001package org.jsoup.parser; 002 003import org.jsoup.nodes.DocumentType; 004 005import java.util.Arrays; 006 007/** 008 * States and transition activations for the Tokeniser. 009 */ 010enum TokeniserState { 011 Data { 012 // in data state, gather characters until a character reference or tag is found 013 void read(Tokeniser t, CharacterReader r) { 014 switch (r.current()) { 015 case '&': 016 t.advanceTransition(CharacterReferenceInData); 017 break; 018 case '<': 019 t.advanceTransition(TagOpen); 020 break; 021 case nullChar: 022 t.error(this); // NOT replacement character (oddly?) 023 t.emit(r.consume()); 024 break; 025 case eof: 026 t.emit(new Token.EOF()); 027 break; 028 default: 029 String data = r.consumeData(); 030 t.emit(data); 031 break; 032 } 033 } 034 }, 035 CharacterReferenceInData { 036 // from & in data 037 void read(Tokeniser t, CharacterReader r) { 038 readCharRef(t, Data); 039 } 040 }, 041 Rcdata { 042 /// handles data in title, textarea etc 043 void read(Tokeniser t, CharacterReader r) { 044 switch (r.current()) { 045 case '&': 046 t.advanceTransition(CharacterReferenceInRcdata); 047 break; 048 case '<': 049 t.advanceTransition(RcdataLessthanSign); 050 break; 051 case nullChar: 052 t.error(this); 053 r.advance(); 054 t.emit(replacementChar); 055 break; 056 case eof: 057 t.emit(new Token.EOF()); 058 break; 059 default: 060 String data = r.consumeToAny('&', '<', nullChar); 061 t.emit(data); 062 break; 063 } 064 } 065 }, 066 CharacterReferenceInRcdata { 067 void read(Tokeniser t, CharacterReader r) { 068 readCharRef(t, Rcdata); 069 } 070 }, 071 Rawtext { 072 void read(Tokeniser t, CharacterReader r) { 073 readData(t, r, this, RawtextLessthanSign); 074 } 075 }, 076 ScriptData { 077 void read(Tokeniser t, CharacterReader r) { 078 readData(t, r, this, ScriptDataLessthanSign); 079 } 080 }, 081 PLAINTEXT { 082 void read(Tokeniser t, CharacterReader r) { 083 switch (r.current()) { 084 case nullChar: 085 t.error(this); 086 r.advance(); 087 t.emit(replacementChar); 088 break; 089 case eof: 090 t.emit(new Token.EOF()); 091 break; 092 default: 093 String data = r.consumeTo(nullChar); 094 t.emit(data); 095 break; 096 } 097 } 098 }, 099 TagOpen { 100 // from < in data 101 void read(Tokeniser t, CharacterReader r) { 102 switch (r.current()) { 103 case '!': 104 t.advanceTransition(MarkupDeclarationOpen); 105 break; 106 case '/': 107 t.advanceTransition(EndTagOpen); 108 break; 109 case '?': 110 t.advanceTransition(BogusComment); 111 break; 112 default: 113 if (r.matchesLetter()) { 114 t.createTagPending(true); 115 t.transition(TagName); 116 } else { 117 t.error(this); 118 t.emit('<'); // char that got us here 119 t.transition(Data); 120 } 121 break; 122 } 123 } 124 }, 125 EndTagOpen { 126 void read(Tokeniser t, CharacterReader r) { 127 if (r.isEmpty()) { 128 t.eofError(this); 129 t.emit("</"); 130 t.transition(Data); 131 } else if (r.matchesLetter()) { 132 t.createTagPending(false); 133 t.transition(TagName); 134 } else if (r.matches('>')) { 135 t.error(this); 136 t.advanceTransition(Data); 137 } else { 138 t.error(this); 139 t.advanceTransition(BogusComment); 140 } 141 } 142 }, 143 TagName { 144 // from < or </ in data, will have start or end tag pending 145 void read(Tokeniser t, CharacterReader r) { 146 // previous TagOpen state did NOT consume, will have a letter char in current 147 //String tagName = r.consumeToAnySorted(tagCharsSorted).toLowerCase(); 148 String tagName = r.consumeTagName(); 149 t.tagPending.appendTagName(tagName); 150 151 switch (r.consume()) { 152 case '\t': 153 case '\n': 154 case '\r': 155 case '\f': 156 case ' ': 157 t.transition(BeforeAttributeName); 158 break; 159 case '/': 160 t.transition(SelfClosingStartTag); 161 break; 162 case '>': 163 t.emitTagPending(); 164 t.transition(Data); 165 break; 166 case nullChar: // replacement 167 t.tagPending.appendTagName(replacementStr); 168 break; 169 case eof: // should emit pending tag? 170 t.eofError(this); 171 t.transition(Data); 172 // no default, as covered with above consumeToAny 173 } 174 } 175 }, 176 RcdataLessthanSign { 177 // from < in rcdata 178 void read(Tokeniser t, CharacterReader r) { 179 if (r.matches('/')) { 180 t.createTempBuffer(); 181 t.advanceTransition(RCDATAEndTagOpen); 182 } else if (r.matchesLetter() && t.appropriateEndTagName() != null && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) { 183 // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than 184 // consuming to EOF; break out here 185 t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName()); 186 t.emitTagPending(); 187 r.unconsume(); // undo "<" 188 t.transition(Data); 189 } else { 190 t.emit("<"); 191 t.transition(Rcdata); 192 } 193 } 194 }, 195 RCDATAEndTagOpen { 196 void read(Tokeniser t, CharacterReader r) { 197 if (r.matchesLetter()) { 198 t.createTagPending(false); 199 t.tagPending.appendTagName(r.current()); 200 t.dataBuffer.append(r.current()); 201 t.advanceTransition(RCDATAEndTagName); 202 } else { 203 t.emit("</"); 204 t.transition(Rcdata); 205 } 206 } 207 }, 208 RCDATAEndTagName { 209 void read(Tokeniser t, CharacterReader r) { 210 if (r.matchesLetter()) { 211 String name = r.consumeLetterSequence(); 212 t.tagPending.appendTagName(name); 213 t.dataBuffer.append(name); 214 return; 215 } 216 217 char c = r.consume(); 218 switch (c) { 219 case '\t': 220 case '\n': 221 case '\r': 222 case '\f': 223 case ' ': 224 if (t.isAppropriateEndTagToken()) 225 t.transition(BeforeAttributeName); 226 else 227 anythingElse(t, r); 228 break; 229 case '/': 230 if (t.isAppropriateEndTagToken()) 231 t.transition(SelfClosingStartTag); 232 else 233 anythingElse(t, r); 234 break; 235 case '>': 236 if (t.isAppropriateEndTagToken()) { 237 t.emitTagPending(); 238 t.transition(Data); 239 } 240 else 241 anythingElse(t, r); 242 break; 243 default: 244 anythingElse(t, r); 245 } 246 } 247 248 private void anythingElse(Tokeniser t, CharacterReader r) { 249 t.emit("</" + t.dataBuffer.toString()); 250 r.unconsume(); 251 t.transition(Rcdata); 252 } 253 }, 254 RawtextLessthanSign { 255 void read(Tokeniser t, CharacterReader r) { 256 if (r.matches('/')) { 257 t.createTempBuffer(); 258 t.advanceTransition(RawtextEndTagOpen); 259 } else { 260 t.emit('<'); 261 t.transition(Rawtext); 262 } 263 } 264 }, 265 RawtextEndTagOpen { 266 void read(Tokeniser t, CharacterReader r) { 267 readEndTag(t, r, RawtextEndTagName, Rawtext); 268 } 269 }, 270 RawtextEndTagName { 271 void read(Tokeniser t, CharacterReader r) { 272 handleDataEndTag(t, r, Rawtext); 273 } 274 }, 275 ScriptDataLessthanSign { 276 void read(Tokeniser t, CharacterReader r) { 277 switch (r.consume()) { 278 case '/': 279 t.createTempBuffer(); 280 t.transition(ScriptDataEndTagOpen); 281 break; 282 case '!': 283 t.emit("<!"); 284 t.transition(ScriptDataEscapeStart); 285 break; 286 default: 287 t.emit("<"); 288 r.unconsume(); 289 t.transition(ScriptData); 290 } 291 } 292 }, 293 ScriptDataEndTagOpen { 294 void read(Tokeniser t, CharacterReader r) { 295 readEndTag(t, r, ScriptDataEndTagName, ScriptData); 296 } 297 }, 298 ScriptDataEndTagName { 299 void read(Tokeniser t, CharacterReader r) { 300 handleDataEndTag(t, r, ScriptData); 301 } 302 }, 303 ScriptDataEscapeStart { 304 void read(Tokeniser t, CharacterReader r) { 305 if (r.matches('-')) { 306 t.emit('-'); 307 t.advanceTransition(ScriptDataEscapeStartDash); 308 } else { 309 t.transition(ScriptData); 310 } 311 } 312 }, 313 ScriptDataEscapeStartDash { 314 void read(Tokeniser t, CharacterReader r) { 315 if (r.matches('-')) { 316 t.emit('-'); 317 t.advanceTransition(ScriptDataEscapedDashDash); 318 } else { 319 t.transition(ScriptData); 320 } 321 } 322 }, 323 ScriptDataEscaped { 324 void read(Tokeniser t, CharacterReader r) { 325 if (r.isEmpty()) { 326 t.eofError(this); 327 t.transition(Data); 328 return; 329 } 330 331 switch (r.current()) { 332 case '-': 333 t.emit('-'); 334 t.advanceTransition(ScriptDataEscapedDash); 335 break; 336 case '<': 337 t.advanceTransition(ScriptDataEscapedLessthanSign); 338 break; 339 case nullChar: 340 t.error(this); 341 r.advance(); 342 t.emit(replacementChar); 343 break; 344 default: 345 String data = r.consumeToAny('-', '<', nullChar); 346 t.emit(data); 347 } 348 } 349 }, 350 ScriptDataEscapedDash { 351 void read(Tokeniser t, CharacterReader r) { 352 if (r.isEmpty()) { 353 t.eofError(this); 354 t.transition(Data); 355 return; 356 } 357 358 char c = r.consume(); 359 switch (c) { 360 case '-': 361 t.emit(c); 362 t.transition(ScriptDataEscapedDashDash); 363 break; 364 case '<': 365 t.transition(ScriptDataEscapedLessthanSign); 366 break; 367 case nullChar: 368 t.error(this); 369 t.emit(replacementChar); 370 t.transition(ScriptDataEscaped); 371 break; 372 default: 373 t.emit(c); 374 t.transition(ScriptDataEscaped); 375 } 376 } 377 }, 378 ScriptDataEscapedDashDash { 379 void read(Tokeniser t, CharacterReader r) { 380 if (r.isEmpty()) { 381 t.eofError(this); 382 t.transition(Data); 383 return; 384 } 385 386 char c = r.consume(); 387 switch (c) { 388 case '-': 389 t.emit(c); 390 break; 391 case '<': 392 t.transition(ScriptDataEscapedLessthanSign); 393 break; 394 case '>': 395 t.emit(c); 396 t.transition(ScriptData); 397 break; 398 case nullChar: 399 t.error(this); 400 t.emit(replacementChar); 401 t.transition(ScriptDataEscaped); 402 break; 403 default: 404 t.emit(c); 405 t.transition(ScriptDataEscaped); 406 } 407 } 408 }, 409 ScriptDataEscapedLessthanSign { 410 void read(Tokeniser t, CharacterReader r) { 411 if (r.matchesLetter()) { 412 t.createTempBuffer(); 413 t.dataBuffer.append(r.current()); 414 t.emit("<" + r.current()); 415 t.advanceTransition(ScriptDataDoubleEscapeStart); 416 } else if (r.matches('/')) { 417 t.createTempBuffer(); 418 t.advanceTransition(ScriptDataEscapedEndTagOpen); 419 } else { 420 t.emit('<'); 421 t.transition(ScriptDataEscaped); 422 } 423 } 424 }, 425 ScriptDataEscapedEndTagOpen { 426 void read(Tokeniser t, CharacterReader r) { 427 if (r.matchesLetter()) { 428 t.createTagPending(false); 429 t.tagPending.appendTagName(r.current()); 430 t.dataBuffer.append(r.current()); 431 t.advanceTransition(ScriptDataEscapedEndTagName); 432 } else { 433 t.emit("</"); 434 t.transition(ScriptDataEscaped); 435 } 436 } 437 }, 438 ScriptDataEscapedEndTagName { 439 void read(Tokeniser t, CharacterReader r) { 440 handleDataEndTag(t, r, ScriptDataEscaped); 441 } 442 }, 443 ScriptDataDoubleEscapeStart { 444 void read(Tokeniser t, CharacterReader r) { 445 handleDataDoubleEscapeTag(t, r, ScriptDataDoubleEscaped, ScriptDataEscaped); 446 } 447 }, 448 ScriptDataDoubleEscaped { 449 void read(Tokeniser t, CharacterReader r) { 450 char c = r.current(); 451 switch (c) { 452 case '-': 453 t.emit(c); 454 t.advanceTransition(ScriptDataDoubleEscapedDash); 455 break; 456 case '<': 457 t.emit(c); 458 t.advanceTransition(ScriptDataDoubleEscapedLessthanSign); 459 break; 460 case nullChar: 461 t.error(this); 462 r.advance(); 463 t.emit(replacementChar); 464 break; 465 case eof: 466 t.eofError(this); 467 t.transition(Data); 468 break; 469 default: 470 String data = r.consumeToAny('-', '<', nullChar); 471 t.emit(data); 472 } 473 } 474 }, 475 ScriptDataDoubleEscapedDash { 476 void read(Tokeniser t, CharacterReader r) { 477 char c = r.consume(); 478 switch (c) { 479 case '-': 480 t.emit(c); 481 t.transition(ScriptDataDoubleEscapedDashDash); 482 break; 483 case '<': 484 t.emit(c); 485 t.transition(ScriptDataDoubleEscapedLessthanSign); 486 break; 487 case nullChar: 488 t.error(this); 489 t.emit(replacementChar); 490 t.transition(ScriptDataDoubleEscaped); 491 break; 492 case eof: 493 t.eofError(this); 494 t.transition(Data); 495 break; 496 default: 497 t.emit(c); 498 t.transition(ScriptDataDoubleEscaped); 499 } 500 } 501 }, 502 ScriptDataDoubleEscapedDashDash { 503 void read(Tokeniser t, CharacterReader r) { 504 char c = r.consume(); 505 switch (c) { 506 case '-': 507 t.emit(c); 508 break; 509 case '<': 510 t.emit(c); 511 t.transition(ScriptDataDoubleEscapedLessthanSign); 512 break; 513 case '>': 514 t.emit(c); 515 t.transition(ScriptData); 516 break; 517 case nullChar: 518 t.error(this); 519 t.emit(replacementChar); 520 t.transition(ScriptDataDoubleEscaped); 521 break; 522 case eof: 523 t.eofError(this); 524 t.transition(Data); 525 break; 526 default: 527 t.emit(c); 528 t.transition(ScriptDataDoubleEscaped); 529 } 530 } 531 }, 532 ScriptDataDoubleEscapedLessthanSign { 533 void read(Tokeniser t, CharacterReader r) { 534 if (r.matches('/')) { 535 t.emit('/'); 536 t.createTempBuffer(); 537 t.advanceTransition(ScriptDataDoubleEscapeEnd); 538 } else { 539 t.transition(ScriptDataDoubleEscaped); 540 } 541 } 542 }, 543 ScriptDataDoubleEscapeEnd { 544 void read(Tokeniser t, CharacterReader r) { 545 handleDataDoubleEscapeTag(t,r, ScriptDataEscaped, ScriptDataDoubleEscaped); 546 } 547 }, 548 BeforeAttributeName { 549 // from tagname <xxx 550 void read(Tokeniser t, CharacterReader r) { 551 char c = r.consume(); 552 switch (c) { 553 case '\t': 554 case '\n': 555 case '\r': 556 case '\f': 557 case ' ': 558 break; // ignore whitespace 559 case '/': 560 t.transition(SelfClosingStartTag); 561 break; 562 case '>': 563 t.emitTagPending(); 564 t.transition(Data); 565 break; 566 case nullChar: 567 t.error(this); 568 t.tagPending.newAttribute(); 569 r.unconsume(); 570 t.transition(AttributeName); 571 break; 572 case eof: 573 t.eofError(this); 574 t.transition(Data); 575 break; 576 case '"': 577 case '\'': 578 case '<': 579 case '=': 580 t.error(this); 581 t.tagPending.newAttribute(); 582 t.tagPending.appendAttributeName(c); 583 t.transition(AttributeName); 584 break; 585 default: // A-Z, anything else 586 t.tagPending.newAttribute(); 587 r.unconsume(); 588 t.transition(AttributeName); 589 } 590 } 591 }, 592 AttributeName { 593 // from before attribute name 594 void read(Tokeniser t, CharacterReader r) { 595 String name = r.consumeToAnySorted(attributeNameCharsSorted); 596 t.tagPending.appendAttributeName(name); 597 598 char c = r.consume(); 599 switch (c) { 600 case '\t': 601 case '\n': 602 case '\r': 603 case '\f': 604 case ' ': 605 t.transition(AfterAttributeName); 606 break; 607 case '/': 608 t.transition(SelfClosingStartTag); 609 break; 610 case '=': 611 t.transition(BeforeAttributeValue); 612 break; 613 case '>': 614 t.emitTagPending(); 615 t.transition(Data); 616 break; 617 case nullChar: 618 t.error(this); 619 t.tagPending.appendAttributeName(replacementChar); 620 break; 621 case eof: 622 t.eofError(this); 623 t.transition(Data); 624 break; 625 case '"': 626 case '\'': 627 case '<': 628 t.error(this); 629 t.tagPending.appendAttributeName(c); 630 // no default, as covered in consumeToAny 631 } 632 } 633 }, 634 AfterAttributeName { 635 void read(Tokeniser t, CharacterReader r) { 636 char c = r.consume(); 637 switch (c) { 638 case '\t': 639 case '\n': 640 case '\r': 641 case '\f': 642 case ' ': 643 // ignore 644 break; 645 case '/': 646 t.transition(SelfClosingStartTag); 647 break; 648 case '=': 649 t.transition(BeforeAttributeValue); 650 break; 651 case '>': 652 t.emitTagPending(); 653 t.transition(Data); 654 break; 655 case nullChar: 656 t.error(this); 657 t.tagPending.appendAttributeName(replacementChar); 658 t.transition(AttributeName); 659 break; 660 case eof: 661 t.eofError(this); 662 t.transition(Data); 663 break; 664 case '"': 665 case '\'': 666 case '<': 667 t.error(this); 668 t.tagPending.newAttribute(); 669 t.tagPending.appendAttributeName(c); 670 t.transition(AttributeName); 671 break; 672 default: // A-Z, anything else 673 t.tagPending.newAttribute(); 674 r.unconsume(); 675 t.transition(AttributeName); 676 } 677 } 678 }, 679 BeforeAttributeValue { 680 void read(Tokeniser t, CharacterReader r) { 681 char c = r.consume(); 682 switch (c) { 683 case '\t': 684 case '\n': 685 case '\r': 686 case '\f': 687 case ' ': 688 // ignore 689 break; 690 case '"': 691 t.transition(AttributeValue_doubleQuoted); 692 break; 693 case '&': 694 r.unconsume(); 695 t.transition(AttributeValue_unquoted); 696 break; 697 case '\'': 698 t.transition(AttributeValue_singleQuoted); 699 break; 700 case nullChar: 701 t.error(this); 702 t.tagPending.appendAttributeValue(replacementChar); 703 t.transition(AttributeValue_unquoted); 704 break; 705 case eof: 706 t.eofError(this); 707 t.emitTagPending(); 708 t.transition(Data); 709 break; 710 case '>': 711 t.error(this); 712 t.emitTagPending(); 713 t.transition(Data); 714 break; 715 case '<': 716 case '=': 717 case '`': 718 t.error(this); 719 t.tagPending.appendAttributeValue(c); 720 t.transition(AttributeValue_unquoted); 721 break; 722 default: 723 r.unconsume(); 724 t.transition(AttributeValue_unquoted); 725 } 726 } 727 }, 728 AttributeValue_doubleQuoted { 729 void read(Tokeniser t, CharacterReader r) { 730 String value = r.consumeToAny(attributeDoubleValueCharsSorted); 731 if (value.length() > 0) 732 t.tagPending.appendAttributeValue(value); 733 else 734 t.tagPending.setEmptyAttributeValue(); 735 736 char c = r.consume(); 737 switch (c) { 738 case '"': 739 t.transition(AfterAttributeValue_quoted); 740 break; 741 case '&': 742 int[] ref = t.consumeCharacterReference('"', true); 743 if (ref != null) 744 t.tagPending.appendAttributeValue(ref); 745 else 746 t.tagPending.appendAttributeValue('&'); 747 break; 748 case nullChar: 749 t.error(this); 750 t.tagPending.appendAttributeValue(replacementChar); 751 break; 752 case eof: 753 t.eofError(this); 754 t.transition(Data); 755 break; 756 // no default, handled in consume to any above 757 } 758 } 759 }, 760 AttributeValue_singleQuoted { 761 void read(Tokeniser t, CharacterReader r) { 762 String value = r.consumeToAny(attributeSingleValueCharsSorted); 763 if (value.length() > 0) 764 t.tagPending.appendAttributeValue(value); 765 else 766 t.tagPending.setEmptyAttributeValue(); 767 768 char c = r.consume(); 769 switch (c) { 770 case '\'': 771 t.transition(AfterAttributeValue_quoted); 772 break; 773 case '&': 774 int[] ref = t.consumeCharacterReference('\'', true); 775 if (ref != null) 776 t.tagPending.appendAttributeValue(ref); 777 else 778 t.tagPending.appendAttributeValue('&'); 779 break; 780 case nullChar: 781 t.error(this); 782 t.tagPending.appendAttributeValue(replacementChar); 783 break; 784 case eof: 785 t.eofError(this); 786 t.transition(Data); 787 break; 788 // no default, handled in consume to any above 789 } 790 } 791 }, 792 AttributeValue_unquoted { 793 void read(Tokeniser t, CharacterReader r) { 794 String value = r.consumeToAnySorted(attributeValueUnquoted); 795 if (value.length() > 0) 796 t.tagPending.appendAttributeValue(value); 797 798 char c = r.consume(); 799 switch (c) { 800 case '\t': 801 case '\n': 802 case '\r': 803 case '\f': 804 case ' ': 805 t.transition(BeforeAttributeName); 806 break; 807 case '&': 808 int[] ref = t.consumeCharacterReference('>', true); 809 if (ref != null) 810 t.tagPending.appendAttributeValue(ref); 811 else 812 t.tagPending.appendAttributeValue('&'); 813 break; 814 case '>': 815 t.emitTagPending(); 816 t.transition(Data); 817 break; 818 case nullChar: 819 t.error(this); 820 t.tagPending.appendAttributeValue(replacementChar); 821 break; 822 case eof: 823 t.eofError(this); 824 t.transition(Data); 825 break; 826 case '"': 827 case '\'': 828 case '<': 829 case '=': 830 case '`': 831 t.error(this); 832 t.tagPending.appendAttributeValue(c); 833 break; 834 // no default, handled in consume to any above 835 } 836 837 } 838 }, 839 // CharacterReferenceInAttributeValue state handled inline 840 AfterAttributeValue_quoted { 841 void read(Tokeniser t, CharacterReader r) { 842 char c = r.consume(); 843 switch (c) { 844 case '\t': 845 case '\n': 846 case '\r': 847 case '\f': 848 case ' ': 849 t.transition(BeforeAttributeName); 850 break; 851 case '/': 852 t.transition(SelfClosingStartTag); 853 break; 854 case '>': 855 t.emitTagPending(); 856 t.transition(Data); 857 break; 858 case eof: 859 t.eofError(this); 860 t.transition(Data); 861 break; 862 default: 863 t.error(this); 864 r.unconsume(); 865 t.transition(BeforeAttributeName); 866 } 867 868 } 869 }, 870 SelfClosingStartTag { 871 void read(Tokeniser t, CharacterReader r) { 872 char c = r.consume(); 873 switch (c) { 874 case '>': 875 t.tagPending.selfClosing = true; 876 t.emitTagPending(); 877 t.transition(Data); 878 break; 879 case eof: 880 t.eofError(this); 881 t.transition(Data); 882 break; 883 default: 884 t.error(this); 885 r.unconsume(); 886 t.transition(BeforeAttributeName); 887 } 888 } 889 }, 890 BogusComment { 891 void read(Tokeniser t, CharacterReader r) { 892 // todo: handle bogus comment starting from eof. when does that trigger? 893 // rewind to capture character that lead us here 894 r.unconsume(); 895 Token.Comment comment = new Token.Comment(); 896 comment.bogus = true; 897 comment.data.append(r.consumeTo('>')); 898 // todo: replace nullChar with replaceChar 899 t.emit(comment); 900 t.advanceTransition(Data); 901 } 902 }, 903 MarkupDeclarationOpen { 904 void read(Tokeniser t, CharacterReader r) { 905 if (r.matchConsume("--")) { 906 t.createCommentPending(); 907 t.transition(CommentStart); 908 } else if (r.matchConsumeIgnoreCase("DOCTYPE")) { 909 t.transition(Doctype); 910 } else if (r.matchConsume("[CDATA[")) { 911 // todo: should actually check current namepspace, and only non-html allows cdata. until namespace 912 // is implemented properly, keep handling as cdata 913 //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) { 914 t.transition(CdataSection); 915 } else { 916 t.error(this); 917 t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind 918 } 919 } 920 }, 921 CommentStart { 922 void read(Tokeniser t, CharacterReader r) { 923 char c = r.consume(); 924 switch (c) { 925 case '-': 926 t.transition(CommentStartDash); 927 break; 928 case nullChar: 929 t.error(this); 930 t.commentPending.data.append(replacementChar); 931 t.transition(Comment); 932 break; 933 case '>': 934 t.error(this); 935 t.emitCommentPending(); 936 t.transition(Data); 937 break; 938 case eof: 939 t.eofError(this); 940 t.emitCommentPending(); 941 t.transition(Data); 942 break; 943 default: 944 t.commentPending.data.append(c); 945 t.transition(Comment); 946 } 947 } 948 }, 949 CommentStartDash { 950 void read(Tokeniser t, CharacterReader r) { 951 char c = r.consume(); 952 switch (c) { 953 case '-': 954 t.transition(CommentStartDash); 955 break; 956 case nullChar: 957 t.error(this); 958 t.commentPending.data.append(replacementChar); 959 t.transition(Comment); 960 break; 961 case '>': 962 t.error(this); 963 t.emitCommentPending(); 964 t.transition(Data); 965 break; 966 case eof: 967 t.eofError(this); 968 t.emitCommentPending(); 969 t.transition(Data); 970 break; 971 default: 972 t.commentPending.data.append(c); 973 t.transition(Comment); 974 } 975 } 976 }, 977 Comment { 978 void read(Tokeniser t, CharacterReader r) { 979 char c = r.current(); 980 switch (c) { 981 case '-': 982 t.advanceTransition(CommentEndDash); 983 break; 984 case nullChar: 985 t.error(this); 986 r.advance(); 987 t.commentPending.data.append(replacementChar); 988 break; 989 case eof: 990 t.eofError(this); 991 t.emitCommentPending(); 992 t.transition(Data); 993 break; 994 default: 995 t.commentPending.data.append(r.consumeToAny('-', nullChar)); 996 } 997 } 998 }, 999 CommentEndDash { 1000 void read(Tokeniser t, CharacterReader r) { 1001 char c = r.consume(); 1002 switch (c) { 1003 case '-': 1004 t.transition(CommentEnd); 1005 break; 1006 case nullChar: 1007 t.error(this); 1008 t.commentPending.data.append('-').append(replacementChar); 1009 t.transition(Comment); 1010 break; 1011 case eof: 1012 t.eofError(this); 1013 t.emitCommentPending(); 1014 t.transition(Data); 1015 break; 1016 default: 1017 t.commentPending.data.append('-').append(c); 1018 t.transition(Comment); 1019 } 1020 } 1021 }, 1022 CommentEnd { 1023 void read(Tokeniser t, CharacterReader r) { 1024 char c = r.consume(); 1025 switch (c) { 1026 case '>': 1027 t.emitCommentPending(); 1028 t.transition(Data); 1029 break; 1030 case nullChar: 1031 t.error(this); 1032 t.commentPending.data.append("--").append(replacementChar); 1033 t.transition(Comment); 1034 break; 1035 case '!': 1036 t.error(this); 1037 t.transition(CommentEndBang); 1038 break; 1039 case '-': 1040 t.error(this); 1041 t.commentPending.data.append('-'); 1042 break; 1043 case eof: 1044 t.eofError(this); 1045 t.emitCommentPending(); 1046 t.transition(Data); 1047 break; 1048 default: 1049 t.error(this); 1050 t.commentPending.data.append("--").append(c); 1051 t.transition(Comment); 1052 } 1053 } 1054 }, 1055 CommentEndBang { 1056 void read(Tokeniser t, CharacterReader r) { 1057 char c = r.consume(); 1058 switch (c) { 1059 case '-': 1060 t.commentPending.data.append("--!"); 1061 t.transition(CommentEndDash); 1062 break; 1063 case '>': 1064 t.emitCommentPending(); 1065 t.transition(Data); 1066 break; 1067 case nullChar: 1068 t.error(this); 1069 t.commentPending.data.append("--!").append(replacementChar); 1070 t.transition(Comment); 1071 break; 1072 case eof: 1073 t.eofError(this); 1074 t.emitCommentPending(); 1075 t.transition(Data); 1076 break; 1077 default: 1078 t.commentPending.data.append("--!").append(c); 1079 t.transition(Comment); 1080 } 1081 } 1082 }, 1083 Doctype { 1084 void read(Tokeniser t, CharacterReader r) { 1085 char c = r.consume(); 1086 switch (c) { 1087 case '\t': 1088 case '\n': 1089 case '\r': 1090 case '\f': 1091 case ' ': 1092 t.transition(BeforeDoctypeName); 1093 break; 1094 case eof: 1095 t.eofError(this); 1096 // note: fall through to > case 1097 case '>': // catch invalid <!DOCTYPE> 1098 t.error(this); 1099 t.createDoctypePending(); 1100 t.doctypePending.forceQuirks = true; 1101 t.emitDoctypePending(); 1102 t.transition(Data); 1103 break; 1104 default: 1105 t.error(this); 1106 t.transition(BeforeDoctypeName); 1107 } 1108 } 1109 }, 1110 BeforeDoctypeName { 1111 void read(Tokeniser t, CharacterReader r) { 1112 if (r.matchesLetter()) { 1113 t.createDoctypePending(); 1114 t.transition(DoctypeName); 1115 return; 1116 } 1117 char c = r.consume(); 1118 switch (c) { 1119 case '\t': 1120 case '\n': 1121 case '\r': 1122 case '\f': 1123 case ' ': 1124 break; // ignore whitespace 1125 case nullChar: 1126 t.error(this); 1127 t.createDoctypePending(); 1128 t.doctypePending.name.append(replacementChar); 1129 t.transition(DoctypeName); 1130 break; 1131 case eof: 1132 t.eofError(this); 1133 t.createDoctypePending(); 1134 t.doctypePending.forceQuirks = true; 1135 t.emitDoctypePending(); 1136 t.transition(Data); 1137 break; 1138 default: 1139 t.createDoctypePending(); 1140 t.doctypePending.name.append(c); 1141 t.transition(DoctypeName); 1142 } 1143 } 1144 }, 1145 DoctypeName { 1146 void read(Tokeniser t, CharacterReader r) { 1147 if (r.matchesLetter()) { 1148 String name = r.consumeLetterSequence(); 1149 t.doctypePending.name.append(name); 1150 return; 1151 } 1152 char c = r.consume(); 1153 switch (c) { 1154 case '>': 1155 t.emitDoctypePending(); 1156 t.transition(Data); 1157 break; 1158 case '\t': 1159 case '\n': 1160 case '\r': 1161 case '\f': 1162 case ' ': 1163 t.transition(AfterDoctypeName); 1164 break; 1165 case nullChar: 1166 t.error(this); 1167 t.doctypePending.name.append(replacementChar); 1168 break; 1169 case eof: 1170 t.eofError(this); 1171 t.doctypePending.forceQuirks = true; 1172 t.emitDoctypePending(); 1173 t.transition(Data); 1174 break; 1175 default: 1176 t.doctypePending.name.append(c); 1177 } 1178 } 1179 }, 1180 AfterDoctypeName { 1181 void read(Tokeniser t, CharacterReader r) { 1182 if (r.isEmpty()) { 1183 t.eofError(this); 1184 t.doctypePending.forceQuirks = true; 1185 t.emitDoctypePending(); 1186 t.transition(Data); 1187 return; 1188 } 1189 if (r.matchesAny('\t', '\n', '\r', '\f', ' ')) 1190 r.advance(); // ignore whitespace 1191 else if (r.matches('>')) { 1192 t.emitDoctypePending(); 1193 t.advanceTransition(Data); 1194 } else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) { 1195 t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY; 1196 t.transition(AfterDoctypePublicKeyword); 1197 } else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) { 1198 t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY; 1199 t.transition(AfterDoctypeSystemKeyword); 1200 } else { 1201 t.error(this); 1202 t.doctypePending.forceQuirks = true; 1203 t.advanceTransition(BogusDoctype); 1204 } 1205 1206 } 1207 }, 1208 AfterDoctypePublicKeyword { 1209 void read(Tokeniser t, CharacterReader r) { 1210 char c = r.consume(); 1211 switch (c) { 1212 case '\t': 1213 case '\n': 1214 case '\r': 1215 case '\f': 1216 case ' ': 1217 t.transition(BeforeDoctypePublicIdentifier); 1218 break; 1219 case '"': 1220 t.error(this); 1221 // set public id to empty string 1222 t.transition(DoctypePublicIdentifier_doubleQuoted); 1223 break; 1224 case '\'': 1225 t.error(this); 1226 // set public id to empty string 1227 t.transition(DoctypePublicIdentifier_singleQuoted); 1228 break; 1229 case '>': 1230 t.error(this); 1231 t.doctypePending.forceQuirks = true; 1232 t.emitDoctypePending(); 1233 t.transition(Data); 1234 break; 1235 case eof: 1236 t.eofError(this); 1237 t.doctypePending.forceQuirks = true; 1238 t.emitDoctypePending(); 1239 t.transition(Data); 1240 break; 1241 default: 1242 t.error(this); 1243 t.doctypePending.forceQuirks = true; 1244 t.transition(BogusDoctype); 1245 } 1246 } 1247 }, 1248 BeforeDoctypePublicIdentifier { 1249 void read(Tokeniser t, CharacterReader r) { 1250 char c = r.consume(); 1251 switch (c) { 1252 case '\t': 1253 case '\n': 1254 case '\r': 1255 case '\f': 1256 case ' ': 1257 break; 1258 case '"': 1259 // set public id to empty string 1260 t.transition(DoctypePublicIdentifier_doubleQuoted); 1261 break; 1262 case '\'': 1263 // set public id to empty string 1264 t.transition(DoctypePublicIdentifier_singleQuoted); 1265 break; 1266 case '>': 1267 t.error(this); 1268 t.doctypePending.forceQuirks = true; 1269 t.emitDoctypePending(); 1270 t.transition(Data); 1271 break; 1272 case eof: 1273 t.eofError(this); 1274 t.doctypePending.forceQuirks = true; 1275 t.emitDoctypePending(); 1276 t.transition(Data); 1277 break; 1278 default: 1279 t.error(this); 1280 t.doctypePending.forceQuirks = true; 1281 t.transition(BogusDoctype); 1282 } 1283 } 1284 }, 1285 DoctypePublicIdentifier_doubleQuoted { 1286 void read(Tokeniser t, CharacterReader r) { 1287 char c = r.consume(); 1288 switch (c) { 1289 case '"': 1290 t.transition(AfterDoctypePublicIdentifier); 1291 break; 1292 case nullChar: 1293 t.error(this); 1294 t.doctypePending.publicIdentifier.append(replacementChar); 1295 break; 1296 case '>': 1297 t.error(this); 1298 t.doctypePending.forceQuirks = true; 1299 t.emitDoctypePending(); 1300 t.transition(Data); 1301 break; 1302 case eof: 1303 t.eofError(this); 1304 t.doctypePending.forceQuirks = true; 1305 t.emitDoctypePending(); 1306 t.transition(Data); 1307 break; 1308 default: 1309 t.doctypePending.publicIdentifier.append(c); 1310 } 1311 } 1312 }, 1313 DoctypePublicIdentifier_singleQuoted { 1314 void read(Tokeniser t, CharacterReader r) { 1315 char c = r.consume(); 1316 switch (c) { 1317 case '\'': 1318 t.transition(AfterDoctypePublicIdentifier); 1319 break; 1320 case nullChar: 1321 t.error(this); 1322 t.doctypePending.publicIdentifier.append(replacementChar); 1323 break; 1324 case '>': 1325 t.error(this); 1326 t.doctypePending.forceQuirks = true; 1327 t.emitDoctypePending(); 1328 t.transition(Data); 1329 break; 1330 case eof: 1331 t.eofError(this); 1332 t.doctypePending.forceQuirks = true; 1333 t.emitDoctypePending(); 1334 t.transition(Data); 1335 break; 1336 default: 1337 t.doctypePending.publicIdentifier.append(c); 1338 } 1339 } 1340 }, 1341 AfterDoctypePublicIdentifier { 1342 void read(Tokeniser t, CharacterReader r) { 1343 char c = r.consume(); 1344 switch (c) { 1345 case '\t': 1346 case '\n': 1347 case '\r': 1348 case '\f': 1349 case ' ': 1350 t.transition(BetweenDoctypePublicAndSystemIdentifiers); 1351 break; 1352 case '>': 1353 t.emitDoctypePending(); 1354 t.transition(Data); 1355 break; 1356 case '"': 1357 t.error(this); 1358 // system id empty 1359 t.transition(DoctypeSystemIdentifier_doubleQuoted); 1360 break; 1361 case '\'': 1362 t.error(this); 1363 // system id empty 1364 t.transition(DoctypeSystemIdentifier_singleQuoted); 1365 break; 1366 case eof: 1367 t.eofError(this); 1368 t.doctypePending.forceQuirks = true; 1369 t.emitDoctypePending(); 1370 t.transition(Data); 1371 break; 1372 default: 1373 t.error(this); 1374 t.doctypePending.forceQuirks = true; 1375 t.transition(BogusDoctype); 1376 } 1377 } 1378 }, 1379 BetweenDoctypePublicAndSystemIdentifiers { 1380 void read(Tokeniser t, CharacterReader r) { 1381 char c = r.consume(); 1382 switch (c) { 1383 case '\t': 1384 case '\n': 1385 case '\r': 1386 case '\f': 1387 case ' ': 1388 break; 1389 case '>': 1390 t.emitDoctypePending(); 1391 t.transition(Data); 1392 break; 1393 case '"': 1394 t.error(this); 1395 // system id empty 1396 t.transition(DoctypeSystemIdentifier_doubleQuoted); 1397 break; 1398 case '\'': 1399 t.error(this); 1400 // system id empty 1401 t.transition(DoctypeSystemIdentifier_singleQuoted); 1402 break; 1403 case eof: 1404 t.eofError(this); 1405 t.doctypePending.forceQuirks = true; 1406 t.emitDoctypePending(); 1407 t.transition(Data); 1408 break; 1409 default: 1410 t.error(this); 1411 t.doctypePending.forceQuirks = true; 1412 t.transition(BogusDoctype); 1413 } 1414 } 1415 }, 1416 AfterDoctypeSystemKeyword { 1417 void read(Tokeniser t, CharacterReader r) { 1418 char c = r.consume(); 1419 switch (c) { 1420 case '\t': 1421 case '\n': 1422 case '\r': 1423 case '\f': 1424 case ' ': 1425 t.transition(BeforeDoctypeSystemIdentifier); 1426 break; 1427 case '>': 1428 t.error(this); 1429 t.doctypePending.forceQuirks = true; 1430 t.emitDoctypePending(); 1431 t.transition(Data); 1432 break; 1433 case '"': 1434 t.error(this); 1435 // system id empty 1436 t.transition(DoctypeSystemIdentifier_doubleQuoted); 1437 break; 1438 case '\'': 1439 t.error(this); 1440 // system id empty 1441 t.transition(DoctypeSystemIdentifier_singleQuoted); 1442 break; 1443 case eof: 1444 t.eofError(this); 1445 t.doctypePending.forceQuirks = true; 1446 t.emitDoctypePending(); 1447 t.transition(Data); 1448 break; 1449 default: 1450 t.error(this); 1451 t.doctypePending.forceQuirks = true; 1452 t.emitDoctypePending(); 1453 } 1454 } 1455 }, 1456 BeforeDoctypeSystemIdentifier { 1457 void read(Tokeniser t, CharacterReader r) { 1458 char c = r.consume(); 1459 switch (c) { 1460 case '\t': 1461 case '\n': 1462 case '\r': 1463 case '\f': 1464 case ' ': 1465 break; 1466 case '"': 1467 // set system id to empty string 1468 t.transition(DoctypeSystemIdentifier_doubleQuoted); 1469 break; 1470 case '\'': 1471 // set public id to empty string 1472 t.transition(DoctypeSystemIdentifier_singleQuoted); 1473 break; 1474 case '>': 1475 t.error(this); 1476 t.doctypePending.forceQuirks = true; 1477 t.emitDoctypePending(); 1478 t.transition(Data); 1479 break; 1480 case eof: 1481 t.eofError(this); 1482 t.doctypePending.forceQuirks = true; 1483 t.emitDoctypePending(); 1484 t.transition(Data); 1485 break; 1486 default: 1487 t.error(this); 1488 t.doctypePending.forceQuirks = true; 1489 t.transition(BogusDoctype); 1490 } 1491 } 1492 }, 1493 DoctypeSystemIdentifier_doubleQuoted { 1494 void read(Tokeniser t, CharacterReader r) { 1495 char c = r.consume(); 1496 switch (c) { 1497 case '"': 1498 t.transition(AfterDoctypeSystemIdentifier); 1499 break; 1500 case nullChar: 1501 t.error(this); 1502 t.doctypePending.systemIdentifier.append(replacementChar); 1503 break; 1504 case '>': 1505 t.error(this); 1506 t.doctypePending.forceQuirks = true; 1507 t.emitDoctypePending(); 1508 t.transition(Data); 1509 break; 1510 case eof: 1511 t.eofError(this); 1512 t.doctypePending.forceQuirks = true; 1513 t.emitDoctypePending(); 1514 t.transition(Data); 1515 break; 1516 default: 1517 t.doctypePending.systemIdentifier.append(c); 1518 } 1519 } 1520 }, 1521 DoctypeSystemIdentifier_singleQuoted { 1522 void read(Tokeniser t, CharacterReader r) { 1523 char c = r.consume(); 1524 switch (c) { 1525 case '\'': 1526 t.transition(AfterDoctypeSystemIdentifier); 1527 break; 1528 case nullChar: 1529 t.error(this); 1530 t.doctypePending.systemIdentifier.append(replacementChar); 1531 break; 1532 case '>': 1533 t.error(this); 1534 t.doctypePending.forceQuirks = true; 1535 t.emitDoctypePending(); 1536 t.transition(Data); 1537 break; 1538 case eof: 1539 t.eofError(this); 1540 t.doctypePending.forceQuirks = true; 1541 t.emitDoctypePending(); 1542 t.transition(Data); 1543 break; 1544 default: 1545 t.doctypePending.systemIdentifier.append(c); 1546 } 1547 } 1548 }, 1549 AfterDoctypeSystemIdentifier { 1550 void read(Tokeniser t, CharacterReader r) { 1551 char c = r.consume(); 1552 switch (c) { 1553 case '\t': 1554 case '\n': 1555 case '\r': 1556 case '\f': 1557 case ' ': 1558 break; 1559 case '>': 1560 t.emitDoctypePending(); 1561 t.transition(Data); 1562 break; 1563 case eof: 1564 t.eofError(this); 1565 t.doctypePending.forceQuirks = true; 1566 t.emitDoctypePending(); 1567 t.transition(Data); 1568 break; 1569 default: 1570 t.error(this); 1571 t.transition(BogusDoctype); 1572 // NOT force quirks 1573 } 1574 } 1575 }, 1576 BogusDoctype { 1577 void read(Tokeniser t, CharacterReader r) { 1578 char c = r.consume(); 1579 switch (c) { 1580 case '>': 1581 t.emitDoctypePending(); 1582 t.transition(Data); 1583 break; 1584 case eof: 1585 t.emitDoctypePending(); 1586 t.transition(Data); 1587 break; 1588 default: 1589 // ignore char 1590 break; 1591 } 1592 } 1593 }, 1594 CdataSection { 1595 void read(Tokeniser t, CharacterReader r) { 1596 String data = r.consumeTo("]]>"); 1597 t.emit(data); 1598 r.matchConsume("]]>"); 1599 t.transition(Data); 1600 } 1601 }; 1602 1603 1604 abstract void read(Tokeniser t, CharacterReader r); 1605 1606 static final char nullChar = '\u0000'; 1607 private static final char[] attributeSingleValueCharsSorted = new char[]{'\'', '&', nullChar}; 1608 private static final char[] attributeDoubleValueCharsSorted = new char[]{'"', '&', nullChar}; 1609 private static final char[] attributeNameCharsSorted = new char[]{'\t', '\n', '\r', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<'}; 1610 private static final char[] attributeValueUnquoted = new char[]{'\t', '\n', '\r', '\f', ' ', '&', '>', nullChar, '"', '\'', '<', '=', '`'}; 1611 1612 private static final char replacementChar = Tokeniser.replacementChar; 1613 private static final String replacementStr = String.valueOf(Tokeniser.replacementChar); 1614 private static final char eof = CharacterReader.EOF; 1615 1616 static { 1617 Arrays.sort(attributeSingleValueCharsSorted); 1618 Arrays.sort(attributeDoubleValueCharsSorted); 1619 Arrays.sort(attributeNameCharsSorted); 1620 Arrays.sort(attributeValueUnquoted); 1621 } 1622 1623 /** 1624 * Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. Same body impl, just 1625 * different else exit transitions. 1626 */ 1627 private static void handleDataEndTag(Tokeniser t, CharacterReader r, TokeniserState elseTransition) { 1628 if (r.matchesLetter()) { 1629 String name = r.consumeLetterSequence(); 1630 t.tagPending.appendTagName(name); 1631 t.dataBuffer.append(name); 1632 return; 1633 } 1634 1635 boolean needsExitTransition = false; 1636 if (t.isAppropriateEndTagToken() && !r.isEmpty()) { 1637 char c = r.consume(); 1638 switch (c) { 1639 case '\t': 1640 case '\n': 1641 case '\r': 1642 case '\f': 1643 case ' ': 1644 t.transition(BeforeAttributeName); 1645 break; 1646 case '/': 1647 t.transition(SelfClosingStartTag); 1648 break; 1649 case '>': 1650 t.emitTagPending(); 1651 t.transition(Data); 1652 break; 1653 default: 1654 t.dataBuffer.append(c); 1655 needsExitTransition = true; 1656 } 1657 } else { 1658 needsExitTransition = true; 1659 } 1660 1661 if (needsExitTransition) { 1662 t.emit("</" + t.dataBuffer.toString()); 1663 t.transition(elseTransition); 1664 } 1665 } 1666 1667 private static void readData(Tokeniser t, CharacterReader r, TokeniserState current, TokeniserState advance) { 1668 switch (r.current()) { 1669 case '<': 1670 t.advanceTransition(advance); 1671 break; 1672 case nullChar: 1673 t.error(current); 1674 r.advance(); 1675 t.emit(replacementChar); 1676 break; 1677 case eof: 1678 t.emit(new Token.EOF()); 1679 break; 1680 default: 1681 String data = r.consumeToAny('<', nullChar); // todo - why hunt for null here? Just consumeTo'<'? 1682 t.emit(data); 1683 break; 1684 } 1685 } 1686 1687 private static void readCharRef(Tokeniser t, TokeniserState advance) { 1688 int[] c = t.consumeCharacterReference(null, false); 1689 if (c == null) 1690 t.emit('&'); 1691 else 1692 t.emit(c); 1693 t.transition(advance); 1694 } 1695 1696 private static void readEndTag(Tokeniser t, CharacterReader r, TokeniserState a, TokeniserState b) { 1697 if (r.matchesLetter()) { 1698 t.createTagPending(false); 1699 t.transition(a); 1700 } else { 1701 t.emit("</"); 1702 t.transition(b); 1703 } 1704 } 1705 1706 private static void handleDataDoubleEscapeTag(Tokeniser t, CharacterReader r, TokeniserState primary, TokeniserState fallback) { 1707 if (r.matchesLetter()) { 1708 String name = r.consumeLetterSequence(); 1709 t.dataBuffer.append(name); 1710 t.emit(name); 1711 return; 1712 } 1713 1714 char c = r.consume(); 1715 switch (c) { 1716 case '\t': 1717 case '\n': 1718 case '\r': 1719 case '\f': 1720 case ' ': 1721 case '/': 1722 case '>': 1723 if (t.dataBuffer.toString().equals("script")) 1724 t.transition(primary); 1725 else 1726 t.transition(fallback); 1727 t.emit(c); 1728 break; 1729 default: 1730 r.unconsume(); 1731 t.transition(fallback); 1732 } 1733 } 1734}