001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.nodes.Comment; 005import org.jsoup.nodes.Document; 006import org.jsoup.nodes.DocumentType; 007import org.jsoup.nodes.Element; 008import org.jsoup.nodes.Node; 009import org.jsoup.nodes.XmlDeclaration; 010 011import java.util.List; 012import java.util.regex.Matcher; 013import java.util.regex.Pattern; 014 015import static org.jsoup.internal.Normalizer.lowerCase; 016import static org.jsoup.internal.Normalizer.normalize; 017 018 019/** 020 * Evaluates that an element matches the selector. 021 */ 022public abstract class Evaluator { 023 protected Evaluator() { 024 } 025 026 /** 027 * Test if the element meets the evaluator's requirements. 028 * 029 * @param root Root of the matching subtree 030 * @param element tested element 031 * @return Returns <tt>true</tt> if the requirements are met or 032 * <tt>false</tt> otherwise 033 */ 034 public abstract boolean matches(Element root, Element element); 035 036 /** 037 * Evaluator for tag name 038 */ 039 public static final class Tag extends Evaluator { 040 private String tagName; 041 042 public Tag(String tagName) { 043 this.tagName = tagName; 044 } 045 046 @Override 047 public boolean matches(Element root, Element element) { 048 return (element.tagName().equalsIgnoreCase(tagName)); 049 } 050 051 @Override 052 public String toString() { 053 return String.format("%s", tagName); 054 } 055 } 056 057 058 /** 059 * Evaluator for tag name that ends with 060 */ 061 public static final class TagEndsWith extends Evaluator { 062 private String tagName; 063 064 public TagEndsWith(String tagName) { 065 this.tagName = tagName; 066 } 067 068 @Override 069 public boolean matches(Element root, Element element) { 070 return (element.tagName().endsWith(tagName)); 071 } 072 073 @Override 074 public String toString() { 075 return String.format("%s", tagName); 076 } 077 } 078 079 /** 080 * Evaluator for element id 081 */ 082 public static final class Id extends Evaluator { 083 private String id; 084 085 public Id(String id) { 086 this.id = id; 087 } 088 089 @Override 090 public boolean matches(Element root, Element element) { 091 return (id.equals(element.id())); 092 } 093 094 @Override 095 public String toString() { 096 return String.format("#%s", id); 097 } 098 099 } 100 101 /** 102 * Evaluator for element class 103 */ 104 public static final class Class extends Evaluator { 105 private String className; 106 107 public Class(String className) { 108 this.className = className; 109 } 110 111 @Override 112 public boolean matches(Element root, Element element) { 113 return (element.hasClass(className)); 114 } 115 116 @Override 117 public String toString() { 118 return String.format(".%s", className); 119 } 120 121 } 122 123 /** 124 * Evaluator for attribute name matching 125 */ 126 public static final class Attribute extends Evaluator { 127 private String key; 128 129 public Attribute(String key) { 130 this.key = key; 131 } 132 133 @Override 134 public boolean matches(Element root, Element element) { 135 return element.hasAttr(key); 136 } 137 138 @Override 139 public String toString() { 140 return String.format("[%s]", key); 141 } 142 143 } 144 145 /** 146 * Evaluator for attribute name prefix matching 147 */ 148 public static final class AttributeStarting extends Evaluator { 149 private String keyPrefix; 150 151 public AttributeStarting(String keyPrefix) { 152 Validate.notEmpty(keyPrefix); 153 this.keyPrefix = lowerCase(keyPrefix); 154 } 155 156 @Override 157 public boolean matches(Element root, Element element) { 158 List<org.jsoup.nodes.Attribute> values = element.attributes().asList(); 159 for (org.jsoup.nodes.Attribute attribute : values) { 160 if (lowerCase(attribute.getKey()).startsWith(keyPrefix)) 161 return true; 162 } 163 return false; 164 } 165 166 @Override 167 public String toString() { 168 return String.format("[^%s]", keyPrefix); 169 } 170 171 } 172 173 /** 174 * Evaluator for attribute name/value matching 175 */ 176 public static final class AttributeWithValue extends AttributeKeyPair { 177 public AttributeWithValue(String key, String value) { 178 super(key, value); 179 } 180 181 @Override 182 public boolean matches(Element root, Element element) { 183 return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim()); 184 } 185 186 @Override 187 public String toString() { 188 return String.format("[%s=%s]", key, value); 189 } 190 191 } 192 193 /** 194 * Evaluator for attribute name != value matching 195 */ 196 public static final class AttributeWithValueNot extends AttributeKeyPair { 197 public AttributeWithValueNot(String key, String value) { 198 super(key, value); 199 } 200 201 @Override 202 public boolean matches(Element root, Element element) { 203 return !value.equalsIgnoreCase(element.attr(key)); 204 } 205 206 @Override 207 public String toString() { 208 return String.format("[%s!=%s]", key, value); 209 } 210 211 } 212 213 /** 214 * Evaluator for attribute name/value matching (value prefix) 215 */ 216 public static final class AttributeWithValueStarting extends AttributeKeyPair { 217 public AttributeWithValueStarting(String key, String value) { 218 super(key, value); 219 } 220 221 @Override 222 public boolean matches(Element root, Element element) { 223 return element.hasAttr(key) && lowerCase(element.attr(key)).startsWith(value); // value is lower case already 224 } 225 226 @Override 227 public String toString() { 228 return String.format("[%s^=%s]", key, value); 229 } 230 231 } 232 233 /** 234 * Evaluator for attribute name/value matching (value ending) 235 */ 236 public static final class AttributeWithValueEnding extends AttributeKeyPair { 237 public AttributeWithValueEnding(String key, String value) { 238 super(key, value); 239 } 240 241 @Override 242 public boolean matches(Element root, Element element) { 243 return element.hasAttr(key) && lowerCase(element.attr(key)).endsWith(value); // value is lower case 244 } 245 246 @Override 247 public String toString() { 248 return String.format("[%s$=%s]", key, value); 249 } 250 251 } 252 253 /** 254 * Evaluator for attribute name/value matching (value containing) 255 */ 256 public static final class AttributeWithValueContaining extends AttributeKeyPair { 257 public AttributeWithValueContaining(String key, String value) { 258 super(key, value); 259 } 260 261 @Override 262 public boolean matches(Element root, Element element) { 263 return element.hasAttr(key) && lowerCase(element.attr(key)).contains(value); // value is lower case 264 } 265 266 @Override 267 public String toString() { 268 return String.format("[%s*=%s]", key, value); 269 } 270 271 } 272 273 /** 274 * Evaluator for attribute name/value matching (value regex matching) 275 */ 276 public static final class AttributeWithValueMatching extends Evaluator { 277 String key; 278 Pattern pattern; 279 280 public AttributeWithValueMatching(String key, Pattern pattern) { 281 this.key = normalize(key); 282 this.pattern = pattern; 283 } 284 285 @Override 286 public boolean matches(Element root, Element element) { 287 return element.hasAttr(key) && pattern.matcher(element.attr(key)).find(); 288 } 289 290 @Override 291 public String toString() { 292 return String.format("[%s~=%s]", key, pattern.toString()); 293 } 294 295 } 296 297 /** 298 * Abstract evaluator for attribute name/value matching 299 */ 300 public abstract static class AttributeKeyPair extends Evaluator { 301 String key; 302 String value; 303 304 public AttributeKeyPair(String key, String value) { 305 Validate.notEmpty(key); 306 Validate.notEmpty(value); 307 308 this.key = normalize(key); 309 if (value.startsWith("\"") && value.endsWith("\"") 310 || value.startsWith("'") && value.endsWith("'")) { 311 value = value.substring(1, value.length()-1); 312 } 313 this.value = normalize(value); 314 } 315 } 316 317 /** 318 * Evaluator for any / all element matching 319 */ 320 public static final class AllElements extends Evaluator { 321 322 @Override 323 public boolean matches(Element root, Element element) { 324 return true; 325 } 326 327 @Override 328 public String toString() { 329 return "*"; 330 } 331 } 332 333 /** 334 * Evaluator for matching by sibling index number (e {@literal <} idx) 335 */ 336 public static final class IndexLessThan extends IndexEvaluator { 337 public IndexLessThan(int index) { 338 super(index); 339 } 340 341 @Override 342 public boolean matches(Element root, Element element) { 343 return root != element && element.elementSiblingIndex() < index; 344 } 345 346 @Override 347 public String toString() { 348 return String.format(":lt(%d)", index); 349 } 350 351 } 352 353 /** 354 * Evaluator for matching by sibling index number (e {@literal >} idx) 355 */ 356 public static final class IndexGreaterThan extends IndexEvaluator { 357 public IndexGreaterThan(int index) { 358 super(index); 359 } 360 361 @Override 362 public boolean matches(Element root, Element element) { 363 return element.elementSiblingIndex() > index; 364 } 365 366 @Override 367 public String toString() { 368 return String.format(":gt(%d)", index); 369 } 370 371 } 372 373 /** 374 * Evaluator for matching by sibling index number (e = idx) 375 */ 376 public static final class IndexEquals extends IndexEvaluator { 377 public IndexEquals(int index) { 378 super(index); 379 } 380 381 @Override 382 public boolean matches(Element root, Element element) { 383 return element.elementSiblingIndex() == index; 384 } 385 386 @Override 387 public String toString() { 388 return String.format(":eq(%d)", index); 389 } 390 391 } 392 393 /** 394 * Evaluator for matching the last sibling (css :last-child) 395 */ 396 public static final class IsLastChild extends Evaluator { 397 @Override 398 public boolean matches(Element root, Element element) { 399 final Element p = element.parent(); 400 return p != null && !(p instanceof Document) && element.elementSiblingIndex() == p.children().size()-1; 401 } 402 403 @Override 404 public String toString() { 405 return ":last-child"; 406 } 407 } 408 409 public static final class IsFirstOfType extends IsNthOfType { 410 public IsFirstOfType() { 411 super(0,1); 412 } 413 @Override 414 public String toString() { 415 return ":first-of-type"; 416 } 417 } 418 419 public static final class IsLastOfType extends IsNthLastOfType { 420 public IsLastOfType() { 421 super(0,1); 422 } 423 @Override 424 public String toString() { 425 return ":last-of-type"; 426 } 427 } 428 429 430 public static abstract class CssNthEvaluator extends Evaluator { 431 protected final int a, b; 432 433 public CssNthEvaluator(int a, int b) { 434 this.a = a; 435 this.b = b; 436 } 437 public CssNthEvaluator(int b) { 438 this(0,b); 439 } 440 441 @Override 442 public boolean matches(Element root, Element element) { 443 final Element p = element.parent(); 444 if (p == null || (p instanceof Document)) return false; 445 446 final int pos = calculatePosition(root, element); 447 if (a == 0) return pos == b; 448 449 return (pos-b)*a >= 0 && (pos-b)%a==0; 450 } 451 452 @Override 453 public String toString() { 454 if (a == 0) 455 return String.format(":%s(%d)",getPseudoClass(), b); 456 if (b == 0) 457 return String.format(":%s(%dn)",getPseudoClass(), a); 458 return String.format(":%s(%dn%+d)", getPseudoClass(),a, b); 459 } 460 461 protected abstract String getPseudoClass(); 462 protected abstract int calculatePosition(Element root, Element element); 463 } 464 465 466 /** 467 * css-compatible Evaluator for :eq (css :nth-child) 468 * 469 * @see IndexEquals 470 */ 471 public static final class IsNthChild extends CssNthEvaluator { 472 473 public IsNthChild(int a, int b) { 474 super(a,b); 475 } 476 477 protected int calculatePosition(Element root, Element element) { 478 return element.elementSiblingIndex()+1; 479 } 480 481 482 protected String getPseudoClass() { 483 return "nth-child"; 484 } 485 } 486 487 /** 488 * css pseudo class :nth-last-child) 489 * 490 * @see IndexEquals 491 */ 492 public static final class IsNthLastChild extends CssNthEvaluator { 493 public IsNthLastChild(int a, int b) { 494 super(a,b); 495 } 496 497 @Override 498 protected int calculatePosition(Element root, Element element) { 499 return element.parent().children().size() - element.elementSiblingIndex(); 500 } 501 502 @Override 503 protected String getPseudoClass() { 504 return "nth-last-child"; 505 } 506 } 507 508 /** 509 * css pseudo class nth-of-type 510 * 511 */ 512 public static class IsNthOfType extends CssNthEvaluator { 513 public IsNthOfType(int a, int b) { 514 super(a,b); 515 } 516 517 protected int calculatePosition(Element root, Element element) { 518 int pos = 0; 519 Elements family = element.parent().children(); 520 for (Element el : family) { 521 if (el.tag().equals(element.tag())) pos++; 522 if (el == element) break; 523 } 524 return pos; 525 } 526 527 @Override 528 protected String getPseudoClass() { 529 return "nth-of-type"; 530 } 531 } 532 533 public static class IsNthLastOfType extends CssNthEvaluator { 534 535 public IsNthLastOfType(int a, int b) { 536 super(a, b); 537 } 538 539 @Override 540 protected int calculatePosition(Element root, Element element) { 541 int pos = 0; 542 Elements family = element.parent().children(); 543 for (int i = element.elementSiblingIndex(); i < family.size(); i++) { 544 if (family.get(i).tag().equals(element.tag())) pos++; 545 } 546 return pos; 547 } 548 549 @Override 550 protected String getPseudoClass() { 551 return "nth-last-of-type"; 552 } 553 } 554 555 /** 556 * Evaluator for matching the first sibling (css :first-child) 557 */ 558 public static final class IsFirstChild extends Evaluator { 559 @Override 560 public boolean matches(Element root, Element element) { 561 final Element p = element.parent(); 562 return p != null && !(p instanceof Document) && element.elementSiblingIndex() == 0; 563 } 564 565 @Override 566 public String toString() { 567 return ":first-child"; 568 } 569 } 570 571 /** 572 * css3 pseudo-class :root 573 * @see <a href="http://www.w3.org/TR/selectors/#root-pseudo">:root selector</a> 574 * 575 */ 576 public static final class IsRoot extends Evaluator { 577 @Override 578 public boolean matches(Element root, Element element) { 579 final Element r = root instanceof Document?root.child(0):root; 580 return element == r; 581 } 582 @Override 583 public String toString() { 584 return ":root"; 585 } 586 } 587 588 public static final class IsOnlyChild extends Evaluator { 589 @Override 590 public boolean matches(Element root, Element element) { 591 final Element p = element.parent(); 592 return p!=null && !(p instanceof Document) && element.siblingElements().size() == 0; 593 } 594 @Override 595 public String toString() { 596 return ":only-child"; 597 } 598 } 599 600 public static final class IsOnlyOfType extends Evaluator { 601 @Override 602 public boolean matches(Element root, Element element) { 603 final Element p = element.parent(); 604 if (p==null || p instanceof Document) return false; 605 606 int pos = 0; 607 Elements family = p.children(); 608 for (Element el : family) { 609 if (el.tag().equals(element.tag())) pos++; 610 } 611 return pos == 1; 612 } 613 @Override 614 public String toString() { 615 return ":only-of-type"; 616 } 617 } 618 619 public static final class IsEmpty extends Evaluator { 620 @Override 621 public boolean matches(Element root, Element element) { 622 List<Node> family = element.childNodes(); 623 for (Node n : family) { 624 if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false; 625 } 626 return true; 627 } 628 @Override 629 public String toString() { 630 return ":empty"; 631 } 632 } 633 634 /** 635 * Abstract evaluator for sibling index matching 636 * 637 * @author ant 638 */ 639 public abstract static class IndexEvaluator extends Evaluator { 640 int index; 641 642 public IndexEvaluator(int index) { 643 this.index = index; 644 } 645 } 646 647 /** 648 * Evaluator for matching Element (and its descendants) text 649 */ 650 public static final class ContainsText extends Evaluator { 651 private String searchText; 652 653 public ContainsText(String searchText) { 654 this.searchText = lowerCase(searchText); 655 } 656 657 @Override 658 public boolean matches(Element root, Element element) { 659 return lowerCase(element.text()).contains(searchText); 660 } 661 662 @Override 663 public String toString() { 664 return String.format(":contains(%s)", searchText); 665 } 666 } 667 668 /** 669 * Evaluator for matching Element (and its descendants) data 670 */ 671 public static final class ContainsData extends Evaluator { 672 private String searchText; 673 674 public ContainsData(String searchText) { 675 this.searchText = lowerCase(searchText); 676 } 677 678 @Override 679 public boolean matches(Element root, Element element) { 680 return lowerCase(element.data()).contains(searchText); 681 } 682 683 @Override 684 public String toString() { 685 return String.format(":containsData(%s)", searchText); 686 } 687 } 688 689 /** 690 * Evaluator for matching Element's own text 691 */ 692 public static final class ContainsOwnText extends Evaluator { 693 private String searchText; 694 695 public ContainsOwnText(String searchText) { 696 this.searchText = lowerCase(searchText); 697 } 698 699 @Override 700 public boolean matches(Element root, Element element) { 701 return lowerCase(element.ownText()).contains(searchText); 702 } 703 704 @Override 705 public String toString() { 706 return String.format(":containsOwn(%s)", searchText); 707 } 708 } 709 710 /** 711 * Evaluator for matching Element (and its descendants) text with regex 712 */ 713 public static final class Matches extends Evaluator { 714 private Pattern pattern; 715 716 public Matches(Pattern pattern) { 717 this.pattern = pattern; 718 } 719 720 @Override 721 public boolean matches(Element root, Element element) { 722 Matcher m = pattern.matcher(element.text()); 723 return m.find(); 724 } 725 726 @Override 727 public String toString() { 728 return String.format(":matches(%s)", pattern); 729 } 730 } 731 732 /** 733 * Evaluator for matching Element's own text with regex 734 */ 735 public static final class MatchesOwn extends Evaluator { 736 private Pattern pattern; 737 738 public MatchesOwn(Pattern pattern) { 739 this.pattern = pattern; 740 } 741 742 @Override 743 public boolean matches(Element root, Element element) { 744 Matcher m = pattern.matcher(element.ownText()); 745 return m.find(); 746 } 747 748 @Override 749 public String toString() { 750 return String.format(":matchesOwn(%s)", pattern); 751 } 752 } 753}