1 module html.dom; 2 3 4 import std.algorithm; 5 import std.array; 6 import std.ascii; 7 import std.conv; 8 import std.experimental.allocator; 9 import std.range; 10 import std.string; 11 import std.typecons; 12 13 import html.parser; 14 import html.utils; 15 16 17 alias HTMLString = const(char)[]; 18 static if(__VERSION__ >= 2079){ 19 alias IAllocator = RCIAllocator; 20 } 21 22 enum DOMCreateOptions { 23 None = 0, 24 DecodeEntities = 1 << 0, 25 26 ValidateClosed = 1 << 10, 27 ValidateSelfClosing = 1 << 11, 28 ValidateDuplicateAttr = 1 << 12, 29 ValidateBasic = ValidateClosed | ValidateSelfClosing | ValidateDuplicateAttr, 30 ValidateAll = ValidateBasic, 31 32 Default = DecodeEntities, 33 } 34 35 36 enum ValidationError : size_t { 37 None, 38 MismatchingClose = 1, 39 MissingClose, 40 StrayClose, 41 SelfClosingNonVoidElement, 42 DuplicateAttr, 43 } 44 45 46 alias ValidationErrorCallable = void delegate(ValidationError, HTMLString, HTMLString); 47 48 49 alias onlyElements = a => a.isElementNode; 50 51 52 private struct ChildrenForward(NodeType, alias Condition = null) { 53 this(NodeType first) { 54 curr_ = cast(Node)first; 55 static if (!is(typeof(Condition) == typeof(null))) { 56 while (curr_ && !Condition(curr_)) 57 curr_ = curr_.next_; 58 } 59 } 60 61 bool empty() const { 62 return (curr_ is null); 63 } 64 65 NodeType front() const { 66 return cast(NodeType)curr_; 67 } 68 69 void popFront() { 70 curr_ = curr_.next_; 71 72 static if (!is(typeof(Condition) == typeof(null))) { 73 while (curr_) { 74 if (Condition(curr_)) 75 break; 76 curr_ = curr_.next_; 77 } 78 } 79 } 80 81 private Node curr_; 82 } 83 84 85 private struct AncestorsForward(NodeType, alias Condition = null) { 86 this(NodeType first) { 87 curr_ = cast(Node)first; 88 static if (!is(typeof(Condition) == typeof(null))) { 89 while (curr_ && !Condition(curr_)) 90 curr_ = curr_.parent_; 91 } 92 } 93 94 bool empty() const { 95 return (curr_ is null); 96 } 97 98 NodeType front() const { 99 return cast(NodeType)curr_; 100 } 101 102 void popFront() { 103 curr_ = curr_.parent_; 104 105 static if (!is(typeof(Condition) == typeof(null))) { 106 while (curr_) { 107 if (Condition(curr_)) 108 break; 109 curr_ = curr_.parent_; 110 } 111 } 112 } 113 114 private Node curr_; 115 } 116 117 118 private struct DescendantsForward(NodeType, alias Condition = null) { 119 this(NodeType top) { 120 if (top is null) 121 return; 122 curr_ = cast(Node)top.firstChild_; // top itself is excluded 123 top_ = cast(Node)top; 124 static if (!is(typeof(Condition) == typeof(null))) { 125 if (!Condition(curr_)) 126 popFront; 127 } 128 } 129 130 bool empty() const { 131 return (curr_ is null); 132 } 133 134 NodeType front() const { 135 return cast(NodeType)curr_; 136 } 137 138 void popFront() { 139 while (curr_) { 140 if (curr_.firstChild_) { 141 curr_ = curr_.firstChild_; 142 } else if (curr_ !is top_) { 143 auto next = curr_.next_; 144 if (!next) { 145 Node parent = curr_.parent_; 146 while (parent) { 147 if (parent !is top_) { 148 if (parent.next_) { 149 next = parent.next_; 150 break; 151 } 152 parent = parent.parent_; 153 } else { 154 next = null; 155 break; 156 } 157 } 158 } 159 160 curr_ = next; 161 if (!curr_) 162 break; 163 } else { 164 curr_ = null; 165 break; 166 } 167 168 static if (is(typeof(Condition) == typeof(null))) { 169 break; 170 } else { 171 if (Condition(curr_)) 172 break; 173 } 174 } 175 } 176 177 private Node curr_; 178 private Node top_; 179 } 180 181 unittest { 182 const doc = createDocument(`<div id=a><div id=b></div><div id=c><div id=e></div><div id=f><div id=h></div></div><div id=g></div></div><div id=d></div></div>`); 183 assert(DescendantsForward!(const(Node))(doc.root).count() == 8); 184 auto fs = DescendantsForward!(const(Node), x => x.attr("id") == "f")(doc.root); 185 assert(fs.count() == 1); 186 assert(fs.front().attr("id") == "f"); 187 auto hs = DescendantsForward!(const(Node), x => x.attr("id") == "h")(fs.front()); 188 assert(hs.count() == 1); 189 assert(hs.front().attr("id") == "h"); 190 auto divs = DescendantsForward!(const(Node))(fs.front()); 191 assert(divs.count() == 1); 192 } 193 194 unittest { 195 // multiple top-level nodes 196 const doc = createDocument(`<div id="left"></div><div id="mid"></div><div id="right"></div>`); 197 assert(DescendantsForward!(const Node)(doc.root).count() == 3); 198 assert(doc.nodes.count() == 3); 199 } 200 201 unittest { 202 const doc = createDocument(``); 203 assert(DescendantsForward!(const Node)(doc.root).empty); 204 assert(doc.nodes.empty); 205 } 206 207 private struct QuerySelectorMatcher(NodeType, Nodes) if (isInputRange!Nodes) { 208 this(Selector selector, Nodes nodes) { 209 selector_ = selector; 210 nodes_ = nodes; 211 212 while (!nodes_.empty()) { 213 auto node = nodes_.front(); 214 if (node.isElementNode && selector_.matches(node)) 215 break; 216 217 nodes_.popFront(); 218 } 219 } 220 221 bool empty() const { 222 return nodes_.empty(); 223 } 224 225 NodeType front() const { 226 return cast(NodeType)nodes_.front(); 227 } 228 229 void popFront() { 230 nodes_.popFront(); 231 while (!nodes_.empty()) { 232 auto node = nodes_.front(); 233 if (node.isElementNode && selector_.matches(node)) 234 break; 235 236 nodes_.popFront(); 237 } 238 } 239 240 private Nodes nodes_; 241 private Selector selector_; 242 } 243 244 245 enum NodeTypes : ubyte { 246 Text = 0, 247 Element, 248 Comment, 249 CDATA, 250 Declaration, 251 ProcessingInstruction, 252 } 253 254 255 class Node { 256 @disable this(); 257 258 private this(HTMLString tag, Document document, size_t flags) { 259 flags_ = flags; 260 tag_ = tag; 261 262 parent_ = null; 263 firstChild_ = null; 264 lastChild_ = null; 265 266 prev_ = null; 267 next_ = null; 268 269 document_ = document; 270 } 271 272 auto opIndex(HTMLString name) const { 273 return attr(name); 274 } 275 276 void opIndexAssign(HTMLString value, HTMLString name) { 277 attr(name, value); 278 } 279 280 void opIndexAssign(T)(T value, HTMLString name) { 281 attr(name, value.to!string); 282 } 283 284 @property auto id() const { 285 return attr("id"); 286 } 287 288 @property void id(T)(T value) { 289 attr("id", value.to!string); 290 } 291 292 @property auto type() const { 293 return (flags_ & TypeMask) >> TypeShift; 294 } 295 296 @property auto tag() const { 297 return isElementNode ? tag_ : null; 298 } 299 300 @property auto tagHash() const { 301 return isElementNode ? tagHash_ : 0; 302 } 303 304 @property auto comment() const { 305 return isCommentNode ? tag_ : null; 306 } 307 308 @property auto cdata() const { 309 return isCDATANode ? tag_ : null; 310 } 311 312 @property auto declaration() const { 313 return isDeclarationNode ? tag_ : null; 314 } 315 316 @property auto processingInstruction() const { 317 return isProcessingInstructionNode ? tag_ : null; 318 } 319 320 void text(Appender)(ref Appender app) const { 321 if (isTextNode) { 322 app.put(tag_); 323 } else { 324 Node child = cast(Node)firstChild_; 325 while (child) { 326 child.text(app); 327 child = child.next_; 328 } 329 } 330 } 331 332 @property auto text() const { 333 if (isTextNode) { 334 return tag_; 335 } else { 336 Appender!HTMLString app; 337 text(app); 338 return app.data; 339 } 340 } 341 342 @property void text(HTMLString text) { 343 if (isTextNode) { 344 tag_ = text; 345 } else { 346 destroyChildren(); 347 appendText(text); 348 } 349 } 350 351 @property void attr(HTMLString name, HTMLString value) { 352 assert(isElementNode, "cannot set attributes of non-element nodes"); 353 354 attrs_[name] = value; 355 356 if (name == "id") { 357 if (!value.empty()) { 358 flags_ |= Node.Flags.HasID; 359 } else { 360 flags_ &= ~Node.Flags.HasID; 361 } 362 } 363 } 364 365 @property HTMLString attr(HTMLString name) const { 366 if (auto pattr = name in attrs_) 367 return *pattr; 368 return null; 369 } 370 371 bool hasAttr(HTMLString name) const { 372 return (name in attrs_) != null; 373 } 374 375 void removeAttr(HTMLString name) { 376 attrs_.remove(name); 377 } 378 379 @property void html(size_t Options = DOMCreateOptions.Default)(HTMLString html) { 380 assert(isElementNode, "cannot add html to non-element nodes"); 381 382 enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0); 383 384 destroyChildren(); 385 386 auto builder = DOMBuilder!(Document, Options)(document_, this); 387 parseHTML!(typeof(builder), parserOptions)(html, builder); 388 } 389 390 @property auto html() const { 391 Appender!HTMLString app; 392 innerHTML(app); 393 return app.data; 394 } 395 396 @property auto compactHTML() const { 397 Appender!HTMLString app; 398 compactInnerHTML(app); 399 return app.data; 400 } 401 402 @property auto outerHTML() const { 403 Appender!HTMLString app; 404 outerHTML(app); 405 return app.data; 406 } 407 408 @property auto compactOuterHTML() const { 409 Appender!HTMLString app; 410 compactOuterHTML(app); 411 return app.data; 412 } 413 414 void prependChild(Node node) { 415 assert(document_ == node.document_); 416 assert(isElementNode, "cannot prepend to non-element nodes"); 417 418 if (node.parent_) 419 node.detach(); 420 node.parent_ = this; 421 if (firstChild_) { 422 assert(!firstChild_.prev_); 423 firstChild_.prev_ = node; 424 node.next_ = firstChild_; 425 firstChild_ = node; 426 } else { 427 assert(!lastChild_); 428 firstChild_ = node; 429 lastChild_ = node; 430 } 431 } 432 433 void appendChild(Node node) { 434 assert(document_ == node.document_); 435 assert(isElementNode, "cannot append to non-element nodes"); 436 437 if (node.parent_) 438 node.detach(); 439 node.parent_ = this; 440 if (lastChild_) { 441 assert(!lastChild_.next_); 442 lastChild_.next_ = node; 443 node.prev_ = lastChild_; 444 lastChild_ = node; 445 } else { 446 assert(!firstChild_); 447 firstChild_ = node; 448 lastChild_ = node; 449 } 450 } 451 452 void removeChild(Node node) { 453 assert(node.parent_ is this); 454 node.detach(); 455 } 456 457 void destroyChildren() { 458 auto child = firstChild_; 459 while (child) { 460 auto next = child.next_; 461 child.destroy(); 462 child = next; 463 } 464 465 firstChild_ = null; 466 lastChild_ = null; 467 } 468 469 void prependText(HTMLString text) { 470 auto node = document_.createTextNode(text); 471 if (firstChild_) { 472 assert(firstChild_.prev_ is null); 473 firstChild_.prev_ = node; 474 node.next_ = firstChild_; 475 firstChild_ = node; 476 } else { 477 assert(lastChild_ is null); 478 firstChild_ = node; 479 lastChild_ = node; 480 } 481 node.parent_ = this; 482 } 483 484 void appendText(HTMLString text) { 485 auto node = document_.createTextNode(text); 486 if (lastChild_) { 487 assert(lastChild_.next_ is null); 488 lastChild_.next_ = node; 489 node.prev_ = lastChild_; 490 lastChild_ = node; 491 } else { 492 assert(firstChild_ is null); 493 firstChild_ = node; 494 lastChild_ = node; 495 } 496 node.parent_ = this; 497 } 498 499 void insertBefore(Node node) { 500 assert(document_ is node.document_); 501 502 parent_ = node.parent_; 503 prev_ = node.prev_; 504 next_ = node; 505 node.prev_ = this; 506 507 if (prev_) 508 prev_.next_ = this; 509 else if (parent_) 510 parent_.firstChild_ = this; 511 } 512 513 void insertAfter(Node node) { 514 assert(document_ is node.document_); 515 516 parent_ = node.parent_; 517 prev_ = node; 518 next_ = node.next_; 519 node.next_ = this; 520 521 if (next_) 522 next_.prev_ = this; 523 else if (parent_) 524 parent_.lastChild_ = this; 525 } 526 527 void detach() { 528 if (parent_) { 529 if (parent_.firstChild_ is this) { 530 parent_.firstChild_ = next_; 531 if (next_) { 532 next_.prev_ = null; 533 next_ = null; 534 } else { 535 parent_.lastChild_ = null; 536 } 537 538 assert(prev_ is null); 539 } else if (parent_.lastChild_ is this) { 540 parent_.lastChild_ = prev_; 541 assert(prev_); 542 assert(next_ is null); 543 prev_.next_ = null; 544 prev_ = null; 545 } else { 546 assert(prev_); 547 548 prev_.next_ = next_; 549 if (next_) { 550 next_.prev_ = prev_; 551 next_ = null; 552 } 553 prev_ = null; 554 } 555 parent_ = null; 556 } 557 } 558 559 package void detachFast() { 560 if (parent_) { 561 if (parent_.firstChild_ is this) { 562 parent_.firstChild_ = next_; 563 if (next_) { 564 next_.prev_ = null; 565 } else { 566 parent_.lastChild_ = null; 567 } 568 569 assert(prev_ is null); 570 } else if (parent_.lastChild_ is this) { 571 parent_.lastChild_ = prev_; 572 assert(prev_); 573 assert(next_ is null); 574 prev_.next_ = null; 575 } else { 576 assert(prev_); 577 578 prev_.next_ = next_; 579 if (next_) { 580 next_.prev_ = prev_; 581 } 582 } 583 } 584 } 585 586 void destroy() { 587 detachFast(); 588 destroyChildren(); 589 document_.destroyNode(this); 590 } 591 592 void innerHTML(Appender)(ref Appender app) const { 593 auto child = cast(Node)firstChild_; 594 while (child) { 595 child.outerHTML(app); 596 child = child.next_; 597 } 598 } 599 600 void compactInnerHTML(Appender)(ref Appender app) const { 601 auto child = cast(Node)firstChild_; 602 while (child) { 603 child.compactOuterHTML(app); 604 child = child.next_; 605 } 606 } 607 608 void outerHTML(Appender)(ref Appender app) const { 609 final switch (type) with (NodeTypes) { 610 case Element: 611 app.put('<'); 612 app.put(tag_); 613 614 foreach(HTMLString attr, HTMLString value; attrs_) { 615 app.put(' '); 616 app.put(attr); 617 618 if (value.length) { 619 if (value.requiresQuotes) { 620 app.put("=\""); 621 app.writeQuotesEscaped(value); 622 app.put("\""); 623 } else { 624 app.put('='); 625 app.put(value); 626 } 627 } 628 } 629 630 if (isVoidElement) { 631 app.put(">"); 632 } else { 633 app.put('>'); 634 if (isScriptOrStyleElement) { 635 if (firstChild_) 636 app.put(firstChild_.tag_); 637 } else { 638 innerHTML(app); 639 } 640 app.put("</"); 641 app.put(tag_); 642 app.put('>'); 643 } 644 break; 645 case Text: 646 app.put(tag_); 647 break; 648 case Comment: 649 app.put("<!--"); 650 app.put(tag_); 651 app.put("-->"); 652 break; 653 case CDATA: 654 app.put("<![CDATA["); 655 app.put(tag_); 656 app.put("]]>"); 657 break; 658 case Declaration: 659 app.put("<!"); 660 app.put(tag_); 661 app.put(">"); 662 break; 663 case ProcessingInstruction: 664 app.put("<?"); 665 app.put(tag_); 666 app.put(">"); 667 break; 668 } 669 } 670 671 void compactOuterHTML(Appender)(ref Appender app) const { 672 final switch (type) with (NodeTypes) { 673 case Element: 674 app.put('<'); 675 app.put(tag_); 676 677 foreach (HTMLString attr, HTMLString value; attrs_) { 678 app.put(' '); 679 app.put(attr); 680 681 if (value.length) { 682 if (value.requiresQuotes) { 683 app.put("=\""); 684 app.writeQuotesEscaped(value); 685 app.put("\""); 686 } else { 687 app.put('='); 688 app.put(value); 689 } 690 } 691 } 692 693 if (isVoidElement) { 694 app.put(">"); 695 } else { 696 app.put('>'); 697 if (isScriptOrStyleElement) { 698 if (firstChild_) 699 app.put(firstChild_.tag_.strip()); 700 } else { 701 compactInnerHTML(app); 702 } 703 app.put("</"); 704 app.put(tag_); 705 app.put('>'); 706 } 707 break; 708 case Text: 709 auto ptr = tag_.ptr; 710 const end = ptr + tag_.length; 711 712 if (tag_.isAllWhite()) { 713 size_t aroundCount; 714 Node[2] around; 715 716 around.ptr[aroundCount] = cast(Node)prev_; 717 if (!around.ptr[aroundCount]) 718 around.ptr[aroundCount] = cast(Node)parent_; 719 if (around.ptr[aroundCount]) 720 ++aroundCount; 721 around.ptr[aroundCount] = cast(Node)next_; 722 if (!around.ptr[aroundCount]) 723 around.ptr[aroundCount] = cast(Node)parent_; 724 if (around.ptr[aroundCount] && (!aroundCount || (around.ptr[aroundCount] !is around.ptr[aroundCount - 1]))) 725 ++aroundCount; 726 727 auto tagsMatch = true; 728 Laround: foreach (i; 0..aroundCount) { 729 if (around.ptr[i].isElementNode) { 730 if (!around.ptr[i].isBlockElement) { 731 tagsMatch = false; 732 break Laround; 733 } 734 } 735 } 736 737 if (!tagsMatch && (ptr != end)) 738 app.put(*ptr); 739 } else { 740 auto space = false; 741 while (ptr != end) { 742 auto ch = *ptr++; 743 if (isWhite(ch)) { 744 if (space) 745 continue; 746 space = true; 747 } else { 748 space = false; 749 } 750 app.put(ch); 751 } 752 } 753 break; 754 case Comment: 755 auto stripped = tag_.strip(); 756 if (!stripped.empty() && (tag_.front() == '[')) { 757 app.put("<!--"); 758 app.put(tag_); 759 app.put("-->"); 760 } 761 break; 762 case CDATA: 763 app.put("<![CDATA["); 764 app.put(tag_); 765 app.put("]]>"); 766 break; 767 case Declaration: 768 app.put("<!"); 769 app.put(tag_); 770 app.put(">"); 771 break; 772 case ProcessingInstruction: 773 app.put("<?"); 774 app.put(tag_); 775 app.put(">"); 776 break; 777 } 778 } 779 780 void toString(Appender)(ref Appender app) const { 781 final switch (type) with (NodeTypes) { 782 case Element: 783 app.put('<'); 784 app.put(tag_); 785 786 foreach(HTMLString attr, HTMLString value; attrs_) { 787 app.put(' '); 788 app.put(attr); 789 790 if (value.length) { 791 app.put("=\""); 792 writeHTMLEscaped!(Yes.escapeQuotes)(app, value); 793 app.put("\""); 794 } 795 } 796 if (isVoidElement) { 797 app.put(">"); 798 } else { 799 app.put("/>"); 800 } 801 break; 802 case Text: 803 writeHTMLEscaped!(No.escapeQuotes)(app, tag_); 804 break; 805 case Comment: 806 app.put("<!--"); 807 app.put(tag_); 808 app.put("-->"); 809 break; 810 case CDATA: 811 app.put("<![CDATA["); 812 app.put(tag_); 813 app.put("]]>"); 814 break; 815 case Declaration: 816 app.put("<!"); 817 app.put(tag_); 818 app.put(">"); 819 break; 820 case ProcessingInstruction: 821 app.put("<?"); 822 app.put(tag_); 823 app.put(">"); 824 break; 825 } 826 } 827 828 override string toString() const { 829 Appender!HTMLString app; 830 toString(app); 831 return cast(string)app.data; 832 } 833 834 @property auto parent() const { 835 return parent_; 836 } 837 838 @property auto parent() { 839 return parent_; 840 } 841 842 @property auto firstChild() const { 843 return firstChild_; 844 } 845 846 @property auto firstChild() { 847 return firstChild_; 848 } 849 850 @property auto lastChild() const { 851 return lastChild_; 852 } 853 854 @property auto lastChild() { 855 return lastChild_; 856 } 857 858 @property auto previousSibling() const { 859 return prev_; 860 } 861 862 @property auto previousSibling() { 863 return prev_; 864 } 865 866 @property auto nextSibling() const { 867 return next_; 868 } 869 870 @property auto nextSibling() { 871 return next_; 872 } 873 874 @property auto children() const { 875 return ChildrenForward!(const(Node))(firstChild_); 876 } 877 878 @property auto children() { 879 return ChildrenForward!Node(firstChild_); 880 } 881 882 @property auto attrs() const { 883 return attrs_; 884 } 885 886 @property auto attrs() { 887 return attrs_; 888 } 889 890 auto find(HTMLString selector) const { 891 return document_.querySelectorAll(selector, this); 892 } 893 894 auto find(HTMLString selector) { 895 return document_.querySelectorAll(selector, this); 896 } 897 898 auto find(Selector selector) const { 899 return document_.querySelectorAll(selector, this); 900 } 901 902 auto find(Selector selector) { 903 return document_.querySelectorAll(selector, this); 904 } 905 906 auto closest(HTMLString selector) const { 907 auto rules = Selector.parse(selector); 908 return closest(rules); 909 } 910 911 Node closest(HTMLString selector) { 912 auto rules = Selector.parse(selector); 913 return closest(rules); 914 } 915 916 auto closest(Selector selector) const { 917 if (selector.matches(this)) 918 return this; 919 920 foreach (node; ancestors) { 921 if (selector.matches(node)) 922 return node; 923 } 924 return null; 925 } 926 927 Node closest(Selector selector) { 928 if (selector.matches(this)) 929 return this; 930 931 foreach (node; ancestors) { 932 if (selector.matches(node)) 933 return node; 934 } 935 return null; 936 } 937 938 @property auto ancestors() const { 939 return AncestorsForward!(const(Node))(parent_); 940 } 941 942 @property auto ancestors() { 943 return AncestorsForward!Node(parent_); 944 } 945 946 @property auto descendants() const { 947 return DescendantsForward!(const(Node))(this); 948 } 949 950 @property auto descendants() { 951 return DescendantsForward!Node(this); 952 } 953 954 @property isSelfClosing() const { 955 return (flags_ & Flags.SelfClosing) != 0; 956 } 957 958 @property isVoidElement() const { 959 return (flags_ & Flags.VoidElement) != 0; 960 } 961 962 @property isBlockElement() const { 963 return (flags_ & Flags.BlockElement) != 0; 964 } 965 966 @property isScriptElement() const { 967 return (flags_ & Flags.ScriptElement) != 0; 968 } 969 970 @property isStyleElement() const { 971 return (flags_ & Flags.StyleElement) != 0; 972 } 973 974 @property isScriptOrStyleElement() const { 975 return (flags_ & (Flags.ScriptElement | Flags.StyleElement)) != 0; 976 } 977 978 @property bool hasID() const { 979 return (flags_ & Flags.HasID) != 0; 980 } 981 982 @property isElementNode() const { 983 return type == NodeTypes.Element; 984 } 985 986 @property isTextNode() const { 987 return type == NodeTypes.Text; 988 } 989 990 @property isCommentNode() const { 991 return type == NodeTypes.Comment; 992 } 993 994 @property isCDATANode() const { 995 return type == NodeTypes.CDATA; 996 } 997 998 @property isDeclarationNode() const { 999 return type == NodeTypes.Declaration; 1000 } 1001 1002 @property isProcessingInstructionNode() const { 1003 return type == NodeTypes.ProcessingInstruction; 1004 } 1005 1006 Node clone(Document document) const { 1007 auto node = document.allocNode(tag_, flags_); 1008 node.tagHash_ = tagHash_; 1009 node.flags_ = flags_; 1010 1011 foreach (HTMLString attr, HTMLString value; attrs_) 1012 node.attrs_[attr] = value; 1013 1014 Node current = cast(Node)firstChild_; 1015 1016 while (current !is null) { 1017 auto newChild = current.clone(document); 1018 node.appendChild(newChild); 1019 current = current.next_; 1020 } 1021 return node; 1022 } 1023 1024 Node clone() const { 1025 return document_.cloneNode(this); 1026 } 1027 1028 package: 1029 enum TypeMask = 0x7UL; 1030 enum TypeShift = 0UL; 1031 enum FlagsBit = TypeMask + 1; 1032 enum Flags { 1033 SelfClosing = FlagsBit << 1, 1034 VoidElement = FlagsBit << 2, 1035 BlockElement = FlagsBit << 3, 1036 ScriptElement = FlagsBit << 4, 1037 StyleElement = FlagsBit << 5, 1038 1039 HasID = FlagsBit << 6, 1040 } 1041 1042 size_t flags_; 1043 hash_t tagHash_; 1044 HTMLString tag_; // when Text flag is set, will contain the text itself 1045 HTMLString[HTMLString] attrs_; 1046 1047 Node parent_; 1048 Node firstChild_; 1049 Node lastChild_; 1050 1051 // siblings 1052 Node prev_; 1053 Node next_; 1054 1055 Document document_; 1056 } 1057 1058 1059 auto createDocument(size_t Options = DOMCreateOptions.Default)(HTMLString source, IAllocator alloc = theAllocator) { 1060 enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0); 1061 static assert((Options & DOMCreateOptions.ValidateAll) == 0, "requested validation with no error callable"); 1062 1063 auto document = createDocument(alloc); 1064 auto builder = DOMBuilder!(Document, Options)(document); 1065 1066 parseHTML!(typeof(builder), parserOptions)(source, builder); 1067 return document; 1068 } 1069 1070 1071 auto createDocument(size_t Options = DOMCreateOptions.Default | DOMCreateOptions.ValidateAll)(HTMLString source, ValidationErrorCallable errorCallable, IAllocator alloc = theAllocator) { 1072 enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0); 1073 static assert((Options & DOMCreateOptions.ValidateAll) != 0, "error callable but validation not requested"); 1074 1075 auto document = createDocument(alloc); 1076 auto builder = DOMBuilder!(Document, Options)(document, errorCallable); 1077 1078 parseHTML!(typeof(builder), parserOptions)(source, builder); 1079 return document; 1080 } 1081 1082 1083 unittest { 1084 auto doc = createDocument("<html> <body> \n\r\f\t </body> </html>"); 1085 assert(doc.root.compactOuterHTML == "<root><html><body></body></html></root>"); 1086 doc = createDocument("<html> <body> <div> <p> <a> </a> </p> </div> </body> </html>"); 1087 assert(doc.root.compactOuterHTML == "<root><html><body><div><p> <a> </a> </p></div></body></html></root>"); 1088 } 1089 1090 unittest { 1091 auto doc = createDocument(`<html><body> </body></html>`); 1092 assert(doc.root.outerHTML == "<root><html><body>\ </body></html></root>"); 1093 doc = createDocument!(DOMCreateOptions.None)(`<html><body> </body></html>`); 1094 assert(doc.root.outerHTML == `<root><html><body> </body></html></root>`); 1095 doc = createDocument(`<script> </script>`); 1096 assert(doc.root.outerHTML == `<root><script> </script></root>`, doc.root.outerHTML); 1097 doc = createDocument(`<style> </style>`); 1098 assert(doc.root.outerHTML == `<root><style> </style></root>`, doc.root.outerHTML); 1099 } 1100 1101 unittest { 1102 const doc = createDocument(`<html><body><div title='"Тест'"'>"К"ириллица</div></body></html>`); 1103 assert(doc.root.html == `<html><body><div title=""Тест'"">"К"ириллица</div></body></html>`); 1104 } 1105 1106 unittest { 1107 // void elements should not be self-closed 1108 auto doc = createDocument(`<area><base><br><col>`); 1109 assert(doc.root.outerHTML == `<root><area><base><br><col></root>`, doc.root.outerHTML); 1110 doc = createDocument(`<svg /><math /><svg></svg>`); 1111 assert(doc.root.outerHTML == `<root><svg></svg><math></math><svg></svg></root>`, doc.root.outerHTML); 1112 doc = createDocument(`<br /><div />`); 1113 assert(doc.root.outerHTML == `<root><br><div></div></root>`, doc.root.outerHTML); 1114 } 1115 1116 // toString prints elements with content as <tag attr="value"/> 1117 unittest { 1118 // self-closed element w/o content 1119 auto doc = createDocument(`<svg />`); 1120 assert(doc.root.firstChild.toString == `<svg/>`, doc.root.firstChild.toString); 1121 // elements w/ content 1122 doc = createDocument(`<svg></svg>`); 1123 assert(doc.root.firstChild.toString == `<svg/>`, doc.root.firstChild.toString); 1124 doc = createDocument(`<div class="mydiv"></div>`); 1125 assert(doc.root.firstChild.toString == `<div class="mydiv"/>`, doc.root.firstChild.toString); 1126 // void element 1127 doc = createDocument(`<br>`); 1128 assert(doc.root.firstChild.toString == `<br>`, doc.root.firstChild.toString); 1129 // "invalid" self-closed void element 1130 doc = createDocument(`<br />`); 1131 assert(doc.root.firstChild.toString == `<br>`, doc.root.firstChild.toString); 1132 } 1133 1134 unittest { 1135 const doc = createDocument(`<html><body><div> </div></body></html>`); 1136 assert(doc.root.find("html").front.outerHTML == "<html><body><div>\ </div></body></html>"); 1137 assert(doc.root.find("html").front.find("div").front.outerHTML == "<div>\ </div>"); 1138 assert(doc.root.find("body").front.outerHTML == "<body><div>\ </div></body>"); 1139 assert(doc.root.find("body").front.closest("body").outerHTML == "<body><div>\ </div></body>"); // closest() tests self 1140 assert(doc.root.find("body").front.closest("html").outerHTML == "<html><body><div>\ </div></body></html>"); 1141 } 1142 1143 unittest { 1144 const doc = createDocument(`<html><body><![CDATA[test]]></body></html>`); 1145 assert(doc.root.firstChild.firstChild.firstChild.isCDATANode); 1146 assert(doc.root.html == `<html><body><![CDATA[test]]></body></html>`); 1147 } 1148 1149 1150 static auto createDocument(IAllocator alloc = theAllocator) { 1151 auto document = new Document(alloc); 1152 document.root(document.createElement("root")); 1153 return document; 1154 } 1155 1156 1157 class Document { 1158 private this(IAllocator alloc) { 1159 alloc_ = alloc; 1160 } 1161 1162 auto createElement(HTMLString tagName, Node parent = null) { 1163 auto node = allocNode(tagName, NodeTypes.Element << Node.TypeShift); 1164 node.tagHash_ = tagHashOf(tagName); 1165 node.flags_ |= elementFlags(node.tagHash_); 1166 if (parent) 1167 parent.appendChild(node); 1168 return node; 1169 } 1170 1171 auto createTextNode(HTMLString text, Node parent = null) { 1172 auto node = allocNode(text, NodeTypes.Text << Node.TypeShift); 1173 if (parent) 1174 parent.appendChild(node); 1175 return node; 1176 } 1177 1178 auto createCommentNode(HTMLString comment, Node parent = null) { 1179 auto node = allocNode(comment, NodeTypes.Comment << Node.TypeShift); 1180 if (parent) 1181 parent.appendChild(node); 1182 return node; 1183 } 1184 1185 auto createCDATANode(HTMLString cdata, Node parent = null) { 1186 auto node = allocNode(cdata, NodeTypes.CDATA << Node.TypeShift); 1187 if (parent) 1188 parent.appendChild(node); 1189 return node; 1190 } 1191 1192 auto createDeclarationNode(HTMLString data, Node parent = null) { 1193 auto node = allocNode(data, NodeTypes.Declaration << Node.TypeShift); 1194 if (parent) 1195 parent.appendChild(node); 1196 return node; 1197 } 1198 1199 auto createProcessingInstructionNode(HTMLString data, Node parent = null) { 1200 auto node = allocNode(data, NodeTypes.ProcessingInstruction << Node.TypeShift); 1201 if (parent) 1202 parent.appendChild(node); 1203 return node; 1204 } 1205 1206 Node cloneNode(const(Node) other) const { 1207 return other.clone(cast(Document)this); 1208 } 1209 1210 Document clone(IAllocator alloc = theAllocator) { 1211 Document other = new Document(alloc); 1212 other.root(other.cloneNode(this.root_)); 1213 return other; 1214 } 1215 1216 @property auto root() { 1217 return root_; 1218 } 1219 1220 @property auto root() const { 1221 return root_; 1222 } 1223 1224 @property void root(Node root) { 1225 if (root && (root.document_.alloc_ != alloc_)) 1226 alloc_ = root.document_.alloc_; 1227 root_ = root; 1228 } 1229 1230 @property auto nodes() const { 1231 return DescendantsForward!(const(Node))(root_); 1232 } 1233 1234 @property auto nodes() { 1235 return DescendantsForward!Node(root_); 1236 } 1237 1238 @property auto elements() const { 1239 return DescendantsForward!(const(Node), onlyElements)(root_); 1240 } 1241 1242 @property auto elements() { 1243 return DescendantsForward!(Node, onlyElements)(root_); 1244 } 1245 1246 @property auto elementsByTagName(HTMLString tag) const { 1247 const tagHash = tagHashOf(tag); 1248 return DescendantsForward!(const(Node), (a) { return a.isElementNode && (a.tagHash == tagHash); })(root_); 1249 } 1250 1251 @property auto elementsByTagName(HTMLString tag) { 1252 const tagHash = tagHashOf(tag); 1253 return DescendantsForward!(Node, (a) { return a.isElementNode && (a.tagHash == tagHash); })(root_); 1254 } 1255 1256 const(Node) querySelector(HTMLString selector, Node context = null) const { 1257 auto rules = Selector.parse(selector); 1258 return querySelector(rules, context); 1259 } 1260 1261 Node querySelector(HTMLString selector, Node context = null) { 1262 auto rules = Selector.parse(selector); 1263 return querySelector(rules, context); 1264 } 1265 1266 const(Node) querySelector(Selector selector, const(Node) context = null) const { 1267 auto top = context ? context : root_; 1268 1269 foreach(node; DescendantsForward!(const(Node), onlyElements)(top)) { 1270 if (selector.matches(node)) 1271 return node; 1272 } 1273 return null; 1274 } 1275 1276 Node querySelector(Selector selector, Node context = null) { 1277 auto top = context ? context : root_; 1278 1279 foreach(node; DescendantsForward!(Node, onlyElements)(top)) { 1280 if (selector.matches(node)) 1281 return node; 1282 } 1283 return null; 1284 } 1285 1286 alias QuerySelectorAllResult = QuerySelectorMatcher!(Node, DescendantsForward!Node); 1287 alias QuerySelectorAllConstResult = QuerySelectorMatcher!(const(Node), DescendantsForward!(const(Node))); 1288 1289 QuerySelectorAllResult querySelectorAll(HTMLString selector, Node context = null) { 1290 auto rules = Selector.parse(selector); 1291 return querySelectorAll(rules, context); 1292 } 1293 1294 QuerySelectorAllConstResult querySelectorAll(HTMLString selector, const(Node) context = null) const { 1295 auto rules = Selector.parse(selector); 1296 return querySelectorAll(rules, context); 1297 } 1298 1299 QuerySelectorAllConstResult querySelectorAll(Selector selector, const(Node) context = null) const { 1300 auto top = context ? context : root_; 1301 return QuerySelectorMatcher!(const(Node), DescendantsForward!(const(Node)))(selector, DescendantsForward!(const(Node))(top)); 1302 } 1303 1304 QuerySelectorAllResult querySelectorAll(Selector selector, Node context = null) { 1305 auto top = context ? context : root_; 1306 return QuerySelectorMatcher!(Node, DescendantsForward!Node)(selector, DescendantsForward!Node(top)); 1307 } 1308 1309 void toString(Appender)(ref Appender app) const { 1310 root_.outerHTML(app); 1311 } 1312 1313 override string toString() const { 1314 auto app = appender!HTMLString; 1315 root_.outerHTML(app); 1316 return cast(string)app.data; 1317 } 1318 1319 auto allocNode()(HTMLString tag, size_t flags) { 1320 enum NodeSize = __traits(classInstanceSize, Node); 1321 auto ptr = cast(Node)alloc_.allocate(NodeSize).ptr; 1322 (cast(void*)ptr)[0..NodeSize] = typeid(Node).initializer[]; 1323 ptr.__ctor(tag, this, flags); 1324 return ptr; 1325 } 1326 1327 void destroyNode(Node node) { 1328 assert(node.firstChild_ is null); 1329 alloc_.dispose(node); 1330 } 1331 1332 private: 1333 Node root_; 1334 IAllocator alloc_; 1335 } 1336 1337 1338 unittest { 1339 const(char)[] src = `<parent attr=value><child></child>text</parent>`; 1340 auto doc = createDocument(src); 1341 assert(doc.root.html == src, doc.root.html); 1342 1343 const(char)[] srcq = `<parent attr="v a l u e"><child></child>text</parent>`; 1344 auto docq = createDocument(srcq); 1345 assert(docq.root.html == srcq, docq.root.html); 1346 1347 // basic cloning 1348 auto cloned = doc.cloneNode(doc.root); 1349 assert(cloned.html == src, cloned.html); 1350 assert(doc.root.html == src, cloned.html); 1351 1352 assert(!cloned.find("child").empty); 1353 assert(!cloned.find("parent").empty); 1354 1355 // clone mutation 1356 auto child = cloned.find("child").front.clone; 1357 child.attr("attr", "test"); 1358 cloned.find("parent").front.appendChild(child); 1359 assert(cloned.html == `<parent attr=value><child></child>text<child attr=test></child></parent>`, cloned.html); 1360 assert(doc.root.html == src, doc.root.html); 1361 1362 child.text = "text"; 1363 assert(cloned.html == `<parent attr=value><child></child>text<child attr=test>text</child></parent>`, cloned.html); 1364 assert(doc.root.html == src, doc.root.html); 1365 1366 // document cloning 1367 auto docc = doc.clone; 1368 assert(docc.root.html == doc.root.html, docc.root.html); 1369 } 1370 1371 1372 static struct DOMBuilder(Document, size_t Options) { 1373 enum { 1374 Validate = (Options & DOMCreateOptions.ValidateAll) != 0, 1375 ValidateClosed = (Options & DOMCreateOptions.ValidateClosed) != 0, 1376 ValidateSelfClosing = (Options & DOMCreateOptions.ValidateSelfClosing) != 0, 1377 ValidateDuplicateAttr = (Options & DOMCreateOptions.ValidateDuplicateAttr) != 0, 1378 } 1379 1380 static if (Validate) { 1381 this(Document document, ValidationErrorCallable errorCallable, Node parent = null) { 1382 document_ = document; 1383 element_ = parent ? parent : document.root; 1384 error_ = errorCallable; 1385 } 1386 } else { 1387 this(Document document, Node parent = null) { 1388 document_ = document; 1389 element_ = parent ? parent : document.root; 1390 } 1391 } 1392 1393 void onText(HTMLString data) { 1394 if (data.ptr == (text_.ptr + text_.length)) { 1395 text_ = text_.ptr[0..text_.length + data.length]; 1396 } else { 1397 text_ ~= data; 1398 } 1399 } 1400 1401 void onSelfClosing() { 1402 if (element_.isVoidElement) { 1403 element_.flags_ |= Node.Flags.SelfClosing; 1404 } else { 1405 static if (ValidateSelfClosing) { 1406 error_(ValidationError.SelfClosingNonVoidElement, element_.tag_, null); 1407 } 1408 } 1409 element_ = element_.parent_; 1410 } 1411 1412 void onOpenStart(HTMLString data) { 1413 if (!text_.empty) { 1414 element_.appendText(text_); 1415 text_.length = 0; 1416 } 1417 1418 element_ = document_.createElement(data, element_); 1419 element_.flags_ |= elementFlags(element_.tagHash_); 1420 } 1421 1422 void onOpenEnd(HTMLString data) { 1423 // void elements have neither content nor a closing tag, so 1424 // we're done w/ them on the end of the open tag 1425 if (element_.isVoidElement) 1426 element_ = element_.parent_; 1427 } 1428 1429 void onClose(HTMLString data) { 1430 if (!text_.empty) { 1431 if (element_) { 1432 element_.appendText(text_); 1433 text_.length = 0; 1434 } else { 1435 document_.root.appendText(text_); 1436 } 1437 } 1438 1439 if (element_) { 1440 assert(!element_.isTextNode); 1441 if (element_.tag.equalsCI(data)) { 1442 element_ = element_.parent_; 1443 } else { 1444 auto element = element_.parent_; 1445 while (element) { 1446 if (element.tag.equalsCI(data)) { 1447 static if (ValidateClosed) { 1448 error_(ValidationError.MismatchingClose, data, element_.tag); 1449 } 1450 element_ = element.parent_; 1451 break; 1452 } 1453 element = element.parent_; 1454 } 1455 static if (ValidateClosed) { 1456 if (!element) 1457 error_(ValidationError.StrayClose, data, null); 1458 } 1459 } 1460 } else { 1461 static if (ValidateClosed) { 1462 error_(ValidationError.StrayClose, data, null); 1463 } 1464 } 1465 } 1466 1467 void onAttrName(HTMLString data) { 1468 attr_ = data; 1469 state_ = States.Attr; 1470 } 1471 1472 void onAttrEnd() { 1473 if (!attr_.empty) { 1474 static if (ValidateDuplicateAttr) { 1475 if (attr_ in element_.attrs_) 1476 error_(ValidationError.DuplicateAttr, element_.tag_, attr_); 1477 } 1478 element_.attr(attr_, value_); 1479 } 1480 value_.length = 0; 1481 attr_.length = 0; 1482 state_ = States.Global; 1483 } 1484 1485 void onAttrValue(HTMLString data) { 1486 if (data.ptr == (value_.ptr + value_.length)) { 1487 value_ = value_.ptr[0..value_.length + data.length]; 1488 } else { 1489 value_ ~= data; 1490 } 1491 } 1492 1493 void onComment(HTMLString data) { 1494 document_.createCommentNode(data, element_); 1495 } 1496 1497 void onDeclaration(HTMLString data) { 1498 document_.createDeclarationNode(data, element_); 1499 } 1500 1501 void onProcessingInstruction(HTMLString data) { 1502 document_.createProcessingInstructionNode(data, element_); 1503 } 1504 1505 void onCDATA(HTMLString data) { 1506 document_.createCDATANode(data, element_); 1507 } 1508 1509 void onDocumentEnd() { 1510 if (!text_.empty) { 1511 if (element_) { 1512 element_.appendText(text_); 1513 text_.length = 0; 1514 } else { 1515 document_.root.appendText(text_); 1516 } 1517 } 1518 1519 static if (ValidateClosed) { 1520 while (element_ && element_ !is document_.root_) { 1521 error_(ValidationError.MissingClose, element_.tag, null); 1522 element_ = element_.parent_; 1523 } 1524 } 1525 } 1526 1527 void onNamedEntity(HTMLString data) { 1528 } 1529 1530 void onNumericEntity(HTMLString data) { 1531 } 1532 1533 void onHexEntity(HTMLString data) { 1534 } 1535 1536 void onEntity(HTMLString data, HTMLString decoded) { 1537 if (state_ == States.Global) { 1538 text_ ~= decoded; 1539 } else { 1540 value_ ~= decoded; 1541 } 1542 } 1543 1544 private: 1545 Document document_; 1546 Node element_; 1547 States state_; 1548 static if (Validate) { 1549 ValidationErrorCallable error_; 1550 } 1551 1552 enum States { 1553 Global = 0, 1554 Attr, 1555 } 1556 1557 HTMLString attr_; 1558 HTMLString value_; 1559 HTMLString text_; 1560 } 1561 1562 1563 1564 unittest { 1565 static struct Error { 1566 ValidationError error; 1567 HTMLString tag; 1568 HTMLString related; 1569 } 1570 1571 Error[] errors; 1572 void errorHandler(ValidationError error, HTMLString tag, HTMLString related) { 1573 errors ~= Error(error, tag, related); 1574 } 1575 1576 errors.length = 0; 1577 auto doc = createDocument(`<div><div></div>`, &errorHandler); 1578 assert(errors == [Error(ValidationError.MissingClose, "div")]); 1579 1580 errors.length = 0; 1581 doc = createDocument(`<div></div></div>`, &errorHandler); 1582 assert(errors == [Error(ValidationError.StrayClose, "div")]); 1583 1584 errors.length = 0; 1585 doc = createDocument(`<span><div></span>`, &errorHandler); 1586 assert(errors == [Error(ValidationError.MismatchingClose, "span", "div")]); 1587 1588 errors.length = 0; 1589 doc = createDocument(`<div />`, &errorHandler); 1590 assert(errors == [Error(ValidationError.SelfClosingNonVoidElement, "div")]); 1591 1592 errors.length = 0; 1593 doc = createDocument(`<hr></hr>`, &errorHandler); 1594 assert(errors == [Error(ValidationError.StrayClose, "hr")]); 1595 1596 errors.length = 0; 1597 doc = createDocument(`<span id=moo id=foo class=moo class=></span>`, &errorHandler); 1598 assert(errors == [Error(ValidationError.DuplicateAttr, "span", "id"), Error(ValidationError.DuplicateAttr, "span", "class")]); 1599 } 1600 1601 1602 private struct Rule { 1603 enum Flags : ushort { 1604 HasTag = 1 << 0, 1605 HasID = 1 << 1, 1606 HasAttr = 1 << 2, 1607 HasPseudo = 1 << 3, 1608 CaseSensitive = 1 << 4, 1609 HasAny = 1 << 5, 1610 } 1611 1612 enum MatchType : ubyte { 1613 None = 0, 1614 Set, 1615 Exact, 1616 ContainWord, 1617 Contain, 1618 Begin, 1619 BeginHyphen, 1620 End, 1621 } 1622 1623 enum Relation : ubyte { 1624 None = 0, 1625 Descendant, 1626 Child, 1627 DirectAdjacent, 1628 IndirectAdjacent, 1629 } 1630 1631 bool matches(NodeType)(NodeType element) const { 1632 if (flags_ == 0) 1633 return false; 1634 1635 if (flags_ & Flags.HasTag) { 1636 if (tagHash_ != element.tagHash) 1637 return false; 1638 } 1639 1640 if (flags_ & Flags.HasID) { 1641 if (value_.empty || !element.hasID) return false; 1642 if (value_ != element.id()) return false; 1643 } 1644 1645 if (flags_ & Flags.HasAttr) { 1646 auto cs = (flags_ & Flags.CaseSensitive) != 0; 1647 final switch (match_) with (MatchType) { 1648 case None: 1649 break; 1650 case Set: 1651 if (element.attr(attr_) == null) 1652 return false; 1653 break; 1654 case Exact: 1655 if (value_.empty) return false; 1656 auto attr = element.attr(attr_); 1657 if (!attr || (cs ? (value_ != attr) : !value_.equalsCI(attr))) 1658 return false; 1659 break; 1660 case Contain: 1661 if (value_.empty) return false; 1662 auto attr = element.attr(attr_); 1663 if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) == -1)) 1664 return false; 1665 break; 1666 case ContainWord: 1667 if (value_.empty) return false; 1668 auto attr = element.attr(attr_); 1669 if (!attr) 1670 return false; 1671 1672 size_t start = 0; 1673 while (true) { 1674 auto index = attr.indexOf(value_, start, cs ? CaseSensitive.yes : CaseSensitive.no); 1675 if (index == -1) 1676 return false; 1677 if (index && !isWhite(attr[index - 1])) 1678 return false; 1679 if ((index + value_.length == attr.length) || isWhite(attr[index + value_.length])) 1680 break; 1681 start = index + 1; 1682 } 1683 break; 1684 case Begin: 1685 if (value_.empty) return false; 1686 auto attr = element.attr(attr_); 1687 if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0)) 1688 return false; 1689 break; 1690 case End: 1691 if (value_.empty) return false; 1692 auto attr = element.attr(attr_); 1693 if (!attr || ((attr.lastIndexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != (attr.length - value_.length))) 1694 return false; 1695 break; 1696 case BeginHyphen: 1697 if (value_.empty) return false; 1698 auto attr = element.attr(attr_); 1699 if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0) || ((attr.length > value_.length) && (attr[value_.length] != '-'))) 1700 return false; 1701 break; 1702 } 1703 } 1704 1705 if (flags_ & Flags.HasPseudo) { 1706 switch (pseudo_) { 1707 case quickHashOf("checked"): 1708 if (!element.hasAttr("checked")) 1709 return false; 1710 break; 1711 1712 case quickHashOf("enabled"): 1713 if (element.hasAttr("disabled")) 1714 return false; 1715 break; 1716 1717 case quickHashOf("disabled"): 1718 if (!element.hasAttr("disabled")) 1719 return false; 1720 break; 1721 1722 case quickHashOf("empty"): 1723 if (element.firstChild_) 1724 return false; 1725 break; 1726 1727 case quickHashOf("optional"): 1728 if (element.hasAttr("required")) 1729 return false; 1730 break; 1731 1732 case quickHashOf("read-only"): 1733 if (!element.hasAttr("readonly")) 1734 return false; 1735 break; 1736 1737 case quickHashOf("read-write"): 1738 if (element.hasAttr("readonly")) 1739 return false; 1740 break; 1741 1742 case quickHashOf("required"): 1743 if (!element.hasAttr("required")) 1744 return false; 1745 break; 1746 1747 case quickHashOf("lang"): 1748 if (element.attr("lang") != pseudoArg_) 1749 return false; 1750 break; 1751 1752 case quickHashOf("first-child"): 1753 if (!element.parent_ || (element.parent_.firstChild !is element)) 1754 return false; 1755 break; 1756 1757 case quickHashOf("last-child"): 1758 if (!element.parent_ || (element.parent_.lastChild !is element)) 1759 return false; 1760 break; 1761 1762 case quickHashOf("first-of-type"): 1763 Node sibling = element.previousSibling; 1764 while (sibling) { 1765 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1766 return false; 1767 sibling = sibling.previousSibling; 1768 } 1769 break; 1770 1771 case quickHashOf("last-of-type"): 1772 auto sibling = element.nextSibling; 1773 while (sibling) { 1774 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1775 return false; 1776 sibling = sibling.nextSibling; 1777 } 1778 break; 1779 1780 case quickHashOf("nth-child"): 1781 auto ith = 1; 1782 auto sibling = element.previousSibling; 1783 while (sibling) { 1784 if (ith > pseudoArgNum_) 1785 return false; 1786 if (sibling.isElementNode) 1787 ++ith; 1788 sibling = sibling.previousSibling; 1789 } 1790 if (ith != pseudoArgNum_) 1791 return false; 1792 break; 1793 1794 case quickHashOf("nth-last-child"): 1795 auto ith = 1; 1796 auto sibling = element.nextSibling; 1797 while (sibling) { 1798 if (ith > pseudoArgNum_) 1799 return false; 1800 if (sibling.isElementNode) 1801 ++ith; 1802 sibling = sibling.nextSibling; 1803 } 1804 if (ith != pseudoArgNum_) 1805 return false; 1806 break; 1807 1808 case quickHashOf("nth-of-type"): 1809 auto ith = 1; 1810 auto sibling = element.previousSibling; 1811 while (sibling) { 1812 if (ith > pseudoArgNum_) 1813 return false; 1814 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1815 ++ith; 1816 sibling = sibling.previousSibling; 1817 } 1818 if (ith != pseudoArgNum_) 1819 return false; 1820 break; 1821 1822 case quickHashOf("nth-last-of-type"): 1823 auto ith = 1; 1824 auto sibling = element.nextSibling; 1825 while (sibling) { 1826 if (ith > pseudoArgNum_) 1827 return false; 1828 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1829 ++ith; 1830 sibling = sibling.nextSibling; 1831 } 1832 if (ith != pseudoArgNum_) 1833 return false; 1834 break; 1835 1836 case quickHashOf("only-of-type"): 1837 auto sibling = element.previousSibling; 1838 while (sibling) { 1839 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1840 return false; 1841 sibling = sibling.previousSibling; 1842 } 1843 sibling = element.nextSibling; 1844 while (sibling) { 1845 if (sibling.isElementNode && sibling.tag.equalsCI(element.tag)) 1846 return false; 1847 sibling = sibling.nextSibling; 1848 } 1849 break; 1850 1851 case quickHashOf("only-child"): 1852 auto parent = element.parent_; 1853 if (parent is null) 1854 return false; 1855 Node sibling = parent.firstChild_; 1856 while (sibling) { 1857 if ((sibling !is element) && sibling.isElementNode) 1858 return false; 1859 sibling = sibling.next_; 1860 } 1861 break; 1862 1863 default: 1864 break; 1865 } 1866 } 1867 1868 return true; 1869 } 1870 1871 @property Relation relation() const { 1872 return relation_; 1873 } 1874 1875 package: 1876 ushort flags_; 1877 MatchType match_; 1878 Relation relation_; 1879 hash_t tagHash_; 1880 size_t pseudo_; 1881 HTMLString attr_; 1882 HTMLString value_; 1883 HTMLString pseudoArg_; 1884 uint pseudoArgNum_; 1885 } 1886 1887 1888 struct Selector { 1889 static Selector parse(HTMLString value) { 1890 enum ParserStates { 1891 Identifier = 0, 1892 PostIdentifier, 1893 Tag, 1894 Class, 1895 ID, 1896 AttrName, 1897 AttrOp, 1898 PreAttrValue, 1899 AttrValueDQ, 1900 AttrValueSQ, 1901 AttrValueNQ, 1902 PostAttrValue, 1903 Pseudo, 1904 PseudoArgs, 1905 Relation, 1906 } 1907 1908 size_t ids; 1909 size_t tags; 1910 size_t classes; 1911 1912 value = value.strip; 1913 auto source = uninitializedArray!(char[])(value.length + 1); 1914 source[0..value.length] = value; 1915 source[$-1] = ' '; // add a padding space to ease parsing 1916 1917 auto selector = Selector(source); 1918 Rule[] rules; 1919 ++rules.length; 1920 1921 auto rule = &rules.back; 1922 1923 auto ptr = source.ptr; 1924 auto end = source.ptr + source.length; 1925 auto start = ptr; 1926 1927 ParserStates state = ParserStates.Identifier; 1928 1929 while (ptr != end) { 1930 final switch (state) with (ParserStates) { 1931 case Identifier: 1932 if (*ptr == '#') { 1933 state = ID; 1934 start = ptr + 1; 1935 } else if (*ptr == '.') { 1936 state = Class; 1937 start = ptr + 1; 1938 } else if (*ptr == '[') { 1939 state = AttrName; 1940 start = ptr + 1; 1941 } else if (isAlpha(*ptr)) { 1942 state = Tag; 1943 start = ptr; 1944 continue; 1945 } else if (*ptr == '*') { 1946 rule.flags_ |= Rule.Flags.HasAny; 1947 state = PostIdentifier; 1948 } else if (*ptr == ':') { 1949 rule.flags_ |= Rule.Flags.HasAny; 1950 state = PostIdentifier; 1951 continue; 1952 } 1953 break; 1954 1955 case PostIdentifier: 1956 switch (*ptr) { 1957 case '#': 1958 state = ID; 1959 start = ptr + 1; 1960 break; 1961 case '.': 1962 state = Class; 1963 start = ptr + 1; 1964 break; 1965 case '[': 1966 state = AttrName; 1967 start = ptr + 1; 1968 break; 1969 case ':': 1970 state = Pseudo; 1971 if ((ptr + 1 != end) && (*(ptr + 1) == ':')) 1972 ++ptr; 1973 start = ptr + 1; 1974 break; 1975 default: 1976 state = Relation; 1977 continue; 1978 } 1979 break; 1980 1981 case Tag: 1982 while ((ptr != end) && isAlphaNum(*ptr)) 1983 ++ptr; 1984 if (ptr == end) 1985 continue; 1986 1987 rule.flags_ |= Rule.Flags.HasTag; 1988 rule.tagHash_ = tagHashOf(start[0..ptr-start]); 1989 ++tags; 1990 1991 state = PostIdentifier; 1992 continue; 1993 1994 case Class: 1995 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 1996 ++ptr; 1997 if (ptr == end) 1998 continue; 1999 2000 rule.flags_ |= Rule.Flags.HasAttr; 2001 rule.match_ = Rule.MatchType.ContainWord; 2002 rule.attr_ = "class"; 2003 rule.value_ = start[0..ptr-start]; 2004 ++classes; 2005 2006 state = PostIdentifier; 2007 break; 2008 2009 case ID: 2010 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 2011 ++ptr; 2012 if (ptr == end) 2013 continue; 2014 2015 rule.flags_ |= Rule.Flags.HasID; 2016 rule.value_ = start[0..ptr-start]; 2017 ++ids; 2018 2019 state = PostIdentifier; 2020 break; 2021 2022 case AttrName: 2023 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 2024 ++ptr; 2025 if (ptr == end) 2026 continue; 2027 2028 rule.flags_ |= Rule.Flags.HasAttr; 2029 rule.flags_ |= Rule.Flags.CaseSensitive; 2030 rule.attr_ = start[0..ptr-start]; 2031 ++classes; 2032 2033 state = AttrOp; 2034 continue; 2035 2036 case AttrOp: 2037 while ((ptr != end) && (isWhite(*ptr))) 2038 ++ptr; 2039 if (ptr == end) 2040 continue; 2041 2042 switch (*ptr) { 2043 case ']': 2044 rule.match_ = Rule.MatchType.Set; 2045 state = PostIdentifier; 2046 break; 2047 case '=': 2048 rule.match_ = Rule.MatchType.Exact; 2049 state = PreAttrValue; 2050 break; 2051 default: 2052 if ((ptr + 1 != end) && (*(ptr + 1) == '=')) { 2053 switch (*ptr) { 2054 case '~': 2055 rule.match_ = Rule.MatchType.ContainWord; 2056 break; 2057 case '^': 2058 rule.match_ = Rule.MatchType.Begin; 2059 break; 2060 case '$': 2061 rule.match_ = Rule.MatchType.End; 2062 break; 2063 case '*': 2064 rule.match_ = Rule.MatchType.Contain; 2065 break; 2066 case '|': 2067 rule.match_ = Rule.MatchType.BeginHyphen; 2068 break; 2069 default: 2070 rule.flags_ = 0; // error 2071 ptr = end - 1; 2072 break; 2073 } 2074 2075 state = PreAttrValue; 2076 ++ptr; 2077 } 2078 break; 2079 } 2080 break; 2081 2082 case PreAttrValue: 2083 while ((ptr != end) && isWhite(*ptr)) 2084 ++ptr; 2085 if (ptr == end) 2086 continue; 2087 2088 if (*ptr == '\"') { 2089 state = AttrValueDQ; 2090 start = ptr + 1; 2091 } else if (*ptr == '\'') { 2092 state = AttrValueSQ; 2093 start = ptr + 1; 2094 } else { 2095 state = AttrValueNQ; 2096 start = ptr; 2097 } 2098 break; 2099 2100 case AttrValueDQ: 2101 while ((ptr != end) && (*ptr != '\"')) 2102 ++ptr; 2103 if (ptr == end) 2104 continue; 2105 2106 rule.value_ = start[0..ptr-start]; 2107 state = PostAttrValue; 2108 break; 2109 2110 case AttrValueSQ: 2111 while ((ptr != end) && (*ptr != '\'')) 2112 ++ptr; 2113 if (ptr == end) 2114 continue; 2115 2116 rule.value_ = start[0..ptr-start]; 2117 state = PostAttrValue; 2118 break; 2119 2120 case AttrValueNQ: 2121 while ((ptr != end) && !isWhite(*ptr) && (*ptr != ']')) 2122 ++ptr; 2123 if (ptr == end) 2124 continue; 2125 2126 rule.value_ = start[0..ptr-start]; 2127 state = PostAttrValue; 2128 continue; 2129 2130 case PostAttrValue: 2131 while ((ptr != end) && (*ptr != ']') && (*ptr != 'i')) 2132 ++ptr; 2133 if (ptr == end) 2134 continue; 2135 2136 if (*ptr == ']') { 2137 state = PostIdentifier; 2138 } else if (*ptr == 'i') { 2139 rule.flags_ &= ~cast(int)Rule.Flags.CaseSensitive; 2140 } 2141 break; 2142 2143 case Pseudo: 2144 while ((ptr != end) && (isAlpha(*ptr) || (*ptr == '-'))) 2145 ++ptr; 2146 if (ptr == end) 2147 continue; 2148 2149 rule.pseudo_ = quickHashOf(start[0..ptr-start]); 2150 rule.flags_ |= Rule.Flags.HasPseudo; 2151 ++classes; 2152 if (*ptr != '(') { 2153 state = PostIdentifier; 2154 continue; 2155 } else { 2156 state = PseudoArgs; 2157 start = ptr + 1; 2158 } 2159 break; 2160 2161 case PseudoArgs: 2162 while ((ptr != end) && (*ptr != ')')) 2163 ++ptr; 2164 if (ptr == end) 2165 continue; 2166 2167 rule.pseudoArg_ = start[0..ptr-start]; 2168 if (isNumeric(rule.pseudoArg_)) 2169 rule.pseudoArgNum_ = to!uint(rule.pseudoArg_); 2170 state = PostIdentifier; 2171 break; 2172 2173 case Relation: 2174 while ((ptr != end) && isWhite(*ptr)) 2175 ++ptr; 2176 if (ptr == end) 2177 continue; 2178 2179 ++rules.length; 2180 rule = &rules.back; 2181 2182 state = Identifier; 2183 switch (*ptr) { 2184 case '>': 2185 rule.relation_ = Rule.Relation.Child; 2186 break; 2187 case '+': 2188 rule.relation_ = Rule.Relation.DirectAdjacent; 2189 break; 2190 case '~': 2191 rule.relation_ = Rule.Relation.IndirectAdjacent; 2192 break; 2193 default: 2194 rule.relation_ = Rule.Relation.Descendant; 2195 continue; 2196 } 2197 break; 2198 } 2199 2200 ++ptr; 2201 } 2202 2203 rules.reverse(); 2204 selector.rules_ = rules; 2205 selector.specificity_ = (ids << 14) | (classes << 7) | (tags & 127); 2206 2207 return selector; 2208 } 2209 2210 bool matches(const(Node) node) { 2211 auto element = cast(Node)node; 2212 if (rules_.empty) 2213 return false; 2214 2215 Rule.Relation relation = Rule.Relation.None; 2216 foreach(ref rule; rules_) { 2217 final switch (relation) with (Rule.Relation) { 2218 case None: 2219 if (!rule.matches(element)) 2220 return false; 2221 break; 2222 case Descendant: 2223 auto parent = element.parent_; 2224 while (parent) { 2225 if (rule.matches(parent)) { 2226 element = parent; 2227 break; 2228 } 2229 parent = parent.parent_; 2230 } 2231 if (!parent) 2232 return false; 2233 2234 break; 2235 case Child: 2236 auto parent = element.parent_; 2237 if (!parent || !rule.matches(parent)) 2238 return false; 2239 element = parent; 2240 break; 2241 case DirectAdjacent: 2242 auto adjacent = element.previousSibling; 2243 if (!adjacent || !rule.matches(adjacent)) 2244 return false; 2245 element = adjacent; 2246 break; 2247 case IndirectAdjacent: 2248 auto adjacent = element.previousSibling; 2249 while (adjacent) { 2250 if (rule.matches(adjacent)) { 2251 element = adjacent; 2252 break; 2253 } 2254 adjacent = adjacent.previousSibling; 2255 } 2256 if (!adjacent) 2257 return false; 2258 2259 break; 2260 } 2261 2262 relation = rule.relation; 2263 } 2264 2265 return true; 2266 } 2267 2268 @property size_t specificity() const { 2269 return specificity_; 2270 } 2271 2272 private: 2273 HTMLString source_; 2274 Rule[] rules_; 2275 size_t specificity_; 2276 } 2277 2278 2279 private size_t elementFlags(hash_t tagHash) { 2280 size_t flags; 2281 switch (tagHash) { 2282 case tagHashOf("body"): 2283 case tagHashOf("center"): 2284 case tagHashOf("div"): 2285 case tagHashOf("dl"): 2286 case tagHashOf("form"): 2287 case tagHashOf("head"): 2288 case tagHashOf("html"): 2289 case tagHashOf("noscript"): 2290 case tagHashOf("ol"): 2291 case tagHashOf("p"): 2292 case tagHashOf("table"): 2293 case tagHashOf("tbody"): 2294 case tagHashOf("td"): 2295 case tagHashOf("tfoot"): 2296 case tagHashOf("th"): 2297 case tagHashOf("thead"): 2298 case tagHashOf("title"): 2299 case tagHashOf("tr"): 2300 case tagHashOf("ul"): 2301 flags |= Node.Flags.BlockElement; 2302 break; 2303 case tagHashOf("br"): 2304 case tagHashOf("hr"): 2305 case tagHashOf("link"): 2306 case tagHashOf("meta"): 2307 flags |= Node.Flags.BlockElement; 2308 flags |= Node.Flags.VoidElement; 2309 break; 2310 case tagHashOf("area"): 2311 case tagHashOf("base"): 2312 case tagHashOf("basefont"): 2313 case tagHashOf("col"): 2314 case tagHashOf("embed"): 2315 case tagHashOf("img"): 2316 case tagHashOf("input"): 2317 case tagHashOf("isindex"): 2318 case tagHashOf("param"): 2319 case tagHashOf("source"): 2320 case tagHashOf("track"): 2321 case tagHashOf("wbr"): 2322 flags |= Node.Flags.VoidElement; 2323 break; 2324 case tagHashOf("script"): 2325 flags |= Node.Flags.BlockElement; 2326 flags |= Node.Flags.ScriptElement; 2327 break; 2328 case tagHashOf("style"): 2329 flags |= Node.Flags.BlockElement; 2330 flags |= Node.Flags.StyleElement; 2331 break; 2332 default: 2333 break; 2334 } 2335 return flags; 2336 }