1 module html.dom;
2 
3 
4 import std.algorithm;
5 import std.array;
6 import std.ascii;
7 import std.conv;
8 import std.experimental.allocator;
9 import std.range;
10 import std.string;
11 import std.typecons;
12 
13 import html.parser;
14 import html.utils;
15 
16 
17 alias HTMLString = const(char)[];
18 static if(__VERSION__ >= 2079){
19 	alias IAllocator = RCIAllocator;
20 }
21 
22 enum DOMCreateOptions {
23 	None = 0,
24 	DecodeEntities			= 1 << 0,
25 
26 	ValidateClosed			= 1 << 10,
27 	ValidateSelfClosing		= 1 << 11,
28 	ValidateDuplicateAttr	= 1 << 12,
29 	ValidateBasic			= ValidateClosed | ValidateSelfClosing | ValidateDuplicateAttr,
30 	ValidateAll				= ValidateBasic,
31 
32 	Default = DecodeEntities,
33 }
34 
35 
36 enum ValidationError : size_t {
37 	None,
38 	MismatchingClose = 1,
39 	MissingClose,
40 	StrayClose,
41 	SelfClosingNonVoidElement,
42 	DuplicateAttr,
43 }
44 
45 
46 alias ValidationErrorCallable = void delegate(ValidationError, HTMLString, HTMLString);
47 
48 
49 alias onlyElements = a => a.isElementNode;
50 
51 
52 private struct ChildrenForward(NodeType, alias Condition = null) {
53 	this(NodeType first) {
54 		curr_ = cast(Node)first;
55 		static if (!is(typeof(Condition) == typeof(null))) {
56 			while (curr_ && !Condition(curr_))
57 				curr_ = curr_.next_;
58 		}
59 	}
60 
61 	bool empty() const {
62 		return (curr_ is null);
63 	}
64 
65 	NodeType front() const {
66 		return cast(NodeType)curr_;
67 	}
68 
69 	void popFront() {
70 		curr_ = curr_.next_;
71 
72 		static if (!is(typeof(Condition) == typeof(null))) {
73 			while (curr_) {
74 				if (Condition(curr_))
75 					break;
76 				curr_ = curr_.next_;
77 			}
78 		}
79 	}
80 
81 	private Node curr_;
82 }
83 
84 
85 private struct AncestorsForward(NodeType, alias Condition = null) {
86 	this(NodeType first) {
87 		curr_ = cast(Node)first;
88 		static if (!is(typeof(Condition) == typeof(null))) {
89 			while (curr_ && !Condition(curr_))
90 				curr_ = curr_.parent_;
91 		}
92 	}
93 
94 	bool empty() const {
95 		return (curr_ is null);
96 	}
97 
98 	NodeType front() const {
99 		return cast(NodeType)curr_;
100 	}
101 
102 	void popFront() {
103 		curr_ = curr_.parent_;
104 
105 		static if (!is(typeof(Condition) == typeof(null))) {
106 			while (curr_) {
107 				if (Condition(curr_))
108 					break;
109 				curr_ = curr_.parent_;
110 			}
111 		}
112 	}
113 
114 	private Node curr_;
115 }
116 
117 
118 private struct DescendantsForward(NodeType, alias Condition = null) {
119 	this(NodeType top) {
120 		if (top is null)
121 			return;
122 		curr_ = cast(Node)top.firstChild_; // top itself is excluded
123 		top_ = cast(Node)top;
124 		static if (!is(typeof(Condition) == typeof(null))) {
125 			if (!Condition(curr_))
126 				popFront;
127 		}
128 	}
129 
130 	bool empty() const {
131 		return (curr_ is null);
132 	}
133 
134 	NodeType front() const {
135 		return cast(NodeType)curr_;
136 	}
137 
138 	void popFront() {
139 		while (curr_) {
140 			if (curr_.firstChild_) {
141 				curr_ = curr_.firstChild_;
142 			} else if (curr_ !is top_) {
143 				auto next = curr_.next_;
144 				if (!next) {
145 					Node parent = curr_.parent_;
146 					while (parent) {
147 						if (parent !is top_) {
148 							if (parent.next_) {
149 								next = parent.next_;
150 								break;
151 							}
152 							parent = parent.parent_;
153 						} else {
154 							next = null;
155 							break;
156 						}
157 					}
158 				}
159 
160 				curr_ = next;
161 				if (!curr_)
162 					break;
163 			} else {
164 				curr_ = null;
165 				break;
166 			}
167 
168 			static if (is(typeof(Condition) == typeof(null))) {
169 				break;
170 			} else {
171 				if (Condition(curr_))
172 					break;
173 			}
174 		}
175 	}
176 
177 	private Node curr_;
178 	private Node top_;
179 }
180 
181 unittest {
182 	const doc = createDocument(`<div id=a><div id=b></div><div id=c><div id=e></div><div id=f><div id=h></div></div><div id=g></div></div><div id=d></div></div>`);
183 	assert(DescendantsForward!(const(Node))(doc.root).count() == 8);
184 	auto fs = DescendantsForward!(const(Node), x => x.attr("id") == "f")(doc.root);
185 	assert(fs.count() == 1);
186 	assert(fs.front().attr("id") == "f");
187 	auto hs = DescendantsForward!(const(Node), x => x.attr("id") == "h")(fs.front());
188 	assert(hs.count() == 1);
189 	assert(hs.front().attr("id") == "h");
190 	auto divs = DescendantsForward!(const(Node))(fs.front());
191 	assert(divs.count() == 1);
192 }
193 
194 unittest {
195 	// multiple top-level nodes
196 	const doc = createDocument(`<div id="left"></div><div id="mid"></div><div id="right"></div>`);
197 	assert(DescendantsForward!(const Node)(doc.root).count() == 3);
198 	assert(doc.nodes.count() == 3);
199 }
200 
201 unittest {
202 	const doc = createDocument(``);
203 	assert(DescendantsForward!(const Node)(doc.root).empty);
204 	assert(doc.nodes.empty);
205 }
206 
207 private struct QuerySelectorMatcher(NodeType, Nodes) if (isInputRange!Nodes) {
208 	this(Selector selector, Nodes nodes) {
209 		selector_ = selector;
210 		nodes_ = nodes;
211 
212 		while (!nodes_.empty()) {
213 			auto node = nodes_.front();
214 			if (node.isElementNode && selector_.matches(node))
215 				break;
216 
217 			nodes_.popFront();
218 		}
219 	}
220 
221 	bool empty() const {
222 		return nodes_.empty();
223 	}
224 
225 	NodeType front() const {
226 		return cast(NodeType)nodes_.front();
227 	}
228 
229 	void popFront() {
230 		nodes_.popFront();
231 		while (!nodes_.empty()) {
232 			auto node = nodes_.front();
233 			if (node.isElementNode && selector_.matches(node))
234 				break;
235 
236 			nodes_.popFront();
237 		}
238 	}
239 
240 	private Nodes nodes_;
241 	private Selector selector_;
242 }
243 
244 
245 enum NodeTypes : ubyte {
246 	Text = 0,
247 	Element,
248 	Comment,
249 	CDATA,
250 	Declaration,
251 	ProcessingInstruction,
252 }
253 
254 
255 class Node {
256 	@disable this();
257 
258 	private this(HTMLString tag, Document document, size_t flags) {
259 		flags_ = flags;
260 		tag_ = tag;
261 
262 		parent_ = null;
263 		firstChild_ = null;
264 		lastChild_ = null;
265 
266 		prev_ = null;
267 		next_ = null;
268 
269 		document_ = document;
270 	}
271 
272 	auto opIndex(HTMLString name) const {
273 		return attr(name);
274 	}
275 
276 	void opIndexAssign(HTMLString value, HTMLString name) {
277 		attr(name, value);
278 	}
279 
280 	void opIndexAssign(T)(T value, HTMLString name) {
281 		attr(name, value.to!string);
282 	}
283 
284 	@property auto id() const {
285 		return attr("id");
286 	}
287 
288 	@property void id(T)(T value) {
289 		attr("id", value.to!string);
290 	}
291 
292 	@property auto type() const {
293 		return (flags_ & TypeMask) >> TypeShift;
294 	}
295 
296 	@property auto tag() const {
297 		return isElementNode ? tag_ : null;
298 	}
299 
300 	@property auto tagHash() const {
301 		return isElementNode ? tagHash_ : 0;
302 	}
303 
304 	@property auto comment() const {
305 		return isCommentNode ? tag_ : null;
306 	}
307 
308 	@property auto cdata() const {
309 		return isCDATANode ? tag_ : null;
310 	}
311 
312 	@property auto declaration() const {
313 		return isDeclarationNode ? tag_ : null;
314 	}
315 
316 	@property auto processingInstruction() const {
317 		return isProcessingInstructionNode ? tag_ : null;
318 	}
319 
320 	void text(Appender)(ref Appender app) const {
321 		if (isTextNode) {
322 			app.put(tag_);
323 		} else {
324 			Node child = cast(Node)firstChild_;
325 			while (child) {
326 				child.text(app);
327 				child = child.next_;
328 			}
329 		}
330 	}
331 
332 	@property auto text() const {
333 		if (isTextNode) {
334 			return tag_;
335 		} else {
336 			Appender!HTMLString app;
337 			text(app);
338 			return app.data;
339 		}
340 	}
341 
342 	@property void text(HTMLString text) {
343 		if (isTextNode) {
344 			tag_ = text;
345 		} else {
346 			destroyChildren();
347 			appendText(text);
348 		}
349 	}
350 
351 	@property void attr(HTMLString name, HTMLString value) {
352 		assert(isElementNode, "cannot set attributes of non-element nodes");
353 
354 		attrs_[name] = value;
355 
356 		if (name == "id") {
357 			if (!value.empty()) {
358 				flags_ |= Node.Flags.HasID;
359 			} else {
360 				flags_ &= ~Node.Flags.HasID;
361 			}
362 		}
363 	}
364 
365 	@property HTMLString attr(HTMLString name) const {
366 		if (auto pattr = name in attrs_)
367 			return *pattr;
368 		return null;
369 	}
370 
371 	bool hasAttr(HTMLString name) const {
372 		return (name in attrs_) != null;
373 	}
374 
375 	void removeAttr(HTMLString name) {
376 		attrs_.remove(name);
377 	}
378 
379 	@property void html(size_t Options = DOMCreateOptions.Default)(HTMLString html) {
380 		assert(isElementNode, "cannot add html to non-element nodes");
381 
382 		enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0);
383 
384 		destroyChildren();
385 
386 		auto builder = DOMBuilder!(Document, Options)(document_, this);
387 		parseHTML!(typeof(builder), parserOptions)(html, builder);
388 	}
389 
390 	@property auto html() const {
391 		Appender!HTMLString app;
392 		innerHTML(app);
393 		return app.data;
394 	}
395 
396 	@property auto compactHTML() const {
397 		Appender!HTMLString app;
398 		compactInnerHTML(app);
399 		return app.data;
400 	}
401 
402 	@property auto outerHTML() const {
403 		Appender!HTMLString app;
404 		outerHTML(app);
405 		return app.data;
406 	}
407 
408 	@property auto compactOuterHTML() const {
409 		Appender!HTMLString app;
410 		compactOuterHTML(app);
411 		return app.data;
412 	}
413 
414 	void prependChild(Node node) {
415 		assert(document_ == node.document_);
416 		assert(isElementNode, "cannot prepend to non-element nodes");
417 
418 		if (node.parent_)
419 			node.detach();
420 		node.parent_ = this;
421 		if (firstChild_) {
422 			assert(!firstChild_.prev_);
423 			firstChild_.prev_ = node;
424 			node.next_ = firstChild_;
425 			firstChild_ = node;
426 		} else {
427 			assert(!lastChild_);
428 			firstChild_ = node;
429 			lastChild_ = node;
430 		}
431 	}
432 
433 	void appendChild(Node node) {
434 		assert(document_ == node.document_);
435 		assert(isElementNode, "cannot append to non-element nodes");
436 
437 		if (node.parent_)
438 			node.detach();
439 		node.parent_ = this;
440 		if (lastChild_) {
441 			assert(!lastChild_.next_);
442 			lastChild_.next_ = node;
443 			node.prev_ = lastChild_;
444 			lastChild_ = node;
445 		} else {
446 			assert(!firstChild_);
447 			firstChild_ = node;
448 			lastChild_ = node;
449 		}
450 	}
451 
452 	void removeChild(Node node) {
453 		assert(node.parent_ is this);
454 		node.detach();
455 	}
456 
457 	void destroyChildren() {
458 		auto child = firstChild_;
459 		while (child) {
460 			auto next = child.next_;
461 			child.destroy();
462 			child = next;
463 		}
464 
465 		firstChild_ = null;
466 		lastChild_ = null;
467 	}
468 
469 	void prependText(HTMLString text) {
470 		auto node = document_.createTextNode(text);
471 		if (firstChild_) {
472 			assert(firstChild_.prev_ is null);
473 			firstChild_.prev_ = node;
474 			node.next_ = firstChild_;
475 			firstChild_ = node;
476 		} else {
477 			assert(lastChild_ is null);
478 			firstChild_ = node;
479 			lastChild_ = node;
480 		}
481 		node.parent_ = this;
482 	}
483 
484 	void appendText(HTMLString text) {
485 		auto node = document_.createTextNode(text);
486 		if (lastChild_) {
487 			assert(lastChild_.next_ is null);
488 			lastChild_.next_ = node;
489 			node.prev_ = lastChild_;
490 			lastChild_ = node;
491 		} else {
492 			assert(firstChild_ is null);
493 			firstChild_ = node;
494 			lastChild_ = node;
495 		}
496 		node.parent_ = this;
497 	}
498 
499 	void insertBefore(Node node) {
500 		assert(document_ is node.document_);
501 
502 		parent_ = node.parent_;
503 		prev_ = node.prev_;
504 		next_ = node;
505 		node.prev_ = this;
506 
507 		if (prev_)
508 			prev_.next_ = this;
509 		else if (parent_)
510 			parent_.firstChild_ = this;
511 	}
512 
513 	void insertAfter(Node node) {
514 		assert(document_ is node.document_);
515 
516 		parent_ = node.parent_;
517 		prev_ = node;
518 		next_ = node.next_;
519 		node.next_ = this;
520 
521 		if (next_)
522 			next_.prev_ = this;
523 		else if (parent_)
524 			parent_.lastChild_ = this;
525 	}
526 
527 	void detach() {
528 		if (parent_) {
529 			if (parent_.firstChild_ is this) {
530 				parent_.firstChild_ = next_;
531 				if (next_) {
532 					next_.prev_ = null;
533 					next_ = null;
534 				} else {
535 					parent_.lastChild_ = null;
536 				}
537 
538 				assert(prev_ is null);
539 			} else if (parent_.lastChild_ is this) {
540 				parent_.lastChild_ = prev_;
541 				assert(prev_);
542 				assert(next_ is null);
543 				prev_.next_ = null;
544 				prev_ = null;
545 			} else {
546 				assert(prev_);
547 
548 				prev_.next_ = next_;
549 				if (next_) {
550 					next_.prev_ = prev_;
551 					next_ = null;
552 				}
553 				prev_ = null;
554 			}
555 			parent_ = null;
556 		}
557 	}
558 
559 	package void detachFast() {
560 		if (parent_) {
561 			if (parent_.firstChild_ is this) {
562 				parent_.firstChild_ = next_;
563 				if (next_) {
564 					next_.prev_ = null;
565 				} else {
566 					parent_.lastChild_ = null;
567 				}
568 
569 				assert(prev_ is null);
570 			} else if (parent_.lastChild_ is this) {
571 				parent_.lastChild_ = prev_;
572 				assert(prev_);
573 				assert(next_ is null);
574 				prev_.next_ = null;
575 			} else {
576 				assert(prev_);
577 
578 				prev_.next_ = next_;
579 				if (next_) {
580 					next_.prev_ = prev_;
581 				}
582 			}
583 		}
584 	}
585 
586 	void destroy() {
587 		detachFast();
588 		destroyChildren();
589 		document_.destroyNode(this);
590 	}
591 
592 	void innerHTML(Appender)(ref Appender app) const {
593 		auto child = cast(Node)firstChild_;
594 		while (child) {
595 			child.outerHTML(app);
596 			child = child.next_;
597 		}
598 	}
599 
600 	void compactInnerHTML(Appender)(ref Appender app) const {
601 		auto child = cast(Node)firstChild_;
602 		while (child) {
603 			child.compactOuterHTML(app);
604 			child = child.next_;
605 		}
606 	}
607 
608 	void outerHTML(Appender)(ref Appender app) const {
609 		final switch (type) with (NodeTypes) {
610 		case Element:
611 			app.put('<');
612 			app.put(tag_);
613 
614 			foreach(HTMLString attr, HTMLString value; attrs_) {
615 				app.put(' ');
616 				app.put(attr);
617 
618 				if (value.length) {
619 					if (value.requiresQuotes) {
620 						app.put("=\"");
621 						app.writeQuotesEscaped(value);
622 						app.put("\"");
623 					} else {
624 						app.put('=');
625 						app.put(value);
626 					}
627 				}
628 			}
629 
630 			if (isVoidElement) {
631 				app.put(">");
632 			} else {
633 				app.put('>');
634 				if (isScriptOrStyleElement) {
635 					if (firstChild_)
636 						app.put(firstChild_.tag_);
637 				} else {
638 					innerHTML(app);
639 				}
640 				app.put("</");
641 				app.put(tag_);
642 				app.put('>');
643 			}
644 			break;
645 		case Text:
646 			app.put(tag_);
647 			break;
648 		case Comment:
649 			app.put("<!--");
650 			app.put(tag_);
651 			app.put("-->");
652 			break;
653 		case CDATA:
654 			app.put("<![CDATA[");
655 			app.put(tag_);
656 			app.put("]]>");
657 			break;
658 		case Declaration:
659 			app.put("<!");
660 			app.put(tag_);
661 			app.put(">");
662 			break;
663 		case ProcessingInstruction:
664 			app.put("<?");
665 			app.put(tag_);
666 			app.put(">");
667 			break;
668 		}
669 	}
670 
671 	void compactOuterHTML(Appender)(ref Appender app) const {
672 		final switch (type) with (NodeTypes) {
673 		case Element:
674 			app.put('<');
675 			app.put(tag_);
676 
677 			foreach (HTMLString attr, HTMLString value; attrs_) {
678 				app.put(' ');
679 				app.put(attr);
680 
681 				if (value.length) {
682 					if (value.requiresQuotes) {
683 						app.put("=\"");
684 						app.writeQuotesEscaped(value);
685 						app.put("\"");
686 					} else {
687 						app.put('=');
688 						app.put(value);
689 					}
690 				}
691 			}
692 
693 			if (isVoidElement) {
694 				app.put(">");
695 			} else {
696 				app.put('>');
697 				if (isScriptOrStyleElement) {
698 					if (firstChild_)
699 						app.put(firstChild_.tag_.strip());
700 				} else {
701 					compactInnerHTML(app);
702 				}
703 				app.put("</");
704 				app.put(tag_);
705 				app.put('>');
706 			}
707 			break;
708 		case Text:
709 			auto ptr = tag_.ptr;
710 			const end = ptr + tag_.length;
711 
712 			if (tag_.isAllWhite()) {
713 				size_t aroundCount;
714 				Node[2] around;
715 
716 				around.ptr[aroundCount] = cast(Node)prev_;
717 				if (!around.ptr[aroundCount])
718 					around.ptr[aroundCount] = cast(Node)parent_;
719 				if (around.ptr[aroundCount])
720 					++aroundCount;
721 				around.ptr[aroundCount] = cast(Node)next_;
722 				if (!around.ptr[aroundCount])
723 					around.ptr[aroundCount] = cast(Node)parent_;
724 				if (around.ptr[aroundCount] && (!aroundCount || (around.ptr[aroundCount] !is around.ptr[aroundCount - 1])))
725 					++aroundCount;
726 
727 				auto tagsMatch = true;
728 				Laround: foreach (i; 0..aroundCount) {
729 					if (around.ptr[i].isElementNode) {
730 						if (!around.ptr[i].isBlockElement) {
731 							tagsMatch = false;
732 							break Laround;
733 						}
734 					}
735 				}
736 
737 				if (!tagsMatch && (ptr != end))
738 					app.put(*ptr);
739 			} else {
740 				auto space = false;
741 				while (ptr != end) {
742 					auto ch = *ptr++;
743 					if (isWhite(ch)) {
744 						if (space)
745 							continue;
746 						space = true;
747 					} else {
748 						space = false;
749 					}
750 					app.put(ch);
751 				}
752 			}
753 			break;
754 		case Comment:
755 			auto stripped = tag_.strip();
756 			if (!stripped.empty() && (tag_.front() == '[')) {
757 				app.put("<!--");
758 				app.put(tag_);
759 				app.put("-->");
760 			}
761 			break;
762 		case CDATA:
763 			app.put("<![CDATA[");
764 			app.put(tag_);
765 			app.put("]]>");
766 			break;
767 		case Declaration:
768 			app.put("<!");
769 			app.put(tag_);
770 			app.put(">");
771 			break;
772 		case ProcessingInstruction:
773 			app.put("<?");
774 			app.put(tag_);
775 			app.put(">");
776 			break;
777 		}
778 	}
779 
780 	void toString(Appender)(ref Appender app) const {
781 		final switch (type) with (NodeTypes) {
782 		case Element:
783 			app.put('<');
784 			app.put(tag_);
785 
786 			foreach(HTMLString attr, HTMLString value; attrs_) {
787 				app.put(' ');
788 				app.put(attr);
789 
790 				if (value.length) {
791 					app.put("=\"");
792 					writeHTMLEscaped!(Yes.escapeQuotes)(app, value);
793 					app.put("\"");
794 				}
795 			}
796 			if (isVoidElement) {
797 				app.put(">");
798 			} else {
799 				app.put("/>");
800 			}
801 			break;
802 		case Text:
803 			writeHTMLEscaped!(No.escapeQuotes)(app, tag_);
804 			break;
805 		case Comment:
806 			app.put("<!--");
807 			app.put(tag_);
808 			app.put("-->");
809 			break;
810 		case CDATA:
811 			app.put("<![CDATA[");
812 			app.put(tag_);
813 			app.put("]]>");
814 			break;
815 		case Declaration:
816 			app.put("<!");
817 			app.put(tag_);
818 			app.put(">");
819 			break;
820 		case ProcessingInstruction:
821 			app.put("<?");
822 			app.put(tag_);
823 			app.put(">");
824 			break;
825 		}
826 	}
827 
828 	override string toString() const {
829 		Appender!HTMLString app;
830 		toString(app);
831 		return cast(string)app.data;
832 	}
833 
834 	@property auto parent() const {
835 		return parent_;
836 	}
837 
838 	@property auto parent() {
839 		return parent_;
840 	}
841 
842 	@property auto firstChild() const {
843 		return firstChild_;
844 	}
845 
846 	@property auto firstChild() {
847 		return firstChild_;
848 	}
849 
850 	@property auto lastChild() const {
851 		return lastChild_;
852 	}
853 
854 	@property auto lastChild() {
855 		return lastChild_;
856 	}
857 
858 	@property auto previousSibling() const {
859 		return prev_;
860 	}
861 
862 	@property auto previousSibling() {
863 		return prev_;
864 	}
865 
866 	@property auto nextSibling() const {
867 		return next_;
868 	}
869 
870 	@property auto nextSibling() {
871 		return next_;
872 	}
873 
874 	@property auto children() const {
875 		return ChildrenForward!(const(Node))(firstChild_);
876 	}
877 
878 	@property auto children() {
879 		return ChildrenForward!Node(firstChild_);
880 	}
881 
882 	@property auto attrs() const {
883 		return attrs_;
884 	}
885 
886 	@property auto attrs() {
887 		return attrs_;
888 	}
889 
890 	auto find(HTMLString selector) const {
891 		return document_.querySelectorAll(selector, this);
892 	}
893 
894 	auto find(HTMLString selector) {
895 		return document_.querySelectorAll(selector, this);
896 	}
897 
898 	auto find(Selector selector) const {
899 		return document_.querySelectorAll(selector, this);
900 	}
901 
902 	auto find(Selector selector) {
903 		return document_.querySelectorAll(selector, this);
904 	}
905 
906 	auto closest(HTMLString selector) const {
907 		auto rules = Selector.parse(selector);
908 		return closest(rules);
909 	}
910 
911 	Node closest(HTMLString selector) {
912 		auto rules = Selector.parse(selector);
913 		return closest(rules);
914 	}
915 
916 	auto closest(Selector selector) const {
917 		if (selector.matches(this))
918 			return this;
919 
920 		foreach (node; ancestors) {
921 			if (selector.matches(node))
922 				return node;
923 		}
924 		return null;
925 	}
926 
927 	Node closest(Selector selector) {
928 		if (selector.matches(this))
929 			return this;
930 
931 		foreach (node; ancestors) {
932 			if (selector.matches(node))
933 				return node;
934 		}
935 		return null;
936 	}
937 
938 	@property auto ancestors() const {
939 		return AncestorsForward!(const(Node))(parent_);
940 	}
941 
942 	@property auto ancestors() {
943 		return AncestorsForward!Node(parent_);
944 	}
945 
946 	@property auto descendants() const {
947 		return DescendantsForward!(const(Node))(this);
948 	}
949 
950 	@property auto descendants() {
951 		return DescendantsForward!Node(this);
952 	}
953 
954 	@property isSelfClosing() const {
955 		return (flags_ & Flags.SelfClosing) != 0;
956 	}
957 
958 	@property isVoidElement() const {
959 		return (flags_ & Flags.VoidElement) != 0;
960 	}
961 
962 	@property isBlockElement() const {
963 		return (flags_ & Flags.BlockElement) != 0;
964 	}
965 
966 	@property isScriptElement() const {
967 		return (flags_ & Flags.ScriptElement) != 0;
968 	}
969 
970 	@property isStyleElement() const {
971 		return (flags_ & Flags.StyleElement) != 0;
972 	}
973 
974 	@property isScriptOrStyleElement() const {
975 		return (flags_ & (Flags.ScriptElement | Flags.StyleElement)) != 0;
976 	}
977 
978 	@property bool hasID() const {
979 		return (flags_ & Flags.HasID) != 0;
980 	}
981 
982 	@property isElementNode() const {
983 		return type == NodeTypes.Element;
984 	}
985 
986 	@property isTextNode() const {
987 		return type == NodeTypes.Text;
988 	}
989 
990 	@property isCommentNode() const {
991 		return type == NodeTypes.Comment;
992 	}
993 
994 	@property isCDATANode() const {
995 		return type == NodeTypes.CDATA;
996 	}
997 
998 	@property isDeclarationNode() const {
999 		return type == NodeTypes.Declaration;
1000 	}
1001 
1002 	@property isProcessingInstructionNode() const {
1003 		return type == NodeTypes.ProcessingInstruction;
1004 	}
1005 
1006 	Node clone(Document document) const {
1007 		auto node = document.allocNode(tag_, flags_);
1008 		node.tagHash_ = tagHash_;
1009 		node.flags_ = flags_;
1010 
1011 		foreach (HTMLString attr, HTMLString value; attrs_)
1012 			node.attrs_[attr] = value;
1013 
1014 		Node current = cast(Node)firstChild_;
1015 
1016 		while (current !is null) {
1017 			auto newChild = current.clone(document);
1018 			node.appendChild(newChild);
1019 			current = current.next_;
1020 		}
1021 		return node;
1022 	}
1023 
1024 	Node clone() const {
1025 		return document_.cloneNode(this);
1026 	}
1027 
1028 package:
1029 	enum TypeMask	= 0x7UL;
1030 	enum TypeShift	= 0UL;
1031 	enum FlagsBit	= TypeMask + 1;
1032 	enum Flags {
1033 		SelfClosing		= FlagsBit << 1,
1034 		VoidElement		= FlagsBit << 2,
1035 		BlockElement	= FlagsBit << 3,
1036 		ScriptElement	= FlagsBit << 4,
1037 		StyleElement	= FlagsBit << 5,
1038 
1039 		HasID			= FlagsBit << 6,
1040 	}
1041 
1042 	size_t flags_;
1043 	hash_t tagHash_;
1044 	HTMLString tag_; // when Text flag is set, will contain the text itself
1045 	HTMLString[HTMLString] attrs_;
1046 
1047 	Node parent_;
1048 	Node firstChild_;
1049 	Node lastChild_;
1050 
1051 	// siblings
1052 	Node prev_;
1053 	Node next_;
1054 
1055 	Document document_;
1056 }
1057 
1058 
1059 auto createDocument(size_t Options = DOMCreateOptions.Default)(HTMLString source, IAllocator alloc = theAllocator) {
1060 	enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0);
1061 	static assert((Options & DOMCreateOptions.ValidateAll) == 0, "requested validation with no error callable");
1062 
1063 	auto document = createDocument(alloc);
1064 	auto builder = DOMBuilder!(Document, Options)(document);
1065 
1066 	parseHTML!(typeof(builder), parserOptions)(source, builder);
1067 	return document;
1068 }
1069 
1070 
1071 auto createDocument(size_t Options = DOMCreateOptions.Default | DOMCreateOptions.ValidateAll)(HTMLString source, ValidationErrorCallable errorCallable, IAllocator alloc = theAllocator) {
1072 	enum parserOptions = ((Options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0);
1073 	static assert((Options & DOMCreateOptions.ValidateAll) != 0, "error callable but validation not requested");
1074 
1075 	auto document = createDocument(alloc);
1076 	auto builder = DOMBuilder!(Document, Options)(document, errorCallable);
1077 
1078 	parseHTML!(typeof(builder), parserOptions)(source, builder);
1079 	return document;
1080 }
1081 
1082 
1083 unittest {
1084 	auto doc = createDocument("<html> <body> \n\r\f\t </body> </html>");
1085 	assert(doc.root.compactOuterHTML == "<root><html><body></body></html></root>");
1086 	doc = createDocument("<html> <body> <div> <p>  <a>  </a> </p> </div> </body> </html>");
1087 	assert(doc.root.compactOuterHTML == "<root><html><body><div><p> <a> </a> </p></div></body></html></root>");
1088 }
1089 
1090 unittest {
1091 	auto doc = createDocument(`<html><body>&nbsp;</body></html>`);
1092 	assert(doc.root.outerHTML == "<root><html><body>\&nbsp;</body></html></root>");
1093 	doc = createDocument!(DOMCreateOptions.None)(`<html><body>&nbsp;</body></html>`);
1094 	assert(doc.root.outerHTML == `<root><html><body>&nbsp;</body></html></root>`);
1095 	doc = createDocument(`<script>&nbsp;</script>`);
1096 	assert(doc.root.outerHTML == `<root><script>&nbsp;</script></root>`, doc.root.outerHTML);
1097 	doc = createDocument(`<style>&nbsp;</style>`);
1098 	assert(doc.root.outerHTML == `<root><style>&nbsp;</style></root>`, doc.root.outerHTML);
1099 }
1100 
1101 unittest {
1102 	const doc = createDocument(`<html><body><div title='"Тест&apos;"'>"К"ириллица</div></body></html>`);
1103 	assert(doc.root.html == `<html><body><div title="&#34;Тест'&#34;">"К"ириллица</div></body></html>`);
1104 }
1105 
1106 unittest {
1107 	// void elements should not be self-closed
1108 	auto doc = createDocument(`<area><base><br><col>`);
1109 	assert(doc.root.outerHTML == `<root><area><base><br><col></root>`, doc.root.outerHTML);
1110 	doc = createDocument(`<svg /><math /><svg></svg>`);
1111 	assert(doc.root.outerHTML == `<root><svg></svg><math></math><svg></svg></root>`, doc.root.outerHTML);
1112 	doc = createDocument(`<br /><div />`);
1113 	assert(doc.root.outerHTML == `<root><br><div></div></root>`, doc.root.outerHTML);
1114 }
1115 
1116 // toString prints elements with content as <tag attr="value"/>
1117 unittest {
1118 	// self-closed element w/o content
1119 	auto doc = createDocument(`<svg />`);
1120 	assert(doc.root.firstChild.toString == `<svg/>`, doc.root.firstChild.toString);
1121 	// elements w/ content
1122 	doc = createDocument(`<svg></svg>`);
1123 	assert(doc.root.firstChild.toString == `<svg/>`, doc.root.firstChild.toString);
1124 	doc = createDocument(`<div class="mydiv"></div>`);
1125 	assert(doc.root.firstChild.toString == `<div class="mydiv"/>`, doc.root.firstChild.toString);
1126 	// void element
1127 	doc = createDocument(`<br>`);
1128 	assert(doc.root.firstChild.toString == `<br>`, doc.root.firstChild.toString);
1129 	// "invalid" self-closed void element
1130 	doc = createDocument(`<br />`);
1131 	assert(doc.root.firstChild.toString == `<br>`, doc.root.firstChild.toString);
1132 }
1133 
1134 unittest {
1135 	const doc = createDocument(`<html><body><div>&nbsp;</div></body></html>`);
1136 	assert(doc.root.find("html").front.outerHTML == "<html><body><div>\&nbsp;</div></body></html>");
1137 	assert(doc.root.find("html").front.find("div").front.outerHTML == "<div>\&nbsp;</div>");
1138 	assert(doc.root.find("body").front.outerHTML == "<body><div>\&nbsp;</div></body>");
1139 	assert(doc.root.find("body").front.closest("body").outerHTML == "<body><div>\&nbsp;</div></body>"); // closest() tests self
1140 	assert(doc.root.find("body").front.closest("html").outerHTML == "<html><body><div>\&nbsp;</div></body></html>");
1141 }
1142 
1143 unittest {
1144 	const doc = createDocument(`<html><body><![CDATA[test]]></body></html>`);
1145 	assert(doc.root.firstChild.firstChild.firstChild.isCDATANode);
1146 	assert(doc.root.html == `<html><body><![CDATA[test]]></body></html>`);
1147 }
1148 
1149 
1150 static auto createDocument(IAllocator alloc = theAllocator) {
1151 	auto document = new Document(alloc);
1152 	document.root(document.createElement("root"));
1153 	return document;
1154 }
1155 
1156 
1157 class Document {
1158 	private this(IAllocator alloc) {
1159 		alloc_ = alloc;
1160 	}
1161 
1162 	auto createElement(HTMLString tagName, Node parent = null) {
1163 		auto node = allocNode(tagName, NodeTypes.Element << Node.TypeShift);
1164 		node.tagHash_ = tagHashOf(tagName);
1165 		node.flags_ |= elementFlags(node.tagHash_);
1166 		if (parent)
1167 			parent.appendChild(node);
1168 		return node;
1169 	}
1170 
1171 	auto createTextNode(HTMLString text, Node parent = null) {
1172 		auto node = allocNode(text, NodeTypes.Text << Node.TypeShift);
1173 		if (parent)
1174 			parent.appendChild(node);
1175 		return node;
1176 	}
1177 
1178 	auto createCommentNode(HTMLString comment, Node parent = null) {
1179 		auto node = allocNode(comment, NodeTypes.Comment << Node.TypeShift);
1180 		if (parent)
1181 			parent.appendChild(node);
1182 		return node;
1183 	}
1184 
1185 	auto createCDATANode(HTMLString cdata, Node parent = null) {
1186 		auto node = allocNode(cdata, NodeTypes.CDATA << Node.TypeShift);
1187 		if (parent)
1188 			parent.appendChild(node);
1189 		return node;
1190 	}
1191 
1192 	auto createDeclarationNode(HTMLString data, Node parent = null) {
1193 		auto node = allocNode(data, NodeTypes.Declaration << Node.TypeShift);
1194 		if (parent)
1195 			parent.appendChild(node);
1196 		return node;
1197 	}
1198 
1199 	auto createProcessingInstructionNode(HTMLString data, Node parent = null) {
1200 		auto node = allocNode(data, NodeTypes.ProcessingInstruction << Node.TypeShift);
1201 		if (parent)
1202 			parent.appendChild(node);
1203 		return node;
1204 	}
1205 
1206 	Node cloneNode(const(Node) other) const {
1207 		return other.clone(cast(Document)this);
1208 	}
1209 
1210 	Document clone(IAllocator alloc = theAllocator) {
1211 		Document other = new Document(alloc);
1212 		other.root(other.cloneNode(this.root_));
1213 		return other;
1214 	}
1215 
1216 	@property auto root() {
1217 		return root_;
1218 	}
1219 
1220 	@property auto root() const {
1221 		return root_;
1222 	}
1223 
1224 	@property void root(Node root) {
1225 		if (root && (root.document_.alloc_ != alloc_))
1226 			alloc_ = root.document_.alloc_;
1227 		root_ = root;
1228 	}
1229 
1230 	@property auto nodes() const {
1231 		return DescendantsForward!(const(Node))(root_);
1232 	}
1233 
1234 	@property auto nodes() {
1235 		return DescendantsForward!Node(root_);
1236 	}
1237 
1238 	@property auto elements() const {
1239 		return DescendantsForward!(const(Node), onlyElements)(root_);
1240 	}
1241 
1242 	@property auto elements() {
1243 		return DescendantsForward!(Node, onlyElements)(root_);
1244 	}
1245 
1246 	@property auto elementsByTagName(HTMLString tag) const {
1247 		const tagHash = tagHashOf(tag);
1248 		return DescendantsForward!(const(Node), (a) { return a.isElementNode && (a.tagHash == tagHash); })(root_);
1249 	}
1250 
1251 	@property auto elementsByTagName(HTMLString tag) {
1252 		const tagHash = tagHashOf(tag);
1253 		return DescendantsForward!(Node, (a) { return a.isElementNode && (a.tagHash == tagHash); })(root_);
1254 	}
1255 
1256 	const(Node) querySelector(HTMLString selector, Node context = null) const {
1257 		auto rules = Selector.parse(selector);
1258 		return querySelector(rules, context);
1259 	}
1260 
1261 	Node querySelector(HTMLString selector, Node context = null) {
1262 		auto rules = Selector.parse(selector);
1263 		return querySelector(rules, context);
1264 	}
1265 
1266 	const(Node) querySelector(Selector selector, const(Node) context = null) const {
1267 		auto top = context ? context : root_;
1268 
1269 		foreach(node; DescendantsForward!(const(Node), onlyElements)(top)) {
1270 			if (selector.matches(node))
1271 				return node;
1272 		}
1273 		return null;
1274 	}
1275 
1276 	Node querySelector(Selector selector, Node context = null) {
1277 		auto top = context ? context : root_;
1278 
1279 		foreach(node; DescendantsForward!(Node, onlyElements)(top)) {
1280 			if (selector.matches(node))
1281 				return node;
1282 		}
1283 		return null;
1284 	}
1285 
1286 	alias QuerySelectorAllResult = QuerySelectorMatcher!(Node, DescendantsForward!Node);
1287 	alias QuerySelectorAllConstResult = QuerySelectorMatcher!(const(Node), DescendantsForward!(const(Node)));
1288 
1289 	QuerySelectorAllResult querySelectorAll(HTMLString selector, Node context = null) {
1290 		auto rules = Selector.parse(selector);
1291 		return querySelectorAll(rules, context);
1292 	}
1293 
1294 	QuerySelectorAllConstResult querySelectorAll(HTMLString selector, const(Node) context = null) const {
1295 		auto rules = Selector.parse(selector);
1296 		return querySelectorAll(rules, context);
1297 	}
1298 
1299 	QuerySelectorAllConstResult querySelectorAll(Selector selector, const(Node) context = null) const {
1300 		auto top = context ? context : root_;
1301 		return QuerySelectorMatcher!(const(Node), DescendantsForward!(const(Node)))(selector, DescendantsForward!(const(Node))(top));
1302 	}
1303 
1304 	QuerySelectorAllResult querySelectorAll(Selector selector, Node context = null) {
1305 		auto top = context ? context : root_;
1306 		return QuerySelectorMatcher!(Node, DescendantsForward!Node)(selector, DescendantsForward!Node(top));
1307 	}
1308 
1309 	void toString(Appender)(ref Appender app) const {
1310 		root_.outerHTML(app);
1311 	}
1312 
1313 	override string toString() const {
1314 		auto app = appender!HTMLString;
1315 		root_.outerHTML(app);
1316 		return cast(string)app.data;
1317 	}
1318 
1319 	auto allocNode()(HTMLString tag, size_t flags) {
1320 		enum NodeSize = __traits(classInstanceSize, Node);
1321 		auto ptr = cast(Node)alloc_.allocate(NodeSize).ptr;
1322 		(cast(void*)ptr)[0..NodeSize] = typeid(Node).initializer[];
1323 		ptr.__ctor(tag, this, flags);
1324 		return ptr;
1325 	}
1326 
1327 	void destroyNode(Node node) {
1328 		assert(node.firstChild_ is null);
1329 		alloc_.dispose(node);
1330 	}
1331 
1332 private:
1333 	Node root_;
1334 	IAllocator alloc_;
1335 }
1336 
1337 
1338 unittest {
1339 	const(char)[] src = `<parent attr=value><child></child>text</parent>`;
1340 	auto doc = createDocument(src);
1341 	assert(doc.root.html == src, doc.root.html);
1342 
1343 	const(char)[] srcq = `<parent attr="v a l u e"><child></child>text</parent>`;
1344 	auto docq = createDocument(srcq);
1345 	assert(docq.root.html == srcq, docq.root.html);
1346 
1347 	// basic cloning
1348 	auto cloned = doc.cloneNode(doc.root);
1349 	assert(cloned.html == src, cloned.html);
1350 	assert(doc.root.html == src, cloned.html);
1351 
1352 	assert(!cloned.find("child").empty);
1353 	assert(!cloned.find("parent").empty);
1354 
1355 	// clone mutation
1356 	auto child = cloned.find("child").front.clone;
1357 	child.attr("attr", "test");
1358 	cloned.find("parent").front.appendChild(child);
1359 	assert(cloned.html == `<parent attr=value><child></child>text<child attr=test></child></parent>`, cloned.html);
1360 	assert(doc.root.html == src, doc.root.html);
1361 
1362 	child.text = "text";
1363 	assert(cloned.html == `<parent attr=value><child></child>text<child attr=test>text</child></parent>`, cloned.html);
1364 	assert(doc.root.html == src, doc.root.html);
1365 
1366 	// document cloning
1367 	auto docc = doc.clone;
1368 	assert(docc.root.html == doc.root.html, docc.root.html);
1369 }
1370 
1371 
1372 static struct DOMBuilder(Document, size_t Options) {
1373 	enum {
1374 		Validate 				= (Options & DOMCreateOptions.ValidateAll) != 0,
1375 		ValidateClosed 			= (Options & DOMCreateOptions.ValidateClosed) != 0,
1376 		ValidateSelfClosing		= (Options & DOMCreateOptions.ValidateSelfClosing) != 0,
1377 		ValidateDuplicateAttr	= (Options & DOMCreateOptions.ValidateDuplicateAttr) != 0,
1378 	}
1379 
1380 	static if (Validate) {
1381 		this(Document document, ValidationErrorCallable errorCallable, Node parent = null) {
1382 			document_ = document;
1383 			element_ = parent ? parent : document.root;
1384 			error_ = errorCallable;
1385 		}
1386 	} else {
1387 		this(Document document, Node parent = null) {
1388 			document_ = document;
1389 			element_ = parent ? parent : document.root;
1390 		}
1391 	}
1392 
1393 	void onText(HTMLString data) {
1394 		if (data.ptr == (text_.ptr + text_.length)) {
1395 			text_ = text_.ptr[0..text_.length + data.length];
1396 		} else {
1397 			text_ ~= data;
1398 		}
1399 	}
1400 
1401 	void onSelfClosing() {
1402 		if (element_.isVoidElement) {
1403 			element_.flags_ |= Node.Flags.SelfClosing;
1404 		} else {
1405 			static if (ValidateSelfClosing) {
1406 				error_(ValidationError.SelfClosingNonVoidElement, element_.tag_, null);
1407 			}
1408 		}
1409 		element_ = element_.parent_;
1410 	}
1411 
1412 	void onOpenStart(HTMLString data) {
1413 		if (!text_.empty) {
1414 			element_.appendText(text_);
1415 			text_.length = 0;
1416 		}
1417 
1418 		element_ = document_.createElement(data, element_);
1419 		element_.flags_ |= elementFlags(element_.tagHash_);
1420 	}
1421 
1422 	void onOpenEnd(HTMLString data) {
1423 		// void elements have neither content nor a closing tag, so
1424 		// we're done w/ them on the end of the open tag
1425 		if (element_.isVoidElement)
1426 			element_ = element_.parent_;
1427 	}
1428 
1429 	void onClose(HTMLString data) {
1430 		if (!text_.empty) {
1431 			if (element_) {
1432 				element_.appendText(text_);
1433 				text_.length = 0;
1434 			} else {
1435 				document_.root.appendText(text_);
1436 			}
1437 		}
1438 
1439 		if (element_) {
1440 			assert(!element_.isTextNode);
1441 			if (element_.tag.equalsCI(data)) {
1442 				element_ = element_.parent_;
1443 			} else {
1444 				auto element = element_.parent_;
1445 				while (element) {
1446 					if (element.tag.equalsCI(data)) {
1447 						static if (ValidateClosed) {
1448 							error_(ValidationError.MismatchingClose, data, element_.tag);
1449 						}
1450 						element_ = element.parent_;
1451 						break;
1452 					}
1453 					element = element.parent_;
1454 				}
1455 				static if (ValidateClosed) {
1456 					if (!element)
1457 						error_(ValidationError.StrayClose, data, null);
1458 				}
1459 			}
1460 		} else {
1461 			static if (ValidateClosed) {
1462 				error_(ValidationError.StrayClose, data, null);
1463 			}
1464 		}
1465 	}
1466 
1467 	void onAttrName(HTMLString data) {
1468 		attr_ = data;
1469 		state_ = States.Attr;
1470 	}
1471 
1472 	void onAttrEnd() {
1473 		if (!attr_.empty) {
1474 			static if (ValidateDuplicateAttr) {
1475 				if (attr_ in element_.attrs_)
1476 					error_(ValidationError.DuplicateAttr, element_.tag_, attr_);
1477 			}
1478 			element_.attr(attr_, value_);
1479 		}
1480 		value_.length = 0;
1481 		attr_.length = 0;
1482 		state_ = States.Global;
1483 	}
1484 
1485 	void onAttrValue(HTMLString data) {
1486 		if (data.ptr == (value_.ptr + value_.length)) {
1487 			value_ = value_.ptr[0..value_.length + data.length];
1488 		} else {
1489 			value_ ~= data;
1490 		}
1491 	}
1492 
1493 	void onComment(HTMLString data) {
1494 		document_.createCommentNode(data, element_);
1495 	}
1496 
1497 	void onDeclaration(HTMLString data) {
1498 		document_.createDeclarationNode(data, element_);
1499 	}
1500 
1501 	void onProcessingInstruction(HTMLString data) {
1502 		document_.createProcessingInstructionNode(data, element_);
1503 	}
1504 
1505 	void onCDATA(HTMLString data) {
1506 		document_.createCDATANode(data, element_);
1507 	}
1508 
1509 	void onDocumentEnd() {
1510 		if (!text_.empty) {
1511 			if (element_) {
1512 				element_.appendText(text_);
1513 				text_.length = 0;
1514 			} else {
1515 				document_.root.appendText(text_);
1516 			}
1517 		}
1518 
1519 		static if (ValidateClosed) {
1520 			while (element_ && element_ !is document_.root_) {
1521 				error_(ValidationError.MissingClose, element_.tag, null);
1522 				element_ = element_.parent_;
1523 			}
1524 		}
1525 	}
1526 
1527 	void onNamedEntity(HTMLString data) {
1528 	}
1529 
1530 	void onNumericEntity(HTMLString data) {
1531 	}
1532 
1533 	void onHexEntity(HTMLString data) {
1534 	}
1535 
1536 	void onEntity(HTMLString data, HTMLString decoded) {
1537 		if (state_ == States.Global) {
1538 			text_ ~= decoded;
1539 		} else {
1540 			value_ ~= decoded;
1541 		}
1542 	}
1543 
1544 private:
1545 	Document document_;
1546 	Node element_;
1547 	States state_;
1548 	static if (Validate) {
1549 		ValidationErrorCallable error_;
1550 	}
1551 
1552 	enum States {
1553 		Global = 0,
1554 		Attr,
1555 	}
1556 
1557 	HTMLString attr_;
1558 	HTMLString value_;
1559 	HTMLString text_;
1560 }
1561 
1562 
1563 
1564 unittest {
1565 	static struct Error {
1566 		ValidationError error;
1567 		HTMLString tag;
1568 		HTMLString related;
1569 	}
1570 
1571 	Error[] errors;
1572 	void errorHandler(ValidationError error, HTMLString tag, HTMLString related) {
1573 		errors ~= Error(error, tag, related);
1574 	}
1575 
1576 	errors.length = 0;
1577 	auto doc = createDocument(`<div><div></div>`, &errorHandler);
1578 	assert(errors == [Error(ValidationError.MissingClose, "div")]);
1579 
1580 	errors.length = 0;
1581 	doc = createDocument(`<div></div></div>`, &errorHandler);
1582 	assert(errors == [Error(ValidationError.StrayClose, "div")]);
1583 
1584 	errors.length = 0;
1585 	doc = createDocument(`<span><div></span>`, &errorHandler);
1586 	assert(errors == [Error(ValidationError.MismatchingClose, "span", "div")]);
1587 
1588 	errors.length = 0;
1589 	doc = createDocument(`<div />`, &errorHandler);
1590 	assert(errors == [Error(ValidationError.SelfClosingNonVoidElement, "div")]);
1591 
1592 	errors.length = 0;
1593 	doc = createDocument(`<hr></hr>`, &errorHandler);
1594 	assert(errors == [Error(ValidationError.StrayClose, "hr")]);
1595 
1596 	errors.length = 0;
1597 	doc = createDocument(`<span id=moo id=foo class=moo class=></span>`, &errorHandler);
1598 	assert(errors == [Error(ValidationError.DuplicateAttr, "span", "id"), Error(ValidationError.DuplicateAttr, "span", "class")]);
1599 }
1600 
1601 
1602 private struct Rule {
1603 	enum Flags : ushort {
1604 		HasTag          = 1 << 0,
1605 		HasID			= 1 << 1,
1606 		HasAttr         = 1 << 2,
1607 		HasPseudo       = 1 << 3,
1608 		CaseSensitive   = 1 << 4,
1609 		HasAny          = 1 << 5,
1610 	}
1611 
1612 	enum MatchType : ubyte {
1613 		None = 0,
1614 		Set,
1615 		Exact,
1616 		ContainWord,
1617 		Contain,
1618 		Begin,
1619 		BeginHyphen,
1620 		End,
1621 	}
1622 
1623 	enum Relation : ubyte {
1624 		None = 0,
1625 		Descendant,
1626 		Child,
1627 		DirectAdjacent,
1628 		IndirectAdjacent,
1629 	}
1630 
1631 	bool matches(NodeType)(NodeType element) const {
1632 		if (flags_ == 0)
1633 			return false;
1634 
1635 		if (flags_ & Flags.HasTag) {
1636 			if (tagHash_ != element.tagHash)
1637 				return false;
1638 		}
1639 
1640 		if (flags_ & Flags.HasID) {
1641 			if (value_.empty || !element.hasID) return false;
1642 			if (value_ != element.id()) return false;
1643 		}
1644 
1645 		if (flags_ & Flags.HasAttr) {
1646 			auto cs = (flags_ & Flags.CaseSensitive) != 0;
1647 			final switch (match_) with (MatchType) {
1648 			case None:
1649 				break;
1650 			case Set:
1651 				if (element.attr(attr_) == null)
1652 					return false;
1653 				break;
1654 			case Exact:
1655 				if (value_.empty) return false;
1656 				auto attr = element.attr(attr_);
1657 				if (!attr || (cs ? (value_ != attr) : !value_.equalsCI(attr)))
1658 					return false;
1659 				break;
1660 			case Contain:
1661 				if (value_.empty) return false;
1662 				auto attr = element.attr(attr_);
1663 				if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) == -1))
1664 					return false;
1665 				break;
1666 			case ContainWord:
1667 				if (value_.empty) return false;
1668 				auto attr = element.attr(attr_);
1669 				if (!attr)
1670 					return false;
1671 
1672 				size_t start = 0;
1673 				while (true) {
1674 					auto index = attr.indexOf(value_, start, cs ? CaseSensitive.yes : CaseSensitive.no);
1675 					if (index == -1)
1676 						return false;
1677 					if (index && !isWhite(attr[index - 1]))
1678 						return false;
1679 					if ((index + value_.length == attr.length) || isWhite(attr[index + value_.length]))
1680 						break;
1681 					start = index + 1;
1682 				}
1683 				break;
1684 			case Begin:
1685 				if (value_.empty) return false;
1686 				auto attr = element.attr(attr_);
1687 				if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0))
1688 					return false;
1689 				break;
1690 			case End:
1691 				if (value_.empty) return false;
1692 				auto attr = element.attr(attr_);
1693 				if (!attr || ((attr.lastIndexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != (attr.length - value_.length)))
1694 					return false;
1695 				break;
1696 			case BeginHyphen:
1697 				if (value_.empty) return false;
1698 				auto attr = element.attr(attr_);
1699 				if (!attr || ((attr.indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0) || ((attr.length > value_.length) && (attr[value_.length] != '-')))
1700 					return false;
1701 				break;
1702 			}
1703 		}
1704 
1705 		if (flags_ & Flags.HasPseudo) {
1706 			switch (pseudo_) {
1707 			case quickHashOf("checked"):
1708 				if (!element.hasAttr("checked"))
1709 					return false;
1710 				break;
1711 
1712 			case quickHashOf("enabled"):
1713 				if (element.hasAttr("disabled"))
1714 					return false;
1715 				break;
1716 
1717 			case quickHashOf("disabled"):
1718 				if (!element.hasAttr("disabled"))
1719 					return false;
1720 				break;
1721 
1722 			case quickHashOf("empty"):
1723 				if (element.firstChild_)
1724 					return false;
1725 				break;
1726 
1727 			case quickHashOf("optional"):
1728 				if (element.hasAttr("required"))
1729 					return false;
1730 				break;
1731 
1732 			case quickHashOf("read-only"):
1733 				if (!element.hasAttr("readonly"))
1734 					return false;
1735 				break;
1736 
1737 			case quickHashOf("read-write"):
1738 				if (element.hasAttr("readonly"))
1739 					return false;
1740 				break;
1741 
1742 			case quickHashOf("required"):
1743 				if (!element.hasAttr("required"))
1744 					return false;
1745 				break;
1746 
1747 			case quickHashOf("lang"):
1748 				if (element.attr("lang") != pseudoArg_)
1749 					return false;
1750 				break;
1751 
1752 			case quickHashOf("first-child"):
1753 				if (!element.parent_ || (element.parent_.firstChild !is element))
1754 					return false;
1755 				break;
1756 
1757 			case quickHashOf("last-child"):
1758 				if (!element.parent_ || (element.parent_.lastChild !is element))
1759 					return false;
1760 				break;
1761 
1762 			case quickHashOf("first-of-type"):
1763 				Node sibling = element.previousSibling;
1764 				while (sibling) {
1765 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1766 						return false;
1767 					sibling = sibling.previousSibling;
1768 				}
1769 				break;
1770 
1771 			case quickHashOf("last-of-type"):
1772 				auto sibling = element.nextSibling;
1773 				while (sibling) {
1774 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1775 						return false;
1776 					sibling = sibling.nextSibling;
1777 				}
1778 				break;
1779 
1780 			case quickHashOf("nth-child"):
1781 				auto ith = 1;
1782 				auto sibling = element.previousSibling;
1783 				while (sibling) {
1784 					if (ith > pseudoArgNum_)
1785 						return false;
1786 					if (sibling.isElementNode)
1787 						++ith;
1788 					sibling = sibling.previousSibling;
1789 				}
1790 				if (ith != pseudoArgNum_)
1791 					return false;
1792 				break;
1793 
1794 			case quickHashOf("nth-last-child"):
1795 				auto ith = 1;
1796 				auto sibling = element.nextSibling;
1797 				while (sibling) {
1798 					if (ith > pseudoArgNum_)
1799 						return false;
1800 					if (sibling.isElementNode)
1801 						++ith;
1802 					sibling = sibling.nextSibling;
1803 				}
1804 				if (ith != pseudoArgNum_)
1805 					return false;
1806 				break;
1807 
1808 			case quickHashOf("nth-of-type"):
1809 				auto ith = 1;
1810 				auto sibling = element.previousSibling;
1811 				while (sibling) {
1812 					if (ith > pseudoArgNum_)
1813 						return false;
1814 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1815 						++ith;
1816 					sibling = sibling.previousSibling;
1817 				}
1818 				if (ith != pseudoArgNum_)
1819 					return false;
1820 				break;
1821 
1822 			case quickHashOf("nth-last-of-type"):
1823 				auto ith = 1;
1824 				auto sibling = element.nextSibling;
1825 				while (sibling) {
1826 					if (ith > pseudoArgNum_)
1827 						return false;
1828 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1829 						++ith;
1830 					sibling = sibling.nextSibling;
1831 				}
1832 				if (ith != pseudoArgNum_)
1833 					return false;
1834 				break;
1835 
1836 			case quickHashOf("only-of-type"):
1837 				auto sibling = element.previousSibling;
1838 				while (sibling) {
1839 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1840 						return false;
1841 					sibling = sibling.previousSibling;
1842 				}
1843 				sibling = element.nextSibling;
1844 				while (sibling) {
1845 					if (sibling.isElementNode && sibling.tag.equalsCI(element.tag))
1846 						return false;
1847 					sibling = sibling.nextSibling;
1848 				}
1849 				break;
1850 
1851 			case quickHashOf("only-child"):
1852 				auto parent = element.parent_;
1853 				if (parent is null)
1854 					return false;
1855 				Node sibling = parent.firstChild_;
1856 				while (sibling) {
1857 					if ((sibling !is element) && sibling.isElementNode)
1858 						return false;
1859 					sibling = sibling.next_;
1860 				}
1861 				break;
1862 
1863 			default:
1864 				break;
1865 			}
1866 		}
1867 
1868 		return true;
1869 	}
1870 
1871 	@property Relation relation() const {
1872 		return relation_;
1873 	}
1874 
1875 package:
1876 	ushort flags_;
1877 	MatchType match_;
1878 	Relation relation_;
1879 	hash_t tagHash_;
1880 	size_t pseudo_;
1881 	HTMLString attr_;
1882 	HTMLString value_;
1883 	HTMLString pseudoArg_;
1884 	uint pseudoArgNum_;
1885 }
1886 
1887 
1888 struct Selector {
1889 	static Selector parse(HTMLString value) {
1890 		enum ParserStates {
1891 			Identifier = 0,
1892 			PostIdentifier,
1893 			Tag,
1894 			Class,
1895 			ID,
1896 			AttrName,
1897 			AttrOp,
1898 			PreAttrValue,
1899 			AttrValueDQ,
1900 			AttrValueSQ,
1901 			AttrValueNQ,
1902 			PostAttrValue,
1903 			Pseudo,
1904 			PseudoArgs,
1905 			Relation,
1906 		}
1907 
1908 		size_t ids;
1909 		size_t tags;
1910 		size_t classes;
1911 
1912 		value = value.strip;
1913 		auto source = uninitializedArray!(char[])(value.length + 1);
1914 		source[0..value.length] = value;
1915 		source[$-1] = ' '; // add a padding space to ease parsing
1916 
1917 		auto selector = Selector(source);
1918 		Rule[] rules;
1919 		++rules.length;
1920 
1921 		auto rule = &rules.back;
1922 
1923 		auto ptr = source.ptr;
1924 		auto end = source.ptr + source.length;
1925 		auto start = ptr;
1926 
1927 		ParserStates state = ParserStates.Identifier;
1928 
1929 		while (ptr != end) {
1930 			final switch (state) with (ParserStates) {
1931 			case Identifier:
1932 				if (*ptr == '#') {
1933 					state = ID;
1934 					start = ptr + 1;
1935 				} else if (*ptr == '.') {
1936 					state = Class;
1937 					start = ptr + 1;
1938 				} else if (*ptr == '[') {
1939 					state = AttrName;
1940 					start = ptr + 1;
1941 				} else if (isAlpha(*ptr)) {
1942 					state = Tag;
1943 					start = ptr;
1944 					continue;
1945 				} else if (*ptr == '*') {
1946 					rule.flags_ |= Rule.Flags.HasAny;
1947 					state = PostIdentifier;
1948 				} else if (*ptr == ':') {
1949 					rule.flags_ |= Rule.Flags.HasAny;
1950 					state = PostIdentifier;
1951 					continue;
1952 				}
1953 				break;
1954 
1955 			case PostIdentifier:
1956 				switch (*ptr) {
1957 				case '#':
1958 					state = ID;
1959 					start = ptr + 1;
1960 					break;
1961 				case '.':
1962 					state = Class;
1963 					start = ptr + 1;
1964 					break;
1965 				case '[':
1966 					state = AttrName;
1967 					start = ptr + 1;
1968 					break;
1969 				case ':':
1970 					state = Pseudo;
1971 					if ((ptr + 1 != end) && (*(ptr + 1) == ':'))
1972 						++ptr;
1973 					start = ptr + 1;
1974 					break;
1975 				default:
1976 					state = Relation;
1977 					continue;
1978 				}
1979 				break;
1980 
1981 			case Tag:
1982 				while ((ptr != end) && isAlphaNum(*ptr))
1983 					++ptr;
1984 				if (ptr == end)
1985 					continue;
1986 
1987 				rule.flags_ |= Rule.Flags.HasTag;
1988 				rule.tagHash_ = tagHashOf(start[0..ptr-start]);
1989 				++tags;
1990 
1991 				state = PostIdentifier;
1992 				continue;
1993 
1994 			case Class:
1995 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
1996 					++ptr;
1997 				if (ptr == end)
1998 					continue;
1999 
2000 				rule.flags_ |= Rule.Flags.HasAttr;
2001 				rule.match_ = Rule.MatchType.ContainWord;
2002 				rule.attr_ = "class";
2003 				rule.value_ = start[0..ptr-start];
2004 				++classes;
2005 
2006 				state = PostIdentifier;
2007 				break;
2008 
2009 			case ID:
2010 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
2011 					++ptr;
2012 				if (ptr == end)
2013 					continue;
2014 
2015 				rule.flags_ |= Rule.Flags.HasID;
2016 				rule.value_ = start[0..ptr-start];
2017 				++ids;
2018 
2019 				state = PostIdentifier;
2020 				break;
2021 
2022 			case AttrName:
2023 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
2024 					++ptr;
2025 				if (ptr == end)
2026 					continue;
2027 
2028 				rule.flags_ |= Rule.Flags.HasAttr;
2029 				rule.flags_ |= Rule.Flags.CaseSensitive;
2030 				rule.attr_ = start[0..ptr-start];
2031 				++classes;
2032 
2033 				state = AttrOp;
2034 				continue;
2035 
2036 			case AttrOp:
2037 				while ((ptr != end) && (isWhite(*ptr)))
2038 					++ptr;
2039 				if (ptr == end)
2040 					continue;
2041 
2042 				switch (*ptr) {
2043 				case ']':
2044 					rule.match_ = Rule.MatchType.Set;
2045 					state = PostIdentifier;
2046 					break;
2047 				case '=':
2048 					rule.match_ = Rule.MatchType.Exact;
2049 					state = PreAttrValue;
2050 					break;
2051 				default:
2052 					if ((ptr + 1 != end) && (*(ptr + 1) == '=')) {
2053 						switch (*ptr) {
2054 						case '~':
2055 							rule.match_ = Rule.MatchType.ContainWord;
2056 							break;
2057 						case '^':
2058 							rule.match_ = Rule.MatchType.Begin;
2059 							break;
2060 						case '$':
2061 							rule.match_ = Rule.MatchType.End;
2062 							break;
2063 						case '*':
2064 							rule.match_ = Rule.MatchType.Contain;
2065 							break;
2066 						case '|':
2067 							rule.match_ = Rule.MatchType.BeginHyphen;
2068 							break;
2069 						default:
2070 							rule.flags_ = 0; // error
2071 							ptr = end - 1;
2072 							break;
2073 						}
2074 
2075 						state = PreAttrValue;
2076 						++ptr;
2077 					}
2078 					break;
2079 				}
2080 				break;
2081 
2082 			case PreAttrValue:
2083 				while ((ptr != end) && isWhite(*ptr))
2084 					++ptr;
2085 				if (ptr == end)
2086 					continue;
2087 
2088 				if (*ptr == '\"') {
2089 					state = AttrValueDQ;
2090 					start = ptr + 1;
2091 				} else if (*ptr == '\'') {
2092 					state = AttrValueSQ;
2093 					start = ptr + 1;
2094 				} else {
2095 					state = AttrValueNQ;
2096 					start = ptr;
2097 				}
2098 				break;
2099 
2100 			case AttrValueDQ:
2101 				while ((ptr != end) && (*ptr != '\"'))
2102 					++ptr;
2103 				if (ptr == end)
2104 					continue;
2105 
2106 				rule.value_ = start[0..ptr-start];
2107 				state = PostAttrValue;
2108 				break;
2109 
2110 			case AttrValueSQ:
2111 				while ((ptr != end) && (*ptr != '\''))
2112 					++ptr;
2113 				if (ptr == end)
2114 					continue;
2115 
2116 				rule.value_ = start[0..ptr-start];
2117 				state = PostAttrValue;
2118 				break;
2119 
2120 			case AttrValueNQ:
2121 				while ((ptr != end) && !isWhite(*ptr) && (*ptr != ']'))
2122 					++ptr;
2123 				if (ptr == end)
2124 					continue;
2125 
2126 				rule.value_ = start[0..ptr-start];
2127 				state = PostAttrValue;
2128 				continue;
2129 
2130 			case PostAttrValue:
2131 				while ((ptr != end) && (*ptr != ']') && (*ptr != 'i'))
2132 					++ptr;
2133 				if (ptr == end)
2134 					continue;
2135 
2136 				if (*ptr == ']') {
2137 					state = PostIdentifier;
2138 				} else if (*ptr == 'i') {
2139 					rule.flags_ &= ~cast(int)Rule.Flags.CaseSensitive;
2140 				}
2141 				break;
2142 
2143 			case Pseudo:
2144 				while ((ptr != end) && (isAlpha(*ptr) || (*ptr == '-')))
2145 					++ptr;
2146 				if (ptr == end)
2147 					continue;
2148 
2149 				rule.pseudo_ = quickHashOf(start[0..ptr-start]);
2150 				rule.flags_ |= Rule.Flags.HasPseudo;
2151 				++classes;
2152 				if (*ptr != '(') {
2153 					state = PostIdentifier;
2154 					continue;
2155 				} else {
2156 					state = PseudoArgs;
2157 					start = ptr + 1;
2158 				}
2159 				break;
2160 
2161 			case PseudoArgs:
2162 				while ((ptr != end) && (*ptr != ')'))
2163 					++ptr;
2164 				if (ptr == end)
2165 					continue;
2166 
2167 				rule.pseudoArg_ = start[0..ptr-start];
2168 				if (isNumeric(rule.pseudoArg_))
2169 					rule.pseudoArgNum_ = to!uint(rule.pseudoArg_);
2170 				state = PostIdentifier;
2171 				break;
2172 
2173 			case Relation:
2174 				while ((ptr != end) && isWhite(*ptr))
2175 					++ptr;
2176 				if (ptr == end)
2177 					continue;
2178 
2179 				++rules.length;
2180 				rule = &rules.back;
2181 
2182 				state = Identifier;
2183 				switch (*ptr) {
2184 				case '>':
2185 					rule.relation_ = Rule.Relation.Child;
2186 					break;
2187 				case '+':
2188 					rule.relation_ = Rule.Relation.DirectAdjacent;
2189 					break;
2190 				case '~':
2191 					rule.relation_ = Rule.Relation.IndirectAdjacent;
2192 					break;
2193 				default:
2194 					rule.relation_ = Rule.Relation.Descendant;
2195 					continue;
2196 				}
2197 				break;
2198 			}
2199 
2200 			++ptr;
2201 		}
2202 
2203 		rules.reverse();
2204 		selector.rules_ = rules;
2205 		selector.specificity_ = (ids << 14) | (classes << 7) | (tags & 127);
2206 
2207 		return selector;
2208 	}
2209 
2210 	bool matches(const(Node) node) {
2211 		auto element = cast(Node)node;
2212 		if (rules_.empty)
2213 			return false;
2214 
2215 		Rule.Relation relation = Rule.Relation.None;
2216 		foreach(ref rule; rules_) {
2217 			final switch (relation) with (Rule.Relation) {
2218 			case None:
2219 				if (!rule.matches(element))
2220 					return false;
2221 				break;
2222 			case Descendant:
2223 				auto parent = element.parent_;
2224 				while (parent) {
2225 					if (rule.matches(parent)) {
2226 						element = parent;
2227 						break;
2228 					}
2229 					parent = parent.parent_;
2230 				}
2231 				if (!parent)
2232 					return false;
2233 
2234 				break;
2235 			case Child:
2236 				auto parent = element.parent_;
2237 				if (!parent || !rule.matches(parent))
2238 					return false;
2239 				element = parent;
2240 				break;
2241 			case DirectAdjacent:
2242 				auto adjacent = element.previousSibling;
2243 				if (!adjacent || !rule.matches(adjacent))
2244 					return false;
2245 				element = adjacent;
2246 				break;
2247 			case IndirectAdjacent:
2248 				auto adjacent = element.previousSibling;
2249 				while (adjacent) {
2250 					if (rule.matches(adjacent)) {
2251 						element = adjacent;
2252 						break;
2253 					}
2254 					adjacent = adjacent.previousSibling;
2255 				}
2256 				if (!adjacent)
2257 					return false;
2258 
2259 				break;
2260 			}
2261 
2262 			relation = rule.relation;
2263 		}
2264 
2265 		return true;
2266 	}
2267 
2268 	@property size_t specificity() const {
2269 		return specificity_;
2270 	}
2271 
2272 private:
2273 	HTMLString source_;
2274 	Rule[] rules_;
2275 	size_t specificity_;
2276 }
2277 
2278 
2279 private size_t elementFlags(hash_t tagHash) {
2280 	size_t flags;
2281 	switch (tagHash) {
2282 	case tagHashOf("body"):
2283 	case tagHashOf("center"):
2284 	case tagHashOf("div"):
2285 	case tagHashOf("dl"):
2286 	case tagHashOf("form"):
2287 	case tagHashOf("head"):
2288 	case tagHashOf("html"):
2289 	case tagHashOf("noscript"):
2290 	case tagHashOf("ol"):
2291 	case tagHashOf("p"):
2292 	case tagHashOf("table"):
2293 	case tagHashOf("tbody"):
2294 	case tagHashOf("td"):
2295 	case tagHashOf("tfoot"):
2296 	case tagHashOf("th"):
2297 	case tagHashOf("thead"):
2298 	case tagHashOf("title"):
2299 	case tagHashOf("tr"):
2300 	case tagHashOf("ul"):
2301 		flags |= Node.Flags.BlockElement;
2302 		break;
2303 	case tagHashOf("br"):
2304 	case tagHashOf("hr"):
2305 	case tagHashOf("link"):
2306 	case tagHashOf("meta"):
2307 		flags |= Node.Flags.BlockElement;
2308 		flags |= Node.Flags.VoidElement;
2309 		break;
2310 	case tagHashOf("area"):
2311 	case tagHashOf("base"):
2312 	case tagHashOf("basefont"):
2313 	case tagHashOf("col"):
2314 	case tagHashOf("embed"):
2315 	case tagHashOf("img"):
2316 	case tagHashOf("input"):
2317 	case tagHashOf("isindex"):
2318 	case tagHashOf("param"):
2319 	case tagHashOf("source"):
2320 	case tagHashOf("track"):
2321 	case tagHashOf("wbr"):
2322 		flags |= Node.Flags.VoidElement;
2323 		break;
2324 	case tagHashOf("script"):
2325 		flags |= Node.Flags.BlockElement;
2326 		flags |= Node.Flags.ScriptElement;
2327 		break;
2328 	case tagHashOf("style"):
2329 		flags |= Node.Flags.BlockElement;
2330 		flags |= Node.Flags.StyleElement;
2331 		break;
2332 	default:
2333 		break;
2334 	}
2335 	return flags;
2336 }