1 // Written in the D programming language
2 
3 /++
4     This implements a DOM for representing an XML 1.0 document. $(LREF parseDOM)
5     uses an $(REF EntityRange, dxml, parser) to parse the document, and
6     $(LREF DOMEntity) recursively represents the DOM tree.
7 
8     See the documentation for $(MREF dxml, parser) and
9     $(REF EntityRange, dxml, parser) for details on the parser and its
10     configuration options.
11 
12     For convenience, $(REF EntityType, dxml, parser) and
13     $(REF simpleXML, dxml, parser) are publicly imported by this module,
14     since $(REF_ALTTEXT EntityType, EntityType, dxml, parser) is required
15     to correctly use $(LREF DOMEntity), and
16     $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) is highly likely to
17     be used when calling $(LREF parseDOM).
18 
19     Copyright: Copyright 2018 - 2020
20     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
21     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
22     Source:    $(LINK_TO_SRC dxml/_dom.d)
23 
24     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
25   +/
26 module dxml.dom;
27 
28 ///
29 unittest
30 {
31     import std.range.primitives : empty;
32 
33     auto xml = "<!-- comment -->\n" ~
34                "<root>\n" ~
35                "    <foo>some text<whatever/></foo>\n" ~
36                "    <bar/>\n" ~
37                "    <baz></baz>\n" ~
38                "</root>";
39     {
40         auto dom = parseDOM(xml);
41         assert(dom.type == EntityType.elementStart);
42         assert(dom.name.empty);
43         assert(dom.children.length == 2);
44 
45         assert(dom.children[0].type == EntityType.comment);
46         assert(dom.children[0].text == " comment ");
47 
48         auto root = dom.children[1];
49         assert(root.type == EntityType.elementStart);
50         assert(root.name == "root");
51         assert(root.children.length == 3);
52 
53         auto foo = root.children[0];
54         assert(foo.type == EntityType.elementStart);
55         assert(foo.name == "foo");
56         assert(foo.children.length == 2);
57 
58         assert(foo.children[0].type == EntityType.text);
59         assert(foo.children[0].text == "some text");
60 
61         assert(foo.children[1].type == EntityType.elementEmpty);
62         assert(foo.children[1].name == "whatever");
63 
64         assert(root.children[1].type == EntityType.elementEmpty);
65         assert(root.children[1].name == "bar");
66 
67         assert(root.children[2].type == EntityType.elementStart);
68         assert(root.children[2].name == "baz");
69         assert(root.children[2].children.length == 0);
70     }
71     {
72         auto dom = parseDOM!simpleXML(xml);
73         assert(dom.type == EntityType.elementStart);
74         assert(dom.name.empty);
75         assert(dom.children.length == 1);
76 
77         auto root = dom.children[0];
78         assert(root.type == EntityType.elementStart);
79         assert(root.name == "root");
80         assert(root.children.length == 3);
81 
82         auto foo = root.children[0];
83         assert(foo.type == EntityType.elementStart);
84         assert(foo.name == "foo");
85         assert(foo.children.length == 2);
86 
87         assert(foo.children[0].type == EntityType.text);
88         assert(foo.children[0].text == "some text");
89 
90         assert(foo.children[1].type == EntityType.elementStart);
91         assert(foo.children[1].name == "whatever");
92         assert(foo.children[1].children.length == 0);
93 
94         assert(root.children[1].type == EntityType.elementStart);
95         assert(root.children[1].name == "bar");
96         assert(root.children[1].children.length == 0);
97 
98         assert(root.children[2].type == EntityType.elementStart);
99         assert(root.children[2].name == "baz");
100         assert(root.children[2].children.length == 0);
101     }
102 }
103 
104 
105 import std.range.primitives;
106 import std.traits;
107 
108 public import dxml.parser : EntityType, simpleXML;
109 import dxml.parser : Config, EntityRange;
110 
111 
112 /++
113     Represents an entity in an XML document as a DOM tree.
114 
115     parseDOM either takes a range of characters or an
116     $(REF EntityRange, dxml, parser) and generates a DOMEntity from that XML.
117 
118     When parseDOM processes the XML, it returns a DOMEntity representing the
119     entire document. Even though the XML document itself isn't technically an
120     entity in the XML document, it's simplest to treat it as if it were an
121     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
122     with an empty $(LREF2 name, _DOMEntity.name). That DOMEntity then contains
123     child entities that recursively define the DOM tree through their children.
124 
125     For DOMEntities of type
126     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser),
127     $(LREF _DOMEntity.children) gives access to all of the child entities of
128     that start tag. Other DOMEntities have no children.
129 
130     Note that the $(LREF2 type, _DOMEntity.type) determines which
131     properties of the DOMEntity can be used, and it can determine whether
132     functions which a DOMEntity is passed to are allowed to be called. Each
133     function lists which $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s
134     are allowed, and it is an error to call them with any other
135     $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
136 
137     If parseDOM is given a range of characters, it in turn passes that to
138     $(REF parseXML, dxml, parser) to do the actual XML parsing. As such, that
139     overload accepts an optional $(REF Config, dxml, parser) as a template
140     argument to configure the parser.
141 
142     If parseDOM is given an
143     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the range does
144     not have to be at the start of the document. It can be used to create a DOM
145     for a portion of the document. When a character range is passed to it, it
146     will return a DOMEntity with the $(LREF2 type, _DOMEntity.type)
147     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
148     and an empty $(LREF2 name, _DOMEntity.name). It will iterate the range until
149     it either reaches the end of the range, or it reaches the end tag which
150     matches the start tag which is the parent of the entity that was the
151     $(D front) of the range when it was passed to parseDOM. The
152     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
153     is passed by $(K_REF), so if it was not at the top level when it was passed
154     to parseDOM (and thus still has elements in it when parseDOM returns), the
155     range will then be at the entity after that matching end tag, and the
156     application can continue to process the range after that if it so chooses.
157 
158     Params:
159         config = The $(REF Config, dxml, parser) to use with
160                  $(REF parseXML, dxml, parser) if the range passed to parseDOM
161                  is a range of characters.
162         range = Either a range of characters representing an entire XML document
163                 or a $(REF EntityRange, dxml, parser) which may refer to some
164                 or all of an XML document.
165 
166     Returns: A DOMEntity representing the DOM tree from the point in the
167              document that was passed to parseDOM (the start of the document if
168              a range of characters was passed, and wherever in the document the
169              range was if an
170              $(REF_ALTTEXT EntityRange, EntityRange dxml, parser) was passed).
171 
172     Throws: $(REF_ALTTEXT XMLParsingException, XMLParsingException, dxml, parser)
173             if the parser encounters invalid XML.
174   +/
175 struct DOMEntity(R)
176 {
177 public:
178 
179     import std.algorithm.searching : canFind;
180     import std.range : only, takeExactly;
181     import std.typecons : Tuple;
182     import dxml.parser : TextPos;
183 
184     private enum compileInTests = is(R == DOMCompileTests);
185 
186     /++
187         The type used when any slice of the original range of characters is
188         used. If the range was a string or supports slicing, then SliceOfR is
189         the same type as the range; otherwise, it's the result of calling
190         $(PHOBOS_REF takeExactly, std, range) on it.
191 
192         ---
193         import std.algorithm : filter;
194         import std.range : takeExactly;
195 
196         static assert(is(DOMEntity!string.SliceOfR == string));
197 
198         auto range = filter!(a => true)("some xml");
199 
200         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
201                          typeof(takeExactly(range, 42))));
202         ---
203       +/
204     static if(isDynamicArray!R || hasSlicing!R)
205         alias SliceOfR = R;
206     else
207         alias SliceOfR = typeof(takeExactly(R.init, 42));
208 
209     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
210     // a ddoc-ed unit test.
211     static if(compileInTests) @safe unittest
212     {
213         import std.algorithm : filter;
214         import std.range : takeExactly;
215 
216         static assert(is(DOMEntity!string.SliceOfR == string));
217 
218         auto range = filter!(a => true)("some xml");
219 
220         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
221                          typeof(takeExactly(range, 42))));
222     }
223 
224 
225     /++
226         The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
227         $(LREF2 attributes, DOMEntity) returns a range of.
228 
229         See_Also: $(LREF2 attributes, DOMEntity)
230       +/
231     alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
232 
233 
234     /++
235         The $(REF_ALTTEXT EntityType, EntityType, dxml, parser) for this
236         DOMEntity.
237 
238         The type can never be
239         $(REF_ALTTEXT EntityType.elementEnd, EntityType.elementEnd, dxml, parser),
240         because the end of $(LREF2 children, DOMEntity.children) already
241         indicates where the contents of the start tag end.
242 
243         type determines which properties of the DOMEntity can be used, and it
244         can determine whether functions which a DOMEntity is passed to are
245         allowed to be called. Each function lists which
246         $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s are allowed, and it
247         is an error to call them with any other
248         $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
249       +/
250     @property EntityType type() @safe const pure nothrow @nogc
251     {
252         return _type;
253     }
254 
255     ///
256     static if(compileInTests) unittest
257     {
258         import std.range.primitives;
259 
260         auto xml = "<root>\n" ~
261                    "    <!--no comment-->\n" ~
262                    "    <![CDATA[cdata run]]>\n" ~
263                    "    <text>I am text!</text>\n" ~
264                    "    <empty/>\n" ~
265                    "    <?pi?>\n" ~
266                    "</root>";
267 
268         auto dom = parseDOM(xml);
269         assert(dom.type == EntityType.elementStart);
270         assert(dom.name.empty);
271         assert(dom.children.length == 1);
272 
273         auto root = dom.children[0];
274         assert(root.type == EntityType.elementStart);
275         assert(root.name == "root");
276         assert(root.children.length == 5);
277 
278         assert(root.children[0].type == EntityType.comment);
279         assert(root.children[0].text == "no comment");
280 
281         assert(root.children[1].type == EntityType.cdata);
282         assert(root.children[1].text == "cdata run");
283 
284         auto textTag = root.children[2];
285         assert(textTag.type == EntityType.elementStart);
286         assert(textTag.name == "text");
287         assert(textTag.children.length == 1);
288 
289         assert(textTag.children[0].type == EntityType.text);
290         assert(textTag.children[0].text == "I am text!");
291 
292         assert(root.children[3].type == EntityType.elementEmpty);
293         assert(root.children[3].name == "empty");
294 
295         assert(root.children[4].type == EntityType.pi);
296         assert(root.children[4].name == "pi");
297     }
298 
299 
300     /++
301         The position in the the original text where the entity starts.
302 
303         See_Also: $(REF_ALTTEXT TextPos, TextPos, dxml, parser)$(BR)
304                   $(REF_ALTTEXT XMLParsingException._pos, XMLParsingException._pos, dxml, parser)
305       +/
306     @property TextPos pos() @safe const pure nothrow @nogc
307     {
308         return _pos;
309     }
310 
311     ///
312     static if(compileInTests) unittest
313     {
314         import std.range.primitives : empty;
315         import dxml.parser : TextPos;
316         import dxml.util : stripIndent;
317 
318         auto xml = "<root>\n" ~
319                    "    <foo>\n" ~
320                    "        Foo and bar. Always foo and bar...\n" ~
321                    "    </foo>\n" ~
322                    "</root>";
323 
324         auto dom = parseDOM(xml);
325         assert(dom.type == EntityType.elementStart);
326         assert(dom.name.empty);
327         assert(dom.pos == TextPos(1, 1));
328 
329         auto root = dom.children[0];
330         assert(root.type == EntityType.elementStart);
331         assert(root.name == "root");
332         assert(root.pos == TextPos(1, 1));
333 
334         auto foo = root.children[0];
335         assert(foo.type == EntityType.elementStart);
336         assert(foo.name == "foo");
337         assert(foo.pos == TextPos(2, 5));
338 
339         auto text = foo.children[0];
340         assert(text.type == EntityType.text);
341         assert(text.text.stripIndent() ==
342                "Foo and bar. Always foo and bar...");
343         assert(text.pos == TextPos(2, 10));
344     }
345 
346 
347     /++
348         Gives the name of this DOMEntity.
349 
350         Note that this is the direct name in the XML for this entity and
351         does not contain any of the names of any of the parent entities that
352         this entity has.
353 
354         $(TABLE
355             $(TR $(TH Supported $(LREF EntityType)s:))
356             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
357             $(TR $(TD $(REF_ALTTEXT elementEnd, EntityType.elementEnd, dxml, parser)))
358             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
359             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
360         )
361 
362         See_Also: $(LREF2 path, DOMEntity.path)
363       +/
364     @property SliceOfR name()
365     {
366         import dxml.internal : checkedSave;
367         with(EntityType)
368         {
369             import std.format : format;
370             assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
371                    format("name cannot be called with %s", _type));
372         }
373         return checkedSave(_name);
374     }
375 
376     ///
377     static if(compileInTests) unittest
378     {
379         import std.range.primitives : empty;
380 
381         auto xml = "<root>\n" ~
382                    "    <empty/>\n" ~
383                    "    <?pi?>\n" ~
384                    "</root>";
385 
386         auto dom = parseDOM(xml);
387         assert(dom.type == EntityType.elementStart);
388         assert(dom.name.empty);
389 
390         auto root = dom.children[0];
391         assert(root.type == EntityType.elementStart);
392         assert(root.name == "root");
393 
394         assert(root.children[0].type == EntityType.elementEmpty);
395         assert(root.children[0].name == "empty");
396 
397         assert(root.children[1].type == EntityType.pi);
398         assert(root.children[1].name == "pi");
399     }
400 
401 
402     /++
403         Gives the list of the names of the parent start tags of this DOMEntity.
404 
405         The name of the current entity (if it has one) is not included in the
406         path.
407 
408         Note that if parseDOM were given an
409         $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the path
410         starts where the range started. So, it doesn't necessarily contain the
411         entire path from the start of the XML document.
412 
413         See_Also: $(LREF2 name, DOMEntity.name)
414       +/
415     @property SliceOfR[] path()
416     {
417         return _path;
418     }
419 
420     ///
421     static if(compileInTests) unittest
422     {
423         import std.range.primitives : empty;
424 
425         auto xml = "<root>\n" ~
426                    "    <bar>\n" ~
427                    "        <baz>\n" ~
428                    "            <xyzzy/>\n" ~
429                    "        </baz>\n" ~
430                    "        <frobozz>\n" ~
431                    "            <!-- comment -->\n" ~
432                    "            It's magic!\n" ~
433                    "        </frobozz>\n" ~
434                    "    </bar>\n" ~
435                    "    <foo></foo>\n" ~
436                    "</root>";
437 
438         auto dom = parseDOM(xml);
439         assert(dom.type == EntityType.elementStart);
440         assert(dom.name.empty);
441         assert(dom.path.empty);
442 
443         auto root = dom.children[0];
444         assert(root.type == EntityType.elementStart);
445         assert(root.name == "root");
446         assert(root.path.empty);
447 
448         auto bar = root.children[0];
449         assert(bar.type == EntityType.elementStart);
450         assert(bar.name == "bar");
451         assert(bar.path == ["root"]);
452 
453         auto baz = bar.children[0];
454         assert(baz.type == EntityType.elementStart);
455         assert(baz.name == "baz");
456         assert(baz.path == ["root", "bar"]);
457 
458         auto xyzzy = baz.children[0];
459         assert(xyzzy.type == EntityType.elementEmpty);
460         assert(xyzzy.name == "xyzzy");
461         assert(xyzzy.path == ["root", "bar", "baz"]);
462 
463         auto frobozz = bar.children[1];
464         assert(frobozz.type == EntityType.elementStart);
465         assert(frobozz.name == "frobozz");
466         assert(frobozz.path == ["root", "bar"]);
467 
468         auto comment = frobozz.children[0];
469         assert(comment.type == EntityType.comment);
470         assert(comment.text == " comment ");
471         assert(comment.path == ["root", "bar", "frobozz"]);
472 
473         auto text = frobozz.children[1];
474         assert(text.type == EntityType.text);
475         assert(text.text == "\n            It's magic!\n        ");
476         assert(text.path == ["root", "bar", "frobozz"]);
477 
478         auto foo = root.children[1];
479         assert(foo.type == EntityType.elementStart);
480         assert(foo.name == "foo");
481         assert(foo.path == ["root"]);
482     }
483 
484 
485     /++
486         Returns a dynamic array of attributes for a start tag where each
487         attribute is represented as a$(BR)
488         $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
489                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
490                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
491                   $(REF_ALTTEXT TextPos, TextPos, dxml, parser), $(D_STRING "pos"))).
492 
493         $(TABLE
494             $(TR $(TH Supported $(LREF EntityType)s:))
495             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
496             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
497         )
498 
499         See_Also: $(LREF DomEntity.Attribute)$(BR)
500                   $(REF normalize, dxml, util)$(BR)
501                   $(REF asNormalized, dxml, util)
502       +/
503     @property auto attributes()
504     {
505         with(EntityType)
506         {
507             import std.format : format;
508             assert(_type == elementStart || _type == elementEmpty,
509                    format("attributes cannot be called with %s", _type));
510         }
511         return _attributes;
512     }
513 
514     ///
515     static if(compileInTests) unittest
516     {
517         import std.algorithm.comparison : equal;
518         import std.algorithm.iteration : filter;
519         import std.range.primitives : empty;
520         import dxml.parser : TextPos;
521 
522         {
523             auto xml = "<root/>";
524             auto root = parseDOM(xml).children[0];
525             assert(root.type == EntityType.elementEmpty);
526             assert(root.attributes.empty);
527 
528             static assert(is(ElementType!(typeof(root.attributes)) ==
529                              typeof(root).Attribute));
530         }
531         {
532             auto xml = "<root a='42' q='29' w='hello'/>";
533             auto root = parseDOM(xml).children[0];
534             assert(root.type == EntityType.elementEmpty);
535 
536             auto attrs = root.attributes;
537             assert(attrs.length == 3);
538 
539             assert(attrs[0].name == "a");
540             assert(attrs[0].value == "42");
541             assert(attrs[0].pos == TextPos(1, 7));
542 
543             assert(attrs[1].name == "q");
544             assert(attrs[1].value == "29");
545             assert(attrs[1].pos == TextPos(1, 14));
546 
547             assert(attrs[2].name == "w");
548             assert(attrs[2].value == "hello");
549             assert(attrs[2].pos == TextPos(1, 21));
550         }
551         // Because the type of name and value is SliceOfR, == with a string
552         // only works if the range passed to parseXML was string.
553         {
554             auto xml = filter!"true"("<root a='42' q='29' w='hello'/>");
555             auto root = parseDOM(xml).children[0];
556             assert(root.type == EntityType.elementEmpty);
557 
558             auto attrs = root.attributes;
559             assert(attrs.length == 3);
560 
561             assert(equal(attrs[0].name, "a"));
562             assert(equal(attrs[0].value, "42"));
563             assert(attrs[0].pos == TextPos(1, 7));
564 
565             assert(equal(attrs[1].name, "q"));
566             assert(equal(attrs[1].value, "29"));
567             assert(attrs[1].pos == TextPos(1, 14));
568 
569             assert(equal(attrs[2].name, "w"));
570             assert(equal(attrs[2].value, "hello"));
571             assert(attrs[2].pos == TextPos(1, 21));
572         }
573     }
574 
575 
576     /++
577         Returns the textual value of this DOMEntity.
578 
579         In the case of
580         $(REF_ALTTEXT EntityType.pi, EntityType.pi, dxml, parser), this is the
581         text that follows the name, whereas in the other cases, the text is the
582         entire contents of the entity (save for the delimeters on the ends if
583         that entity has them).
584 
585         $(TABLE
586             $(TR $(TH Supported $(LREF EntityType)s:))
587             $(TR $(TD $(REF_ALTTEXT cdata, EntityType.cdata, dxml, parser)))
588             $(TR $(TD $(REF_ALTTEXT comment, EntityType.comment, dxml, parser)))
589             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
590             $(TR $(TD $(REF_ALTTEXT _text, EntityType._text, dxml, parser)))
591         )
592 
593         See_Also: $(REF normalize, dxml, util)$(BR)
594                   $(REF asNormalized, dxml, util)$(BR)
595                   $(REF stripIndent, dxml, util)$(BR)
596                   $(REF withoutIndent, dxml, util)
597       +/
598     @property SliceOfR text()
599     {
600         import dxml.internal : checkedSave;
601         with(EntityType)
602         {
603             import std.format : format;
604             assert(only(cdata, comment, pi, text).canFind(_type),
605                    format("text cannot be called with %s", _type));
606         }
607         return checkedSave(_text);
608     }
609 
610     ///
611     static if(compileInTests) unittest
612     {
613         import std.range.primitives : empty;
614 
615         auto xml = "<?xml version='1.0'?>\n" ~
616                    "<?instructionName?>\n" ~
617                    "<?foo here is something to say?>\n" ~
618                    "<root>\n" ~
619                    "    <![CDATA[ Yay! random text >> << ]]>\n" ~
620                    "    <!-- some random comment -->\n" ~
621                    "    <p>something here</p>\n" ~
622                    "    <p>\n" ~
623                    "       something else\n" ~
624                    "       here</p>\n" ~
625                    "</root>";
626         auto dom = parseDOM(xml);
627 
628         // "<?instructionName?>\n" ~
629         auto pi1 = dom.children[0];
630         assert(pi1.type == EntityType.pi);
631         assert(pi1.name == "instructionName");
632         assert(pi1.text.empty);
633 
634         // "<?foo here is something to say?>\n" ~
635         auto pi2 = dom.children[1];
636         assert(pi2.type == EntityType.pi);
637         assert(pi2.name == "foo");
638         assert(pi2.text == "here is something to say");
639 
640         // "<root>\n" ~
641         auto root = dom.children[2];
642         assert(root.type == EntityType.elementStart);
643 
644         // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
645         auto cdata = root.children[0];
646         assert(cdata.type == EntityType.cdata);
647         assert(cdata.text == " Yay! random text >> << ");
648 
649         // "    <!-- some random comment -->\n" ~
650         auto comment = root.children[1];
651         assert(comment.type == EntityType.comment);
652         assert(comment.text == " some random comment ");
653 
654         // "    <p>something here</p>\n" ~
655         auto p1 = root.children[2];
656         assert(p1.type == EntityType.elementStart);
657         assert(p1.name == "p");
658 
659         assert(p1.children[0].type == EntityType.text);
660         assert(p1.children[0].text == "something here");
661 
662         // "    <p>\n" ~
663         // "       something else\n" ~
664         // "       here</p>\n" ~
665         auto p2 = root.children[3];
666         assert(p2.type == EntityType.elementStart);
667 
668         assert(p2.children[0].type == EntityType.text);
669         assert(p2.children[0].text == "\n       something else\n       here");
670     }
671 
672 
673     /++
674         Returns the child entities of the current entity.
675 
676         They are in the same order that they were in the XML document.
677 
678         $(TABLE
679             $(TR $(TH Supported $(LREF EntityType)s:))
680             $(TR $(TD $(REF_ALTTEXT elementStart, elementStart.elementStart, dxml, parser)))
681         )
682       +/
683     @property DOMEntity[] children()
684     {
685         import std.format : format;
686         assert(_type == EntityType.elementStart,
687                format!"children cannot be called with %s"(_type));
688         return _children;
689     }
690 
691     ///
692     static if(compileInTests) unittest
693     {
694         auto xml = "<potato>\n" ~
695                    "    <!--comment-->\n" ~
696                    "    <foo>bar</foo>\n" ~
697                    "    <tag>\n" ~
698                    "        <silly>you</silly>\n" ~
699                    "        <empty/>\n" ~
700                    "        <nocontent></nocontent>\n" ~
701                    "    </tag>\n" ~
702                    "</potato>\n" ~
703                    "<!--the end-->";
704         auto dom = parseDOM(xml);
705         assert(dom.children.length == 2);
706 
707         auto potato = dom.children[0];
708         assert(potato.type == EntityType.elementStart);
709         assert(potato.name == "potato");
710         assert(potato.children.length == 3);
711 
712         auto comment = potato.children[0];
713         assert(comment.type == EntityType.comment);
714         assert(comment.text == "comment");
715 
716         auto foo = potato.children[1];
717         assert(foo.type == EntityType.elementStart);
718         assert(foo.name == "foo");
719         assert(foo.children.length == 1);
720 
721         assert(foo.children[0].type == EntityType.text);
722         assert(foo.children[0].text == "bar");
723 
724         auto tag = potato.children[2];
725         assert(tag.type == EntityType.elementStart);
726         assert(tag.name == "tag");
727         assert(tag.children.length == 3);
728 
729         auto silly = tag.children[0];
730         assert(silly.type == EntityType.elementStart);
731         assert(silly.name == "silly");
732         assert(silly.children.length == 1);
733 
734         assert(silly.children[0].type == EntityType.text);
735         assert(silly.children[0].text == "you");
736 
737         auto empty = tag.children[1];
738         assert(empty.type == EntityType.elementEmpty);
739         assert(empty.name == "empty");
740 
741         auto nocontent = tag.children[2];
742         assert(nocontent.type == EntityType.elementStart);
743         assert(nocontent.name == "nocontent");
744         assert(nocontent.children.length == 0);
745 
746         auto endComment = dom.children[1];
747         assert(endComment.type == EntityType.comment);
748         assert(endComment.text == "the end");
749     }
750 
751 
752     // Reduce the chance of bugs if reference-type ranges are involved.
753     static if(!isDynamicArray!R) this(this)
754     {
755         with(EntityType) final switch(_type)
756         {
757             case cdata: goto case text;
758             case comment: goto case text;
759             case elementStart:
760             {
761                 _name = _name.save;
762                 break;
763             }
764             case elementEnd: goto case elementStart;
765             case elementEmpty: goto case elementStart;
766             case text:
767             {
768                 _text = _text.save;
769                 break;
770             }
771             case pi:
772             {
773                 _text = _text.save;
774                 goto case elementStart;
775             }
776         }
777     }
778 
779 
780 private:
781 
782     this(EntityType type, TextPos pos)
783     {
784         _type = type;
785         _pos = pos;
786 
787         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
788         _name = typeof(_name).init;
789         _text = typeof(_text).init;
790     }
791 
792     auto _type = EntityType.elementStart;
793     TextPos _pos;
794     SliceOfR _name;
795     SliceOfR[] _path;
796     Attribute[] _attributes;
797     SliceOfR _text;
798     DOMEntity[] _children;
799 }
800 
801 /// Ditto
802 DOMEntity!R parseDOM(Config config = Config.init, R)(R range)
803     if(isForwardRange!R && isSomeChar!(ElementType!R))
804 {
805     import dxml.parser : parseXML;
806     auto entityRange = parseXML!config(range);
807     typeof(return) retval;
808     _parseDOM(entityRange, retval);
809     return retval;
810 }
811 
812 /// Ditto
813 DOMEntity!(ER.Input) parseDOM(ER)(ref ER range)
814     if(isInstanceOf!(EntityRange, ER))
815 {
816     typeof(return) retval;
817     if(range.empty)
818         return retval;
819     retval._pos = range.front.pos;
820     if(range.front.type == EntityType.elementEnd)
821         return retval;
822     _parseDOM(range, retval);
823     return retval;
824 }
825 
826 /++
827     parseDOM with the default $(REF_ALTTEXT Config, Config, dxml, parser) and a
828     range of characters.
829   +/
830 @safe unittest
831 {
832     import std.range.primitives;
833 
834     auto xml = "<root>\n" ~
835                "    <!-- no comment -->\n" ~
836                "    <foo></foo>\n" ~
837                "    <baz>\n" ~
838                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
839                "    </baz>\n" ~
840                "    <tag/>\n" ~
841                "</root>";
842 
843     auto dom = parseDOM(xml);
844     assert(dom.type == EntityType.elementStart);
845     assert(dom.name.empty);
846     assert(dom.children.length == 1);
847 
848     auto root = dom.children[0];
849     assert(root.type == EntityType.elementStart);
850     assert(root.name == "root");
851     assert(root.children.length == 4);
852 
853     assert(root.children[0].type == EntityType.comment);
854     assert(root.children[0].text == " no comment ");
855 
856     assert(root.children[1].type == EntityType.elementStart);
857     assert(root.children[1].name == "foo");
858     assert(root.children[1].children.length == 0);
859 
860     auto baz = root.children[2];
861     assert(baz.type == EntityType.elementStart);
862     assert(baz.name == "baz");
863     assert(baz.children.length == 1);
864 
865     auto xyzzy = baz.children[0];
866     assert(xyzzy.type == EntityType.elementStart);
867     assert(xyzzy.name == "xyzzy");
868     assert(xyzzy.children.length == 1);
869 
870     assert(xyzzy.children[0].type == EntityType.text);
871     assert(xyzzy.children[0].text == "It's an adventure!");
872 
873     assert(root.children[3].type == EntityType.elementEmpty);
874     assert(root.children[3].name == "tag");
875 }
876 
877 /++
878     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and a range
879     of characters.
880   +/
881 unittest
882 {
883     import std.range.primitives : empty;
884 
885     auto xml = "<root>\n" ~
886                "    <!-- no comment -->\n" ~
887                "    <foo></foo>\n" ~
888                "    <baz>\n" ~
889                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
890                "    </baz>\n" ~
891                "    <tag/>\n" ~
892                "</root>";
893 
894     auto dom = parseDOM!simpleXML(xml);
895     assert(dom.type == EntityType.elementStart);
896     assert(dom.name.empty);
897     assert(dom.children.length == 1);
898 
899     auto root = dom.children[0];
900     assert(root.type == EntityType.elementStart);
901     assert(root.name == "root");
902     assert(root.children.length == 3);
903 
904     assert(root.children[0].type == EntityType.elementStart);
905     assert(root.children[0].name == "foo");
906     assert(root.children[0].children.length == 0);
907 
908     auto baz = root.children[1];
909     assert(baz.type == EntityType.elementStart);
910     assert(baz.name == "baz");
911     assert(baz.children.length == 1);
912 
913     auto xyzzy = baz.children[0];
914     assert(xyzzy.type == EntityType.elementStart);
915     assert(xyzzy.name == "xyzzy");
916     assert(xyzzy.children.length == 1);
917 
918     assert(xyzzy.children[0].type == EntityType.text);
919     assert(xyzzy.children[0].text == "It's an adventure!");
920 
921     assert(root.children[2].type == EntityType.elementStart);
922     assert(root.children[2].name == "tag");
923     assert(root.children[2].children.length == 0);
924 }
925 
926 /++
927     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and an
928     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser).
929   +/
930 unittest
931 {
932     import std.range.primitives : empty;
933     import dxml.parser : parseXML;
934 
935     auto xml = "<root>\n" ~
936                "    <!-- no comment -->\n" ~
937                "    <foo></foo>\n" ~
938                "    <baz>\n" ~
939                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
940                "    </baz>\n" ~
941                "    <tag/>\n" ~
942                "</root>";
943 
944     auto range = parseXML!simpleXML(xml);
945     auto dom = parseDOM(range);
946     assert(range.empty);
947 
948     assert(dom.type == EntityType.elementStart);
949     assert(dom.name.empty);
950     assert(dom.children.length == 1);
951 
952     auto root = dom.children[0];
953     assert(root.type == EntityType.elementStart);
954     assert(root.name == "root");
955     assert(root.children.length == 3);
956 
957     assert(root.children[0].type == EntityType.elementStart);
958     assert(root.children[0].name == "foo");
959     assert(root.children[0].children.length == 0);
960 
961     auto baz = root.children[1];
962     assert(baz.type == EntityType.elementStart);
963     assert(baz.name == "baz");
964     assert(baz.children.length == 1);
965 
966     auto xyzzy = baz.children[0];
967     assert(xyzzy.type == EntityType.elementStart);
968     assert(xyzzy.name == "xyzzy");
969     assert(xyzzy.children.length == 1);
970 
971     assert(xyzzy.children[0].type == EntityType.text);
972     assert(xyzzy.children[0].text == "It's an adventure!");
973 
974     assert(root.children[2].type == EntityType.elementStart);
975     assert(root.children[2].name == "tag");
976     assert(root.children[2].children.length == 0);
977 }
978 
979 /++
980     parseDOM with an $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser)
981     which is not at the start of the document.
982   +/
983 unittest
984 {
985     import std.range.primitives : empty;
986     import dxml.parser : parseXML, skipToPath;
987 
988     auto xml = "<root>\n" ~
989                "    <!-- no comment -->\n" ~
990                "    <foo></foo>\n" ~
991                "    <baz>\n" ~
992                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
993                "    </baz>\n" ~
994                "    <tag/>\n" ~
995                "</root>";
996 
997     auto range = parseXML!simpleXML(xml).skipToPath("baz/xyzzy");
998     assert(range.front.type == EntityType.elementStart);
999     assert(range.front.name == "xyzzy");
1000 
1001     auto dom = parseDOM(range);
1002     assert(range.front.type == EntityType.elementStart);
1003     assert(range.front.name == "tag");
1004 
1005     assert(dom.type == EntityType.elementStart);
1006     assert(dom.name.empty);
1007     assert(dom.children.length == 1);
1008 
1009     auto xyzzy = dom.children[0];
1010     assert(xyzzy.type == EntityType.elementStart);
1011     assert(xyzzy.name == "xyzzy");
1012     assert(xyzzy.children.length == 1);
1013 
1014     assert(xyzzy.children[0].type == EntityType.text);
1015     assert(xyzzy.children[0].text == "It's an adventure!");
1016 }
1017 
1018 /// parseDOM at compile-time
1019 unittest
1020 {
1021     enum xml = "<!-- comment -->\n" ~
1022                "<root>\n" ~
1023                "    <foo>some text<whatever/></foo>\n" ~
1024                "    <bar/>\n" ~
1025                "    <baz></baz>\n" ~
1026                "</root>";
1027 
1028     enum dom = parseDOM(xml);
1029     static assert(dom.type == EntityType.elementStart);
1030     static assert(dom.name.empty);
1031     static assert(dom.children.length == 2);
1032 
1033     static assert(dom.children[0].type == EntityType.comment);
1034     static assert(dom.children[0].text == " comment ");
1035 }
1036 
1037 // This is purely to provide a way to trigger the unittest blocks in DOMEntity
1038 // without compiling them in normally.
1039 private struct DOMCompileTests
1040 {
1041     @property bool empty() @safe pure nothrow @nogc { assert(0); }
1042     @property char front() @safe pure nothrow @nogc { assert(0); }
1043     void popFront() @safe pure nothrow @nogc { assert(0); }
1044     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
1045 }
1046 
1047 unittest
1048 {
1049     DOMEntity!DOMCompileTests _domTests;
1050 }
1051 
1052 
1053 private:
1054 
1055 void _parseDOM(ER, DE)(ref ER range, ref DE parent, ER.SliceOfR[] path = null)
1056 {
1057     assert(!range.empty);
1058     assert(range.front.type != EntityType.elementEnd);
1059 
1060     import std.array : appender, array;
1061     auto children = appender!(DE[])();
1062 
1063     while(!range.empty)
1064     {
1065         auto entity = range.front;
1066         range.popFront();
1067         if(entity.type == EntityType.elementEnd)
1068             break;
1069 
1070         auto child = DE(entity.type, entity.pos);
1071         child._path = path;
1072 
1073         with(EntityType) final switch(entity.type)
1074         {
1075             case cdata: goto case text;
1076             case comment: goto case text;
1077             case elementStart:
1078             {
1079                 child._name = entity.name;
1080                 child._attributes = entity.attributes.array();
1081 
1082                 if(range.front.type == EntityType.elementEnd)
1083                     range.popFront();
1084                 else
1085                 {
1086                     if(!entity.name.empty)
1087                         path ~= entity.name;
1088                     // TODO The explicit instantiation doesn't hurt, but it
1089                     // shouldn't be necessary, and if it's not there, we get
1090                     // a compiler error. It should be reduced and reported.
1091                     _parseDOM!(ER, DE)(range, child, path);
1092                     --path.length;
1093                 }
1094                 break;
1095             }
1096             case elementEnd: assert(0);
1097             case elementEmpty:
1098             {
1099                 child._name = entity.name;
1100                 child._attributes = entity.attributes.array();
1101                 break;
1102             }
1103             case text:
1104             {
1105                 child._text = entity.text;
1106                 break;
1107             }
1108             case pi:
1109             {
1110                 child._name = entity.name;
1111                 child._text = entity.text;
1112                 break;
1113             }
1114         }
1115 
1116         put(children, child);
1117     }
1118 
1119     parent._children = children.data;
1120 }
1121 
1122 unittest
1123 {
1124     import std.algorithm.comparison : equal;
1125     import dxml.internal : testRangeFuncs;
1126     import dxml.parser : parseXML, TextPos;
1127 
1128     static void testChildren(ER, size_t line = __LINE__)(ref ER entityRange, int row, int col, EntityType[] expected...)
1129     {
1130         import core.exception : AssertError;
1131         import std.exception : enforce;
1132         auto temp = entityRange.save;
1133         auto dom = parseDOM(temp);
1134         enforce!AssertError(dom.type == EntityType.elementStart, "unittest 1", __FILE__, line);
1135         enforce!AssertError(dom.children.length == expected.length, "unittest 2", __FILE__, line);
1136         foreach(i; 0 .. dom._children.length)
1137             enforce!AssertError(dom._children[i].type == expected[i], "unittest 3", __FILE__, line);
1138         enforce!AssertError(dom.pos == TextPos(row, col), "unittest 4", __FILE__, line);
1139         if(!entityRange.empty)
1140             entityRange.popFront();
1141     }
1142 
1143     static foreach(func; testRangeFuncs)
1144     {{
1145         {
1146             foreach(i, xml; ["<!-- comment -->\n" ~
1147                              "<?pi foo?>\n" ~
1148                              "<su></su>",
1149                             "<!-- comment -->\n" ~
1150                              "<?pi foo?>\n" ~
1151                              "<su/>"])
1152             {
1153                 auto range = parseXML(func(xml));
1154                 foreach(j; 0 .. 4 - i)
1155                 {
1156                     auto temp = range.save;
1157                     auto dom = parseDOM(temp);
1158                     assert(dom.type == EntityType.elementStart);
1159                     assert(dom.children.length == 3 - j);
1160                     if(j <= 2)
1161                     {
1162                         assert(dom.children[2 - j].type ==
1163                                (i == 0 ? EntityType.elementStart : EntityType.elementEmpty));
1164                         assert(equal(dom.children[2 - j].name, "su"));
1165                         if(j <= 1)
1166                         {
1167                             assert(dom.children[1 - j].type == EntityType.pi);
1168                             assert(equal(dom.children[1 - j].name, "pi"));
1169                             assert(equal(dom.children[1 - j].text, "foo"));
1170                             if(j == 0)
1171                             {
1172                                 assert(dom.children[0].type == EntityType.comment);
1173                                 assert(equal(dom.children[0].text, " comment "));
1174                             }
1175                         }
1176                     }
1177                     range.popFront();
1178                 }
1179                 assert(range.empty);
1180                 auto dom = parseDOM(range);
1181                 assert(dom.type == EntityType.elementStart);
1182                 assert(dom.name is typeof(dom.name).init);
1183                 assert(dom.children.length == 0);
1184             }
1185         }
1186         {
1187             auto xml = "<root>\n" ~
1188                        "    <foo>\n" ~
1189                        "        <bar>\n" ~
1190                        "            <baz>\n" ~
1191                        "            It's silly, Charley\n" ~
1192                        "            </baz>\n" ~
1193                        "            <frobozz>\n" ~
1194                        "                <is>the Wiz</is>\n" ~
1195                        "            </frobozz>\n" ~
1196                        "            <empty></empty>\n" ~
1197                        "            <xyzzy/>\n" ~
1198                        "        </bar>\n" ~
1199                        "    </foo>\n" ~
1200                        "    <!--This isn't the end-->\n" ~
1201                        "</root>\n" ~
1202                        "<?Poirot?>\n" ~
1203                        "<!--It's the end!-->";
1204 
1205             {
1206                 auto range = parseXML(func(xml));
1207                 with(EntityType)
1208                 {
1209                     testChildren(range, 1, 1, elementStart, pi, comment); // <root>
1210                     testChildren(range, 2, 5, elementStart, comment); // <foo>
1211                     testChildren(range, 3, 9, elementStart); // <bar>
1212                     testChildren(range, 4, 13, elementStart, elementStart, elementStart, elementEmpty); // <baz>
1213                     testChildren(range, 4, 18, text); // It's silly, Charley
1214                     testChildren(range, 6, 13); // </baz>
1215                     testChildren(range, 7, 13, elementStart, elementStart, elementEmpty); // <frobozz>
1216                     testChildren(range, 8, 17, elementStart); // <is>
1217                     testChildren(range, 8, 21, text); // the Wiz
1218                     testChildren(range, 8, 28); // </is>
1219                     testChildren(range, 9, 13); // </frobozz>
1220                     testChildren(range, 10, 13, elementStart, elementEmpty); // <empty>
1221                     testChildren(range, 10, 20); // </empty>
1222                     testChildren(range, 11, 13, elementEmpty); // <xyzzy/>
1223                     testChildren(range, 12, 9); // </bar>
1224                     testChildren(range, 13, 5); // </foo>
1225                     testChildren(range, 14, 5, comment); // <!--This isn't the end-->
1226                     testChildren(range, 15, 1); // </root>
1227                     testChildren(range, 16, 1, pi, comment); // <?Poirot?>
1228                     testChildren(range, 17, 1, comment); // <!--It's the end-->"
1229                     testChildren(range, 1, 1); // empty range
1230                 }
1231             }
1232             {
1233                 auto dom = parseDOM(func(xml));
1234                 assert(dom.children.length == 3);
1235 
1236                 auto root = dom.children[0];
1237                 assert(root.type == EntityType.elementStart);
1238                 assert(root.pos == TextPos(1, 1));
1239                 assert(root.children.length == 2);
1240                 assert(equal(root.name, "root"));
1241 
1242                 auto foo = root.children[0];
1243                 assert(foo.type == EntityType.elementStart);
1244                 assert(foo.pos == TextPos(2, 5));
1245                 assert(foo.children.length == 1);
1246                 assert(equal(foo.name, "foo"));
1247 
1248                 auto bar = foo.children[0];
1249                 assert(bar.type == EntityType.elementStart);
1250                 assert(bar.pos == TextPos(3, 9));
1251                 assert(bar.children.length == 4);
1252                 assert(equal(bar.name, "bar"));
1253 
1254                 auto baz = bar.children[0];
1255                 assert(baz.type == EntityType.elementStart);
1256                 assert(baz.pos == TextPos(4, 13));
1257                 assert(baz.children.length == 1);
1258                 assert(equal(baz.name, "baz"));
1259 
1260                 auto silly = baz.children[0];
1261                 assert(silly.type == EntityType.text);
1262                 assert(silly.pos == TextPos(4, 18));
1263                 assert(equal(silly.text, "\n            It's silly, Charley\n            "));
1264 
1265                 auto frobozz = bar.children[1];
1266                 assert(frobozz.type == EntityType.elementStart);
1267                 assert(frobozz.pos == TextPos(7, 13));
1268                 assert(frobozz.children.length == 1);
1269                 assert(equal(frobozz.name, "frobozz"));
1270 
1271                 auto is_ = frobozz.children[0];
1272                 assert(is_.type == EntityType.elementStart);
1273                 assert(is_.pos == TextPos(8, 17));
1274                 assert(is_.children.length == 1);
1275                 assert(equal(is_.name, "is"));
1276 
1277                 auto wiz = is_.children[0];
1278                 assert(wiz.type == EntityType.text);
1279                 assert(wiz.pos == TextPos(8, 21));
1280                 assert(equal(wiz.text, "the Wiz"));
1281 
1282                 auto empty = bar.children[2];
1283                 assert(empty.type == EntityType.elementStart);
1284                 assert(empty.pos == TextPos(10, 13));
1285                 assert(empty.children.length == 0);
1286                 assert(equal(empty.name, "empty"));
1287 
1288                 auto xyzzy = bar.children[3];
1289                 assert(xyzzy.type == EntityType.elementEmpty);
1290                 assert(xyzzy.pos == TextPos(11, 13));
1291                 assert(equal(xyzzy.name, "xyzzy"));
1292 
1293                 auto comment = root.children[1];
1294                 assert(comment.type == EntityType.comment);
1295                 assert(comment.pos == TextPos(14, 5));
1296                 assert(equal(comment.text, "This isn't the end"));
1297 
1298                 auto poirot = dom.children[1];
1299                 assert(poirot.type == EntityType.pi);
1300                 assert(poirot.pos == TextPos(16, 1));
1301                 assert(equal(poirot.name, "Poirot"));
1302                 assert(poirot.text.empty);
1303 
1304                 auto endComment = dom.children[2];
1305                 assert(endComment.type == EntityType.comment);
1306                 assert(endComment.pos == TextPos(17, 1));
1307                 assert(equal(endComment.text, "It's the end!"));
1308             }
1309         }
1310     }}
1311 }