1 // Written in the D programming language
2 
3 /++
4     This implements a DOM for representing an XML 1.0 document. $(LREF parseDOM)
5     uses an $(REF EntityRange, dxml, parser) to parse the document, and
6     $(LREF DOMEntity) recursively represents the DOM tree.
7 
8     See the documentation for $(MREF dxml, parser) and
9     $(REF EntityRange, dxml, parser) for details on the parser and its
10     configuration options.
11 
12     For convenience, $(REF EntityType, dxml, parser) and
13     $(REF simpleXML, dxml, parser) are publicly imported by this module,
14     since $(REF_ALTTEXT EntityType, EntityType, dxml, parser) is required
15     to correctly use $(LREF DOMEntity), and
16     $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) is highly likely to
17     be used when calling $(LREF parseDOM).
18 
19     Copyright: Copyright 2018
20     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
21     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
22     Source:    $(LINK_TO_SRC dxml/_dom.d)
23 
24     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
25   +/
26 module dxml.dom;
27 
28 ///
29 version(dxmlTests) unittest
30 {
31     import std.range.primitives : empty;
32 
33     auto xml = "<!-- comment -->\n" ~
34                "<root>\n" ~
35                "    <foo>some text<whatever/></foo>\n" ~
36                "    <bar/>\n" ~
37                "    <baz></baz>\n" ~
38                "</root>";
39     {
40         auto dom = parseDOM(xml);
41         assert(dom.type == EntityType.elementStart);
42         assert(dom.name.empty);
43         assert(dom.children.length == 2);
44 
45         assert(dom.children[0].type == EntityType.comment);
46         assert(dom.children[0].text == " comment ");
47 
48         auto root = dom.children[1];
49         assert(root.type == EntityType.elementStart);
50         assert(root.name == "root");
51         assert(root.children.length == 3);
52 
53         auto foo = root.children[0];
54         assert(foo.type == EntityType.elementStart);
55         assert(foo.name == "foo");
56         assert(foo.children.length == 2);
57 
58         assert(foo.children[0].type == EntityType.text);
59         assert(foo.children[0].text == "some text");
60 
61         assert(foo.children[1].type == EntityType.elementEmpty);
62         assert(foo.children[1].name == "whatever");
63 
64         assert(root.children[1].type == EntityType.elementEmpty);
65         assert(root.children[1].name == "bar");
66 
67         assert(root.children[2].type == EntityType.elementStart);
68         assert(root.children[2].name == "baz");
69         assert(root.children[2].children.length == 0);
70     }
71     {
72         auto dom = parseDOM!simpleXML(xml);
73         assert(dom.type == EntityType.elementStart);
74         assert(dom.name.empty);
75         assert(dom.children.length == 1);
76 
77         auto root = dom.children[0];
78         assert(root.type == EntityType.elementStart);
79         assert(root.name == "root");
80         assert(root.children.length == 3);
81 
82         auto foo = root.children[0];
83         assert(foo.type == EntityType.elementStart);
84         assert(foo.name == "foo");
85         assert(foo.children.length == 2);
86 
87         assert(foo.children[0].type == EntityType.text);
88         assert(foo.children[0].text == "some text");
89 
90         assert(foo.children[1].type == EntityType.elementStart);
91         assert(foo.children[1].name == "whatever");
92         assert(foo.children[1].children.length == 0);
93 
94         assert(root.children[1].type == EntityType.elementStart);
95         assert(root.children[1].name == "bar");
96         assert(root.children[1].children.length == 0);
97 
98         assert(root.children[2].type == EntityType.elementStart);
99         assert(root.children[2].name == "baz");
100         assert(root.children[2].children.length == 0);
101     }
102 }
103 
104 
105 import std.range.primitives;
106 import std.traits;
107 
108 public import dxml.parser : EntityType, simpleXML;
109 import dxml.parser : Config, EntityRange;
110 
111 
112 /++
113     Represents an entity in an XML document as a DOM tree.
114 
115     parseDOM either takes a range of characters or an
116     $(REF EntityRange, dxml, parser) and generates a DOMEntity from that XML.
117 
118     When parseDOM processes the XML, it returns a DOMEntity representing the
119     entire document. Even though the XML document itself isn't technically an
120     entity in the XML document, it's simplest to treat it as if it were an
121     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
122     with an empty $(LREF2 name, _DOMEntity.name). That DOMEntity then contains
123     child entities that recursively define the DOM tree through their children.
124 
125     For DOMEntities of type
126     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser),
127     $(LREF _DOMEntity.children) gives access to all of the child entities of
128     that start tag. Other DOMEntities have no children.
129 
130     Note that the $(LREF2 type, _DOMEntity.type) determines which
131     properties of the DOMEntity can be used, and it can determine whether
132     functions which a DOMEntity is passed to are allowed to be called. Each
133     function lists which $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s
134     are allowed, and it is an error to call them with any other
135     $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
136 
137     If parseDOM is given a range of characters, it in turn passes that to
138     $(REF parseXML, dxml, parser) to do the actual XML parsing. As such, that
139     overload accepts an optional $(REF Config, dxml, parser) as a template
140     argument to configure the parser.
141 
142     If parseDOM is given an
143     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the range does
144     not have to be at the start of the document. It can be used to create a DOM
145     for a portion of the document. When a character range is passed to it, it
146     will return a DOMEntity with the $(LREF2 type, _DOMEntity.type)
147     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
148     and an empty $(LREF2 name, _DOMEntity.name). It will iterate the range until
149     it either reaches the end of the range, or it reaches the end tag which
150     matches the start tag which is the parent of the entity that was the
151     $(D front) of the range when it was passed to parseDOM. The
152     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
153     is passed by $(K_REF), so if it was not at the top level when it was passed
154     to parseDOM (and thus still has elements in it when parseDOM returns), the
155     range will then be at the entity after that matching end tag, and the
156     application can continue to process the range after that if it so chooses.
157 
158     Params:
159         config = The $(REF Config, dxml, parser) to use with
160                  $(REF parseXML, dxml, parser) if the range passed to parseDOM
161                  is a range of characters.
162         range = Either a range of characters representing an entire XML document
163                 or a $(REF EntityRange, dxml, parser) which may refer to some
164                 or all of an XML document.
165 
166     Returns: A DOMEntity representing the DOM tree from the point in the
167              document that was passed to parseDOM (the start of the document if
168              a range of characters was passed, and wherever in the document the
169              range was if an
170              $(REF_ALTTEXT EntityRange, EntityRange dxml, parser) was passed).
171 
172     Throws: $(REF_ALTTEXT XMLParsingException, XMLParsingException, dxml, parser)
173             if the parser encounters invalid XML.
174   +/
175 struct DOMEntity(R)
176 {
177 public:
178 
179     import std.algorithm.searching : canFind;
180     import std.range : only, takeExactly;
181     import std.typecons : Tuple;
182     import dxml.parser : TextPos;
183 
184     private enum compileInTests = is(R == DOMCompileTests);
185 
186     /++
187         The type used when any slice of the original range of characters is
188         used. If the range was a string or supports slicing, then SliceOfR is
189         the same type as the range; otherwise, it's the result of calling
190         $(PHOBOS_REF takeExactly, std, range) on it.
191 
192         ---
193         import std.algorithm : filter;
194         import std.range : takeExactly;
195 
196         static assert(is(DOMEntity!string.SliceOfR == string));
197 
198         auto range = filter!(a => true)("some xml");
199 
200         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
201                          typeof(takeExactly(range, 42))));
202         ---
203       +/
204     static if(isDynamicArray!R || hasSlicing!R)
205         alias SliceOfR = R;
206     else
207         alias SliceOfR = typeof(takeExactly(R.init, 42));
208 
209     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
210     // a ddoc-ed unit test.
211     static if(compileInTests) @safe unittest
212     {
213         import std.algorithm : filter;
214         import std.range : takeExactly;
215 
216         static assert(is(DOMEntity!string.SliceOfR == string));
217 
218         auto range = filter!(a => true)("some xml");
219 
220         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
221                          typeof(takeExactly(range, 42))));
222     }
223 
224 
225     /++
226         The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
227         $(LREF2 attributes, DOMEntity) returns a range of.
228 
229         See_Also: $(LREF2 attributes, DOMEntity)
230       +/
231     alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
232 
233 
234     /++
235         The $(REF_ALTTEXT EntityType, EntityType, dxml, parser) for this
236         DOMEntity.
237 
238         The type can never be
239         $(REF_ALTTEXT EntityType.elementEnd, EntityType.elementEnd, dxml, parser),
240         because the end of $(LREF2 children, DOMEntity.children) already
241         indicates where the contents of the start tag end.
242 
243         type determines which properties of the DOMEntity can be used, and it
244         can determine whether functions which a DOMEntity is passed to are
245         allowed to be called. Each function lists which
246         $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s are allowed, and it
247         is an error to call them with any other
248         $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
249       +/
250     @property EntityType type() @safe const pure nothrow @nogc
251     {
252         return _type;
253     }
254 
255     ///
256     static if(compileInTests) unittest
257     {
258         import std.range.primitives;
259 
260         auto xml = "<root>\n" ~
261                    "    <!--no comment-->\n" ~
262                    "    <![CDATA[cdata run]]>\n" ~
263                    "    <text>I am text!</text>\n" ~
264                    "    <empty/>\n" ~
265                    "    <?pi?>\n" ~
266                    "</root>";
267 
268         auto dom = parseDOM(xml);
269         assert(dom.type == EntityType.elementStart);
270         assert(dom.name.empty);
271         assert(dom.children.length == 1);
272 
273         auto root = dom.children[0];
274         assert(root.type == EntityType.elementStart);
275         assert(root.name == "root");
276         assert(root.children.length == 5);
277 
278         assert(root.children[0].type == EntityType.comment);
279         assert(root.children[0].text == "no comment");
280 
281         assert(root.children[1].type == EntityType.cdata);
282         assert(root.children[1].text == "cdata run");
283 
284         auto textTag = root.children[2];
285         assert(textTag.type == EntityType.elementStart);
286         assert(textTag.name == "text");
287         assert(textTag.children.length == 1);
288 
289         assert(textTag.children[0].type == EntityType.text);
290         assert(textTag.children[0].text == "I am text!");
291 
292         assert(root.children[3].type == EntityType.elementEmpty);
293         assert(root.children[3].name == "empty");
294 
295         assert(root.children[4].type == EntityType.pi);
296         assert(root.children[4].name == "pi");
297     }
298 
299 
300     /++
301         The position in the the original text where the entity starts.
302 
303         See_Also: $(REF_ALTTEXT TextPos, TextPos, dxml, parser)$(BR)
304                   $(REF_ALTTEXT XMLParsingException._pos, XMLParsingException._pos, dxml, parser)
305       +/
306     @property TextPos pos() @safe const pure nothrow @nogc
307     {
308         return _pos;
309     }
310 
311     ///
312     static if(compileInTests) unittest
313     {
314         import std.range.primitives : empty;
315         import dxml.parser : TextPos;
316         import dxml.util : stripIndent;
317 
318         auto xml = "<root>\n" ~
319                    "    <foo>\n" ~
320                    "        Foo and bar. Always foo and bar...\n" ~
321                    "    </foo>\n" ~
322                    "</root>";
323 
324         auto dom = parseDOM(xml);
325         assert(dom.type == EntityType.elementStart);
326         assert(dom.name.empty);
327         assert(dom.pos == TextPos(1, 1));
328 
329         auto root = dom.children[0];
330         assert(root.type == EntityType.elementStart);
331         assert(root.name == "root");
332         assert(root.pos == TextPos(1, 1));
333 
334         auto foo = root.children[0];
335         assert(foo.type == EntityType.elementStart);
336         assert(foo.name == "foo");
337         assert(foo.pos == TextPos(2, 5));
338 
339         auto text = foo.children[0];
340         assert(text.type == EntityType.text);
341         assert(text.text.stripIndent() ==
342                "Foo and bar. Always foo and bar...");
343         assert(text.pos == TextPos(2, 10));
344     }
345 
346 
347     /++
348         Gives the name of this DOMEntity.
349 
350         Note that this is the direct name in the XML for this entity and
351         does not contain any of the names of any of the parent entities that
352         this entity has.
353 
354         $(TABLE
355             $(TR $(TH Supported $(LREF EntityType)s:))
356             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
357             $(TR $(TD $(REF_ALTTEXT elementEnd, EntityType.elementEnd, dxml, parser)))
358             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
359             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
360         )
361 
362         See_Also: $(LREF2 path, DOMEntity.path)
363       +/
364     @property SliceOfR name()
365     {
366         import dxml.internal : checkedSave;
367         with(EntityType)
368         {
369             import std.format : format;
370             assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
371                    format("name cannot be called with %s", _type));
372         }
373         return checkedSave(_name);
374     }
375 
376     ///
377     static if(compileInTests) unittest
378     {
379         import std.range.primitives : empty;
380 
381         auto xml = "<root>\n" ~
382                    "    <empty/>\n" ~
383                    "    <?pi?>\n" ~
384                    "</root>";
385 
386         auto dom = parseDOM(xml);
387         assert(dom.type == EntityType.elementStart);
388         assert(dom.name.empty);
389 
390         auto root = dom.children[0];
391         assert(root.type == EntityType.elementStart);
392         assert(root.name == "root");
393 
394         assert(root.children[0].type == EntityType.elementEmpty);
395         assert(root.children[0].name == "empty");
396 
397         assert(root.children[1].type == EntityType.pi);
398         assert(root.children[1].name == "pi");
399     }
400 
401 
402     /++
403         Gives the list of the names of the parent start tags of this DOMEntity.
404 
405         The name of the current entity (if it has one) is not included in the
406         path.
407 
408         Note that if parseDOM were given an
409         $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the path
410         starts where the range started. So, it doesn't necessarily contain the
411         entire path from the start of the XML document.
412 
413         See_Also: $(LREF2 name, DOMEntity.name)
414       +/
415     @property SliceOfR[] path()
416     {
417         return _path;
418     }
419 
420     ///
421     static if(compileInTests) unittest
422     {
423         import std.range.primitives : empty;
424 
425         auto xml = "<root>\n" ~
426                    "    <bar>\n" ~
427                    "        <baz>\n" ~
428                    "            <xyzzy/>\n" ~
429                    "        </baz>\n" ~
430                    "        <frobozz>\n" ~
431                    "            <!-- comment -->\n" ~
432                    "            It's magic!\n" ~
433                    "        </frobozz>\n" ~
434                    "    </bar>\n" ~
435                    "    <foo></foo>\n" ~
436                    "</root>";
437 
438         auto dom = parseDOM(xml);
439         assert(dom.type == EntityType.elementStart);
440         assert(dom.name.empty);
441         assert(dom.path.empty);
442 
443         auto root = dom.children[0];
444         assert(root.type == EntityType.elementStart);
445         assert(root.name == "root");
446         assert(root.path.empty);
447 
448         auto bar = root.children[0];
449         assert(bar.type == EntityType.elementStart);
450         assert(bar.name == "bar");
451         assert(bar.path == ["root"]);
452 
453         auto baz = bar.children[0];
454         assert(baz.type == EntityType.elementStart);
455         assert(baz.name == "baz");
456         assert(baz.path == ["root", "bar"]);
457 
458         auto xyzzy = baz.children[0];
459         assert(xyzzy.type == EntityType.elementEmpty);
460         assert(xyzzy.name == "xyzzy");
461         assert(xyzzy.path == ["root", "bar", "baz"]);
462 
463         auto frobozz = bar.children[1];
464         assert(frobozz.type == EntityType.elementStart);
465         assert(frobozz.name == "frobozz");
466         assert(frobozz.path == ["root", "bar"]);
467 
468         auto comment = frobozz.children[0];
469         assert(comment.type == EntityType.comment);
470         assert(comment.text == " comment ");
471         assert(comment.path == ["root", "bar", "frobozz"]);
472 
473         auto text = frobozz.children[1];
474         assert(text.type == EntityType.text);
475         assert(text.text == "\n            It's magic!\n        ");
476         assert(text.path == ["root", "bar", "frobozz"]);
477 
478         auto foo = root.children[1];
479         assert(foo.type == EntityType.elementStart);
480         assert(foo.name == "foo");
481         assert(foo.path == ["root"]);
482     }
483 
484 
485     /++
486         Returns a dynamic array of attributes for a start tag where each
487         attribute is represented as a$(BR)
488         $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
489                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
490                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
491                   $(REF_ALTTEXT TextPos, TextPos, dxml, parser), $(D_STRING "pos"))).
492 
493         $(TABLE
494             $(TR $(TH Supported $(LREF EntityType)s:))
495             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
496             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
497         )
498 
499         See_Also: $(LREF DomEntity.Attribute)$(BR)
500                   $(REF normalize, dxml, util)$(BR)
501                   $(REF asNormalized, dxml, util)
502       +/
503     @property auto attributes()
504     {
505         with(EntityType)
506         {
507             import std.format : format;
508             assert(_type == elementStart || _type == elementEmpty,
509                    format("attributes cannot be called with %s", _type));
510         }
511         return _attributes;
512     }
513 
514     ///
515     static if(compileInTests) unittest
516     {
517         import std.algorithm.comparison : equal;
518         import std.algorithm.iteration : filter;
519         import std.range.primitives : empty;
520         import dxml.parser : TextPos;
521 
522         {
523             auto xml = "<root/>";
524             auto root = parseDOM(xml).children[0];
525             assert(root.type == EntityType.elementEmpty);
526             assert(root.attributes.empty);
527 
528             static assert(is(ElementType!(typeof(root.attributes)) ==
529                              typeof(root).Attribute));
530         }
531         {
532             auto xml = "<root a='42' q='29' w='hello'/>";
533             auto root = parseDOM(xml).children[0];
534             assert(root.type == EntityType.elementEmpty);
535 
536             auto attrs = root.attributes;
537             assert(attrs.length == 3);
538 
539             assert(attrs[0].name == "a");
540             assert(attrs[0].value == "42");
541             assert(attrs[0].pos == TextPos(1, 7));
542 
543             assert(attrs[1].name == "q");
544             assert(attrs[1].value == "29");
545             assert(attrs[1].pos == TextPos(1, 14));
546 
547             assert(attrs[2].name == "w");
548             assert(attrs[2].value == "hello");
549             assert(attrs[2].pos == TextPos(1, 21));
550         }
551         // Because the type of name and value is SliceOfR, == with a string
552         // only works if the range passed to parseXML was string.
553         {
554             auto xml = filter!"true"("<root a='42' q='29' w='hello'/>");
555             auto root = parseDOM(xml).children[0];
556             assert(root.type == EntityType.elementEmpty);
557 
558             auto attrs = root.attributes;
559             assert(attrs.length == 3);
560 
561             assert(equal(attrs[0].name, "a"));
562             assert(equal(attrs[0].value, "42"));
563             assert(attrs[0].pos == TextPos(1, 7));
564 
565             assert(equal(attrs[1].name, "q"));
566             assert(equal(attrs[1].value, "29"));
567             assert(attrs[1].pos == TextPos(1, 14));
568 
569             assert(equal(attrs[2].name, "w"));
570             assert(equal(attrs[2].value, "hello"));
571             assert(attrs[2].pos == TextPos(1, 21));
572         }
573     }
574 
575 
576     /++
577         Returns the textual value of this DOMEntity.
578 
579         In the case of
580         $(REF_ALTTEXT EntityType.pi, EntityType.pi, dxml, parser), this is the
581         text that follows the name, whereas in the other cases, the text is the
582         entire contents of the entity (save for the delimeters on the ends if
583         that entity has them).
584 
585         $(TABLE
586             $(TR $(TH Supported $(LREF EntityType)s:))
587             $(TR $(TD $(REF_ALTTEXT cdata, EntityType.cdata, dxml, parser)))
588             $(TR $(TD $(REF_ALTTEXT comment, EntityType.comment, dxml, parser)))
589             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
590             $(TR $(TD $(REF_ALTTEXT _text, EntityType._text, dxml, parser)))
591         )
592 
593         See_Also: $(REF normalize, dxml, util)$(BR)
594                   $(REF asNormalized, dxml, util)$(BR)
595                   $(REF stripIndent, dxml, util)$(BR)
596                   $(REF withoutIndent, dxml, util)
597       +/
598     @property SliceOfR text()
599     {
600         import dxml.internal : checkedSave;
601         with(EntityType)
602         {
603             import std.format : format;
604             assert(only(cdata, comment, pi, text).canFind(_type),
605                    format("text cannot be called with %s", _type));
606         }
607         return checkedSave(_text);
608     }
609 
610     ///
611     static if(compileInTests) unittest
612     {
613         import std.range.primitives : empty;
614 
615         auto xml = "<?xml version='1.0'?>\n" ~
616                    "<?instructionName?>\n" ~
617                    "<?foo here is something to say?>\n" ~
618                    "<root>\n" ~
619                    "    <![CDATA[ Yay! random text >> << ]]>\n" ~
620                    "    <!-- some random comment -->\n" ~
621                    "    <p>something here</p>\n" ~
622                    "    <p>\n" ~
623                    "       something else\n" ~
624                    "       here</p>\n" ~
625                    "</root>";
626         auto dom = parseDOM(xml);
627 
628         // "<?instructionName?>\n" ~
629         auto pi1 = dom.children[0];
630         assert(pi1.type == EntityType.pi);
631         assert(pi1.name == "instructionName");
632         assert(pi1.text.empty);
633 
634         // "<?foo here is something to say?>\n" ~
635         auto pi2 = dom.children[1];
636         assert(pi2.type == EntityType.pi);
637         assert(pi2.name == "foo");
638         assert(pi2.text == "here is something to say");
639 
640         // "<root>\n" ~
641         auto root = dom.children[2];
642         assert(root.type == EntityType.elementStart);
643 
644         // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
645         auto cdata = root.children[0];
646         assert(cdata.type == EntityType.cdata);
647         assert(cdata.text == " Yay! random text >> << ");
648 
649         // "    <!-- some random comment -->\n" ~
650         auto comment = root.children[1];
651         assert(comment.type == EntityType.comment);
652         assert(comment.text == " some random comment ");
653 
654         // "    <p>something here</p>\n" ~
655         auto p1 = root.children[2];
656         assert(p1.type == EntityType.elementStart);
657         assert(p1.name == "p");
658 
659         assert(p1.children[0].type == EntityType.text);
660         assert(p1.children[0].text == "something here");
661 
662         // "    <p>\n" ~
663         // "       something else\n" ~
664         // "       here</p>\n" ~
665         auto p2 = root.children[3];
666         assert(p2.type == EntityType.elementStart);
667 
668         assert(p2.children[0].type == EntityType.text);
669         assert(p2.children[0].text == "\n       something else\n       here");
670     }
671 
672 
673     /++
674         Returns the child entities of the current entity.
675 
676         They are in the same order that they were in the XML document.
677 
678         $(TABLE
679             $(TR $(TH Supported $(LREF EntityType)s:))
680             $(TR $(TD $(REF_ALTTEXT elementStart, elementStart.elementStart, dxml, parser)))
681         )
682       +/
683     @property DOMEntity[] children()
684     {
685         assert(_type == EntityType.elementStart);
686         return _children;
687     }
688 
689     ///
690     static if(compileInTests) unittest
691     {
692         auto xml = "<potato>\n" ~
693                    "    <!--comment-->\n" ~
694                    "    <foo>bar</foo>\n" ~
695                    "    <tag>\n" ~
696                    "        <silly>you</silly>\n" ~
697                    "        <empty/>\n" ~
698                    "        <nocontent></nocontent>\n" ~
699                    "    </tag>\n" ~
700                    "</potato>\n" ~
701                    "<!--the end-->";
702         auto dom = parseDOM(xml);
703         assert(dom.children.length == 2);
704 
705         auto potato = dom.children[0];
706         assert(potato.type == EntityType.elementStart);
707         assert(potato.name == "potato");
708         assert(potato.children.length == 3);
709 
710         auto comment = potato.children[0];
711         assert(comment.type == EntityType.comment);
712         assert(comment.text == "comment");
713 
714         auto foo = potato.children[1];
715         assert(foo.type == EntityType.elementStart);
716         assert(foo.name == "foo");
717         assert(foo.children.length == 1);
718 
719         assert(foo.children[0].type == EntityType.text);
720         assert(foo.children[0].text == "bar");
721 
722         auto tag = potato.children[2];
723         assert(tag.type == EntityType.elementStart);
724         assert(tag.name == "tag");
725         assert(tag.children.length == 3);
726 
727         auto silly = tag.children[0];
728         assert(silly.type == EntityType.elementStart);
729         assert(silly.name == "silly");
730         assert(silly.children.length == 1);
731 
732         assert(silly.children[0].type == EntityType.text);
733         assert(silly.children[0].text == "you");
734 
735         auto empty = tag.children[1];
736         assert(empty.type == EntityType.elementEmpty);
737         assert(empty.name == "empty");
738 
739         auto nocontent = tag.children[2];
740         assert(nocontent.type == EntityType.elementStart);
741         assert(nocontent.name == "nocontent");
742         assert(nocontent.children.length == 0);
743 
744         auto endComment = dom.children[1];
745         assert(endComment.type == EntityType.comment);
746         assert(endComment.text == "the end");
747     }
748 
749 
750     // Reduce the chance of bugs if reference-type ranges are involved.
751     static if(!isDynamicArray!R) this(this)
752     {
753         with(EntityType) final switch(_type)
754         {
755             case cdata: goto case text;
756             case comment: goto case text;
757             case elementStart:
758             {
759                 _name = _name.save;
760                 break;
761             }
762             case elementEnd: goto case elementStart;
763             case elementEmpty: goto case elementStart;
764             case text:
765             {
766                 _text = _text.save;
767                 break;
768             }
769             case pi:
770             {
771                 _text = _text.save;
772                 goto case elementStart;
773             }
774         }
775     }
776 
777 
778 private:
779 
780     this(EntityType type, TextPos pos)
781     {
782         _type = type;
783         _pos = pos;
784 
785         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
786         _name = typeof(_name).init;
787         _text = typeof(_text).init;
788     }
789 
790     auto _type = EntityType.elementStart;
791     TextPos _pos;
792     SliceOfR _name;
793     SliceOfR[] _path;
794     Attribute[] _attributes;
795     SliceOfR _text;
796     DOMEntity[] _children;
797 }
798 
799 /// Ditto
800 DOMEntity!R parseDOM(Config config = Config.init, R)(R range)
801     if(isForwardRange!R && isSomeChar!(ElementType!R))
802 {
803     import dxml.parser : parseXML;
804     auto entityRange = parseXML!config(range);
805     typeof(return) retval;
806     _parseDOM(entityRange, retval);
807     return retval;
808 }
809 
810 /// Ditto
811 DOMEntity!(ER.Input) parseDOM(ER)(ref ER range)
812     if(isInstanceOf!(EntityRange, ER))
813 {
814     typeof(return) retval;
815     if(range.empty)
816         return retval;
817     retval._pos = range.front.pos;
818     if(range.front.type == EntityType.elementEnd)
819         return retval;
820     _parseDOM(range, retval);
821     return retval;
822 }
823 
824 /++
825     parseDOM with the default $(REF_ALTTEXT Config, Config, dxml, parser) and a
826     range of characters.
827   +/
828 version(dxmlTests) @safe unittest
829 {
830     import std.range.primitives;
831 
832     auto xml = "<root>\n" ~
833                "    <!-- no comment -->\n" ~
834                "    <foo></foo>\n" ~
835                "    <baz>\n" ~
836                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
837                "    </baz>\n" ~
838                "    <tag/>\n" ~
839                "</root>";
840 
841     auto dom = parseDOM(xml);
842     assert(dom.type == EntityType.elementStart);
843     assert(dom.name.empty);
844     assert(dom.children.length == 1);
845 
846     auto root = dom.children[0];
847     assert(root.type == EntityType.elementStart);
848     assert(root.name == "root");
849     assert(root.children.length == 4);
850 
851     assert(root.children[0].type == EntityType.comment);
852     assert(root.children[0].text == " no comment ");
853 
854     assert(root.children[1].type == EntityType.elementStart);
855     assert(root.children[1].name == "foo");
856     assert(root.children[1].children.length == 0);
857 
858     auto baz = root.children[2];
859     assert(baz.type == EntityType.elementStart);
860     assert(baz.name == "baz");
861     assert(baz.children.length == 1);
862 
863     auto xyzzy = baz.children[0];
864     assert(xyzzy.type == EntityType.elementStart);
865     assert(xyzzy.name == "xyzzy");
866     assert(xyzzy.children.length == 1);
867 
868     assert(xyzzy.children[0].type == EntityType.text);
869     assert(xyzzy.children[0].text == "It's an adventure!");
870 
871     assert(root.children[3].type == EntityType.elementEmpty);
872     assert(root.children[3].name == "tag");
873 }
874 
875 /++
876     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and a range
877     of characters.
878   +/
879 version(dxmlTests) unittest
880 {
881     import std.range.primitives : empty;
882 
883     auto xml = "<root>\n" ~
884                "    <!-- no comment -->\n" ~
885                "    <foo></foo>\n" ~
886                "    <baz>\n" ~
887                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
888                "    </baz>\n" ~
889                "    <tag/>\n" ~
890                "</root>";
891 
892     auto dom = parseDOM!simpleXML(xml);
893     assert(dom.type == EntityType.elementStart);
894     assert(dom.name.empty);
895     assert(dom.children.length == 1);
896 
897     auto root = dom.children[0];
898     assert(root.type == EntityType.elementStart);
899     assert(root.name == "root");
900     assert(root.children.length == 3);
901 
902     assert(root.children[0].type == EntityType.elementStart);
903     assert(root.children[0].name == "foo");
904     assert(root.children[0].children.length == 0);
905 
906     auto baz = root.children[1];
907     assert(baz.type == EntityType.elementStart);
908     assert(baz.name == "baz");
909     assert(baz.children.length == 1);
910 
911     auto xyzzy = baz.children[0];
912     assert(xyzzy.type == EntityType.elementStart);
913     assert(xyzzy.name == "xyzzy");
914     assert(xyzzy.children.length == 1);
915 
916     assert(xyzzy.children[0].type == EntityType.text);
917     assert(xyzzy.children[0].text == "It's an adventure!");
918 
919     assert(root.children[2].type == EntityType.elementStart);
920     assert(root.children[2].name == "tag");
921     assert(root.children[2].children.length == 0);
922 }
923 
924 /++
925     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and an
926     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser).
927   +/
928 version(dxmlTests) unittest
929 {
930     import std.range.primitives : empty;
931     import dxml.parser : parseXML;
932 
933     auto xml = "<root>\n" ~
934                "    <!-- no comment -->\n" ~
935                "    <foo></foo>\n" ~
936                "    <baz>\n" ~
937                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
938                "    </baz>\n" ~
939                "    <tag/>\n" ~
940                "</root>";
941 
942     auto range = parseXML!simpleXML(xml);
943     auto dom = parseDOM(range);
944     assert(range.empty);
945 
946     assert(dom.type == EntityType.elementStart);
947     assert(dom.name.empty);
948     assert(dom.children.length == 1);
949 
950     auto root = dom.children[0];
951     assert(root.type == EntityType.elementStart);
952     assert(root.name == "root");
953     assert(root.children.length == 3);
954 
955     assert(root.children[0].type == EntityType.elementStart);
956     assert(root.children[0].name == "foo");
957     assert(root.children[0].children.length == 0);
958 
959     auto baz = root.children[1];
960     assert(baz.type == EntityType.elementStart);
961     assert(baz.name == "baz");
962     assert(baz.children.length == 1);
963 
964     auto xyzzy = baz.children[0];
965     assert(xyzzy.type == EntityType.elementStart);
966     assert(xyzzy.name == "xyzzy");
967     assert(xyzzy.children.length == 1);
968 
969     assert(xyzzy.children[0].type == EntityType.text);
970     assert(xyzzy.children[0].text == "It's an adventure!");
971 
972     assert(root.children[2].type == EntityType.elementStart);
973     assert(root.children[2].name == "tag");
974     assert(root.children[2].children.length == 0);
975 }
976 
977 /++
978     parseDOM with an $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser)
979     which is not at the start of the document.
980   +/
981 version(dxmlTests) unittest
982 {
983     import std.range.primitives : empty;
984     import dxml.parser : parseXML, skipToPath;
985 
986     auto xml = "<root>\n" ~
987                "    <!-- no comment -->\n" ~
988                "    <foo></foo>\n" ~
989                "    <baz>\n" ~
990                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
991                "    </baz>\n" ~
992                "    <tag/>\n" ~
993                "</root>";
994 
995     auto range = parseXML!simpleXML(xml).skipToPath("baz/xyzzy");
996     assert(range.front.type == EntityType.elementStart);
997     assert(range.front.name == "xyzzy");
998 
999     auto dom = parseDOM(range);
1000     assert(range.front.type == EntityType.elementStart);
1001     assert(range.front.name == "tag");
1002 
1003     assert(dom.type == EntityType.elementStart);
1004     assert(dom.name.empty);
1005     assert(dom.children.length == 1);
1006 
1007     auto xyzzy = dom.children[0];
1008     assert(xyzzy.type == EntityType.elementStart);
1009     assert(xyzzy.name == "xyzzy");
1010     assert(xyzzy.children.length == 1);
1011 
1012     assert(xyzzy.children[0].type == EntityType.text);
1013     assert(xyzzy.children[0].text == "It's an adventure!");
1014 }
1015 
1016 /// parseDOM at compile-time
1017 version(dxmlTests) unittest
1018 {
1019     enum xml = "<!-- comment -->\n" ~
1020                "<root>\n" ~
1021                "    <foo>some text<whatever/></foo>\n" ~
1022                "    <bar/>\n" ~
1023                "    <baz></baz>\n" ~
1024                "</root>";
1025 
1026     enum dom = parseDOM(xml);
1027     static assert(dom.type == EntityType.elementStart);
1028     static assert(dom.name.empty);
1029     static assert(dom.children.length == 2);
1030 
1031     static assert(dom.children[0].type == EntityType.comment);
1032     static assert(dom.children[0].text == " comment ");
1033 }
1034 
1035 // This is purely to provide a way to trigger the unittest blocks in DOMEntity
1036 // without compiling them in normally.
1037 private struct DOMCompileTests
1038 {
1039     @property bool empty() @safe pure nothrow @nogc { assert(0); }
1040     @property char front() @safe pure nothrow @nogc { assert(0); }
1041     void popFront() @safe pure nothrow @nogc { assert(0); }
1042     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
1043 }
1044 
1045 version(dxmlTests)
1046     DOMEntity!DOMCompileTests _domTests;
1047 
1048 
1049 private:
1050 
1051 void _parseDOM(ER, DE)(ref ER range, ref DE parent, ER.SliceOfR[] path = null)
1052 {
1053     assert(!range.empty);
1054     assert(range.front.type != EntityType.elementEnd);
1055 
1056     import std.array : appender, array;
1057     auto children = appender!(DE[])();
1058 
1059     while(!range.empty)
1060     {
1061         auto entity = range.front;
1062         range.popFront();
1063         if(entity.type == EntityType.elementEnd)
1064             break;
1065 
1066         auto child = DE(entity.type, entity.pos);
1067         child._path = path;
1068 
1069         with(EntityType) final switch(entity.type)
1070         {
1071             case cdata: goto case text;
1072             case comment: goto case text;
1073             case elementStart:
1074             {
1075                 child._name = entity.name;
1076                 child._attributes = entity.attributes.array();
1077 
1078                 if(range.front.type == EntityType.elementEnd)
1079                     range.popFront();
1080                 else
1081                 {
1082                     if(!entity.name.empty)
1083                         path ~= entity.name;
1084                     // TODO The explicit instantiation doesn't hurt, but it
1085                     // shouldn't be necessary, and if it's not there, we get
1086                     // a compiler error. It should be reduced and reported.
1087                     _parseDOM!(ER, DE)(range, child, path);
1088                     --path.length;
1089                 }
1090                 break;
1091             }
1092             case elementEnd: assert(0);
1093             case elementEmpty:
1094             {
1095                 child._name = entity.name;
1096                 child._attributes = entity.attributes.array();
1097                 break;
1098             }
1099             case text:
1100             {
1101                 child._text = entity.text;
1102                 break;
1103             }
1104             case pi:
1105             {
1106                 child._name = entity.name;
1107                 child._text = entity.text;
1108                 break;
1109             }
1110         }
1111 
1112         put(children, child);
1113     }
1114 
1115     parent._children = children.data;
1116 }
1117 
1118 version(dxmlTests) unittest
1119 {
1120     import std.algorithm.comparison : equal;
1121     import dxml.internal : testRangeFuncs;
1122     import dxml.parser : parseXML, TextPos;
1123 
1124     static void testChildren(ER, size_t line = __LINE__)(ref ER entityRange, int row, int col, EntityType[] expected...)
1125     {
1126         import core.exception : AssertError;
1127         import std.exception : enforce;
1128         auto temp = entityRange.save;
1129         auto dom = parseDOM(temp);
1130         enforce!AssertError(dom.type == EntityType.elementStart, "unittest 1", __FILE__, line);
1131         enforce!AssertError(dom.children.length == expected.length, "unittest 2", __FILE__, line);
1132         foreach(i; 0 .. dom._children.length)
1133             enforce!AssertError(dom._children[i].type == expected[i], "unittest 3", __FILE__, line);
1134         enforce!AssertError(dom.pos == TextPos(row, col), "unittest 4", __FILE__, line);
1135         if(!entityRange.empty)
1136             entityRange.popFront();
1137     }
1138 
1139     static foreach(func; testRangeFuncs)
1140     {{
1141         {
1142             foreach(i, xml; ["<!-- comment -->\n" ~
1143                              "<?pi foo?>\n" ~
1144                              "<su></su>",
1145                             "<!-- comment -->\n" ~
1146                              "<?pi foo?>\n" ~
1147                              "<su/>"])
1148             {
1149                 auto range = parseXML(func(xml));
1150                 foreach(j; 0 .. 4 - i)
1151                 {
1152                     auto temp = range.save;
1153                     auto dom = parseDOM(temp);
1154                     assert(dom.type == EntityType.elementStart);
1155                     assert(dom.children.length == 3 - j);
1156                     if(j <= 2)
1157                     {
1158                         assert(dom.children[2 - j].type ==
1159                                (i == 0 ? EntityType.elementStart : EntityType.elementEmpty));
1160                         assert(equal(dom.children[2 - j].name, "su"));
1161                         if(j <= 1)
1162                         {
1163                             assert(dom.children[1 - j].type == EntityType.pi);
1164                             assert(equal(dom.children[1 - j].name, "pi"));
1165                             assert(equal(dom.children[1 - j].text, "foo"));
1166                             if(j == 0)
1167                             {
1168                                 assert(dom.children[0].type == EntityType.comment);
1169                                 assert(equal(dom.children[0].text, " comment "));
1170                             }
1171                         }
1172                     }
1173                     range.popFront();
1174                 }
1175                 assert(range.empty);
1176                 auto dom = parseDOM(range);
1177                 assert(dom.type == EntityType.elementStart);
1178                 assert(dom.name is typeof(dom.name).init);
1179                 assert(dom.children.length == 0);
1180             }
1181         }
1182         {
1183             auto xml = "<root>\n" ~
1184                        "    <foo>\n" ~
1185                        "        <bar>\n" ~
1186                        "            <baz>\n" ~
1187                        "            It's silly, Charley\n" ~
1188                        "            </baz>\n" ~
1189                        "            <frobozz>\n" ~
1190                        "                <is>the Wiz</is>\n" ~
1191                        "            </frobozz>\n" ~
1192                        "            <empty></empty>\n" ~
1193                        "            <xyzzy/>\n" ~
1194                        "        </bar>\n" ~
1195                        "    </foo>\n" ~
1196                        "    <!--This isn't the end-->\n" ~
1197                        "</root>\n" ~
1198                        "<?Poirot?>\n" ~
1199                        "<!--It's the end!-->";
1200 
1201             {
1202                 auto range = parseXML(func(xml));
1203                 with(EntityType)
1204                 {
1205                     testChildren(range, 1, 1, elementStart, pi, comment); // <root>
1206                     testChildren(range, 2, 5, elementStart, comment); // <foo>
1207                     testChildren(range, 3, 9, elementStart); // <bar>
1208                     testChildren(range, 4, 13, elementStart, elementStart, elementStart, elementEmpty); // <baz>
1209                     testChildren(range, 4, 18, text); // It's silly, Charley
1210                     testChildren(range, 6, 13); // </baz>
1211                     testChildren(range, 7, 13, elementStart, elementStart, elementEmpty); // <frobozz>
1212                     testChildren(range, 8, 17, elementStart); // <is>
1213                     testChildren(range, 8, 21, text); // the Wiz
1214                     testChildren(range, 8, 28); // </is>
1215                     testChildren(range, 9, 13); // </frobozz>
1216                     testChildren(range, 10, 13, elementStart, elementEmpty); // <empty>
1217                     testChildren(range, 10, 20); // </empty>
1218                     testChildren(range, 11, 13, elementEmpty); // <xyzzy/>
1219                     testChildren(range, 12, 9); // </bar>
1220                     testChildren(range, 13, 5); // </foo>
1221                     testChildren(range, 14, 5, comment); // <!--This isn't the end-->
1222                     testChildren(range, 15, 1); // </root>
1223                     testChildren(range, 16, 1, pi, comment); // <?Poirot?>
1224                     testChildren(range, 17, 1, comment); // <!--It's the end-->"
1225                     testChildren(range, 1, 1); // empty range
1226                 }
1227             }
1228             {
1229                 auto dom = parseDOM(func(xml));
1230                 assert(dom.children.length == 3);
1231 
1232                 auto root = dom.children[0];
1233                 assert(root.type == EntityType.elementStart);
1234                 assert(root.pos == TextPos(1, 1));
1235                 assert(root.children.length == 2);
1236                 assert(equal(root.name, "root"));
1237 
1238                 auto foo = root.children[0];
1239                 assert(foo.type == EntityType.elementStart);
1240                 assert(foo.pos == TextPos(2, 5));
1241                 assert(foo.children.length == 1);
1242                 assert(equal(foo.name, "foo"));
1243 
1244                 auto bar = foo.children[0];
1245                 assert(bar.type == EntityType.elementStart);
1246                 assert(bar.pos == TextPos(3, 9));
1247                 assert(bar.children.length == 4);
1248                 assert(equal(bar.name, "bar"));
1249 
1250                 auto baz = bar.children[0];
1251                 assert(baz.type == EntityType.elementStart);
1252                 assert(baz.pos == TextPos(4, 13));
1253                 assert(baz.children.length == 1);
1254                 assert(equal(baz.name, "baz"));
1255 
1256                 auto silly = baz.children[0];
1257                 assert(silly.type == EntityType.text);
1258                 assert(silly.pos == TextPos(4, 18));
1259                 assert(equal(silly.text, "\n            It's silly, Charley\n            "));
1260 
1261                 auto frobozz = bar.children[1];
1262                 assert(frobozz.type == EntityType.elementStart);
1263                 assert(frobozz.pos == TextPos(7, 13));
1264                 assert(frobozz.children.length == 1);
1265                 assert(equal(frobozz.name, "frobozz"));
1266 
1267                 auto is_ = frobozz.children[0];
1268                 assert(is_.type == EntityType.elementStart);
1269                 assert(is_.pos == TextPos(8, 17));
1270                 assert(is_.children.length == 1);
1271                 assert(equal(is_.name, "is"));
1272 
1273                 auto wiz = is_.children[0];
1274                 assert(wiz.type == EntityType.text);
1275                 assert(wiz.pos == TextPos(8, 21));
1276                 assert(equal(wiz.text, "the Wiz"));
1277 
1278                 auto empty = bar.children[2];
1279                 assert(empty.type == EntityType.elementStart);
1280                 assert(empty.pos == TextPos(10, 13));
1281                 assert(empty.children.length == 0);
1282                 assert(equal(empty.name, "empty"));
1283 
1284                 auto xyzzy = bar.children[3];
1285                 assert(xyzzy.type == EntityType.elementEmpty);
1286                 assert(xyzzy.pos == TextPos(11, 13));
1287                 assert(equal(xyzzy.name, "xyzzy"));
1288 
1289                 auto comment = root.children[1];
1290                 assert(comment.type == EntityType.comment);
1291                 assert(comment.pos == TextPos(14, 5));
1292                 assert(equal(comment.text, "This isn't the end"));
1293 
1294                 auto poirot = dom.children[1];
1295                 assert(poirot.type == EntityType.pi);
1296                 assert(poirot.pos == TextPos(16, 1));
1297                 assert(equal(poirot.name, "Poirot"));
1298                 assert(poirot.text.empty);
1299 
1300                 auto endComment = dom.children[2];
1301                 assert(endComment.type == EntityType.comment);
1302                 assert(endComment.pos == TextPos(17, 1));
1303                 assert(equal(endComment.text, "It's the end!"));
1304             }
1305         }
1306     }}
1307 }