1 // Written in the D programming language
2 
3 /++
4     This implements a DOM for representing an XML 1.0 document. $(LREF parseDOM)
5     uses an $(REF EntityRange, dxml, parser) to parse the document, and
6     $(LREF DOMEntity) recursively represents the DOM tree.
7 
8     See the documentation for $(MREF dxml, parser) and
9     $(REF EntityRange, dxml, parser) for details on the parser and its
10     configuration options.
11 
12     For convenience, $(REF EntityType, dxml, parser) and
13     $(REF simpleXML, dxml, parser) are publicly imported by this module,
14     since $(REF_ALTTEXT EntityType, EntityType, dxml, parser) is required
15     to correctly use $(LREF DOMEntity), and
16     $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) is highly likely to
17     be used when calling $(LREF parseDOM).
18 
19     Copyright: Copyright 2018 - 2023
20     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
21     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
22     Source:    $(LINK_TO_SRC dxml/_dom.d)
23 
24     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
25   +/
26 module dxml.dom;
27 
28 ///
29 version(dxmlTests) unittest
30 {
31     import std.range.primitives : empty;
32 
33     auto xml = "<!-- comment -->\n" ~
34                "<root>\n" ~
35                "    <foo>some text<whatever/></foo>\n" ~
36                "    <bar/>\n" ~
37                "    <baz></baz>\n" ~
38                "</root>";
39     {
40         auto dom = parseDOM(xml);
41         assert(dom.type == EntityType.elementStart);
42         assert(dom.name.empty);
43         assert(dom.children.length == 2);
44 
45         assert(dom.children[0].type == EntityType.comment);
46         assert(dom.children[0].text == " comment ");
47 
48         auto root = dom.children[1];
49         assert(root.type == EntityType.elementStart);
50         assert(root.name == "root");
51         assert(root.children.length == 3);
52 
53         auto foo = root.children[0];
54         assert(foo.type == EntityType.elementStart);
55         assert(foo.name == "foo");
56         assert(foo.children.length == 2);
57 
58         assert(foo.children[0].type == EntityType.text);
59         assert(foo.children[0].text == "some text");
60 
61         assert(foo.children[1].type == EntityType.elementEmpty);
62         assert(foo.children[1].name == "whatever");
63 
64         assert(root.children[1].type == EntityType.elementEmpty);
65         assert(root.children[1].name == "bar");
66 
67         assert(root.children[2].type == EntityType.elementStart);
68         assert(root.children[2].name == "baz");
69         assert(root.children[2].children.length == 0);
70     }
71     {
72         auto dom = parseDOM!simpleXML(xml);
73         assert(dom.type == EntityType.elementStart);
74         assert(dom.name.empty);
75         assert(dom.children.length == 1);
76 
77         auto root = dom.children[0];
78         assert(root.type == EntityType.elementStart);
79         assert(root.name == "root");
80         assert(root.children.length == 3);
81 
82         auto foo = root.children[0];
83         assert(foo.type == EntityType.elementStart);
84         assert(foo.name == "foo");
85         assert(foo.children.length == 2);
86 
87         assert(foo.children[0].type == EntityType.text);
88         assert(foo.children[0].text == "some text");
89 
90         assert(foo.children[1].type == EntityType.elementStart);
91         assert(foo.children[1].name == "whatever");
92         assert(foo.children[1].children.length == 0);
93 
94         assert(root.children[1].type == EntityType.elementStart);
95         assert(root.children[1].name == "bar");
96         assert(root.children[1].children.length == 0);
97 
98         assert(root.children[2].type == EntityType.elementStart);
99         assert(root.children[2].name == "baz");
100         assert(root.children[2].children.length == 0);
101     }
102 }
103 
104 
105 import std.range.primitives;
106 import std.traits;
107 
108 public import dxml.parser : EntityType, simpleXML;
109 import dxml.parser : Config, EntityRange;
110 
111 
112 /++
113     Represents an entity in an XML document as a DOM tree.
114 
115     parseDOM either takes a range of characters or an
116     $(REF EntityRange, dxml, parser) and generates a DOMEntity from that XML.
117 
118     When parseDOM processes the XML, it returns a DOMEntity representing the
119     entire document. Even though the XML document itself isn't technically an
120     entity in the XML document, it's simplest to treat it as if it were an
121     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
122     with an empty $(LREF2 name, _DOMEntity.name). That DOMEntity then contains
123     child entities that recursively define the DOM tree through their children.
124 
125     For DOMEntities of type
126     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser),
127     $(LREF _DOMEntity.children) gives access to all of the child entities of
128     that start tag. Other DOMEntities have no children.
129 
130     Note that the $(LREF2 type, _DOMEntity.type) determines which
131     properties of the DOMEntity can be used, and it can determine whether
132     functions which a DOMEntity is passed to are allowed to be called. Each
133     function lists which $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s
134     are allowed, and it is an error to call them with any other
135     $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
136 
137     If parseDOM is given a range of characters, it in turn passes that to
138     $(REF parseXML, dxml, parser) to do the actual XML parsing. As such, that
139     overload accepts an optional $(REF Config, dxml, parser) as a template
140     argument to configure the parser.
141 
142     If parseDOM is given an
143     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the range does
144     not have to be at the start of the document. It can be used to create a DOM
145     for a portion of the document. When a character range is passed to it, it
146     will return a DOMEntity with the $(LREF2 type, _DOMEntity.type)
147     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
148     and an empty $(LREF2 name, _DOMEntity.name). It will iterate the range until
149     it either reaches the end of the range, or it reaches the end tag which
150     matches the start tag which is the parent of the entity that was the
151     $(D front) of the range when it was passed to parseDOM. The
152     $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser)
153     is passed by $(K_REF), so if it was not at the top level when it was passed
154     to parseDOM (and thus still has elements in it when parseDOM returns), the
155     range will then be at the entity after that matching end tag, and the
156     application can continue to process the range after that if it so chooses.
157 
158     Params:
159         config = The $(REF Config, dxml, parser) to use with
160                  $(REF parseXML, dxml, parser) if the range passed to parseDOM
161                  is a range of characters.
162         range = Either a range of characters representing an entire XML document
163                 or a $(REF EntityRange, dxml, parser) which may refer to some
164                 or all of an XML document.
165 
166     Returns: A DOMEntity representing the DOM tree from the point in the
167              document that was passed to parseDOM (the start of the document if
168              a range of characters was passed, and wherever in the document the
169              range was if an
170              $(REF_ALTTEXT EntityRange, EntityRange dxml, parser) was passed).
171 
172     Throws: $(REF_ALTTEXT XMLParsingException, XMLParsingException, dxml, parser)
173             if the parser encounters invalid XML.
174   +/
175 struct DOMEntity(R)
176 {
177 public:
178 
179     import std.algorithm.searching : canFind;
180     import std.range : only, takeExactly;
181     import std.typecons : Tuple;
182     import dxml.parser : TextPos;
183 
184     private enum compileInTests = is(R == DOMCompileTests);
185 
186     /++
187         The type used when any slice of the original range of characters is
188         used. If the range was a string or supports slicing, then SliceOfR is
189         the same type as the range; otherwise, it's the result of calling
190         $(PHOBOS_REF takeExactly, std, range) on it.
191 
192         ---
193         import std.algorithm : filter;
194         import std.range : takeExactly;
195 
196         static assert(is(DOMEntity!string.SliceOfR == string));
197 
198         auto range = filter!(a => true)("some xml");
199 
200         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
201                          typeof(takeExactly(range, 42))));
202         ---
203       +/
204     static if(isDynamicArray!R || hasSlicing!R)
205         alias SliceOfR = R;
206     else
207         alias SliceOfR = typeof(takeExactly(R.init, 42));
208 
209     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
210     // a ddoc-ed unit test.
211     static if(compileInTests) @safe unittest
212     {
213         import std.algorithm : filter;
214         import std.range : takeExactly;
215 
216         static assert(is(DOMEntity!string.SliceOfR == string));
217 
218         auto range = filter!(a => true)("some xml");
219 
220         static assert(is(DOMEntity!(typeof(range)).SliceOfR ==
221                          typeof(takeExactly(range, 42))));
222     }
223 
224 
225     /++
226         The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
227         $(LREF2 attributes, DOMEntity) returns a range of.
228 
229         See_Also: $(LREF2 attributes, DOMEntity)
230       +/
231     alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
232 
233 
234     /++
235         The $(REF_ALTTEXT EntityType, EntityType, dxml, parser) for this
236         DOMEntity.
237 
238         The type can never be
239         $(REF_ALTTEXT EntityType.elementEnd, EntityType.elementEnd, dxml, parser),
240         because the end of $(LREF2 children, DOMEntity.children) already
241         indicates where the contents of the start tag end.
242 
243         type determines which properties of the DOMEntity can be used, and it
244         can determine whether functions which a DOMEntity is passed to are
245         allowed to be called. Each function lists which
246         $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s are allowed, and it
247         is an error to call them with any other
248         $(REF_ALTTEXT EntityType, EntityType, dxml, parser).
249       +/
250     @property EntityType type() @safe const pure nothrow @nogc
251     {
252         return _type;
253     }
254 
255     ///
256     static if(compileInTests) unittest
257     {
258         import std.range.primitives;
259 
260         auto xml = "<root>\n" ~
261                    "    <!--no comment-->\n" ~
262                    "    <![CDATA[cdata run]]>\n" ~
263                    "    <text>I am text!</text>\n" ~
264                    "    <empty/>\n" ~
265                    "    <?pi?>\n" ~
266                    "</root>";
267 
268         auto dom = parseDOM(xml);
269         assert(dom.type == EntityType.elementStart);
270         assert(dom.name.empty);
271         assert(dom.children.length == 1);
272 
273         auto root = dom.children[0];
274         assert(root.type == EntityType.elementStart);
275         assert(root.name == "root");
276         assert(root.children.length == 5);
277 
278         assert(root.children[0].type == EntityType.comment);
279         assert(root.children[0].text == "no comment");
280 
281         assert(root.children[1].type == EntityType.cdata);
282         assert(root.children[1].text == "cdata run");
283 
284         auto textTag = root.children[2];
285         assert(textTag.type == EntityType.elementStart);
286         assert(textTag.name == "text");
287         assert(textTag.children.length == 1);
288 
289         assert(textTag.children[0].type == EntityType.text);
290         assert(textTag.children[0].text == "I am text!");
291 
292         assert(root.children[3].type == EntityType.elementEmpty);
293         assert(root.children[3].name == "empty");
294 
295         assert(root.children[4].type == EntityType.pi);
296         assert(root.children[4].name == "pi");
297     }
298 
299 
300     /++
301         The position in the the original text where the entity starts.
302 
303         See_Also: $(REF_ALTTEXT TextPos, TextPos, dxml, parser)$(BR)
304                   $(REF_ALTTEXT XMLParsingException._pos, XMLParsingException._pos, dxml, parser)
305       +/
306     @property TextPos pos() @safe const pure nothrow @nogc
307     {
308         return _pos;
309     }
310 
311     ///
312     static if(compileInTests) unittest
313     {
314         import std.range.primitives : empty;
315         import dxml.parser : TextPos;
316         import dxml.util : stripIndent;
317 
318         auto xml = "<root>\n" ~
319                    "    <foo>\n" ~
320                    "        Foo and bar. Always foo and bar...\n" ~
321                    "    </foo>\n" ~
322                    "</root>";
323 
324         auto dom = parseDOM(xml);
325         assert(dom.type == EntityType.elementStart);
326         assert(dom.name.empty);
327         assert(dom.pos == TextPos(1, 1));
328 
329         auto root = dom.children[0];
330         assert(root.type == EntityType.elementStart);
331         assert(root.name == "root");
332         assert(root.pos == TextPos(1, 1));
333 
334         auto foo = root.children[0];
335         assert(foo.type == EntityType.elementStart);
336         assert(foo.name == "foo");
337         assert(foo.pos == TextPos(2, 5));
338 
339         auto text = foo.children[0];
340         assert(text.type == EntityType.text);
341         assert(text.text.stripIndent() ==
342                "Foo and bar. Always foo and bar...");
343         assert(text.pos == TextPos(2, 10));
344     }
345 
346 
347     /++
348         Gives the name of this DOMEntity.
349 
350         Note that this is the direct name in the XML for this entity and
351         does not contain any of the names of any of the parent entities that
352         this entity has.
353 
354         $(TABLE
355             $(TR $(TH Supported $(LREF EntityType)s:))
356             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
357             $(TR $(TD $(REF_ALTTEXT elementEnd, EntityType.elementEnd, dxml, parser)))
358             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
359             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
360         )
361 
362         See_Also: $(LREF2 path, DOMEntity.path)
363       +/
364     @property SliceOfR name()
365     {
366         import dxml.internal : checkedSave;
367         with(EntityType)
368         {
369             import std.format : format;
370             assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
371                    format("name cannot be called with %s", _type));
372         }
373         return checkedSave(_name);
374     }
375 
376     ///
377     static if(compileInTests) unittest
378     {
379         import std.range.primitives : empty;
380 
381         auto xml = "<root>\n" ~
382                    "    <empty/>\n" ~
383                    "    <?pi?>\n" ~
384                    "</root>";
385 
386         auto dom = parseDOM(xml);
387         assert(dom.type == EntityType.elementStart);
388         assert(dom.name.empty);
389 
390         auto root = dom.children[0];
391         assert(root.type == EntityType.elementStart);
392         assert(root.name == "root");
393 
394         assert(root.children[0].type == EntityType.elementEmpty);
395         assert(root.children[0].name == "empty");
396 
397         assert(root.children[1].type == EntityType.pi);
398         assert(root.children[1].name == "pi");
399     }
400 
401 
402     /++
403         Gives the list of the names of the parent start tags of this DOMEntity.
404 
405         The name of the current entity (if it has one) is not included in the
406         path.
407 
408         Note that if parseDOM were given an
409         $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the path
410         starts where the range started. So, it doesn't necessarily contain the
411         entire path from the start of the XML document.
412 
413         See_Also: $(LREF2 name, DOMEntity.name)
414       +/
415     @property SliceOfR[] path()
416     {
417         return _path;
418     }
419 
420     ///
421     static if(compileInTests) unittest
422     {
423         import std.range.primitives : empty;
424 
425         auto xml = "<root>\n" ~
426                    "    <bar>\n" ~
427                    "        <baz>\n" ~
428                    "            <xyzzy/>\n" ~
429                    "        </baz>\n" ~
430                    "        <frobozz>\n" ~
431                    "            <!-- comment -->\n" ~
432                    "            It's magic!\n" ~
433                    "        </frobozz>\n" ~
434                    "    </bar>\n" ~
435                    "    <foo></foo>\n" ~
436                    "</root>";
437 
438         auto dom = parseDOM(xml);
439         assert(dom.type == EntityType.elementStart);
440         assert(dom.name.empty);
441         assert(dom.path.empty);
442 
443         auto root = dom.children[0];
444         assert(root.type == EntityType.elementStart);
445         assert(root.name == "root");
446         assert(root.path.empty);
447 
448         auto bar = root.children[0];
449         assert(bar.type == EntityType.elementStart);
450         assert(bar.name == "bar");
451         assert(bar.path == ["root"]);
452 
453         auto baz = bar.children[0];
454         assert(baz.type == EntityType.elementStart);
455         assert(baz.name == "baz");
456         assert(baz.path == ["root", "bar"]);
457 
458         auto xyzzy = baz.children[0];
459         assert(xyzzy.type == EntityType.elementEmpty);
460         assert(xyzzy.name == "xyzzy");
461         assert(xyzzy.path == ["root", "bar", "baz"]);
462 
463         auto frobozz = bar.children[1];
464         assert(frobozz.type == EntityType.elementStart);
465         assert(frobozz.name == "frobozz");
466         assert(frobozz.path == ["root", "bar"]);
467 
468         auto comment = frobozz.children[0];
469         assert(comment.type == EntityType.comment);
470         assert(comment.text == " comment ");
471         assert(comment.path == ["root", "bar", "frobozz"]);
472 
473         auto text = frobozz.children[1];
474         assert(text.type == EntityType.text);
475         assert(text.text == "\n            It's magic!\n        ");
476         assert(text.path == ["root", "bar", "frobozz"]);
477 
478         auto foo = root.children[1];
479         assert(foo.type == EntityType.elementStart);
480         assert(foo.name == "foo");
481         assert(foo.path == ["root"]);
482     }
483 
484 
485     /++
486         Returns a dynamic array of attributes for a start tag where each
487         attribute is represented as a$(BR)
488         $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
489                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
490                   $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
491                   $(REF_ALTTEXT TextPos, TextPos, dxml, parser), $(D_STRING "pos"))).
492 
493         $(TABLE
494             $(TR $(TH Supported $(LREF EntityType)s:))
495             $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser)))
496             $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser)))
497         )
498 
499         See_Also: $(LREF DomEntity.Attribute)$(BR)
500                   $(REF normalize, dxml, util)$(BR)
501                   $(REF asNormalized, dxml, util)
502       +/
503     @property auto attributes()
504     {
505         with(EntityType)
506         {
507             import std.format : format;
508             assert(_type == elementStart || _type == elementEmpty,
509                    format("attributes cannot be called with %s", _type));
510         }
511         return _attributes;
512     }
513 
514     ///
515     static if(compileInTests) unittest
516     {
517         import std.algorithm.comparison : equal;
518         import std.algorithm.iteration : filter;
519         import std.range.primitives : empty;
520         import dxml.parser : TextPos;
521 
522         {
523             auto xml = "<root/>";
524             auto root = parseDOM(xml).children[0];
525             assert(root.type == EntityType.elementEmpty);
526             assert(root.attributes.empty);
527 
528             static assert(is(ElementType!(typeof(root.attributes)) ==
529                              typeof(root).Attribute));
530         }
531         {
532             auto xml = "<root a='42' q='29' w='hello'/>";
533             auto root = parseDOM(xml).children[0];
534             assert(root.type == EntityType.elementEmpty);
535 
536             auto attrs = root.attributes;
537             assert(attrs.length == 3);
538 
539             assert(attrs[0].name == "a");
540             assert(attrs[0].value == "42");
541             assert(attrs[0].pos == TextPos(1, 7));
542 
543             assert(attrs[1].name == "q");
544             assert(attrs[1].value == "29");
545             assert(attrs[1].pos == TextPos(1, 14));
546 
547             assert(attrs[2].name == "w");
548             assert(attrs[2].value == "hello");
549             assert(attrs[2].pos == TextPos(1, 21));
550         }
551         // Because the type of name and value is SliceOfR, == with a string
552         // only works if the range passed to parseXML was string.
553         {
554             auto xml = filter!"true"("<root a='42' q='29' w='hello'/>");
555             auto root = parseDOM(xml).children[0];
556             assert(root.type == EntityType.elementEmpty);
557 
558             auto attrs = root.attributes;
559             assert(attrs.length == 3);
560 
561             assert(equal(attrs[0].name, "a"));
562             assert(equal(attrs[0].value, "42"));
563             assert(attrs[0].pos == TextPos(1, 7));
564 
565             assert(equal(attrs[1].name, "q"));
566             assert(equal(attrs[1].value, "29"));
567             assert(attrs[1].pos == TextPos(1, 14));
568 
569             assert(equal(attrs[2].name, "w"));
570             assert(equal(attrs[2].value, "hello"));
571             assert(attrs[2].pos == TextPos(1, 21));
572         }
573     }
574 
575 
576     /++
577         Returns the textual value of this DOMEntity.
578 
579         In the case of
580         $(REF_ALTTEXT EntityType.pi, EntityType.pi, dxml, parser), this is the
581         text that follows the name, whereas in the other cases, the text is the
582         entire contents of the entity (save for the delimeters on the ends if
583         that entity has them).
584 
585         $(TABLE
586             $(TR $(TH Supported $(LREF EntityType)s:))
587             $(TR $(TD $(REF_ALTTEXT cdata, EntityType.cdata, dxml, parser)))
588             $(TR $(TD $(REF_ALTTEXT comment, EntityType.comment, dxml, parser)))
589             $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser)))
590             $(TR $(TD $(REF_ALTTEXT _text, EntityType._text, dxml, parser)))
591         )
592 
593         See_Also: $(REF normalize, dxml, util)$(BR)
594                   $(REF asNormalized, dxml, util)$(BR)
595                   $(REF stripIndent, dxml, util)$(BR)
596                   $(REF withoutIndent, dxml, util)
597       +/
598     @property SliceOfR text()
599     {
600         import dxml.internal : checkedSave;
601         with(EntityType)
602         {
603             import std.format : format;
604             assert(only(cdata, comment, pi, text).canFind(_type),
605                    format("text cannot be called with %s", _type));
606         }
607         return checkedSave(_text);
608     }
609 
610     ///
611     static if(compileInTests) unittest
612     {
613         import std.range.primitives : empty;
614 
615         auto xml = "<?xml version='1.0'?>\n" ~
616                    "<?instructionName?>\n" ~
617                    "<?foo here is something to say?>\n" ~
618                    "<root>\n" ~
619                    "    <![CDATA[ Yay! random text >> << ]]>\n" ~
620                    "    <!-- some random comment -->\n" ~
621                    "    <p>something here</p>\n" ~
622                    "    <p>\n" ~
623                    "       something else\n" ~
624                    "       here</p>\n" ~
625                    "</root>";
626         auto dom = parseDOM(xml);
627 
628         // "<?instructionName?>\n" ~
629         auto pi1 = dom.children[0];
630         assert(pi1.type == EntityType.pi);
631         assert(pi1.name == "instructionName");
632         assert(pi1.text.empty);
633 
634         // "<?foo here is something to say?>\n" ~
635         auto pi2 = dom.children[1];
636         assert(pi2.type == EntityType.pi);
637         assert(pi2.name == "foo");
638         assert(pi2.text == "here is something to say");
639 
640         // "<root>\n" ~
641         auto root = dom.children[2];
642         assert(root.type == EntityType.elementStart);
643 
644         // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
645         auto cdata = root.children[0];
646         assert(cdata.type == EntityType.cdata);
647         assert(cdata.text == " Yay! random text >> << ");
648 
649         // "    <!-- some random comment -->\n" ~
650         auto comment = root.children[1];
651         assert(comment.type == EntityType.comment);
652         assert(comment.text == " some random comment ");
653 
654         // "    <p>something here</p>\n" ~
655         auto p1 = root.children[2];
656         assert(p1.type == EntityType.elementStart);
657         assert(p1.name == "p");
658 
659         assert(p1.children[0].type == EntityType.text);
660         assert(p1.children[0].text == "something here");
661 
662         // "    <p>\n" ~
663         // "       something else\n" ~
664         // "       here</p>\n" ~
665         auto p2 = root.children[3];
666         assert(p2.type == EntityType.elementStart);
667 
668         assert(p2.children[0].type == EntityType.text);
669         assert(p2.children[0].text == "\n       something else\n       here");
670     }
671 
672 
673     /++
674         Returns the child entities of the current entity.
675 
676         They are in the same order that they were in the XML document.
677 
678         $(TABLE
679             $(TR $(TH Supported $(LREF EntityType)s:))
680             $(TR $(TD $(REF_ALTTEXT elementStart, elementStart.elementStart, dxml, parser)))
681         )
682       +/
683     @property DOMEntity[] children()
684     {
685         import std.format : format;
686         assert(_type == EntityType.elementStart,
687                format!"children cannot be called with %s"(_type));
688         return _children;
689     }
690 
691     ///
692     static if(compileInTests) unittest
693     {
694         auto xml = "<potato>\n" ~
695                    "    <!--comment-->\n" ~
696                    "    <foo>bar</foo>\n" ~
697                    "    <tag>\n" ~
698                    "        <silly>you</silly>\n" ~
699                    "        <empty/>\n" ~
700                    "        <nocontent></nocontent>\n" ~
701                    "    </tag>\n" ~
702                    "</potato>\n" ~
703                    "<!--the end-->";
704         auto dom = parseDOM(xml);
705         assert(dom.children.length == 2);
706 
707         auto potato = dom.children[0];
708         assert(potato.type == EntityType.elementStart);
709         assert(potato.name == "potato");
710         assert(potato.children.length == 3);
711 
712         auto comment = potato.children[0];
713         assert(comment.type == EntityType.comment);
714         assert(comment.text == "comment");
715 
716         auto foo = potato.children[1];
717         assert(foo.type == EntityType.elementStart);
718         assert(foo.name == "foo");
719         assert(foo.children.length == 1);
720 
721         assert(foo.children[0].type == EntityType.text);
722         assert(foo.children[0].text == "bar");
723 
724         auto tag = potato.children[2];
725         assert(tag.type == EntityType.elementStart);
726         assert(tag.name == "tag");
727         assert(tag.children.length == 3);
728 
729         auto silly = tag.children[0];
730         assert(silly.type == EntityType.elementStart);
731         assert(silly.name == "silly");
732         assert(silly.children.length == 1);
733 
734         assert(silly.children[0].type == EntityType.text);
735         assert(silly.children[0].text == "you");
736 
737         auto empty = tag.children[1];
738         assert(empty.type == EntityType.elementEmpty);
739         assert(empty.name == "empty");
740 
741         auto nocontent = tag.children[2];
742         assert(nocontent.type == EntityType.elementStart);
743         assert(nocontent.name == "nocontent");
744         assert(nocontent.children.length == 0);
745 
746         auto endComment = dom.children[1];
747         assert(endComment.type == EntityType.comment);
748         assert(endComment.text == "the end");
749     }
750 
751 
752     // Reduce the chance of bugs if reference-type ranges are involved.
753     static if(!isDynamicArray!R) this(this)
754     {
755         with(EntityType) final switch(_type)
756         {
757             case cdata: goto case text;
758             case comment: goto case text;
759             case elementStart:
760             {
761                 _name = _name.save;
762                 break;
763             }
764             case elementEnd: goto case elementStart;
765             case elementEmpty: goto case elementStart;
766             case text:
767             {
768                 _text = _text.save;
769                 break;
770             }
771             case pi:
772             {
773                 _text = _text.save;
774                 goto case elementStart;
775             }
776         }
777     }
778 
779 
780 private:
781 
782     this(EntityType type, TextPos pos)
783     {
784         _type = type;
785         _pos = pos;
786 
787         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
788         _name = typeof(_name).init;
789         _text = typeof(_text).init;
790     }
791 
792     auto _type = EntityType.elementStart;
793     TextPos _pos;
794     SliceOfR _name;
795     SliceOfR[] _path;
796     Attribute[] _attributes;
797     SliceOfR _text;
798     DOMEntity[] _children;
799 }
800 
801 /// Ditto
802 DOMEntity!R parseDOM(Config config = Config.init, R)(R range)
803     if(isForwardRange!R && isSomeChar!(ElementType!R))
804 {
805     import dxml.parser : parseXML;
806     auto entityRange = parseXML!config(range);
807     typeof(return) retval;
808     _parseDOM(entityRange, retval);
809     return retval;
810 }
811 
812 /// Ditto
813 DOMEntity!(ER.Input) parseDOM(ER)(ref ER range)
814     if(isInstanceOf!(EntityRange, ER))
815 {
816     typeof(return) retval;
817     if(range.empty)
818         return retval;
819     retval._pos = range.front.pos;
820     if(range.front.type == EntityType.elementEnd)
821         return retval;
822     _parseDOM(range, retval);
823     return retval;
824 }
825 
826 /++
827     parseDOM with the default $(REF_ALTTEXT Config, Config, dxml, parser) and a
828     range of characters.
829   +/
830 version(dxmlTests) @safe unittest
831 {
832     import std.range.primitives;
833 
834     auto xml = "<root>\n" ~
835                "    <!-- no comment -->\n" ~
836                "    <foo></foo>\n" ~
837                "    <baz>\n" ~
838                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
839                "    </baz>\n" ~
840                "    <tag/>\n" ~
841                "</root>";
842 
843     auto dom = parseDOM(xml);
844     assert(dom.type == EntityType.elementStart);
845     assert(dom.name.empty);
846     assert(dom.children.length == 1);
847 
848     auto root = dom.children[0];
849     assert(root.type == EntityType.elementStart);
850     assert(root.name == "root");
851     assert(root.children.length == 4);
852 
853     assert(root.children[0].type == EntityType.comment);
854     assert(root.children[0].text == " no comment ");
855 
856     assert(root.children[1].type == EntityType.elementStart);
857     assert(root.children[1].name == "foo");
858     assert(root.children[1].children.length == 0);
859 
860     auto baz = root.children[2];
861     assert(baz.type == EntityType.elementStart);
862     assert(baz.name == "baz");
863     assert(baz.children.length == 1);
864 
865     auto xyzzy = baz.children[0];
866     assert(xyzzy.type == EntityType.elementStart);
867     assert(xyzzy.name == "xyzzy");
868     assert(xyzzy.children.length == 1);
869 
870     assert(xyzzy.children[0].type == EntityType.text);
871     assert(xyzzy.children[0].text == "It's an adventure!");
872 
873     assert(root.children[3].type == EntityType.elementEmpty);
874     assert(root.children[3].name == "tag");
875 }
876 
877 /++
878     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and a range
879     of characters.
880   +/
881 version(dxmlTests) unittest
882 {
883     import std.range.primitives : empty;
884 
885     auto xml = "<root>\n" ~
886                "    <!-- no comment -->\n" ~
887                "    <foo></foo>\n" ~
888                "    <baz>\n" ~
889                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
890                "    </baz>\n" ~
891                "    <tag/>\n" ~
892                "</root>";
893 
894     auto dom = parseDOM!simpleXML(xml);
895     assert(dom.type == EntityType.elementStart);
896     assert(dom.name.empty);
897     assert(dom.children.length == 1);
898 
899     auto root = dom.children[0];
900     assert(root.type == EntityType.elementStart);
901     assert(root.name == "root");
902     assert(root.children.length == 3);
903 
904     assert(root.children[0].type == EntityType.elementStart);
905     assert(root.children[0].name == "foo");
906     assert(root.children[0].children.length == 0);
907 
908     auto baz = root.children[1];
909     assert(baz.type == EntityType.elementStart);
910     assert(baz.name == "baz");
911     assert(baz.children.length == 1);
912 
913     auto xyzzy = baz.children[0];
914     assert(xyzzy.type == EntityType.elementStart);
915     assert(xyzzy.name == "xyzzy");
916     assert(xyzzy.children.length == 1);
917 
918     assert(xyzzy.children[0].type == EntityType.text);
919     assert(xyzzy.children[0].text == "It's an adventure!");
920 
921     assert(root.children[2].type == EntityType.elementStart);
922     assert(root.children[2].name == "tag");
923     assert(root.children[2].children.length == 0);
924 }
925 
926 /++
927     parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and an
928     $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser).
929   +/
930 version(dxmlTests) unittest
931 {
932     import std.range.primitives : empty;
933     import dxml.parser : parseXML;
934 
935     auto xml = "<root>\n" ~
936                "    <!-- no comment -->\n" ~
937                "    <foo></foo>\n" ~
938                "    <baz>\n" ~
939                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
940                "    </baz>\n" ~
941                "    <tag/>\n" ~
942                "</root>";
943 
944     auto range = parseXML!simpleXML(xml);
945     auto dom = parseDOM(range);
946     assert(range.empty);
947 
948     assert(dom.type == EntityType.elementStart);
949     assert(dom.name.empty);
950     assert(dom.children.length == 1);
951 
952     auto root = dom.children[0];
953     assert(root.type == EntityType.elementStart);
954     assert(root.name == "root");
955     assert(root.children.length == 3);
956 
957     assert(root.children[0].type == EntityType.elementStart);
958     assert(root.children[0].name == "foo");
959     assert(root.children[0].children.length == 0);
960 
961     auto baz = root.children[1];
962     assert(baz.type == EntityType.elementStart);
963     assert(baz.name == "baz");
964     assert(baz.children.length == 1);
965 
966     auto xyzzy = baz.children[0];
967     assert(xyzzy.type == EntityType.elementStart);
968     assert(xyzzy.name == "xyzzy");
969     assert(xyzzy.children.length == 1);
970 
971     assert(xyzzy.children[0].type == EntityType.text);
972     assert(xyzzy.children[0].text == "It's an adventure!");
973 
974     assert(root.children[2].type == EntityType.elementStart);
975     assert(root.children[2].name == "tag");
976     assert(root.children[2].children.length == 0);
977 }
978 
979 /++
980     parseDOM with an $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser)
981     which is not at the start of the document.
982   +/
983 version(dxmlTests) unittest
984 {
985     import std.range.primitives : empty;
986     import dxml.parser : parseXML, skipToPath;
987 
988     auto xml = "<root>\n" ~
989                "    <!-- no comment -->\n" ~
990                "    <foo></foo>\n" ~
991                "    <baz>\n" ~
992                "        <xyzzy>It's an adventure!</xyzzy>\n" ~
993                "    </baz>\n" ~
994                "    <tag/>\n" ~
995                "</root>";
996 
997     auto range = parseXML!simpleXML(xml).skipToPath("baz/xyzzy");
998     assert(range.front.type == EntityType.elementStart);
999     assert(range.front.name == "xyzzy");
1000 
1001     auto dom = parseDOM(range);
1002     assert(range.front.type == EntityType.elementStart);
1003     assert(range.front.name == "tag");
1004 
1005     assert(dom.type == EntityType.elementStart);
1006     assert(dom.name.empty);
1007     assert(dom.children.length == 1);
1008 
1009     auto xyzzy = dom.children[0];
1010     assert(xyzzy.type == EntityType.elementStart);
1011     assert(xyzzy.name == "xyzzy");
1012     assert(xyzzy.children.length == 1);
1013 
1014     assert(xyzzy.children[0].type == EntityType.text);
1015     assert(xyzzy.children[0].text == "It's an adventure!");
1016 }
1017 
1018 /// parseDOM at compile-time
1019 version(dxmlTests) unittest
1020 {
1021     enum xml = "<!-- comment -->\n" ~
1022                "<root>\n" ~
1023                "    <foo>some text<whatever/></foo>\n" ~
1024                "    <bar/>\n" ~
1025                "    <baz></baz>\n" ~
1026                "</root>";
1027 
1028     enum dom = parseDOM(xml);
1029     static assert(dom.type == EntityType.elementStart);
1030     static assert(dom.name.empty);
1031     static assert(dom.children.length == 2);
1032 
1033     static assert(dom.children[0].type == EntityType.comment);
1034     static assert(dom.children[0].text == " comment ");
1035 }
1036 
1037 // This is purely to provide a way to trigger the unittest blocks in DOMEntity
1038 // without compiling them in normally.
1039 private struct DOMCompileTests
1040 {
1041     @property bool empty() @safe pure nothrow @nogc { assert(0); }
1042     @property char front() @safe pure nothrow @nogc { assert(0); }
1043     void popFront() @safe pure nothrow @nogc { assert(0); }
1044     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
1045 }
1046 
1047 version(dxmlTests)
1048     DOMEntity!DOMCompileTests _domTests;
1049 
1050 
1051 private:
1052 
1053 void _parseDOM(ER, DE)(ref ER range, ref DE parent, ER.SliceOfR[] path = null)
1054 {
1055     assert(!range.empty);
1056     assert(range.front.type != EntityType.elementEnd);
1057 
1058     import std.array : appender, array;
1059     auto children = appender!(DE[])();
1060 
1061     while(!range.empty)
1062     {
1063         auto entity = range.front;
1064         range.popFront();
1065         if(entity.type == EntityType.elementEnd)
1066             break;
1067 
1068         auto child = DE(entity.type, entity.pos);
1069         child._path = path;
1070 
1071         with(EntityType) final switch(entity.type)
1072         {
1073             case cdata: goto case text;
1074             case comment: goto case text;
1075             case elementStart:
1076             {
1077                 child._name = entity.name;
1078                 child._attributes = entity.attributes.array();
1079 
1080                 if(range.front.type == EntityType.elementEnd)
1081                     range.popFront();
1082                 else
1083                 {
1084                     if(!entity.name.empty)
1085                         path ~= entity.name;
1086                     // TODO The explicit instantiation doesn't hurt, but it
1087                     // shouldn't be necessary, and if it's not there, we get
1088                     // a compiler error. It should be reduced and reported.
1089                     _parseDOM!(ER, DE)(range, child, path);
1090                     --path.length;
1091                 }
1092                 break;
1093             }
1094             case elementEnd: assert(0);
1095             case elementEmpty:
1096             {
1097                 child._name = entity.name;
1098                 child._attributes = entity.attributes.array();
1099                 break;
1100             }
1101             case text:
1102             {
1103                 child._text = entity.text;
1104                 break;
1105             }
1106             case pi:
1107             {
1108                 child._name = entity.name;
1109                 child._text = entity.text;
1110                 break;
1111             }
1112         }
1113 
1114         put(children, child);
1115     }
1116 
1117     parent._children = children.data;
1118 }
1119 
1120 version(dxmlTests) unittest
1121 {
1122     import std.algorithm.comparison : equal;
1123     import dxml.internal : testRangeFuncs;
1124     import dxml.parser : parseXML, TextPos;
1125 
1126     static void testChildren(ER, size_t line = __LINE__)(ref ER entityRange, int row, int col, EntityType[] expected...)
1127     {
1128         import core.exception : AssertError;
1129         import std.exception : enforce;
1130         auto temp = entityRange.save;
1131         auto dom = parseDOM(temp);
1132         enforce!AssertError(dom.type == EntityType.elementStart, "unittest 1", __FILE__, line);
1133         enforce!AssertError(dom.children.length == expected.length, "unittest 2", __FILE__, line);
1134         foreach(i; 0 .. dom._children.length)
1135             enforce!AssertError(dom._children[i].type == expected[i], "unittest 3", __FILE__, line);
1136         enforce!AssertError(dom.pos == TextPos(row, col), "unittest 4", __FILE__, line);
1137         if(!entityRange.empty)
1138             entityRange.popFront();
1139     }
1140 
1141     static foreach(func; testRangeFuncs)
1142     {{
1143         {
1144             foreach(i, xml; ["<!-- comment -->\n" ~
1145                              "<?pi foo?>\n" ~
1146                              "<su></su>",
1147                             "<!-- comment -->\n" ~
1148                              "<?pi foo?>\n" ~
1149                              "<su/>"])
1150             {
1151                 auto range = parseXML(func(xml));
1152                 foreach(j; 0 .. 4 - i)
1153                 {
1154                     auto temp = range.save;
1155                     auto dom = parseDOM(temp);
1156                     assert(dom.type == EntityType.elementStart);
1157                     assert(dom.children.length == 3 - j);
1158                     if(j <= 2)
1159                     {
1160                         assert(dom.children[2 - j].type ==
1161                                (i == 0 ? EntityType.elementStart : EntityType.elementEmpty));
1162                         assert(equal(dom.children[2 - j].name, "su"));
1163                         if(j <= 1)
1164                         {
1165                             assert(dom.children[1 - j].type == EntityType.pi);
1166                             assert(equal(dom.children[1 - j].name, "pi"));
1167                             assert(equal(dom.children[1 - j].text, "foo"));
1168                             if(j == 0)
1169                             {
1170                                 assert(dom.children[0].type == EntityType.comment);
1171                                 assert(equal(dom.children[0].text, " comment "));
1172                             }
1173                         }
1174                     }
1175                     range.popFront();
1176                 }
1177                 assert(range.empty);
1178                 auto dom = parseDOM(range);
1179                 assert(dom.type == EntityType.elementStart);
1180                 assert(dom.name is typeof(dom.name).init);
1181                 assert(dom.children.length == 0);
1182             }
1183         }
1184         {
1185             auto xml = "<root>\n" ~
1186                        "    <foo>\n" ~
1187                        "        <bar>\n" ~
1188                        "            <baz>\n" ~
1189                        "            It's silly, Charley\n" ~
1190                        "            </baz>\n" ~
1191                        "            <frobozz>\n" ~
1192                        "                <is>the Wiz</is>\n" ~
1193                        "            </frobozz>\n" ~
1194                        "            <empty></empty>\n" ~
1195                        "            <xyzzy/>\n" ~
1196                        "        </bar>\n" ~
1197                        "    </foo>\n" ~
1198                        "    <!--This isn't the end-->\n" ~
1199                        "</root>\n" ~
1200                        "<?Poirot?>\n" ~
1201                        "<!--It's the end!-->";
1202 
1203             {
1204                 auto range = parseXML(func(xml));
1205                 with(EntityType)
1206                 {
1207                     testChildren(range, 1, 1, elementStart, pi, comment); // <root>
1208                     testChildren(range, 2, 5, elementStart, comment); // <foo>
1209                     testChildren(range, 3, 9, elementStart); // <bar>
1210                     testChildren(range, 4, 13, elementStart, elementStart, elementStart, elementEmpty); // <baz>
1211                     testChildren(range, 4, 18, text); // It's silly, Charley
1212                     testChildren(range, 6, 13); // </baz>
1213                     testChildren(range, 7, 13, elementStart, elementStart, elementEmpty); // <frobozz>
1214                     testChildren(range, 8, 17, elementStart); // <is>
1215                     testChildren(range, 8, 21, text); // the Wiz
1216                     testChildren(range, 8, 28); // </is>
1217                     testChildren(range, 9, 13); // </frobozz>
1218                     testChildren(range, 10, 13, elementStart, elementEmpty); // <empty>
1219                     testChildren(range, 10, 20); // </empty>
1220                     testChildren(range, 11, 13, elementEmpty); // <xyzzy/>
1221                     testChildren(range, 12, 9); // </bar>
1222                     testChildren(range, 13, 5); // </foo>
1223                     testChildren(range, 14, 5, comment); // <!--This isn't the end-->
1224                     testChildren(range, 15, 1); // </root>
1225                     testChildren(range, 16, 1, pi, comment); // <?Poirot?>
1226                     testChildren(range, 17, 1, comment); // <!--It's the end-->"
1227                     testChildren(range, 1, 1); // empty range
1228                 }
1229             }
1230             {
1231                 auto dom = parseDOM(func(xml));
1232                 assert(dom.children.length == 3);
1233 
1234                 auto root = dom.children[0];
1235                 assert(root.type == EntityType.elementStart);
1236                 assert(root.pos == TextPos(1, 1));
1237                 assert(root.children.length == 2);
1238                 assert(equal(root.name, "root"));
1239 
1240                 auto foo = root.children[0];
1241                 assert(foo.type == EntityType.elementStart);
1242                 assert(foo.pos == TextPos(2, 5));
1243                 assert(foo.children.length == 1);
1244                 assert(equal(foo.name, "foo"));
1245 
1246                 auto bar = foo.children[0];
1247                 assert(bar.type == EntityType.elementStart);
1248                 assert(bar.pos == TextPos(3, 9));
1249                 assert(bar.children.length == 4);
1250                 assert(equal(bar.name, "bar"));
1251 
1252                 auto baz = bar.children[0];
1253                 assert(baz.type == EntityType.elementStart);
1254                 assert(baz.pos == TextPos(4, 13));
1255                 assert(baz.children.length == 1);
1256                 assert(equal(baz.name, "baz"));
1257 
1258                 auto silly = baz.children[0];
1259                 assert(silly.type == EntityType.text);
1260                 assert(silly.pos == TextPos(4, 18));
1261                 assert(equal(silly.text, "\n            It's silly, Charley\n            "));
1262 
1263                 auto frobozz = bar.children[1];
1264                 assert(frobozz.type == EntityType.elementStart);
1265                 assert(frobozz.pos == TextPos(7, 13));
1266                 assert(frobozz.children.length == 1);
1267                 assert(equal(frobozz.name, "frobozz"));
1268 
1269                 auto is_ = frobozz.children[0];
1270                 assert(is_.type == EntityType.elementStart);
1271                 assert(is_.pos == TextPos(8, 17));
1272                 assert(is_.children.length == 1);
1273                 assert(equal(is_.name, "is"));
1274 
1275                 auto wiz = is_.children[0];
1276                 assert(wiz.type == EntityType.text);
1277                 assert(wiz.pos == TextPos(8, 21));
1278                 assert(equal(wiz.text, "the Wiz"));
1279 
1280                 auto empty = bar.children[2];
1281                 assert(empty.type == EntityType.elementStart);
1282                 assert(empty.pos == TextPos(10, 13));
1283                 assert(empty.children.length == 0);
1284                 assert(equal(empty.name, "empty"));
1285 
1286                 auto xyzzy = bar.children[3];
1287                 assert(xyzzy.type == EntityType.elementEmpty);
1288                 assert(xyzzy.pos == TextPos(11, 13));
1289                 assert(equal(xyzzy.name, "xyzzy"));
1290 
1291                 auto comment = root.children[1];
1292                 assert(comment.type == EntityType.comment);
1293                 assert(comment.pos == TextPos(14, 5));
1294                 assert(equal(comment.text, "This isn't the end"));
1295 
1296                 auto poirot = dom.children[1];
1297                 assert(poirot.type == EntityType.pi);
1298                 assert(poirot.pos == TextPos(16, 1));
1299                 assert(equal(poirot.name, "Poirot"));
1300                 assert(poirot.text.empty);
1301 
1302                 auto endComment = dom.children[2];
1303                 assert(endComment.type == EntityType.comment);
1304                 assert(endComment.pos == TextPos(17, 1));
1305                 assert(equal(endComment.text, "It's the end!"));
1306             }
1307         }
1308     }}
1309 }