1 // Written in the D programming language
2 
3 /++
4     This implements a range-based
5     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which
6     will work with XML 1.1 documents assuming that they don't use any
7     1.1-specific features). For the sake of simplicity, sanity, and efficiency,
8     the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD)
9     section is not supported beyond what is required to parse past it.
10 
11     Start tags, end tags, comments, cdata sections, and processing instructions
12     are all supported and reported to the application. Anything in the DTD is
13     skipped (though it's parsed enough to parse past it correctly, and that
14     $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid
15     enough to be correctly skipped), and the
16     $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the
17     top is skipped if present (XML 1.1 requires that it be there, but XML 1.0
18     does not).
19 
20     Regardless of what the XML declaration says (if present), any range of
21     $(K_CHAR) will be treated as being encoded in UTF-8, any range of $(K_WCHAR)
22     will be treated as being encoded in UTF-16, and any range of $(K_DCHAR) will
23     be treated as having been encoded in UTF-32. Strings will be treated as
24     ranges of their code units, not code points.
25 
26     Since the DTD is skipped, entity references other than the five which are
27     predefined by the XML spec cannot be fully processed (since wherever they
28     were used in the document would be replaced by what they referred to, which
29     could be arbitrarily complex XML). As such, by default, if any entity
30     references which are not predefined are encountered outside of the DTD, an
31     $(LREF XMLParsingException) will be thrown (see
32     $(LREF Config.throwOnEntityRef) for how that can be configured). The
33     predefined entity references and any character references encountered will
34     be checked to verify that they're valid, but they will not be replaced
35     (since that does not work with returning slices of the original input).
36 
37     However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
38     $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from
39     $(MREF dxml, util) can be used to convert the predefined entity references
40     to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
41     $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from
42     $(MREF dxml, util) can be used to convert character references to what they
43     refer to.
44 
45     $(H3 Primary Symbols)
46     $(TABLE
47         $(TR $(TH Symbol) $(TH Description))
48         $(TR $(TD $(LREF parseXML))
49              $(TD The function used to initiate the parsing of an XML
50                   document.))
51         $(TR $(TD $(LREF EntityRange))
52              $(TD The range returned by $(LREF parseXML).))
53         $(TR $(TD $(LREF EntityRange.Entity))
54              $(TD The element type of $(LREF EntityRange).))
55     )
56 
57     $(H3 Parser Configuration Helpers)
58     $(TABLE
59         $(TR $(TH Symbol) $(TH Description))
60         $(TR $(TD $(LREF Config))
61              $(TD Used to configure how $(LREF EntityRange) parses the XML.))
62         $(TR $(TD $(LREF simpleXML))
63              $(TD A user-friendly configuration for when the application just
64                   wants the element tags and the data in between them.))
65         $(TR $(TD $(LREF makeConfig))
66              $(TD A convenience function for constructing a custom
67                   $(LREF Config).))
68         $(TR $(TD $(LREF SkipComments))
69              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
70                   to tell the parser to skip comments.))
71         $(TR $(TD $(LREF SkipPI))
72              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
73                   to tell the parser to skip processing instructions.))
74         $(TR $(TD $(LREF SplitEmpty))
75              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
76                   to configure how the parser deals with empty element tags.))
77     )
78 
79     $(H3 Helper Types Used When Parsing)
80     $(TABLE
81         $(TR $(TH Symbol) $(TH Description))
82         $(TR $(TD $(LREF EntityType))
83              $(TD The type of an entity in the XML (e.g. a
84                   $(LREF_ALTTEXT start tag, EntityType.elementStart) or a
85                   $(LREF_ALTTEXT comment, EntityType.comment)).))
86         $(TR $(TD $(LREF TextPos))
87              $(TD Gives the line and column number in the XML document.))
88         $(TR $(TD $(LREF XMLParsingException))
89              $(TD Thrown by $(LREF EntityRange) when it encounters invalid
90                   XML.))
91     )
92 
93     $(H3 Helper Functions Used When Parsing)
94     $(TABLE
95         $(TR $(TH Symbol) $(TH Description))
96         $(TR $(TD $(LREF getAttrs))
97              $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which
98                   allows for the easy processing of start tag attributes.))
99         $(TR $(TD $(LREF skipContents))
100              $(TD Iterates an $(LREF EntityRange) from a start tag to its
101                   matching end tag.))
102         $(TR $(TD $(LREF skipToPath))
103              $(TD Used to navigate from one start tag to another as if the start
104                   tag names formed a file path.))
105         $(TR $(TD $(LREF skipToEntityType))
106              $(TD Skips to the next entity of the given type in the range.))
107         $(TR $(TD $(LREF skipToParentEndTag))
108              $(TD Iterates an $(LREF EntityRange) until it reaches the end tag
109                   that matches the start tag which is the parent of the
110                   current entity.))
111     )
112 
113     $(H3 Helper Traits)
114     $(TABLE
115         $(TR $(TH Symbol) $(TH Description))
116         $(TR $(TD $(LREF isAttrRange))
117              $(TD Whether the given range is a range of attributes.)))
118 
119     Copyright: Copyright 2017 - 2018
120     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
121     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
122     Source:    $(LINK_TO_SRC dxml/_parser.d)
123 
124     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
125   +/
126 module dxml.parser;
127 
128 ///
129 version(dxmlTests) unittest
130 {
131     auto xml = "<!-- comment -->\n" ~
132                "<root>\n" ~
133                "    <foo>some text<whatever/></foo>\n" ~
134                "    <bar/>\n" ~
135                "    <baz></baz>\n" ~
136                "</root>";
137     {
138         auto range = parseXML(xml);
139         assert(range.front.type == EntityType.comment);
140         assert(range.front.text == " comment ");
141         range.popFront();
142 
143         assert(range.front.type == EntityType.elementStart);
144         assert(range.front.name == "root");
145         range.popFront();
146 
147         assert(range.front.type == EntityType.elementStart);
148         assert(range.front.name == "foo");
149         range.popFront();
150 
151         assert(range.front.type == EntityType.text);
152         assert(range.front.text == "some text");
153         range.popFront();
154 
155         assert(range.front.type == EntityType.elementEmpty);
156         assert(range.front.name == "whatever");
157         range.popFront();
158 
159         assert(range.front.type == EntityType.elementEnd);
160         assert(range.front.name == "foo");
161         range.popFront();
162 
163         assert(range.front.type == EntityType.elementEmpty);
164         assert(range.front.name == "bar");
165         range.popFront();
166 
167         assert(range.front.type == EntityType.elementStart);
168         assert(range.front.name == "baz");
169         range.popFront();
170 
171         assert(range.front.type == EntityType.elementEnd);
172         assert(range.front.name == "baz");
173         range.popFront();
174 
175         assert(range.front.type == EntityType.elementEnd);
176         assert(range.front.name == "root");
177         range.popFront();
178 
179         assert(range.empty);
180     }
181     {
182         auto range = parseXML!simpleXML(xml);
183 
184         // simpleXML skips comments
185 
186         assert(range.front.type == EntityType.elementStart);
187         assert(range.front.name == "root");
188         range.popFront();
189 
190         assert(range.front.type == EntityType.elementStart);
191         assert(range.front.name == "foo");
192         range.popFront();
193 
194         assert(range.front.type == EntityType.text);
195         assert(range.front.text == "some text");
196         range.popFront();
197 
198         // simpleXML splits empty element tags into a start tag and end tag
199         // so that the code doesn't have to care whether a start tag with no
200         // content is an empty tag or a start tag and end tag with nothing but
201         // whitespace in between.
202         assert(range.front.type == EntityType.elementStart);
203         assert(range.front.name == "whatever");
204         range.popFront();
205 
206         assert(range.front.type == EntityType.elementEnd);
207         assert(range.front.name == "whatever");
208         range.popFront();
209 
210         assert(range.front.type == EntityType.elementEnd);
211         assert(range.front.name == "foo");
212         range.popFront();
213 
214         assert(range.front.type == EntityType.elementStart);
215         assert(range.front.name == "bar");
216         range.popFront();
217 
218         assert(range.front.type == EntityType.elementEnd);
219         assert(range.front.name == "bar");
220         range.popFront();
221 
222         assert(range.front.type == EntityType.elementStart);
223         assert(range.front.name == "baz");
224         range.popFront();
225 
226         assert(range.front.type == EntityType.elementEnd);
227         assert(range.front.name == "baz");
228         range.popFront();
229 
230         assert(range.front.type == EntityType.elementEnd);
231         assert(range.front.name == "root");
232         range.popFront();
233 
234         assert(range.empty);
235     }
236 }
237 
238 
239 import std.range.primitives;
240 import std.traits;
241 import std.typecons : Flag;
242 
243 
244 /++
245     The exception type thrown when the XML parser encounters invalid XML.
246   +/
247 class XMLParsingException : Exception
248 {
249     /++
250         The position in the XML input where the problem is.
251       +/
252     TextPos pos;
253 
254 package:
255 
256     this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure
257     {
258         import std.format : format;
259         super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line);
260         pos = textPos;
261     }
262 }
263 
264 
265 /++
266     Where in the XML document an entity is.
267 
268     The line and column numbers are 1-based.
269 
270     The primary use case for TextPos is $(LREF XMLParsingException), but an
271     application may have other uses for it. The TextPos for an
272     $(LREF2 Entity, EntityRange) can be obtained from
273     $(LREF2 Entity.pos, EntityRange).
274 
275     See_Also: $(LREF XMLParsingException.pos)$(BR)
276               $(LREF EntityRange.Entity.pos)
277   +/
278 struct TextPos
279 {
280     /// A line number in the XML file.
281     int line = 1;
282 
283     /++
284         A column number in a line of the XML file.
285 
286         Each code unit is considered a column, so depending on what a program
287         is looking to do with the column number, it may need to examine the
288         actual text on that line and calculate the number that represents
289         what the program wants to display (e.g. the number of graphemes).
290       +/
291     int col = 1;
292 }
293 
294 
295 /++
296     Used to configure how the parser works.
297 
298     See_Also:
299         $(LREF makeConfig)$(BR)
300         $(LREF parseXML)$(BR)
301         $(LREF simpleXML)
302   +/
303 struct Config
304 {
305     /++
306         Whether the comments should be skipped while parsing.
307 
308         If $(D skipComments == SkipComments.yes), any entities of type
309         $(LREF EntityType.comment) will be omitted from the parsing results,
310         and they will not be validated beyond what is required to parse past
311         them.
312 
313         Defaults to $(D SkipComments.no).
314       +/
315     auto skipComments = SkipComments.no;
316 
317     /++
318         Whether processing instructions should be skipped.
319 
320         If $(D skipPI == SkipPI.yes), any entities of type
321         $(LREF EntityType.pi) will be skipped, and they will not be validated
322         beyond what is required to parse past them.
323 
324         Defaults to $(D SkipPI.no).
325       +/
326     auto skipPI = SkipPI.no;
327 
328     /++
329         Whether the parser should report empty element tags as if they were a
330         start tag followed by an end tag with nothing in between.
331 
332         If $(D splitEmpty == SplitEmpty.yes),  then whenever an
333         $(LREF EntityType.elementEmpty) is encountered, the parser will claim
334         that that entity is an $(LREF EntityType.elementStart), and then it
335         will provide an $(LREF EntityType.elementEnd) as the next entity before
336         the entity that actually follows it.
337 
338         The purpose of this is to simplify the code using the parser, since most
339         code does not care about the difference between an empty tag and a start
340         and end tag with nothing in between. But since some code may care about
341         the difference, the behavior is configurable.
342 
343         Defaults to $(D SplitEmpty.no).
344       +/
345     auto splitEmpty = SplitEmpty.no;
346 
347     ///
348     version(dxmlTests) unittest
349     {
350         enum configSplitYes = makeConfig(SplitEmpty.yes);
351 
352         {
353             auto range = parseXML("<root></root>");
354             assert(range.front.type == EntityType.elementStart);
355             assert(range.front.name == "root");
356             range.popFront();
357             assert(range.front.type == EntityType.elementEnd);
358             assert(range.front.name == "root");
359             range.popFront();
360             assert(range.empty);
361         }
362         {
363             // No difference if the tags are already split.
364             auto range = parseXML!configSplitYes("<root></root>");
365             assert(range.front.type == EntityType.elementStart);
366             assert(range.front.name == "root");
367             range.popFront();
368             assert(range.front.type == EntityType.elementEnd);
369             assert(range.front.name == "root");
370             range.popFront();
371             assert(range.empty);
372         }
373         {
374             // This treats <root></root> and <root/> as distinct.
375             auto range = parseXML("<root/>");
376             assert(range.front.type == EntityType.elementEmpty);
377             assert(range.front.name == "root");
378             range.popFront();
379             assert(range.empty);
380         }
381         {
382             // This is parsed as if it were <root></root> insead of <root/>.
383             auto range = parseXML!configSplitYes("<root/>");
384             assert(range.front.type == EntityType.elementStart);
385             assert(range.front.name == "root");
386             range.popFront();
387             assert(range.front.type == EntityType.elementEnd);
388             assert(range.front.name == "root");
389             range.popFront();
390             assert(range.empty);
391         }
392     }
393 
394     /++
395         Whether the parser should throw when it encounters any entity references
396         other than the five entity references defined in the XML standard.
397 
398         Any other entity references would have to be defined in the DTD in
399         order to be valid. And in order to know what XML they represent (which
400         could be arbitrarily complex, even effectively inserting entire XML
401         documents into the middle of the XML), the DTD would have to be parsed.
402         However, dxml does not support parsing the DTD beyond what is required
403         to correctly parse past it, and replacing entity references with what
404         they represent would not work with the slicing semantics that
405         $(LREF EntityRange) provides. As such, it is not possible for dxml to
406         correctly handle any entity references other than the five which are
407         defined in the XML standard, and even those are only parsed by using
408         $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util).
409         $(LREF EntityRange) always validates that entity references are one
410         of the five, predefined entity references, but otherwise, it lets them
411         pass through as normal text. It does not replace them with what they
412         represent.
413 
414         As such, the default behavior of $(LREF EntityRange) is to throw an
415         $(LREF XMLParsingException) when it encounters an entity reference
416         which is not one of the five defined by the XML standard. With that
417         behavior, there is no risk of processing an XML document as if it had
418         no entity references and ending up with what the program using the
419         parser would probably consider incorrect results. However, there are
420         cases where a program may find it acceptable to treat entity references
421         as normal text and ignore them. As such, if a program wishes to take
422         that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no).
423 
424         If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity
425         reference that it encounters will be validated to ensure that it is
426         syntactically valid (i.e. that the characters it contains form what
427         could be a valid entity reference assuming that the DTD declared it
428         properly), but otherwise, $(LREF EntityRange) will treat it as normal
429         text, just like it treats the five, predefined entity references as
430         normal text.
431 
432         Note that any valid XML entity reference which contains start or end
433         tags must contain matching start or end tags, and entity references
434         cannot contain incomplete fragments of XML (e.g. the start or end of a
435         comment). So, missing entity references should only affect the data in
436         the XML document and not its overall structure (if that were not _true,
437         attempting to ignore entity references such as $(D ThrowOnEntityRef.no)
438         does would be a disaster in the making). However, how reasonable it is
439         to miss that data depends entirely on the application and what the XML
440         documents it's parsing contain - hence, the behavior is configurable.
441 
442         See_Also: $(REF StdEntityRef, dxml, util)$(BR)
443                   $(REF parseStdEntityRef, dxml, util)$(BR)
444                   $(REF parseCharRef, dxml, util)$(BR)
445                   $(REF encodeCharRef, dxml, util)$(BR)
446                   $(REF decodeXML, dxml, util)$(BR)
447                   $(REF asDecodedXML, dxml, util)
448       +/
449     auto throwOnEntityRef = ThrowOnEntityRef.yes;
450 
451     ///
452     version(dxmlTests) unittest
453     {
454         import std.exception : assertThrown;
455         import dxml.util : decodeXML;
456 
457         auto xml = "<root>\n" ~
458                    "    <std>&amp;&apos;&gt;&lt;&quot;</std>\n" ~
459                    "    <other>&foobar;</other>\n" ~
460                    "    <invalid>&--;</invalid>\n" ~
461                    "</root>";
462 
463         // ThrowOnEntityRef.yes
464         {
465             auto range = parseXML(xml);
466             assert(range.front.type == EntityType.elementStart);
467             assert(range.front.name == "root");
468 
469             range.popFront();
470             assert(range.front.type == EntityType.elementStart);
471             assert(range.front.name == "std");
472 
473             range.popFront();
474             assert(range.front.type == EntityType.text);
475             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
476             assert(range.front.text.decodeXML() == `&'><"`);
477 
478             range.popFront();
479             assert(range.front.type == EntityType.elementEnd);
480             assert(range.front.name == "std");
481 
482             range.popFront();
483             assert(range.front.type == EntityType.elementStart);
484             assert(range.front.name == "other");
485 
486             // Attempted to parse past "&foobar;", which is syntactically
487             // valid, but it's not one of the five predefined entity references.
488             assertThrown!XMLParsingException(range.popFront());
489         }
490 
491         // ThrowOnEntityRef.no
492         {
493             auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml);
494             assert(range.front.type == EntityType.elementStart);
495             assert(range.front.name == "root");
496 
497             range.popFront();
498             assert(range.front.type == EntityType.elementStart);
499             assert(range.front.name == "std");
500 
501             range.popFront();
502             assert(range.front.type == EntityType.text);
503             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
504             assert(range.front.text.decodeXML() == `&'><"`);
505 
506             range.popFront();
507             assert(range.front.type == EntityType.elementEnd);
508             assert(range.front.name == "std");
509 
510             range.popFront();
511             assert(range.front.type == EntityType.elementStart);
512             assert(range.front.name == "other");
513 
514             // Doesn't throw, because "&foobar;" is syntactically valid.
515             range.popFront();
516             assert(range.front.type == EntityType.text);
517             assert(range.front.text == "&foobar;");
518 
519             // decodeXML has no effect on non-standard entity references.
520             assert(range.front.text.decodeXML() == "&foobar;");
521 
522             range.popFront();
523             assert(range.front.type == EntityType.elementEnd);
524             assert(range.front.name == "other");
525 
526             range.popFront();
527             assert(range.front.type == EntityType.elementStart);
528             assert(range.front.name == "invalid");
529 
530             // Attempted to parse past "&--;", which is not syntactically valid,
531             // because -- is not a valid name for an entity reference.
532             assertThrown!XMLParsingException(range.popFront());
533         }
534     }
535 }
536 
537 
538 /// See_Also: $(LREF2 skipComments, Config)
539 alias SkipComments = Flag!"SkipComments";
540 
541 /// See_Also: $(LREF2 skipPI, Config)
542 alias SkipPI = Flag!"SkipPI";
543 
544 /// See_Also: $(LREF2 splitEmpty, Config)
545 alias SplitEmpty = Flag!"SplitEmpty";
546 
547 /// See_Also: $(LREF2 throwOnEntityRef, Config)
548 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef";
549 
550 
551 /++
552     Helper function for creating a custom config. It makes it easy to set one
553     or more of the member variables to something other than the default without
554     having to worry about explicitly setting them individually or setting them
555     all at once via a constructor.
556 
557     The order of the arguments does not matter. The types of each of the members
558     of Config are unique, so that information alone is sufficient to determine
559     which argument should be assigned to which member.
560   +/
561 Config makeConfig(Args...)(Args args)
562 {
563     import std.format : format;
564     import std.meta : AliasSeq, staticIndexOf, staticMap;
565 
566     template isValid(T, Types...)
567     {
568         static if(Types.length == 0)
569             enum isValid = false;
570         else static if(is(T == Types[0]))
571             enum isValid = true;
572         else
573             enum isValid = isValid!(T, Types[1 .. $]);
574     }
575 
576     Config config;
577 
578     alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName));
579     alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config)));
580 
581     foreach(i, arg; args)
582     {
583         static assert(isValid!(typeof(arg), MemberTypes),
584                       format!"Argument %s does not match the type of any members of Config"(i));
585 
586         static foreach(j, Other; Args)
587         {
588             static if(i != j)
589                 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j));
590         }
591 
592         foreach(memberName; __traits(allMembers, Config))
593         {
594             static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg)))
595                 mixin("config." ~ memberName ~ " = arg;");
596         }
597     }
598 
599     return config;
600 }
601 
602 ///
603 version(dxmlTests) @safe pure nothrow @nogc unittest
604 {
605     {
606         auto config = makeConfig(SkipComments.yes);
607         assert(config.skipComments == SkipComments.yes);
608         assert(config.skipPI == Config.init.skipPI);
609         assert(config.splitEmpty == Config.init.splitEmpty);
610         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
611     }
612     {
613         auto config = makeConfig(SkipComments.yes, SkipPI.yes);
614         assert(config.skipComments == SkipComments.yes);
615         assert(config.skipPI == SkipPI.yes);
616         assert(config.splitEmpty == Config.init.splitEmpty);
617         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
618     }
619     {
620         auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no);
621         assert(config.skipComments == SkipComments.yes);
622         assert(config.skipPI == Config.init.skipPI);
623         assert(config.splitEmpty == SplitEmpty.yes);
624         assert(config.throwOnEntityRef == ThrowOnEntityRef.no);
625     }
626 }
627 
628 version(dxmlTests) unittest
629 {
630     import std.typecons : Flag;
631     static assert(!__traits(compiles, makeConfig(42)));
632     static assert(!__traits(compiles, makeConfig("hello")));
633     static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes)));
634     static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no)));
635 }
636 
637 
638 /++
639     This $(LREF Config) is intended for making it easy to parse XML by skipping
640     everything that isn't the actual data as well as making it simpler to deal
641     with empty element tags by treating them the same as a start tag and end
642     tag with nothing but whitespace between them.
643   +/
644 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes);
645 
646 ///
647 version(dxmlTests) @safe pure nothrow @nogc unittest
648 {
649     static assert(simpleXML.skipComments == SkipComments.yes);
650     static assert(simpleXML.skipPI == SkipPI.yes);
651     static assert(simpleXML.splitEmpty == SplitEmpty.yes);
652     static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes);
653 }
654 
655 
656 /++
657     Represents the type of an XML entity. Used by $(LREF EntityRange.Entity).
658   +/
659 enum EntityType
660 {
661     /++
662         A cdata section: `<![CDATA[ ... ]]>`.
663 
664         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect)
665       +/
666     cdata,
667 
668     /++
669         An XML comment: `<!-- ... -->`.
670 
671         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments)
672       +/
673     comment,
674 
675     /++
676         The start tag for an element. e.g. `<foo name="value">`.
677 
678         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
679       +/
680     elementStart,
681 
682     /++
683         The end tag for an element. e.g. `</foo>`.
684 
685         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
686       +/
687     elementEnd,
688 
689     /++
690         The tag for an element with no contents or matching end tag. e.g.
691         `<foo name="value"/>`.
692 
693         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
694       +/
695     elementEmpty,
696 
697     /++
698         A processing instruction such as `<?foo?>`. Note that the
699         `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi).
700 
701         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi)
702       +/
703     pi,
704 
705     /++
706         The content of an element tag that is simple text.
707 
708         If there is an entity other than the end tag following the text, then
709         the text includes up to that entity.
710 
711         Note however that character references (e.g.
712         $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g.
713         $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In
714         order for them to be processed, the text should be passed to either
715         $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
716         $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references
717         which are not predefined are considered invalid XML, because the DTD
718         section is skipped, and thus they cannot be processed properly.
719 
720         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR)
721                   $(REF decodeXML, dxml, util)$(BR)
722                   $(REF asDecodedXML, dxml, util)$(BR)
723                   $(REF parseStdEntityRef, dxml, util)$(BR)
724                   $(REF parseCharRef, dxml, util)$(BR)
725                   $(LREF EntityRange.Entity._text)
726       +/
727     text,
728 }
729 
730 
731 /++
732     Lazily parses the given range of characters as an XML document.
733 
734     EntityRange is essentially a
735     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved
736     into that rather than being based on what Java did, and it's range-based
737     rather than iterator-based, so its API is likely to differ from other
738     implementations. The basic concept should be the same though.
739 
740     One of the core design goals of this parser is to slice the original input
741     rather than having to allocate strings for the output or wrap it in a lazy
742     range that produces a mutated version of the data. So, all of the text that
743     the parser provides is either a slice or
744     $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases,
745     for the parser to be fully compliant with the XML spec,
746     $(REF decodeXML, dxml, util) must be called on the text to mutate certain
747     constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or
748     converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But
749     that's left up to the application.
750 
751     The parser is not $(K_NOGC), but it allocates memory very minimally. It
752     allocates some of its state on the heap so it can validate attributes and
753     end tags. However, that state is shared among all the ranges that came from
754     the same call to parseXML (only the range farthest along in parsing
755     validates attributes or end tags), so $(LREF2 save, _EntityRange) does not
756     allocate memory unless $(D save) on the underlying range allocates memory.
757     The shared state currently uses a couple of dynamic arrays to validate the
758     tags and attributes, and if the document has a particularly deep tag depth
759     or has a lot of attributes on a start tag, then some reallocations may
760     occur until the maximum is reached, but enough is reserved that for most
761     documents, no reallocations will occur. The only other times that the
762     parser would allocate would be if an exception were thrown or if the range
763     that was passed to parseXML allocates for any reason when calling any of the
764     range primitives.
765 
766     If invalid XML is encountered at any point during the parsing process, an
767     $(LREF XMLParsingException) will be thrown. If an exception has been thrown,
768     then the parser is in an invalid state, and it is an error to call any
769     functions on it.
770 
771     However, note that XML validation is reduced for any entities that are
772     skipped (e.g. for anything in the DTD, validation is reduced to what is
773     required to correctly parse past it, and when
774     $(D Config.skipPI == SkipPI.yes), processing instructions are only validated
775     enough to correctly skip past them).
776 
777     As the module documentation says, this parser does not provide any DTD
778     support. It is not possible to properly support the DTD while returning
779     slices of the original input, and the DTD portion of the spec makes parsing
780     XML far, far more complicated.
781 
782     A quick note about carriage returns$(COLON) per the XML spec, they are all
783     supposed to either be stripped out or replaced with newlines or spaces
784     before the XML parser even processes the text. That doesn't work when the
785     parser is slicing the original text and not mutating it at all. So, for the
786     purposes of parsing, this parser treats all carriage returns as if they
787     were newlines or spaces (though they won't count as newlines when counting
788     the lines for $(LREF TextPos)). However, they $(I will) appear in any text
789     fields or attribute values if they are in the document (since the text
790     fields and attribute values are slices of the original text).
791     $(REF decodeXML, dxml, util) can be used to strip them along with
792     converting any character references in the text. Alternatively, the
793     application can remove them all before calling parseXML, but it's not
794     necessary.
795   +/
796 struct EntityRange(Config cfg, R)
797     if(isForwardRange!R && isSomeChar!(ElementType!R))
798 {
799     import std.algorithm : canFind;
800     import std.range : only, takeExactly;
801     import std.typecons : Nullable;
802     import std.utf : byCodeUnit;
803 
804     enum compileInTests = is(R == EntityRangeCompileTests);
805 
806 public:
807 
808     /// The Config used for when parsing the XML.
809     alias config = cfg;
810 
811     /// The type of the range that EntityRange is parsing.
812     alias Input = R;
813 
814     /++
815         The type used when any slice of the original input is used. If $(D R)
816         is a string or supports slicing, then SliceOfR is the same as $(D R);
817         otherwise, it's the result of calling
818         $(PHOBOS_REF takeExactly, std, range) on the input.
819 
820         ---
821         import std.algorithm : filter;
822         import std.range : takeExactly;
823 
824         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
825 
826         auto range = filter!(a => true)("some xml");
827 
828         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
829                          typeof(takeExactly(range, 42))));
830         ---
831       +/
832     static if(isDynamicArray!R || hasSlicing!R)
833         alias SliceOfR = R;
834     else
835         alias SliceOfR = typeof(takeExactly(R.init, 42));
836 
837     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
838     // a ddoc-ed unit test.
839     static if(compileInTests) @safe unittest
840     {
841         import std.algorithm : filter;
842         import std.range : takeExactly;
843 
844         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
845 
846         auto range = filter!(a => true)("some xml");
847 
848         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
849                          typeof(takeExactly(range, 42))));
850     }
851 
852 
853     /++
854         Represents an entity in the XML document.
855 
856         Note that the $(LREF2 type, EntityRange._Entity) determines which
857         properties can be used, and it can determine whether functions which
858         an Entity or $(LREF EntityRange) is passed to are allowed to be called.
859         Each function lists which $(LREF EntityType)s are allowed, and it is an
860         error to call them with any other $(LREF EntityType).
861       +/
862     struct Entity
863     {
864     public:
865 
866         import std.typecons : Tuple;
867 
868         /++
869             The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
870             $(LREF2 attributes, EntityRange.EntityType) returns a range of.
871 
872             See_Also: $(LREF2 attributes, EntityRange.Entity)
873           +/
874         alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
875 
876 
877         /++
878             The $(LREF EntityType) for this Entity.
879           +/
880         @property EntityType type() @safe const pure nothrow @nogc
881         {
882             return _type;
883         }
884 
885         ///
886         static if(compileInTests) unittest
887         {
888             auto xml = "<root>\n" ~
889                        "    <!--no comment-->\n" ~
890                        "    <![CDATA[cdata run]]>\n" ~
891                        "    <text>I am text!</text>\n" ~
892                        "    <empty/>\n" ~
893                        "    <?pi?>\n" ~
894                        "</root>";
895 
896             auto range = parseXML(xml);
897             assert(range.front.type == EntityType.elementStart);
898             assert(range.front.name == "root");
899             range.popFront();
900 
901             assert(range.front.type == EntityType.comment);
902             assert(range.front.text == "no comment");
903             range.popFront();
904 
905             assert(range.front.type == EntityType.cdata);
906             assert(range.front.text == "cdata run");
907             range.popFront();
908 
909             assert(range.front.type == EntityType.elementStart);
910             assert(range.front.name == "text");
911             range.popFront();
912 
913             assert(range.front.type == EntityType.text);
914             assert(range.front.text == "I am text!");
915             range.popFront();
916 
917             assert(range.front.type == EntityType.elementEnd);
918             assert(range.front.name == "text");
919             range.popFront();
920 
921             assert(range.front.type == EntityType.elementEmpty);
922             assert(range.front.name == "empty");
923             range.popFront();
924 
925             assert(range.front.type == EntityType.pi);
926             assert(range.front.name == "pi");
927             range.popFront();
928 
929             assert(range.front.type == EntityType.elementEnd);
930             assert(range.front.name == "root");
931             range.popFront();
932 
933             assert(range.empty);
934         }
935 
936 
937         /++
938             The position in the the original text where the entity starts.
939 
940             See_Also: $(LREF TextPos)$(BR)
941                       $(LREF XMLParsingException._pos)
942           +/
943         @property TextPos pos() @safe const pure nothrow @nogc
944         {
945             return _pos;
946         }
947 
948         ///
949         static if(compileInTests) unittest
950         {
951             auto xml = "<root>\n" ~
952                        "    <foo>\n" ~
953                        "        Foo and bar. Always foo and bar...\n" ~
954                        "    </foo>\n" ~
955                        "</root>";
956 
957             auto range = parseXML(xml);
958             assert(range.front.type == EntityType.elementStart);
959             assert(range.front.name == "root");
960             assert(range.front.pos == TextPos(1, 1));
961             range.popFront();
962 
963             assert(range.front.type == EntityType.elementStart);
964             assert(range.front.name == "foo");
965             assert(range.front.pos == TextPos(2, 5));
966             range.popFront();
967 
968             assert(range.front.type == EntityType.text);
969             assert(range.front.text ==
970                    "\n" ~
971                    "        Foo and bar. Always foo and bar...\n" ~
972                    "    ");
973             assert(range.front.pos == TextPos(2, 10));
974             range.popFront();
975 
976             assert(range.front.type == EntityType.elementEnd);
977             assert(range.front.name == "foo");
978             assert(range.front.pos == TextPos(4, 5));
979             range.popFront();
980 
981             assert(range.front.type == EntityType.elementEnd);
982             assert(range.front.name == "root");
983             assert(range.front.pos == TextPos(5, 1));
984             range.popFront();
985 
986             assert(range.empty);
987         }
988 
989         static if(compileInTests) unittest
990         {
991             import core.exception : AssertError;
992             import std.exception : enforce;
993 
994             static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__)
995             {
996                 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line);
997                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
998                 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
999                 range.popFront();
1000             }
1001 
1002             auto xml = "<?xml?>\n" ~
1003                        "   <!--comment-->\n" ~
1004                        "   <?pi?>\n" ~
1005                        " <root>\n" ~
1006                        "          <!--comment--><!--comment-->\n" ~
1007                        "       <?pi?>\n" ~
1008                        "  <![CDATA[]]>\n" ~
1009                        "              <empty/>     </root>\n" ~
1010                        " <!--comment-->\n" ~
1011                        " <?pi?>\n";
1012 
1013             {
1014                 auto range = parseXML(xml);
1015                 test(range, EntityType.comment, 2, 4);
1016                 test(range, EntityType.pi, 3, 4);
1017                 test(range, EntityType.elementStart, 4, 2);
1018                 test(range, EntityType.comment, 5, 11);
1019                 test(range, EntityType.comment, 5, 25);
1020                 test(range, EntityType.pi, 6, 8);
1021                 test(range, EntityType.cdata, 7, 3);
1022                 test(range, EntityType.elementEmpty, 8, 15);
1023                 test(range, EntityType.elementEnd, 8, 28);
1024                 test(range, EntityType.comment, 9, 2);
1025                 test(range, EntityType.pi, 10, 2);
1026             }
1027 
1028             auto range = parseXML!simpleXML(xml);
1029             test(range, EntityType.elementStart, 4, 2);
1030             test(range, EntityType.cdata, 7, 3);
1031             test(range, EntityType.elementStart, 8, 15);
1032             test(range, EntityType.elementEnd, 8, 15);
1033             test(range, EntityType.elementEnd, 8, 28);
1034         }
1035 
1036 
1037         /++
1038             Gives the name of this Entity.
1039 
1040             Note that this is the direct name in the XML for this entity and
1041             does not contain any of the names of any of the parent entities that
1042             this entity has. If an application wants the full "path" of the
1043             entity, then it will have to keep track of that itself. The parser
1044             does not do that as it would require allocating memory.
1045 
1046             $(TABLE
1047                 $(TR $(TH Supported $(LREF EntityType)s:))
1048                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1049                 $(TR $(TD $(LREF2 elementEnd, EntityType)))
1050                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1051                 $(TR $(TD $(LREF2 pi, EntityType)))
1052             )
1053           +/
1054         @property SliceOfR name()
1055         {
1056             import dxml.internal : checkedSave, stripBCU;
1057             with(EntityType)
1058             {
1059                 import std.format : format;
1060                 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
1061                        format("name cannot be called with %s", _type));
1062             }
1063             return stripBCU!R(checkedSave(_name));
1064         }
1065 
1066         ///
1067         static if(compileInTests) unittest
1068         {
1069             auto xml = "<root>\n" ~
1070                        "    <empty/>\n" ~
1071                        "    <?pi?>\n" ~
1072                        "</root>";
1073 
1074             auto range = parseXML(xml);
1075             assert(range.front.type == EntityType.elementStart);
1076             assert(range.front.name == "root");
1077             range.popFront();
1078 
1079             assert(range.front.type == EntityType.elementEmpty);
1080             assert(range.front.name == "empty");
1081             range.popFront();
1082 
1083             assert(range.front.type == EntityType.pi);
1084             assert(range.front.name == "pi");
1085             range.popFront();
1086 
1087             assert(range.front.type == EntityType.elementEnd);
1088             assert(range.front.name == "root");
1089             range.popFront();
1090 
1091             assert(range.empty);
1092         }
1093 
1094 
1095         /++
1096             Returns a lazy range of attributes for a start tag where each
1097             attribute is represented as a$(BR)
1098             $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
1099                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
1100                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
1101                       $(LREF TextPos), $(D_STRING "pos"))).
1102 
1103             $(TABLE
1104                 $(TR $(TH Supported $(LREF EntityType)s:))
1105                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1106                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1107             )
1108 
1109             See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR)
1110                       $(REF decodeXML, dxml, util)$(BR)
1111                       $(REF asDecodedXML, dxml, util)
1112           +/
1113         @property auto attributes()
1114         {
1115             with(EntityType)
1116             {
1117                 import std.format : format;
1118                 assert(_type == elementStart || _type == elementEmpty,
1119                        format("attributes cannot be called with %s", _type));
1120             }
1121 
1122             // STag         ::= '<' Name (S Attribute)* S? '>'
1123             // Attribute    ::= Name Eq AttValue
1124             // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1125 
1126             static struct AttributeRange
1127             {
1128                 @property Attribute front()
1129                 {
1130                     return _front;
1131                 }
1132 
1133                 void popFront()
1134                 {
1135                     import dxml.internal : stripBCU;
1136 
1137                     stripWS(_text);
1138                     if(_text.input.empty)
1139                     {
1140                         empty = true;
1141                         return;
1142                     }
1143 
1144                     immutable pos = _text.pos;
1145                     auto name = stripBCU!R(_text.takeName!'='());
1146                     stripWS(_text);
1147                     popFrontAndIncCol(_text);
1148                     stripWS(_text);
1149                     _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos);
1150                 }
1151 
1152                 @property auto save()
1153                 {
1154                     import dxml.internal : checkedSave;
1155                     auto retval = this;
1156                     retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]);
1157                     retval._text.input = checkedSave(retval._text.input);
1158                     return retval;
1159                 }
1160 
1161                 this(typeof(_text) text)
1162                 {
1163                     _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945
1164                     _text = text;
1165                     if(_text.input.empty)
1166                         empty = true;
1167                     else
1168                         popFront();
1169                 }
1170 
1171                 bool empty;
1172                 Attribute _front;
1173                 typeof(_savedText) _text;
1174             }
1175 
1176             return AttributeRange(_savedText.save);
1177         }
1178 
1179         ///
1180         static if(compileInTests) unittest
1181         {
1182             import std.algorithm.comparison : equal;
1183             import std.algorithm.iteration : filter;
1184             {
1185                 auto xml = "<root/>";
1186                 auto range = parseXML(xml);
1187                 assert(range.front.type == EntityType.elementEmpty);
1188                 assert(range.front.attributes.empty);
1189 
1190                 static assert(is(ElementType!(typeof(range.front.attributes)) ==
1191                                  typeof(range).Entity.Attribute));
1192             }
1193             {
1194                 auto xml = "<root a='42' q='29' w='hello'/>";
1195                 auto range = parseXML(xml);
1196                 assert(range.front.type == EntityType.elementEmpty);
1197 
1198                 auto attrs = range.front.attributes;
1199                 assert(attrs.front.name == "a");
1200                 assert(attrs.front.value == "42");
1201                 assert(attrs.front.pos == TextPos(1, 7));
1202                 attrs.popFront();
1203 
1204                 assert(attrs.front.name == "q");
1205                 assert(attrs.front.value == "29");
1206                 assert(attrs.front.pos == TextPos(1, 14));
1207                 attrs.popFront();
1208 
1209                 assert(attrs.front.name == "w");
1210                 assert(attrs.front.value == "hello");
1211                 assert(attrs.front.pos == TextPos(1, 21));
1212                 attrs.popFront();
1213 
1214                 assert(attrs.empty);
1215             }
1216             // Because the type of name and value is SliceOfR, == with a string
1217             // only works if the range passed to parseXML was string.
1218             {
1219                 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>");
1220                 auto range = parseXML(xml);
1221                 assert(range.front.type == EntityType.elementEmpty);
1222 
1223                 auto attrs = range.front.attributes;
1224                 assert(equal(attrs.front.name, "a"));
1225                 assert(equal(attrs.front.value, "42"));
1226                 assert(attrs.front.pos == TextPos(1, 7));
1227                 attrs.popFront();
1228 
1229                 assert(equal(attrs.front.name, "q"));
1230                 assert(equal(attrs.front.value, "29"));
1231                 assert(attrs.front.pos == TextPos(1, 14));
1232                 attrs.popFront();
1233 
1234                 assert(equal(attrs.front.name, "w"));
1235                 assert(equal(attrs.front.value, "hello"));
1236                 assert(attrs.front.pos == TextPos(1, 21));
1237                 attrs.popFront();
1238 
1239                 assert(attrs.empty);
1240             }
1241         }
1242 
1243         static if(compileInTests) unittest
1244         {
1245             import core.exception : AssertError;
1246             import std.algorithm.comparison : equal;
1247             import std.exception : assertNotThrown, collectException, enforce;
1248             import std.typecons : Tuple, tuple;
1249             import dxml.internal : codeLen, testRangeFuncs;
1250 
1251             static bool cmpAttr(T, U)(T lhs, U rhs)
1252             {
1253                 return equal(lhs[0].save, rhs[0].save) &&
1254                        equal(lhs[1].save, rhs[1].save);
1255             }
1256 
1257             static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type,
1258                                                                 Tuple!(string, string)[] expected,
1259                                                                 int row, int col, size_t line = __LINE__)
1260             {
1261                 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)),
1262                                                                  "unittest 1", __FILE__, line);
1263                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1264                 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected),
1265                                     "unittest failure 3", __FILE__, line);
1266                 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line);
1267             }
1268 
1269             static void testFail(alias func, ThrowOnEntityRef toer)(string text,
1270                                                                     int row, int col, size_t line = __LINE__)
1271             {
1272                 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text)));
1273                 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
1274                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1275             }
1276 
1277             static foreach(func; testRangeFuncs)
1278             {
1279                 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
1280                 {
1281                     test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14);
1282                     test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17);
1283                     test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty,
1284                                       [tuple("a", "b")], 9, 4);
1285                     test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13);
1286                     test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16);
1287                     test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart,
1288                                       [tuple("a", "b")], 5, 3);
1289 
1290                     test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4);
1291                     test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18);
1292                     test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18);
1293                     test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16);
1294                     test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19);
1295 
1296                     test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty,
1297                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1298                     test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty,
1299                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1300 
1301                     test!(func, toer)(`<root foo="&#42;" a='&#x42;' hello="%foo"/>`, EntityType.elementEmpty,
1302                               [tuple("foo", "&#42;"), tuple("a", "&#x42;"), tuple("hello", "%foo")], 1, 44);
1303 
1304                     test!(func, toer)(`<root foo="&amp;" a='vector&lt;int&gt;'></root>`, EntityType.elementStart,
1305                               [tuple("foo", "&amp;"), tuple("a", "vector&lt;int&gt;"),], 1, 41);
1306 
1307                     test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty,
1308                               [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1);
1309 
1310                     test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16);
1311                     test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21);
1312                     test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15);
1313                     test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20);
1314 
1315                     test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty,
1316                               [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30);
1317 
1318                     testFail!(func, toer)(`<root a="""/>`, 1, 11);
1319                     testFail!(func, toer)(`<root a='''/>`, 1, 11);
1320                     testFail!(func, toer)("<root a=/>", 1, 9);
1321                     testFail!(func, toer)("<root a='/>", 1, 9);
1322                     testFail!(func, toer)("<root a='/>", 1, 9);
1323                     testFail!(func, toer)("<root =''/>", 1, 7);
1324                     testFail!(func, toer)(`<root a ""/>`, 1, 9);
1325                     testFail!(func, toer)(`<root a""/>`, 1, 8);
1326                     testFail!(func, toer)(`<root a/>`, 1, 8);
1327                     testFail!(func, toer)("<root foo='bar' a=/>", 1, 19);
1328                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1329                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1330                     testFail!(func, toer)("<root foo='bar' =''/>", 1, 17);
1331                     testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20);
1332                     // It's 33 rather than 28, because it throws when processing the start tag and not when processing
1333                     // the attributes. So, the mismatched quotes are detected before the attributes are checked.
1334                     testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33);
1335                     testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17);
1336                     testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16);
1337                     testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7);
1338 
1339                     testFail!(func, toer)(`<root foo="<"/>`, 1, 12);
1340                     testFail!(func, toer)(`<root foo="<world"/>`, 1, 12);
1341                     testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17);
1342                     testFail!(func, toer)(`<root foo="&"/>`, 1, 12);
1343                     testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17);
1344                     testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17);
1345                     testFail!(func, toer)(`<root foo="&;"/>`, 1, 12);
1346                     testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12);
1347                     testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12);
1348                     testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12);
1349                     testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12);
1350                     testFail!(func, toer)(`<root foo="&#42"/>`, 1, 12);
1351                     testFail!(func, toer)(`<root foo="&#x42"/>`, 1, 12);
1352                     testFail!(func, toer)(`<root foo="&#x12;"/>`, 1, 12);
1353 
1354                     testFail!(func, toer)("<root\n\nfoo='\nbar&#x42'></root>", 4, 4);
1355 
1356                     testFail!(func, toer)(`<root a="""></root>`, 1, 11);
1357                     testFail!(func, toer)(`<root a='''></root>`, 1, 11);
1358                     testFail!(func, toer)("<root a=></root>", 1, 9);
1359                     testFail!(func, toer)("<root a='></root>", 1, 9);
1360                     testFail!(func, toer)("<root a='></root>", 1, 9);
1361                     testFail!(func, toer)("<root =''></root>", 1, 7);
1362                     testFail!(func, toer)(`<root a ""></root>`, 1, 9);
1363                     testFail!(func, toer)(`<root a""></root>`, 1, 8);
1364                     testFail!(func, toer)(`<root a></root>`, 1, 8);
1365                     testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19);
1366                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1367                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1368                     testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17);
1369                     testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20);
1370                     testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33);
1371                     testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17);
1372                     testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16);
1373                     testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7);
1374 
1375                     testFail!(func, toer)(`<root foo="<"></root>`, 1, 12);
1376                     testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12);
1377                     testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17);
1378                     testFail!(func, toer)(`<root foo="&"></root>`, 1, 12);
1379                     testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17);
1380                     testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17);
1381                     testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12);
1382                     testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12);
1383                     testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12);
1384                     testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12);
1385                     testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12);
1386                     testFail!(func, toer)(`<root foo="&#42"></root>`, 1, 12);
1387                     testFail!(func, toer)(`<root foo="&#x42"></root>`, 1, 12);
1388                     testFail!(func, toer)(`<root foo="&#x12;"></root>`, 1, 12);
1389 
1390                     testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14);
1391                     testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24);
1392                     testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24);
1393                     testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42);
1394                     testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17);
1395 
1396                     test!(func, toer)(`<root foo="&amp;"></root>`, EntityType.elementStart,
1397                                       [tuple("foo", "&amp;")], 1, 19);
1398                     test!(func, toer)(`<root foo="foo&amp;&lt;&gt;&apos;&quot;bar"></root>`, EntityType.elementStart,
1399                                       [tuple("foo", "foo&amp;&lt;&gt;&apos;&quot;bar")], 1, 45);
1400                     testFail!(func, toer)("<root foo='&;'></root>", 1, 12);
1401                     testFail!(func, toer)("<root foo='&.;'></root>", 1, 12);
1402                     testFail!(func, toer)("<root foo='\n &amp ule'></root>", 2, 2);
1403                     testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2);
1404                 }
1405                 {
1406                     alias toer = ThrowOnEntityRef.yes;
1407                     testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12);
1408                     testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12);
1409                     testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15);
1410                     testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18);
1411                     testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2);
1412                 }
1413                 {
1414                     alias toer = ThrowOnEntityRef.no;
1415                     test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty,
1416                                       [tuple("foo", "&foo;")], 1, 20);
1417                     test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart,
1418                                       [tuple("foo", "&foo;")], 1, 19);
1419                     test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart,
1420                                       [tuple("foo", "foo&bar.;")], 1, 23);
1421                     test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart,
1422                                         [tuple("foo", "hello &a; world")], 1, 29);
1423                     test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart,
1424                                         [tuple("foo", "hello \n &a; \n world")], 3, 9);
1425                 }
1426             }
1427         }
1428 
1429 
1430         /++
1431             Returns the textual value of this Entity.
1432 
1433             In the case of $(LREF EntityType.pi), this is the
1434             text that follows the name, whereas in the other cases, the text is
1435             the entire contents of the entity (save for the delimeters on the
1436             ends if that entity has them).
1437 
1438             $(TABLE
1439                 $(TR $(TH Supported $(LREF EntityType)s:))
1440                 $(TR $(TD $(LREF2 cdata, EntityType)))
1441                 $(TR $(TD $(LREF2 comment, EntityType)))
1442                 $(TR $(TD $(LREF2 pi, EntityType)))
1443                 $(TR $(TD $(LREF2 _text, EntityType)))
1444             )
1445 
1446             See_Also: $(REF decodeXML, dxml, util)$(BR)
1447                       $(REF asDecodedXML, dxml, util)$(BR)
1448                       $(REF stripIndent, dxml, util)$(BR)
1449                       $(REF withoutIndent, dxml, util)
1450           +/
1451         @property SliceOfR text()
1452         {
1453             import dxml.internal : checkedSave, stripBCU;
1454             with(EntityType)
1455             {
1456                 import std.format : format;
1457                 assert(only(cdata, comment, pi, text).canFind(_type),
1458                        format("text cannot be called with %s", _type));
1459             }
1460             return stripBCU!R(checkedSave(_savedText.input));
1461         }
1462 
1463         ///
1464         static if(compileInTests) unittest
1465         {
1466             import std.range.primitives : empty;
1467 
1468             auto xml = "<?xml version='1.0'?>\n" ~
1469                        "<?instructionName?>\n" ~
1470                        "<?foo here is something to say?>\n" ~
1471                        "<root>\n" ~
1472                        "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1473                        "    <!-- some random comment -->\n" ~
1474                        "    <p>something here</p>\n" ~
1475                        "    <p>\n" ~
1476                        "       something else\n" ~
1477                        "       here</p>\n" ~
1478                        "</root>";
1479             auto range = parseXML(xml);
1480 
1481             // "<?instructionName?>\n" ~
1482             assert(range.front.type == EntityType.pi);
1483             assert(range.front.name == "instructionName");
1484             assert(range.front.text.empty);
1485 
1486             // "<?foo here is something to say?>\n" ~
1487             range.popFront();
1488             assert(range.front.type == EntityType.pi);
1489             assert(range.front.name == "foo");
1490             assert(range.front.text == "here is something to say");
1491 
1492             // "<root>\n" ~
1493             range.popFront();
1494             assert(range.front.type == EntityType.elementStart);
1495 
1496             // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1497             range.popFront();
1498             assert(range.front.type == EntityType.cdata);
1499             assert(range.front.text == " Yay! random text >> << ");
1500 
1501             // "    <!-- some random comment -->\n" ~
1502             range.popFront();
1503             assert(range.front.type == EntityType.comment);
1504             assert(range.front.text == " some random comment ");
1505 
1506             // "    <p>something here</p>\n" ~
1507             range.popFront();
1508             assert(range.front.type == EntityType.elementStart);
1509             assert(range.front.name == "p");
1510 
1511             range.popFront();
1512             assert(range.front.type == EntityType.text);
1513             assert(range.front.text == "something here");
1514 
1515             range.popFront();
1516             assert(range.front.type == EntityType.elementEnd);
1517             assert(range.front.name == "p");
1518 
1519             // "    <p>\n" ~
1520             // "       something else\n" ~
1521             // "       here</p>\n" ~
1522             range.popFront();
1523             assert(range.front.type == EntityType.elementStart);
1524 
1525             range.popFront();
1526             assert(range.front.type == EntityType.text);
1527             assert(range.front.text == "\n       something else\n       here");
1528 
1529             range.popFront();
1530             assert(range.front.type == EntityType.elementEnd);
1531 
1532             // "</root>"
1533             range.popFront();
1534             assert(range.front.type == EntityType.elementEnd);
1535 
1536             range.popFront();
1537             assert(range.empty);
1538         }
1539 
1540 
1541         // Reduce the chance of bugs if reference-type ranges are involved.
1542         static if(!isDynamicArray!R) this(this)
1543         {
1544             with(EntityType) final switch(_type)
1545             {
1546                 case cdata: break;
1547                 case comment: break;
1548                 case elementStart:
1549                 {
1550                     _name = _name.save;
1551                     break;
1552                 }
1553                 case elementEnd: goto case elementStart;
1554                 case elementEmpty: goto case elementStart;
1555                 case text: break;
1556                 case pi: goto case elementStart;
1557             }
1558 
1559             if(_type != EntityType.elementEnd)
1560                 _savedText = _savedText.save;
1561         }
1562 
1563         static if(compileInTests) unittest
1564         {
1565             import std.algorithm.comparison : equal;
1566             import dxml.internal : testRangeFuncs;
1567 
1568             static bool cmpAttr(T)(T lhs, T rhs)
1569             {
1570                 return equal(lhs.name.save, rhs.name.save) &&
1571                        equal(lhs.value.save, rhs.value.save);
1572             }
1573 
1574             {
1575                 auto xml = "<root>\n" ~
1576                            "    <foo a='42'/>\n" ~
1577                            "    <foo b='42'/>\n" ~
1578                            "    <nocomment>nothing to say</nocomment>\n" ~
1579                            "</root>";
1580 
1581                 // The duplicate lines aren't typos. We want to ensure that the
1582                 // values are independent and that nothing was consumed.
1583                 static foreach(func; testRangeFuncs)
1584                 {{
1585                      auto range = parseXML(func(xml));
1586                      range.popFront();
1587                      {
1588                          auto entity = range.front;
1589                          auto entity2 = entity;
1590                          assert(entity.pos == entity2.pos);
1591                          assert(equal(entity.name, entity2.name));
1592                          assert(equal(entity.name, entity2.name));
1593                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1594                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1595                          range.popFront();
1596                          assert(entity.pos == entity2.pos);
1597                          assert(entity.pos != range.front.pos);
1598                      }
1599                      range.popFront();
1600                      range.popFront();
1601                      {
1602                          auto entity = range.front;
1603                          auto entity2 = entity;
1604                          assert(entity.pos == entity2.pos);
1605                          assert(equal(entity.text, entity2.text));
1606                          assert(equal(entity.text, entity2.text));
1607                          range.popFront();
1608                          assert(entity.pos == entity2.pos);
1609                          assert(entity.pos != range.front.pos);
1610                      }
1611                 }}
1612             }
1613             {
1614                 auto xml = "<root>\n" ~
1615                            "    <![CDATA[whatever]]>\n" ~
1616                            "    <?pi?>\n" ~
1617                            "    <!--comment-->\n" ~
1618                            "    <empty/>\n" ~
1619                            "    <noend a='foo' b='bar'/>\n" ~
1620                            "    <foo baz='42'></foo>\n" ~
1621                            "</root>";
1622 
1623                 static foreach(func; testRangeFuncs)
1624                 {
1625                     for(auto range = parseXML(func(xml)); !range.empty; range.popFront())
1626                     {
1627                         auto entity = range.front;
1628                         auto entity2 = entity;
1629 
1630                         assert(entity.pos == range.front.pos);
1631                         assert(entity.pos == entity2.pos);
1632                         assert(entity.type == range.front.type);
1633                         assert(entity.type == entity2.type);
1634 
1635                         with(EntityType) final switch(entity.type)
1636                         {
1637                             case cdata: goto case text;
1638                             case comment: goto case text;
1639                             case elementStart:
1640                             {
1641                                 assert(equal!cmpAttr(entity.attributes, range.front.attributes));
1642                                 assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1643                                 goto case elementEnd;
1644                             }
1645                             case elementEnd:
1646                             {
1647                                 assert(equal(entity.name, range.front.name));
1648                                 assert(equal(entity.name, entity2.name));
1649                                 break;
1650                             }
1651                             case elementEmpty: goto case elementStart;
1652                             case text:
1653                             {
1654                                 assert(equal(entity.text, range.front.text));
1655                                 assert(equal(entity.text, entity2.text));
1656                                 break;
1657                             }
1658                             case pi:
1659                             {
1660                                 assert(equal(entity.name, range.front.name));
1661                                 assert(equal(entity.name, entity2.name));
1662                                 goto case text;
1663                             }
1664                         }
1665                     }
1666                 }
1667             }
1668         }
1669 
1670 
1671     private:
1672 
1673         this(EntityType type)
1674         {
1675             _type = type;
1676 
1677             // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
1678             _name = typeof(_name).init;
1679             _savedText = typeof(_savedText).init;
1680         }
1681 
1682         EntityType _type;
1683         TextPos _pos;
1684         Taken _name;
1685         typeof(EntityRange._savedText) _savedText;
1686     }
1687 
1688 
1689     /++
1690         Returns the $(LREF Entity) representing the entity in the XML document
1691         which was most recently parsed.
1692       +/
1693     @property Entity front()
1694     {
1695         auto retval = Entity(_type);
1696         with(EntityType) final switch(_type)
1697         {
1698             case cdata: retval._savedText = _savedText.save; break;
1699             case comment: goto case cdata;
1700             case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break;
1701             case elementEnd: retval._name = _name.save; break;
1702             case elementEmpty: goto case elementStart;
1703             case text: goto case cdata;
1704             case pi: goto case elementStart;
1705         }
1706         retval._pos = _entityPos;
1707         return retval;
1708     }
1709 
1710 
1711     /++
1712         Move to the next entity.
1713 
1714         The next entity is the next one that is linearly in the XML document.
1715         So, if the current entity has child entities, the next entity will be
1716         the first child entity, whereas if it has no child entities, it will be
1717         the next entity at the same level.
1718 
1719         Throws: $(LREF XMLParsingException) on invalid XML.
1720       +/
1721     void popFront()
1722     {
1723         final switch(_grammarPos) with(GrammarPos)
1724         {
1725             case documentStart: _parseDocumentStart(); break;
1726             case prologMisc1: _parseAtPrologMisc!1(); break;
1727             case prologMisc2: _parseAtPrologMisc!2(); break;
1728             case splittingEmpty:
1729             {
1730                 _type = EntityType.elementEnd;
1731                 _tagStack.sawEntity();
1732                 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
1733                 break;
1734             }
1735             case contentCharData1:
1736             {
1737                 assert(_type == EntityType.elementStart);
1738                 _tagStack.pushTag(_name.save);
1739                 _parseAtContentCharData();
1740                 break;
1741             }
1742             case contentMid: _parseAtContentMid(); break;
1743             case contentCharData2: _parseAtContentCharData(); break;
1744             case endTag: _parseElementEnd(); break;
1745             case endMisc: _parseAtEndMisc(); break;
1746             case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange.");
1747         }
1748     }
1749 
1750 
1751     /++
1752         Whether the end of the XML document has been reached.
1753 
1754         Note that because an $(LREF XMLParsingException) will be thrown an
1755         invalid XML, it's actually possible to call
1756         $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without
1757         checking empty if the only way that empty would be true is if the XML
1758         were invalid (e.g. if at a start tag, it's a given that there's at
1759         least one end tag left in the document unless it's invalid XML).
1760 
1761         However, of course, caution should be used to ensure that incorrect
1762         assumptions are not made that allow the document to reach its end
1763         earlier than predicted without throwing an $(LREF XMLParsingException),
1764         since it's still an error to call $(LREF2 front, EntityRange) or
1765         $(LREF2 popFront, EntityRange) if empty would return false.
1766       +/
1767     @property bool empty() @safe const pure nothrow @nogc
1768     {
1769         return _grammarPos == GrammarPos.documentEnd;
1770     }
1771 
1772 
1773     /++
1774         Forward range function for obtaining a copy of the range which can then
1775         be iterated independently of the original.
1776       +/
1777     @property auto save()
1778     {
1779         // The init check nonsense is because of ranges whose init values blow
1780         // up when save is called (e.g. a range that's a class).
1781         auto retval = this;
1782         if(retval._name !is typeof(retval._name).init)
1783             retval._name = _name.save;
1784         if(retval._text.input !is typeof(retval._text.input).init)
1785             retval._text.input = _text.input.save;
1786         if(retval._savedText.input !is typeof(retval._savedText.input).init)
1787             retval._savedText.input = _savedText.input.save;
1788         return retval;
1789     }
1790 
1791     static if(compileInTests) unittest
1792     {
1793         import std.algorithm.comparison : equal;
1794         import std.exception : assertNotThrown;
1795         import dxml.internal : testRangeFuncs;
1796 
1797         static bool cmpAttr(T)(T lhs, T rhs)
1798         {
1799             return equal(lhs.name.save, rhs.name.save) &&
1800                    equal(lhs.value.save, rhs.value.save);
1801         }
1802 
1803         static void testEqual(ER)(ER one, ER two)
1804         {
1805              while(!one.empty && !two.empty)
1806              {
1807                  auto left = one.front;
1808                  auto right = two.front;
1809 
1810                  assert(left.pos == right.pos);
1811                  assert(left.type == right.type);
1812 
1813                  with(EntityType) final switch(left.type)
1814                  {
1815                      case cdata: goto case text;
1816                      case comment: goto case text;
1817                      case elementStart:
1818                      {
1819                          assert(equal!cmpAttr(left.attributes, right.attributes));
1820                          goto case elementEnd;
1821                      }
1822                      case elementEnd: assert(equal(left.name, right.name)); break;
1823                      case elementEmpty: goto case elementStart;
1824                      case text: assert(equal(left.text, right.text)); break;
1825                      case pi: assert(equal(left.name, right.name)); goto case text;
1826                  }
1827 
1828                  one.popFront();
1829                  two.popFront();
1830              }
1831 
1832              assert(one.empty);
1833              assert(two.empty);
1834         }
1835 
1836          auto xml = "<root>\n" ~
1837                     "    <!-- comment -->\n" ~
1838                     "    <something>\n" ~
1839                     "         <else/>\n" ~
1840                     "         somet text <i>goes</i> here\n" ~
1841                     "    </something>\n" ~
1842                     "</root>";
1843 
1844         static foreach(i, func; testRangeFuncs)
1845         {{
1846              auto text = func(xml);
1847              testEqual(parseXML(text.save), parseXML(text.save));
1848              auto range = parseXML(text.save);
1849              testEqual(range.save, range.save);
1850         }}
1851     }
1852 
1853 
1854     /++
1855         Returns an empty range. This corresponds to
1856         $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a
1857         wrapper type.
1858       +/
1859     EntityRange takeNone()
1860     {
1861         auto retval = save;
1862         retval._grammarPos = GrammarPos.documentEnd;
1863         return retval;
1864     }
1865 
1866 
1867 private:
1868 
1869     void _parseDocumentStart()
1870     {
1871         auto orig = _text.save;
1872         immutable wasWS = _text.stripWS();
1873         if(_text.stripStartsWith("<?xml"))
1874         {
1875             if(wasWS)
1876                 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init);
1877             checkNotEmpty(_text);
1878             if(_text.input.front == '?' || isSpace(_text.input.front))
1879                 _text.skipUntilAndDrop!"?>"();
1880             else
1881                 _text = orig;
1882         }
1883         _grammarPos = GrammarPos.prologMisc1;
1884         _parseAtPrologMisc!1();
1885     }
1886 
1887     static if(compileInTests) unittest
1888     {
1889         import core.exception : AssertError;
1890         import std.exception : assertNotThrown, enforce;
1891         import dxml.internal : testRangeFuncs;
1892 
1893         static void test(alias func)(string xml, int row, int col, size_t line = __LINE__)
1894         {
1895             auto range = assertNotThrown!XMLParsingException(parseXML(func(xml)));
1896             enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line);
1897             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1898         }
1899 
1900         static foreach(func; testRangeFuncs)
1901         {
1902             test!func("<root/>", 1, 8);
1903             test!func("\n\t\n <root/>   \n", 3, 9);
1904             test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10);
1905             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?><root/>", 6, 23);
1906             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?>\n     <root/>", 7, 13);
1907             test!func("<root/>", 1, 8);
1908             test!func("\n\t\n <root/>   \n", 3, 9);
1909         }
1910     }
1911 
1912 
1913     // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2.
1914     void _parseAtPrologMisc(int miscNum)()
1915     {
1916         static assert(miscNum == 1 || miscNum == 2);
1917 
1918         // document ::= prolog element Misc*
1919         // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
1920         // Misc ::= Comment | PI | S
1921 
1922         stripWS(_text);
1923         checkNotEmpty(_text);
1924         if(_text.input.front != '<')
1925             throw new XMLParsingException("Expected <", _text.pos);
1926         popFrontAndIncCol(_text);
1927         checkNotEmpty(_text);
1928 
1929         switch(_text.input.front)
1930         {
1931             // Comment     ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1932             // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
1933             case '!':
1934             {
1935                 immutable bangPos = _text.pos;
1936                 popFrontAndIncCol(_text);
1937                 if(_text.stripStartsWith("--"))
1938                 {
1939                     _parseComment();
1940                     static if(config.skipComments == SkipComments.yes)
1941                         _parseAtPrologMisc!miscNum();
1942                     break;
1943                 }
1944                 static if(miscNum == 1)
1945                 {
1946                     if(_text.stripStartsWith("DOCTYPE"))
1947                     {
1948                         if(!_text.stripWS())
1949                             throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos);
1950                         _parseDoctypeDecl();
1951                         break;
1952                     }
1953                     throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos);
1954                 }
1955                 else
1956                 {
1957                     if(_text.stripStartsWith("DOCTYPE"))
1958                     {
1959                         throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document",
1960                                                       bangPos);
1961                     }
1962                     throw new XMLParsingException("Expected Comment", bangPos);
1963                 }
1964             }
1965             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1966             case '?':
1967             {
1968                 _parsePI();
1969                 static if(config.skipPI == SkipPI.yes)
1970                     popFront();
1971                 break;
1972             }
1973             // element ::= EmptyElemTag | STag content ETag
1974             default:
1975             {
1976                 _parseElementStart();
1977                 break;
1978             }
1979         }
1980     }
1981 
1982 
1983     // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1984     // Parses a comment. <!-- was already removed from the front of the input.
1985     void _parseComment()
1986     {
1987         static if(config.skipComments == SkipComments.yes)
1988             _text.skipUntilAndDrop!"--"();
1989         else
1990         {
1991             _entityPos = TextPos(_text.pos.line, _text.pos.col - 4);
1992             _type = EntityType.comment;
1993             _tagStack.sawEntity();
1994             _savedText.pos = _text.pos;
1995             _savedText.input = _text.takeUntilAndDrop!"--"();
1996         }
1997         if(_text.input.empty || _text.input.front != '>')
1998             throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos);
1999         // This is here rather than at the end of the previous static if block
2000         // so that the error message for improperly terminating a comment takes
2001         // precedence over the one involving invalid characters in the comment.
2002         static if(config.skipComments == SkipComments.no)
2003             checkText!true(_savedText);
2004         popFrontAndIncCol(_text);
2005     }
2006 
2007     static if(compileInTests) unittest
2008     {
2009         import core.exception : AssertError;
2010         import std.algorithm.comparison : equal;
2011         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2012         import dxml.internal : codeLen, testRangeFuncs;
2013 
2014         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2015         {
2016             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")));
2017             enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line);
2018             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2019             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2020         }
2021 
2022         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2023         {
2024             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2025             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2026             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2027         }
2028 
2029         static foreach(func; testRangeFuncs)
2030         {
2031             test!func("<!--foo-->", "foo", 1, 11);
2032             test!func("<!-- foo -->", " foo ", 1, 13);
2033             test!func("<!-- -->", " ", 1, 9);
2034             test!func("<!---->", "", 1, 8);
2035             test!func("<!--- comment -->", "- comment ", 1, 18);
2036             test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5);
2037             test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1);
2038             test!func("<!--&-->", "&", 1, 9);
2039             test!func("<!--<-->", "<", 1, 9);
2040             test!func("<!-->-->", ">", 1, 9);
2041             test!func("<!--->-->", "->", 1, 10);
2042 
2043             testFail!func("<!", 1, 2);
2044             testFail!func("<!- comment -->", 1, 2);
2045             testFail!func("<!-- comment ->", 1, 5);
2046             testFail!func("<!-- comment --->", 1, 16);
2047             testFail!func("<!---- comment -->", 1, 7);
2048             testFail!func("<!-- comment -- comment -->", 1, 16);
2049             testFail!func("<!->", 1, 2);
2050             testFail!func("<!-->", 1, 5);
2051             testFail!func("<!--->", 1, 5);
2052             testFail!func("<!----->", 1, 7);
2053             testFail!func("<!blah>", 1, 2);
2054             testFail!func("<! blah>", 1, 2);
2055             testFail!func("<!-- \n\n   \v \n -->", 3, 4);
2056             testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v"));
2057 
2058             {
2059                 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>");
2060                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2061                 assert(range.front.type == EntityType.comment);
2062                 assert(equal(range.front.text, " comment "));
2063             }
2064             {
2065                 auto xml = func("<root><!-- comment --></root>");
2066                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2067                 assertNotThrown!XMLParsingException(range.popFront());
2068                 assert(range.front.type == EntityType.comment);
2069                 assert(equal(range.front.text, " comment "));
2070             }
2071             {
2072                 auto xml = func("<root/><!-- comment -->");
2073                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2074                 assertNotThrown!XMLParsingException(range.popFront());
2075                 assert(range.front.type == EntityType.comment);
2076                 assert(equal(range.front.text, " comment "));
2077             }
2078 
2079             static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"])
2080             {
2081                 {
2082                     auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>");
2083                     assertThrown!XMLParsingException(parseXML(xml));
2084                 }
2085                 {
2086                     auto xml = func("<root>" ~ comment ~ "<root>");
2087                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2088                     assertThrown!XMLParsingException(range.popFront());
2089                 }
2090                 {
2091                     auto xml = func("<root/>" ~ comment);
2092                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2093                     assertThrown!XMLParsingException(range.popFront());
2094                 }
2095             }
2096 
2097             {
2098                 auto xml = "<!--one-->\n" ~
2099                            "<!--two-->\n" ~
2100                            "<root>\n" ~
2101                            "    <!--three-->\n" ~
2102                            "    <!--four-->\n" ~
2103                            "</root>\n" ~
2104                            "<!--five-->\n" ~
2105                            "<!--six-->";
2106 
2107                 auto text = func(xml);
2108                 {
2109                     auto range = parseXML(text.save);
2110                     assert(range.front.type == EntityType.comment);
2111                     assert(equal(range.front.text, "one"));
2112                     assertNotThrown!XMLParsingException(range.popFront());
2113                     assert(range.front.type == EntityType.comment);
2114                     assert(equal(range.front.text, "two"));
2115                     assertNotThrown!XMLParsingException(range.popFront());
2116                     assert(range.front.type == EntityType.elementStart);
2117                     assert(equal(range.front.name, "root"));
2118                     assertNotThrown!XMLParsingException(range.popFront());
2119                     assert(range.front.type == EntityType.comment);
2120                     assert(equal(range.front.text, "three"));
2121                     assertNotThrown!XMLParsingException(range.popFront());
2122                     assert(range.front.type == EntityType.comment);
2123                     assert(equal(range.front.text, "four"));
2124                     assertNotThrown!XMLParsingException(range.popFront());
2125                     assert(range.front.type == EntityType.elementEnd);
2126                     assert(equal(range.front.name, "root"));
2127                     assertNotThrown!XMLParsingException(range.popFront());
2128                     assert(range.front.type == EntityType.comment);
2129                     assert(equal(range.front.text, "five"));
2130                     assertNotThrown!XMLParsingException(range.popFront());
2131                     assert(range.front.type == EntityType.comment);
2132                     assert(equal(range.front.text, "six"));
2133                     assertNotThrown!XMLParsingException(range.popFront());
2134                     assert(range.empty);
2135                 }
2136                 {
2137                     auto range = parseXML!simpleXML(text.save);
2138                     assert(range.front.type == EntityType.elementStart);
2139                     assert(equal(range.front.name, "root"));
2140                     assertNotThrown!XMLParsingException(range.popFront());
2141                     assert(range.front.type == EntityType.elementEnd);
2142                     assert(equal(range.front.name, "root"));
2143                     assertNotThrown!XMLParsingException(range.popFront());
2144                     assert(range.empty);
2145                 }
2146             }
2147         }
2148     }
2149 
2150 
2151     // PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2152     // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2153     // Parses a processing instruction. < was already removed from the input.
2154     void _parsePI()
2155     {
2156         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2157         assert(_text.input.front == '?');
2158         popFrontAndIncCol(_text);
2159         static if(config.skipPI == SkipPI.yes)
2160             _text.skipUntilAndDrop!"?>"();
2161         else
2162         {
2163             immutable posAtName = _text.pos;
2164             if(_text.input.empty)
2165                 throw new XMLParsingException("Unterminated processing instruction", posAtName);
2166             _type = EntityType.pi;
2167             _tagStack.sawEntity();
2168             _name = takeName!'?'(_text);
2169             immutable posAtWS = _text.pos;
2170             stripWS(_text);
2171             checkNotEmpty(_text);
2172             _savedText.pos = _text.pos;
2173             _savedText.input = _text.takeUntilAndDrop!"?>"();
2174             checkText!true(_savedText);
2175             if(walkLength(_name.save) == 3)
2176             {
2177                 // FIXME icmp doesn't compile right now due to an issue with
2178                 // byUTF that needs to be looked into.
2179                 /+
2180                 import std.uni : icmp;
2181                 if(icmp(_name.save, "xml") == 0)
2182                     throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2183                 +/
2184                 auto temp = _name.save;
2185                 if(temp.front == 'x' || temp.front == 'X')
2186                 {
2187                     temp.popFront();
2188                     if(temp.front == 'm' || temp.front == 'M')
2189                     {
2190                         temp.popFront();
2191                         if(temp.front == 'l' || temp.front == 'L')
2192                             throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2193                     }
2194                 }
2195             }
2196         }
2197     }
2198 
2199     static if(compileInTests) unittest
2200     {
2201         import core.exception : AssertError;
2202         import std.algorithm.comparison : equal;
2203         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2204         import std.utf : byUTF;
2205         import dxml.internal : codeLen, testRangeFuncs;
2206 
2207         static void test(alias func)(string text, string name, string expected,
2208                                      int row, int col, size_t line = __LINE__)
2209         {
2210             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2211                                                              "unittest failure 1", __FILE__, line);
2212             enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line);
2213             enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line);
2214             enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line);
2215             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line);
2216         }
2217 
2218         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2219         {
2220             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2221             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2222             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2223         }
2224 
2225         static foreach(func; testRangeFuncs)
2226         {
2227             test!func("<?a?>", "a", "", 1, 6);
2228             test!func("<?foo?>", "foo", "", 1, 8);
2229             test!func("<?foo.?>", "foo.", "", 1, 9);
2230             test!func("<?foo bar?>", "foo", "bar", 1, 12);
2231             test!func("<?xmf bar?>", "xmf", "bar", 1, 12);
2232             test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15);
2233             test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16);
2234             test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10);
2235             test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11);
2236             test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6);
2237             test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21);
2238             test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23);
2239             test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3);
2240             test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1);
2241             test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16);
2242             test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16);
2243             test!func("<?pi ?>", "pi", "", 1, 8);
2244             test!func("<?pi\n?>", "pi", "", 2, 3);
2245             test!func("<?foo ??>", "foo", "?", 1, 10);
2246             test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24);
2247 
2248             testFail!func("<?", 1, 3);
2249             testFail!func("<??>", 1, 3);
2250             testFail!func("<? ?>", 1, 3);
2251             testFail!func("<?xml?><?xml?>", 1, 10);
2252             testFail!func("<?XML?>", 1, 3);
2253             testFail!func("<?xMl?>", 1, 3);
2254             testFail!func("<?foo>", 1, 6);
2255             testFail!func("<? foo?>", 1, 3);
2256             testFail!func("<?\nfoo?>", 1, 3);
2257             testFail!func("<??foo?>", 1, 3);
2258             testFail!func("<?.foo?>", 1, 3);
2259             testFail!func("<?foo bar\vbaz?>", 1, 10);
2260 
2261             {
2262                 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>");
2263                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2264                 assert(range.front.type == EntityType.pi);
2265                 assert(equal(range.front.name, "foo"));
2266                 assert(equal(range.front.text, "bar"));
2267             }
2268             {
2269                 auto xml = func("<root><?foo bar?></root>");
2270                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2271                 assertNotThrown!XMLParsingException(range.popFront());
2272                 assert(equal(range.front.name, "foo"));
2273                 assert(equal(range.front.text, "bar"));
2274             }
2275             {
2276                 auto xml = func("<root/><?foo bar?>");
2277                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2278                 assertNotThrown!XMLParsingException(range.popFront());
2279                 assert(equal(range.front.name, "foo"));
2280                 assert(equal(range.front.text, "bar"));
2281             }
2282 
2283             static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"])
2284             {
2285                 {
2286                     auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>");
2287                     assertThrown!XMLParsingException(parseXML(xml));
2288                 }
2289                 {
2290                     auto xml = func("<root>" ~ pi ~ "<root>");
2291                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2292                     assertThrown!XMLParsingException(range.popFront());
2293                 }
2294                 {
2295                     auto xml = func("<root/>" ~ pi);
2296                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2297                     assertThrown!XMLParsingException(range.popFront());
2298                 }
2299             }
2300 
2301             {
2302                 auto xml = "<?one?>\n" ~
2303                            "<?two?>\n" ~
2304                            "<root>\n" ~
2305                            "    <?three?>\n" ~
2306                            "    <?four?>\n" ~
2307                            "</root>\n" ~
2308                            "<?five?>\n" ~
2309                            "<?six?>";
2310 
2311                 auto text = func(xml);
2312                 {
2313                     auto range = parseXML(text.save);
2314                     assert(range.front.type == EntityType.pi);
2315                     assert(equal(range.front.name, "one"));
2316                     assertNotThrown!XMLParsingException(range.popFront());
2317                     assert(range.front.type == EntityType.pi);
2318                     assert(equal(range.front.name, "two"));
2319                     assertNotThrown!XMLParsingException(range.popFront());
2320                     assert(range.front.type == EntityType.elementStart);
2321                     assert(equal(range.front.name, "root"));
2322                     assertNotThrown!XMLParsingException(range.popFront());
2323                     assert(range.front.type == EntityType.pi);
2324                     assert(equal(range.front.name, "three"));
2325                     assertNotThrown!XMLParsingException(range.popFront());
2326                     assert(range.front.type == EntityType.pi);
2327                     assert(equal(range.front.name, "four"));
2328                     assertNotThrown!XMLParsingException(range.popFront());
2329                     assert(range.front.type == EntityType.elementEnd);
2330                     assert(equal(range.front.name, "root"));
2331                     assertNotThrown!XMLParsingException(range.popFront());
2332                     assert(range.front.type == EntityType.pi);
2333                     assert(equal(range.front.name, "five"));
2334                     assertNotThrown!XMLParsingException(range.popFront());
2335                     assert(range.front.type == EntityType.pi);
2336                     assert(equal(range.front.name, "six"));
2337                     assertNotThrown!XMLParsingException(range.popFront());
2338                     assert(range.empty);
2339                 }
2340                 {
2341                     auto range = parseXML!simpleXML(text.save);
2342                     assert(range.front.type == EntityType.elementStart);
2343                     assert(equal(range.front.name, "root"));
2344                     assertNotThrown!XMLParsingException(range.popFront());
2345                     assert(range.front.type == EntityType.elementEnd);
2346                     assert(equal(range.front.name, "root"));
2347                     assertNotThrown!XMLParsingException(range.popFront());
2348                     assert(range.empty);
2349                 }
2350             }
2351         }
2352     }
2353 
2354 
2355     // CDSect  ::= CDStart CData CDEnd
2356     // CDStart ::= '<![CDATA['
2357     // CData   ::= (Char* - (Char* ']]>' Char*))
2358     // CDEnd   ::= ']]>'
2359     // Parses a CDATA. <![CDATA[ was already removed from the front of the input.
2360     void _parseCDATA()
2361     {
2362         _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length);
2363         _type = EntityType.cdata;
2364         _tagStack.sawEntity();
2365         _savedText.pos = _text.pos;
2366         _savedText.input = _text.takeUntilAndDrop!"]]>";
2367         checkText!true(_savedText);
2368         _grammarPos = GrammarPos.contentCharData2;
2369     }
2370 
2371     static if(compileInTests) unittest
2372     {
2373         import core.exception : AssertError;
2374         import std.algorithm.comparison : equal;
2375         import std.exception : assertNotThrown, collectException, enforce;
2376         import dxml.internal : codeLen, testRangeFuncs;
2377 
2378         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2379         {
2380             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2381             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2382             assertNotThrown!XMLParsingException(range.popFront());
2383             enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line);
2384             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2385             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2386         }
2387 
2388         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2389         {
2390             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2391             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2392             auto e = collectException!XMLParsingException(range.popFront());
2393             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2394             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2395         }
2396 
2397         static foreach(func; testRangeFuncs)
2398         {
2399             test!func("<![CDATA[]]>", "", 1, 13);
2400             test!func("<![CDATA[hello world]]>", "hello world", 1, 24);
2401             test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4);
2402             test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2);
2403             test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>",
2404                       "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57);
2405             test!func("<![CDATA[&]]>", "&", 1, 14);
2406 
2407             testFail!func("<[CDATA[]>", 1, 2);
2408             testFail!func("<![CDAT[]>", 1, 2);
2409             testFail!func("<![CDATA]>", 1, 2);
2410             testFail!func("<![CDATA[>", 1, 10);
2411             testFail!func("<![CDATA[]", 1, 10);
2412             testFail!func("<![CDATA[]>", 1, 10);
2413             testFail!func("<![CDATA[ \v ]]>", 1, 11);
2414             testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2);
2415         }
2416     }
2417 
2418 
2419     // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
2420     // DeclSep     ::= PEReference | S
2421     // intSubset   ::= (markupdecl | DeclSep)*
2422     // markupdecl  ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
2423     // Parse doctypedecl after GrammarPos.prologMisc1.
2424     // <!DOCTYPE and any whitespace after it should have already been removed
2425     // from the input.
2426     void _parseDoctypeDecl()
2427     {
2428         outer: while(true)
2429         {
2430             _text.skipToOneOf!('"', '\'', '[', '>')();
2431             switch(_text.input.front)
2432             {
2433                 static foreach(quote; ['"', '\''])
2434                 {
2435                     case quote:
2436                     {
2437                         popFrontAndIncCol(_text);
2438                         _text.skipUntilAndDrop!([quote])();
2439                         continue outer;
2440                     }
2441                 }
2442                 case '[':
2443                 {
2444                     popFrontAndIncCol(_text);
2445                     while(true)
2446                     {
2447                         checkNotEmpty(_text);
2448                         _text.skipToOneOf!('"', '\'', ']')();
2449                         switch(_text.input.front)
2450                         {
2451                             case '"':
2452                             {
2453                                 popFrontAndIncCol(_text);
2454                                 _text.skipUntilAndDrop!`"`();
2455                                 continue;
2456                             }
2457                             case '\'':
2458                             {
2459                                 popFrontAndIncCol(_text);
2460                                 _text.skipUntilAndDrop!`'`();
2461                                 continue;
2462                             }
2463                             case ']':
2464                             {
2465                                 popFrontAndIncCol(_text);
2466                                 stripWS(_text);
2467                                 if(_text.input.empty || _text.input.front != '>')
2468                                     throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos);
2469                                 popFrontAndIncCol(_text);
2470                                 _parseAtPrologMisc!2();
2471                                 return;
2472                             }
2473                             default: assert(0);
2474                         }
2475                     }
2476                 }
2477                 case '>':
2478                 {
2479                     popFrontAndIncCol(_text);
2480                     _parseAtPrologMisc!2();
2481                     break;
2482                 }
2483                 default: assert(0);
2484             }
2485             break;
2486         }
2487     }
2488 
2489     static if(compileInTests) unittest
2490     {
2491         import core.exception : AssertError;
2492         import std.exception : assertNotThrown, collectException, enforce;
2493         import dxml.internal : testRangeFuncs;
2494 
2495         static void test(alias func)(string text, int row, int col, size_t line = __LINE__)
2496         {
2497             auto pos = TextPos(row, col + cast(int)"<root/>".length);
2498             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2499                                                              "unittest failure 1", __FILE__, line);
2500             enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line);
2501             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2502         }
2503 
2504         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2505         {
2506             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2507             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2508             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2509         }
2510 
2511         static foreach(func; testRangeFuncs)
2512         {
2513             test!func("<!DOCTYPE name>", 1, 16);
2514             test!func("<!DOCTYPE \n\n\n name>", 4, 7);
2515             test!func("<!DOCTYPE name \n\n\n >", 4, 3);
2516 
2517             test!func("<!DOCTYPE name []>", 1, 19);
2518             test!func("<!DOCTYPE \n\n\n name []>", 4, 10);
2519             test!func("<!DOCTYPE name \n\n\n []>", 4, 5);
2520 
2521             test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35);
2522             test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38);
2523             test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35);
2524             test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38);
2525 
2526             test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42);
2527             test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40);
2528             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48);
2529             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53);
2530             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43);
2531             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48);
2532 
2533             test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35);
2534             test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55);
2535 
2536             test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40);
2537             test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40);
2538             test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47);
2539             test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53);
2540 
2541             test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51);
2542 
2543             test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26);
2544 
2545             test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36);
2546 
2547             test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56);
2548             test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3);
2549 
2550             test!func("<!DOCTYPE doc [\n" ~
2551                       "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~
2552                       "]>\n", 4, 1);
2553 
2554             testFail!func("<!DOCTYP name>", 1, 2);
2555             testFail!func("<!DOCTYPEname>", 1, 10);
2556             testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18);
2557             testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8);
2558             testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19);
2559 
2560             // FIXME This really should have the exception point at the quote and
2561             // say that it couldn't find the matching quote rather than point at
2562             // the character after it and say that it couldn't find a quote, but
2563             // that requires reworking some helper functions with better error
2564             // messages in mind.
2565             testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~
2566                           "\n<!ELEMENT student (#PCDATA)>\n" ~
2567                           "]>", 1, 40);
2568         }
2569     }
2570 
2571 
2572     // Parse a start tag or empty element tag. It could be the root element, or
2573     // it could be a sub-element.
2574     // < was already removed from the front of the input.
2575     void _parseElementStart()
2576     {
2577         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2578         _savedText.pos = _text.pos;
2579         _savedText.input = _text.takeUntilAndDrop!(">", true)();
2580 
2581         if(_savedText.input.empty)
2582             throw new XMLParsingException("Tag missing name", _savedText.pos);
2583         if(_savedText.input.front == '/')
2584             throw new XMLParsingException("Invalid end tag", _savedText.pos);
2585 
2586         if(_savedText.input.length > 1)
2587         {
2588             auto temp = _savedText.input.save;
2589             temp.popFrontN(temp.length - 1);
2590             if(temp.front == '/')
2591             {
2592                 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1);
2593 
2594                 static if(config.splitEmpty == SplitEmpty.no)
2595                 {
2596                     _type = EntityType.elementEmpty;
2597                     _tagStack.sawEntity();
2598                     _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2599                 }
2600                 else
2601                 {
2602                     _type = EntityType.elementStart;
2603                     _tagStack.sawEntity();
2604                     _grammarPos = GrammarPos.splittingEmpty;
2605                 }
2606             }
2607             else
2608             {
2609                 _type = EntityType.elementStart;
2610                 _tagStack.sawEntity();
2611                 _grammarPos = GrammarPos.contentCharData1;
2612             }
2613         }
2614         else
2615         {
2616             _type = EntityType.elementStart;
2617             _tagStack.sawEntity();
2618             _grammarPos = GrammarPos.contentCharData1;
2619         }
2620 
2621         _name = _savedText.takeName();
2622         // The attributes should be all that's left in savedText.
2623         if(_tagStack.atMax)
2624         {
2625             auto temp = _savedText.save;
2626             auto attrChecker = _tagStack.attrChecker;
2627 
2628             while(true)
2629             {
2630                 immutable wasWS = stripWS(temp);
2631                 if(temp.input.empty)
2632                     break;
2633                 if(!wasWS)
2634                     throw new XMLParsingException("Whitespace missing before attribute name", temp.pos);
2635 
2636                 immutable attrPos = temp.pos;
2637                 attrChecker.pushAttr(temp.takeName!'='(), attrPos);
2638                 stripWS(temp);
2639 
2640                 checkNotEmpty(temp);
2641                 if(temp.input.front != '=')
2642                     throw new XMLParsingException("= missing", temp.pos);
2643                 popFrontAndIncCol(temp);
2644 
2645                 stripWS(temp);
2646                 temp.takeAttValue();
2647             }
2648 
2649             attrChecker.checkAttrs();
2650         }
2651     }
2652 
2653     static if(compileInTests) unittest
2654     {
2655         import core.exception : AssertError;
2656         import std.algorithm.comparison : equal;
2657         import std.exception : assertNotThrown, collectException, enforce;
2658         import dxml.internal : codeLen, testRangeFuncs;
2659 
2660         static void test(alias func)(string text, EntityType type, string name,
2661                                      int row, int col, size_t line = __LINE__)
2662         {
2663             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2664             enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line);
2665             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2666             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2667         }
2668 
2669         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2670         {
2671             auto xml = func(text);
2672             auto e = collectException!XMLParsingException(parseXML(func(text)));
2673             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2674             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2675         }
2676 
2677         static foreach(func; testRangeFuncs)
2678         {
2679             test!func("<a/>", EntityType.elementEmpty, "a", 1, 5);
2680             test!func("<a></a>", EntityType.elementStart, "a", 1, 4);
2681             test!func("<root/>", EntityType.elementEmpty, "root", 1, 8);
2682             test!func("<root></root>", EntityType.elementStart, "root", 1, 7);
2683             test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7);
2684             test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6);
2685             test!func("<foo       />", EntityType.elementEmpty, "foo", 1, 14);
2686             test!func("<foo       ></foo>", EntityType.elementStart, "foo", 1, 13);
2687             test!func("<foo  \n\n\n />", EntityType.elementEmpty, "foo", 4, 4);
2688             test!func("<foo  \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3);
2689             test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7);
2690             test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1);
2691 
2692             testFail!func(`<.foo/>`, 1, 2);
2693             testFail!func(`<>`, 1, 2);
2694             testFail!func(`</>`, 1, 2);
2695             testFail!func(`</foo>`, 1, 2);
2696 
2697             {
2698                 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>")));
2699                 assert(range.front.type == EntityType.elementStart);
2700                 assert(equal(range.front.name, "root"));
2701                 assert(range._text.pos == TextPos(1, 8));
2702                 assertNotThrown!XMLParsingException(range.popFront());
2703                 assert(range.front.type == EntityType.elementEnd);
2704                 assert(equal(range.front.name, "root"));
2705                 assert(range._text.pos == TextPos(1, 8));
2706             }
2707         }
2708     }
2709 
2710 
2711     // Parse an end tag. It could be the root element, or it could be a
2712     // sub-element.
2713     // </ was already removed from the front of the input.
2714     void _parseElementEnd()
2715     {
2716         if(_text.input.empty)
2717             throw new XMLParsingException("Unterminated end tag", _text.pos);
2718         _entityPos = TextPos(_text.pos.line, _text.pos.col - 2);
2719         _type = EntityType.elementEnd;
2720         _tagStack.sawEntity();
2721         immutable namePos = _text.pos;
2722         _name = _text.takeName!'>'();
2723         stripWS(_text);
2724         if(_text.input.empty || _text.input.front != '>')
2725         {
2726             throw new XMLParsingException("There can only be whitespace between an end tag's name and the >",
2727                                           _text.pos);
2728         }
2729         popFrontAndIncCol(_text);
2730         _tagStack.popTag(_name.save, namePos);
2731         _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2732     }
2733 
2734     static if(compileInTests) unittest
2735     {
2736         import core.exception : AssertError;
2737         import std.algorithm.comparison : equal;
2738         import std.exception : assertNotThrown, collectException, enforce;
2739         import dxml.internal : codeLen, testRangeFuncs;
2740 
2741         static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__)
2742         {
2743             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2744             range.popFront();
2745             enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line);
2746             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2747             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2748         }
2749 
2750         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2751         {
2752             auto range = parseXML(func(text));
2753             auto e = collectException!XMLParsingException(range.popFront());
2754             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2755             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2756         }
2757 
2758         static foreach(func; testRangeFuncs)
2759         {
2760             test!func("<a></a>", "a", 1, 8);
2761             test!func("<foo></foo>", "foo", 1, 12);
2762             test!func("<foo    ></foo    >", "foo", 1, 20);
2763             test!func("<foo \n ></foo \n >", "foo", 3, 3);
2764             test!func("<foo>\n\n\n</foo>", "foo", 4, 7);
2765             test!func("<foo.></foo.>", "foo.", 1, 14);
2766             test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1);
2767 
2768             testFail!func(`<foo></ foo>`, 1, 8);
2769             testFail!func(`<foo></bar>`, 1, 8);
2770             testFail!func(`<foo></fo>`, 1, 8);
2771             testFail!func(`<foo></food>`, 1, 8);
2772             testFail!func(`<a></>`, 1, 6);
2773             testFail!func(`<a></`, 1, 6);
2774             testFail!func(`<a><`, 1, 5);
2775             testFail!func(`<a></a b='42'>`, 1, 8);
2776         }
2777     }
2778 
2779 
2780     // GrammarPos.contentCharData1
2781     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2782     // Parses at either CharData?. Nothing from the CharData? (or what's after it
2783     // if it's not there) has been consumed.
2784     void _parseAtContentCharData()
2785     {
2786         checkNotEmpty(_text);
2787         auto orig = _text.save;
2788         stripWS(_text);
2789         checkNotEmpty(_text);
2790         if(_text.input.front != '<')
2791         {
2792             _text = orig;
2793             _entityPos = _text.pos;
2794             _type = EntityType.text;
2795             _tagStack.sawEntity();
2796             _savedText.pos = _text.pos;
2797             _savedText.input = _text.takeUntilAndDrop!"<"();
2798             checkText!false(_savedText);
2799             checkNotEmpty(_text);
2800             if(_text.input.front == '/')
2801             {
2802                 popFrontAndIncCol(_text);
2803                 _grammarPos = GrammarPos.endTag;
2804             }
2805             else
2806                 _grammarPos = GrammarPos.contentMid;
2807         }
2808         else
2809         {
2810             popFrontAndIncCol(_text);
2811             checkNotEmpty(_text);
2812             if(_text.input.front == '/')
2813             {
2814                 popFrontAndIncCol(_text);
2815                 _parseElementEnd();
2816             }
2817             else
2818                 _parseAtContentMid();
2819         }
2820     }
2821 
2822     static if(compileInTests) unittest
2823     {
2824         import core.exception : AssertError;
2825         import std.algorithm.comparison : equal;
2826         import std.exception : assertNotThrown, collectException, enforce;
2827         import dxml.internal : codeLen, testRangeFuncs;
2828 
2829         static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2830         {
2831             auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length));
2832             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2833             assertNotThrown!XMLParsingException(range.popFront());
2834             enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line);
2835             enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line);
2836             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2837         }
2838 
2839         static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2840         {
2841             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2842             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2843             auto e = collectException!XMLParsingException(range.popFront());
2844             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2845             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2846         }
2847 
2848         static foreach(func; testRangeFuncs)
2849         {
2850             static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
2851             {
2852                 test!(func, toer)("hello world", 1, 12);
2853                 test!(func, toer)("\nhello\n\nworld", 4, 6);
2854                 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1);
2855                 test!(func, toer)("&#x42;", 1, 7);
2856                 test!(func, toer)("]", 1, 2);
2857                 test!(func, toer)("]]", 1, 3);
2858                 test!(func, toer)("]>", 1, 3);
2859                 test!(func, toer)("foo \n\n &lt; \n bar", 4, 5);
2860 
2861                 testFail!(func, toer)("&", 1, 1);
2862                 testFail!(func, toer)("&;", 1, 1);
2863                 testFail!(func, toer)("&f", 1, 1);
2864                 testFail!(func, toer)("\v", 1, 1);
2865                 testFail!(func, toer)("hello&world", 1, 6);
2866                 testFail!(func, toer)("hello\vworld", 1, 6);
2867                 testFail!(func, toer)("hello&;world", 1, 6);
2868                 testFail!(func, toer)("hello&#;world", 1, 6);
2869                 testFail!(func, toer)("hello&#x;world", 1, 6);
2870                 testFail!(func, toer)("hello&.;world", 1, 6);
2871                 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4);
2872 
2873                 testFail!(func, toer)("]]>", 1, 1);
2874                 testFail!(func, toer)("foo]]>bar", 1, 4);
2875 
2876                 static if(toer == ThrowOnEntityRef.yes)
2877                 {
2878                     testFail!(func, toer)("&foo; &bar baz", 1, 1);
2879                     testFail!(func, toer)("foo \n\n &ampe; \n bar", 3, 2);
2880                 }
2881                 else
2882                 {
2883                     testFail!(func, toer)("&foo; &bar baz", 1, 7);
2884                     test!(func, toer)("foo \n\n &ampe; \n bar", 4, 5);
2885                 }
2886             }
2887         }
2888     }
2889 
2890 
2891     // GrammarPos.contentMid
2892     // content     ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2893     // The text right after the start tag was what was parsed previously. So,
2894     // that first CharData? was what was parsed last, and this parses starting
2895     // right after. The < should have already been removed from the input.
2896     void _parseAtContentMid()
2897     {
2898         // Note that References are treated as part of the CharData and not
2899         // parsed out by the EntityRange (see EntityRange.text).
2900 
2901         switch(_text.input.front)
2902         {
2903             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2904             // CDSect  ::= CDStart CData CDEnd
2905             // CDStart ::= '<![CDATA['
2906             // CData   ::= (Char* - (Char* ']]>' Char*))
2907             // CDEnd   ::= ']]>'
2908             case '!':
2909             {
2910                 popFrontAndIncCol(_text);
2911                 if(_text.stripStartsWith("--"))
2912                 {
2913                     _parseComment();
2914                     static if(config.skipComments == SkipComments.yes)
2915                         _parseAtContentCharData();
2916                     else
2917                         _grammarPos = GrammarPos.contentCharData2;
2918                 }
2919                 else if(_text.stripStartsWith("[CDATA["))
2920                     _parseCDATA();
2921                 else
2922                 {
2923                     immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2924                     throw new XMLParsingException("Expected Comment or CDATA section", bangPos);
2925                 }
2926                 break;
2927             }
2928             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2929             case '?':
2930             {
2931                 _parsePI();
2932                 _grammarPos = GrammarPos.contentCharData2;
2933                 static if(config.skipPI == SkipPI.yes)
2934                     popFront();
2935                 break;
2936             }
2937             // element ::= EmptyElemTag | STag content ETag
2938             default:
2939             {
2940                 _parseElementStart();
2941                 break;
2942             }
2943         }
2944     }
2945 
2946 
2947     // This parses the Misc* that come after the root element.
2948     void _parseAtEndMisc()
2949     {
2950         // Misc ::= Comment | PI | S
2951 
2952         stripWS(_text);
2953 
2954         if(_text.input.empty)
2955         {
2956             _grammarPos = GrammarPos.documentEnd;
2957             return;
2958         }
2959 
2960         if(_text.input.front != '<')
2961             throw new XMLParsingException("Expected <", _text.pos);
2962         popFrontAndIncCol(_text);
2963         checkNotEmpty(_text);
2964 
2965         switch(_text.input.front)
2966         {
2967             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2968             case '!':
2969             {
2970                 popFrontAndIncCol(_text);
2971                 if(_text.stripStartsWith("--"))
2972                 {
2973                     _parseComment();
2974                     static if(config.skipComments == SkipComments.yes)
2975                         _parseAtEndMisc();
2976                     break;
2977                 }
2978                 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2979                 throw new XMLParsingException("Expected Comment", bangPos);
2980             }
2981             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2982             case '?':
2983             {
2984                 _parsePI();
2985                 static if(config.skipPI == SkipPI.yes)
2986                     popFront();
2987                 break;
2988             }
2989             default: throw new XMLParsingException("Must be a comment or PI", _text.pos);
2990         }
2991     }
2992 
2993     // Used for keeping track of the names of start tags so that end tags can be
2994     // verified as well as making it possible to avoid redoing other validation.
2995     // We keep track of the total number of entities which have been parsed thus
2996     // far so that only whichever EntityRange is farthest along in parsing
2997     // actually adds or removes tags from the TagStack, and the parser can skip
2998     // some of the validation for ranges that are farther behind. That way, the
2999     // end tags get verified, but we only have one stack. If the stack were
3000     // duplicated with every call to save, then there would be a lot more
3001     // allocations, which we don't want. But because we only need to verify the
3002     // end tags once, we can get away with having a shared tag stack. The cost
3003     // is that we have to keep track of how many tags we've parsed so that we
3004     // know if an EntityRange should actually be pushing or popping tags from
3005     // the stack, but that's a lot cheaper than duplicating the stack, and it's
3006     // a lot less annoying then making EntityRange an input range and not a
3007     // forward range or making it a cursor rather than a range.
3008     struct TagStack
3009     {
3010         void pushTag(Taken tagName)
3011         {
3012             if(entityCount++ == state.maxEntities)
3013             {
3014                 ++state.maxEntities;
3015                 put(state.tags, tagName);
3016             }
3017             ++depth;
3018         }
3019 
3020         void popTag(Taken tagName, TextPos pos)
3021         {
3022             import std.algorithm : equal;
3023             import std.format : format;
3024             if(entityCount++ == state.maxEntities)
3025             {
3026                 assert(!state.tags.data.empty);
3027                 if(!equal(state.tags.data.back.save, tagName.save))
3028                 {
3029                     enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>";
3030                     throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos);
3031                 }
3032                 ++state.maxEntities;
3033                 state.tags.shrinkTo(state.tags.data.length - 1);
3034             }
3035             --depth;
3036         }
3037 
3038         @property auto attrChecker()
3039         {
3040             assert(atMax);
3041 
3042             static struct AttrChecker
3043             {
3044                 void pushAttr(Taken attrName, TextPos attrPos)
3045                 {
3046                     import std.typecons : tuple;
3047                     put(state.attrs, tuple(attrName, attrPos));
3048                 }
3049 
3050                 void checkAttrs()
3051                 {
3052                     import std.algorithm.comparison : cmp, equal;
3053                     import std.algorithm.sorting : sort;
3054                     import std.conv : to;
3055 
3056                     if(state.attrs.data.length < 2)
3057                         return;
3058 
3059                     sort!((a,b) => cmp(a[0].save, b[0].save) < 0)(state.attrs.data);
3060                     auto prev = state.attrs.data.front;
3061                     foreach(attr; state.attrs.data[1 .. $])
3062                     {
3063                         if(equal(prev[0], attr[0]))
3064                             throw new XMLParsingException("Duplicate attribute name", attr[1]);
3065                         prev = attr;
3066                     }
3067                 }
3068 
3069                 ~this()
3070                 {
3071                     state.attrs.clear();
3072                 }
3073 
3074                 SharedState* state;
3075             }
3076 
3077             return AttrChecker(state);
3078         }
3079 
3080         void sawEntity()
3081         {
3082             if(entityCount++ == state.maxEntities)
3083                 ++state.maxEntities;
3084         }
3085 
3086         @property bool atMax()
3087         {
3088             return entityCount == state.maxEntities;
3089         }
3090 
3091         struct SharedState
3092         {
3093             import std.array : Appender;
3094             import std.typecons : Tuple;
3095 
3096             Appender!(Taken[]) tags;
3097             Appender!(Tuple!(Taken, TextPos)[]) attrs;
3098             size_t maxEntities;
3099         }
3100 
3101         static create()
3102         {
3103             TagStack tagStack;
3104             tagStack.state = new SharedState;
3105             tagStack.state.tags.reserve(10);
3106             tagStack.state.attrs.reserve(10);
3107             return tagStack;
3108         }
3109 
3110         SharedState* state;
3111         size_t entityCount;
3112         int depth;
3113     }
3114 
3115     static if(compileInTests) unittest
3116     {
3117         import core.exception : AssertError;
3118         import std.algorithm.comparison : equal;
3119         import std.exception : assertNotThrown, collectException, enforce;
3120         import dxml.internal : testRangeFuncs;
3121 
3122         static void test(alias func)(string text, size_t line = __LINE__)
3123         {
3124             auto xml = func(text);
3125             static foreach(config; someTestConfigs)
3126             {{
3127                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3128                                                                  __FILE__, line);
3129                 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line);
3130             }}
3131         }
3132 
3133         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3134         {
3135             auto xml = func(text);
3136             static foreach(config; someTestConfigs)
3137             {{
3138                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3139                                                                  __FILE__, line);
3140                 auto e = collectException!XMLParsingException(walkLength(range));
3141                 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line);
3142                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
3143             }}
3144         }
3145 
3146         static foreach(func; testRangeFuncs)
3147         {
3148             test!func("<root></root>");
3149             test!func("<root><a></a></root>");
3150             test!func("<root><a><b></b></a></root>");
3151             test!func("<root><a><b></b></a></root>");
3152             test!func("<root><a><b></b></a><foo><bar></bar></foo></root>");
3153             test!func("<a>\n" ~
3154                       "    <b>\n" ~
3155                       "        <c>\n" ~
3156                       "            <d>\n" ~
3157                       "                <e>\n" ~
3158                       "                    <f>\n" ~
3159                       "                        <g>\n" ~
3160                       "                            <h>\n" ~
3161                       "                                 <i><i><i><i>\n" ~
3162                       "                                 </i></i></i></i>\n" ~
3163                       "                                 <i>\n" ~
3164                       "                                     <j>\n" ~
3165                       "                                         <k>\n" ~
3166                       "                                             <l>\n" ~
3167                       "                                                 <m>\n" ~
3168                       "                                                     <n>\n" ~
3169                       "                                                         <o>\n" ~
3170                       "                                                             <p>\n" ~
3171                       "                                                                 <q>\n" ~
3172                       "                                                                     <r>\n" ~
3173                       "                                                                         <s>\n" ~
3174                       "          <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~
3175                       "                                                                         </s>\n" ~
3176                       "                                                                     </r>\n" ~
3177                       "                                                                 </q>\n" ~
3178                       "                                                </p></o></n></m>\n" ~
3179                       "                                                               </l>\n" ~
3180                       "                    </k>\n" ~
3181                       "           </j>\n" ~
3182                       "</i></h>" ~
3183                       "                        </g>\n" ~
3184                       "                    </f>\n" ~
3185                       "                </e>\n" ~
3186                       "            </d>\n" ~
3187                       "        </c>\n" ~
3188                       "    </b>\n" ~
3189                       "</a>");
3190             test!func(`<京都市></京都市>`);
3191 
3192             testFail!func(`<a>`, 1, 4);
3193             testFail!func(`<foo></foobar>`, 1, 8);
3194             testFail!func(`<foobar></foo>`, 1, 11);
3195             testFail!func(`<a><\a>`, 1, 5);
3196             testFail!func(`<a><a/>`, 1, 8);
3197             testFail!func(`<a><b>`, 1, 7);
3198             testFail!func(`<a><b><c>`, 1, 10);
3199             testFail!func(`<a></a><b>`, 1, 9);
3200             testFail!func(`<a></a><b></b>`, 1, 9);
3201             testFail!func(`<a><b></a></b>`, 1, 9);
3202             testFail!func(`<a><b><c></c><b></a>`, 1, 19);
3203             testFail!func(`<a><b></c><c></b></a>`, 1, 9);
3204             testFail!func(`<a><b></c></b></a>`, 1, 9);
3205             testFail!func("<a>\n" ~
3206                           "    <b>\n" ~
3207                           "        <c>\n" ~
3208                           "            <d>\n" ~
3209                           "                <e>\n" ~
3210                           "                    <f>\n" ~
3211                           "                    </f>\n" ~
3212                           "                </e>\n" ~
3213                           "            </d>\n" ~
3214                           "        </c>\n" ~
3215                           "    </b>\n" ~
3216                           "<a>", 12, 4);
3217             testFail!func("<a>\n" ~
3218                           "    <b>\n" ~
3219                           "        <c>\n" ~
3220                           "            <d>\n" ~
3221                           "                <e>\n" ~
3222                           "                    <f>\n" ~
3223                           "                    </f>\n" ~
3224                           "                </e>\n" ~
3225                           "            </d>\n" ~
3226                           "        </c>\n" ~
3227                           "    </b>\n" ~
3228                           "</q>", 12, 3);
3229         }
3230     }
3231 
3232 
3233     struct Text(R)
3234     {
3235         alias config = cfg;
3236         alias Input = R;
3237 
3238         Input input;
3239         TextPos pos;
3240 
3241         @property save() { return typeof(this)(input.save, pos); }
3242     }
3243 
3244 
3245     alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42));
3246 
3247 
3248     EntityType _type;
3249     TextPos _entityPos;
3250     auto _grammarPos = GrammarPos.documentStart;
3251 
3252     Taken _name;
3253     TagStack _tagStack;
3254 
3255     Text!(typeof(byCodeUnit(R.init))) _text;
3256     Text!Taken _savedText;
3257 
3258 
3259     this(R xmlText)
3260     {
3261         _tagStack = TagStack.create();
3262         _text.input = byCodeUnit(xmlText);
3263 
3264         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
3265         _savedText = typeof(_savedText).init;
3266         _name = typeof(_name).init;
3267 
3268         popFront();
3269     }
3270 }
3271 
3272 /// Ditto
3273 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText)
3274     if(isForwardRange!R && isSomeChar!(ElementType!R))
3275 {
3276     return EntityRange!(config, R)(xmlText);
3277 }
3278 
3279 ///
3280 version(dxmlTests) unittest
3281 {
3282     import std.range.primitives : walkLength;
3283 
3284     auto xml = "<?xml version='1.0'?>\n" ~
3285                "<?instruction start?>\n" ~
3286                "<foo attr='42'>\n" ~
3287                "    <bar/>\n" ~
3288                "    <!-- no comment -->\n" ~
3289                "    <baz hello='world'>\n" ~
3290                "    nothing to say.\n" ~
3291                "    nothing at all...\n" ~
3292                "    </baz>\n" ~
3293                "</foo>\n" ~
3294                "<?some foo?>";
3295 
3296     {
3297         auto range = parseXML(xml);
3298         assert(range.front.type == EntityType.pi);
3299         assert(range.front.name == "instruction");
3300         assert(range.front.text == "start");
3301 
3302         range.popFront();
3303         assert(range.front.type == EntityType.elementStart);
3304         assert(range.front.name == "foo");
3305 
3306         {
3307             auto attrs = range.front.attributes;
3308             assert(walkLength(attrs.save) == 1);
3309             assert(attrs.front.name == "attr");
3310             assert(attrs.front.value == "42");
3311         }
3312 
3313         range.popFront();
3314         assert(range.front.type == EntityType.elementEmpty);
3315         assert(range.front.name == "bar");
3316 
3317         range.popFront();
3318         assert(range.front.type == EntityType.comment);
3319         assert(range.front.text == " no comment ");
3320 
3321         range.popFront();
3322         assert(range.front.type == EntityType.elementStart);
3323         assert(range.front.name == "baz");
3324 
3325         {
3326             auto attrs = range.front.attributes;
3327             assert(walkLength(attrs.save) == 1);
3328             assert(attrs.front.name == "hello");
3329             assert(attrs.front.value == "world");
3330         }
3331 
3332         range.popFront();
3333         assert(range.front.type == EntityType.text);
3334         assert(range.front.text ==
3335                "\n    nothing to say.\n    nothing at all...\n    ");
3336 
3337         range.popFront();
3338         assert(range.front.type == EntityType.elementEnd); // </baz>
3339         range.popFront();
3340         assert(range.front.type == EntityType.elementEnd); // </foo>
3341 
3342         range.popFront();
3343         assert(range.front.type == EntityType.pi);
3344         assert(range.front.name == "some");
3345         assert(range.front.text == "foo");
3346 
3347         range.popFront();
3348         assert(range.empty);
3349     }
3350     {
3351         auto range = parseXML!simpleXML(xml);
3352 
3353         // simpleXML is set to skip processing instructions.
3354 
3355         assert(range.front.type == EntityType.elementStart);
3356         assert(range.front.name == "foo");
3357 
3358         {
3359             auto attrs = range.front.attributes;
3360             assert(walkLength(attrs.save) == 1);
3361             assert(attrs.front.name == "attr");
3362             assert(attrs.front.value == "42");
3363         }
3364 
3365         // simpleXML is set to split empty tags so that <bar/> is treated
3366         // as the same as <bar></bar> so that code does not have to
3367         // explicitly handle empty tags.
3368         range.popFront();
3369         assert(range.front.type == EntityType.elementStart);
3370         assert(range.front.name == "bar");
3371         range.popFront();
3372         assert(range.front.type == EntityType.elementEnd);
3373         assert(range.front.name == "bar");
3374 
3375         // simpleXML is set to skip comments.
3376 
3377         range.popFront();
3378         assert(range.front.type == EntityType.elementStart);
3379         assert(range.front.name == "baz");
3380 
3381         {
3382             auto attrs = range.front.attributes;
3383             assert(walkLength(attrs.save) == 1);
3384             assert(attrs.front.name == "hello");
3385             assert(attrs.front.value == "world");
3386         }
3387 
3388         range.popFront();
3389         assert(range.front.type == EntityType.text);
3390         assert(range.front.text ==
3391                "\n    nothing to say.\n    nothing at all...\n    ");
3392 
3393         range.popFront();
3394         assert(range.front.type == EntityType.elementEnd); // </baz>
3395         range.popFront();
3396         assert(range.front.type == EntityType.elementEnd); // </foo>
3397         range.popFront();
3398         assert(range.empty);
3399     }
3400 }
3401 
3402 // Test the state of the range immediately after parseXML returns.
3403 version(dxmlTests) unittest
3404 {
3405     import std.algorithm.comparison : equal;
3406     import dxml.internal : testRangeFuncs;
3407 
3408     static foreach(func; testRangeFuncs)
3409     {
3410         static foreach(config; someTestConfigs)
3411         {{
3412             auto range = parseXML!config("<?xml?><root></root>");
3413             assert(!range.empty);
3414             assert(range.front.type == EntityType.elementStart);
3415             assert(equal(range.front.name, "root"));
3416         }}
3417 
3418         static foreach(config; [Config.init, makeConfig(SkipPI.yes)])
3419         {{
3420             auto range = parseXML!config("<!--no comment--><root></root>");
3421             assert(!range.empty);
3422             assert(range.front.type == EntityType.comment);
3423             assert(equal(range.front.text, "no comment"));
3424         }}
3425         static foreach(config; [simpleXML, makeConfig(SkipComments.yes)])
3426         {{
3427             auto range = parseXML!config("<!--no comment--><root></root>");
3428             assert(!range.empty);
3429             assert(range.front.type == EntityType.elementStart);
3430             assert(equal(range.front.name, "root"));
3431         }}
3432 
3433         static foreach(config; [Config.init, makeConfig(SkipComments.yes)])
3434         {{
3435             auto range = parseXML!config("<?private eye?><root></root>");
3436             assert(!range.empty);
3437             assert(range.front.type == EntityType.pi);
3438             assert(equal(range.front.name, "private"));
3439             assert(equal(range.front.text, "eye"));
3440         }}
3441         static foreach(config; [simpleXML, makeConfig(SkipPI.yes)])
3442         {{
3443             auto range = parseXML!config("<?private eye?><root></root>");
3444             assert(!range.empty);
3445             assert(range.front.type == EntityType.elementStart);
3446             assert(equal(range.front.name, "root"));
3447         }}
3448 
3449         static foreach(config; someTestConfigs)
3450         {{
3451             auto range = parseXML!config("<root></root>");
3452             assert(!range.empty);
3453             assert(range.front.type == EntityType.elementStart);
3454             assert(equal(range.front.name, "root"));
3455         }}
3456     }
3457 }
3458 
3459 // Test various invalid states that didn't seem to fit well into tests elsewhere.
3460 version(dxmlTests) unittest
3461 {
3462     import core.exception : AssertError;
3463     import std.exception : collectException, enforce;
3464     import dxml.internal : testRangeFuncs;
3465 
3466     static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3467     {
3468         auto xml = func(text);
3469         static foreach(config; someTestConfigs)
3470         {{
3471             auto e = collectException!XMLParsingException(
3472                 {
3473                     auto range = parseXML!config(xml.save);
3474                     while(!range.empty)
3475                         range.popFront();
3476                 }());
3477             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
3478             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
3479         }}
3480     }
3481 
3482     static foreach(func; testRangeFuncs)
3483     {{
3484         testFail!func("<root></root><invalid></invalid>", 1, 15);
3485         testFail!func("<root></root><invalid/>", 1, 15);
3486         testFail!func("<root/><invalid></invalid>", 1, 9);
3487         testFail!func("<root/><invalid/>", 1, 9);
3488 
3489         testFail!func("<root></root>invalid", 1, 14);
3490         testFail!func("<root/>invalid", 1, 8);
3491 
3492         testFail!func("<root/><?pi?>invalid", 1, 14);
3493         testFail!func("<root/><?pi?><invalid/>", 1, 15);
3494 
3495         testFail!func("<root/><!DOCTYPE foo>", 1, 9);
3496         testFail!func("<root/></root>", 1, 9);
3497 
3498         testFail!func("invalid<root></root>", 1, 1);
3499         testFail!func("invalid<?xml?><root></root>", 1, 1);
3500         testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1);
3501         testFail!func("invalid<!--comment--><root></root>", 1, 1);
3502         testFail!func("invalid<?Poirot?><root></root>", 1, 1);
3503 
3504         testFail!func("<?xml?>invalid<root></root>", 1, 8);
3505         testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15);
3506         testFail!func("<!--comment-->invalid<root></root>", 1, 15);
3507         testFail!func("<?Poirot?>invalid<root></root>", 1, 11);
3508 
3509         testFail!func("<?xml?>", 1, 8);
3510         testFail!func("<!DOCTYPE name>", 1, 16);
3511         testFail!func("<?Sherlock?>", 1, 13);
3512         testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33);
3513         testFail!func("<?Poirot?></Poirot>", 1, 12);
3514         testFail!func("</Poirot>", 1, 2);
3515         testFail!func("<", 1, 2);
3516         testFail!func(`</`, 1, 2);
3517         testFail!func(`</a`, 1, 2);
3518         testFail!func(`</a>`, 1, 2);
3519 
3520 
3521         testFail!func("<doc>]]></doc>", 1, 6);
3522 
3523         testFail!func(" <?xml?><root/>", 1, 1);
3524         testFail!func("\n<?xml?><root/>", 1, 1);
3525     }}
3526 }
3527 
3528 // Test that parseXML and EntityRange's properties work with @safe.
3529 // pure would be nice too, but at minimum, the use of format for exception
3530 // messages, and the use of assumeSafeAppend prevent it. It may or may not be
3531 // worth trying to fix that.
3532 version(dxmlTests) @safe unittest
3533 {
3534     import std.algorithm.comparison : equal;
3535     import dxml.internal : testRangeFuncs;
3536 
3537     auto xml = "<root>\n" ~
3538                "    <![CDATA[nothing]]>\n" ~
3539                "    <foo a='42'/>\n" ~
3540                "</root>";
3541 
3542     static foreach(func; testRangeFuncs)
3543     {{
3544         auto range = parseXML(xml);
3545         assert(range.front.type == EntityType.elementStart);
3546         assert(equal(range.front.name, "root"));
3547         range.popFront();
3548         assert(!range.empty);
3549         assert(range.front.type == EntityType.cdata);
3550         assert(equal(range.front.text, "nothing"));
3551         range.popFront();
3552         assert(!range.empty);
3553         assert(range.front.type == EntityType.elementEmpty);
3554         assert(equal(range.front.name, "foo"));
3555         {
3556             auto attrs = range.front.attributes;
3557             auto saved = attrs.save;
3558             auto attr = attrs.front;
3559             assert(attr.name == "a");
3560             assert(attr.value == "42");
3561             attrs.popFront();
3562             assert(attrs.empty);
3563         }
3564         auto saved = range.save;
3565     }}
3566 }
3567 
3568 // This is purely to provide a way to trigger the unittest blocks in EntityRange
3569 // without compiling them in normally.
3570 private struct EntityRangeCompileTests
3571 {
3572     @property bool empty() @safe pure nothrow @nogc { assert(0); }
3573     @property char front() @safe pure nothrow @nogc { assert(0); }
3574     void popFront() @safe pure nothrow @nogc { assert(0); }
3575     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
3576 }
3577 
3578 version(dxmlTests)
3579     EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests;
3580 
3581 
3582 /++
3583     Whether the given type is a forward range of attributes.
3584 
3585     Essentially, an attribute range must be a forward range where
3586 
3587     $(UL
3588         $(LI each element has the members $(D name), $(D value), and $(D pos))
3589         $(LI $(D name) and $(D value) are forward ranges of characters)
3590         $(LI $(D name) and $(D value) have the same type)
3591         $(LI $(D pos) is a $(LREF TextPos)))
3592 
3593     Normally, an attribute range would come from
3594     $(LREF EntityRange.Entity.attributes) or
3595     $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but
3596     as long as a range has the correct API, it qualifies as an attribute range.
3597 
3598     See_Also: $(LREF EntityRange.Entity.Attribute)$(BR)
3599               $(LREF EntityRange.Entity.attributes)$(BR)
3600               $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR)
3601               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3602   +/
3603 template isAttrRange(R)
3604 {
3605     static if(isForwardRange!R &&
3606               is(typeof(R.init.front.name)) &&
3607               is(typeof(R.init.front.value)) &&
3608               is(ReturnType!((R r) => r.front.pos) == TextPos))
3609     {
3610         alias NameType  = ReturnType!((R r) => r.front.name);
3611         alias ValueType = ReturnType!((R r) => r.front.value);
3612 
3613         enum isAttrRange = is(NameType == ValueType) &&
3614                            isForwardRange!NameType &&
3615                            isSomeChar!(ElementType!NameType);
3616     }
3617     else
3618         enum isAttrRange = false;
3619 }
3620 
3621 ///
3622 version(dxmlTests) unittest
3623 {
3624     import std.typecons : Tuple;
3625     import dxml.dom : parseDOM;
3626 
3627     alias R1 = typeof(parseXML("<root/>").front.attributes);
3628     static assert(isAttrRange!R1);
3629 
3630     alias R2 = typeof(parseDOM("<root/>").children[0].attributes);
3631     static assert(isAttrRange!R2);
3632 
3633     alias T = Tuple!(string, "name", string, "value", TextPos, "pos");
3634     static assert(isAttrRange!(T[]));
3635 
3636     static assert(!isAttrRange!string);
3637 }
3638 
3639 version(dxmlTests) unittest
3640 {
3641     import std.typecons : Tuple;
3642     {
3643         alias T = Tuple!(string, "nam", string, "value", TextPos, "pos");
3644         static assert(!isAttrRange!(T[]));
3645     }
3646     {
3647         alias T = Tuple!(string, "name", string, "valu", TextPos, "pos");
3648         static assert(!isAttrRange!(T[]));
3649     }
3650     {
3651         alias T = Tuple!(string, "name", string, "value", TextPos, "po");
3652         static assert(!isAttrRange!(T[]));
3653     }
3654     {
3655         alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos");
3656         static assert(!isAttrRange!(T[]));
3657     }
3658     {
3659         alias T = Tuple!(string, "name", string, "value");
3660         static assert(!isAttrRange!(T[]));
3661     }
3662     {
3663         alias T = Tuple!(int, "name", string, "value", TextPos, "pos");
3664         static assert(!isAttrRange!(T[]));
3665     }
3666     {
3667         alias T = Tuple!(string, "name", int, "value", TextPos, "pos");
3668         static assert(!isAttrRange!(T[]));
3669     }
3670     {
3671         alias T = Tuple!(string, "name", string, "value", int, "pos");
3672         static assert(!isAttrRange!(T[]));
3673     }
3674 }
3675 
3676 
3677 /++
3678     A helper function for processing start tag attributes.
3679 
3680     It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a
3681     range of attributes and a list of alternating strings and pointers where
3682     each string represents the name of the attribute to parse and the pointer
3683     immediately after it is assigned the value that corresponds to the attribute
3684     name (if present). If the given pointer does not point to the same type as
3685     the range of characters used in the attributes, then
3686     $(PHOBOS_REF to, std, conv) is used to convert the value to the type the
3687     pointer points to.
3688 
3689     If a $(D Nullable!T*) is given rather than a $(D T*), then it will be
3690     treated the same as if it had been $(D T*). So, $(D to!T) will be used to
3691     convert the attribute value if the matching attribute name is present. The
3692     advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's
3693     possible to distinguish between an attribute that wasn't present and one
3694     where it was present but was equivalent to $(D T.init).
3695 
3696     Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed
3697     rather than taking it by $(K_REF) and leaving the attributes that weren't
3698     matched in the range (since that really doesn't work with an arbitrary
3699     range as opposed to a dynamic array). However, if the second argument of
3700     getAttrs is not a $(K_STRING) but is instead an output range that accepts
3701     the element type of the range, then any attributes which aren't matched are
3702     put into the output range.
3703 
3704     Params:
3705         attrRange = A range of attributes (see $(LREF isAttrRange)).
3706         unmatched = An output range that any _unmatched attributes from the
3707                     range are put into (optional argument).
3708         args = An alternating list of strings and pointers where the names
3709                represent the attribute names to get the value of, and the
3710                corresponding values get assigned to what the pointers point to.
3711 
3712     Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to
3713             convert an attribute value.
3714 
3715     See_Also: $(LREF isAttrRange)$(BR)
3716               $(LREF EntityRange.Entity.attributes)$(BR)
3717               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3718   +/
3719 void getAttrs(R, Args...)(R attrRange, Args args)
3720     if(isAttrRange!R && Args.length % 2 == 0)
3721 {
3722     mixin(_genGetAttrs(false));
3723 }
3724 
3725 /// Ditto
3726 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args)
3727     if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0)
3728 {
3729     mixin(_genGetAttrs(true));
3730 }
3731 
3732 private string _genGetAttrs(bool includeUnmatched)
3733 {
3734     auto retval =
3735 `    import std.algorithm.comparison : equal;
3736     import std.conv : ConvException, to;
3737     import std.format : format;
3738     import std.typecons : Nullable;
3739     import std.utf : byChar;
3740 
3741     alias Attr = ElementType!R;
3742     alias SliceOfR = ElementType!(typeof(Attr.init.name));
3743 
3744     outer: foreach(attr; attrRange)
3745     {
3746         static foreach(i, arg; args)
3747         {
3748             static if(i % 2 == 0)
3749                 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i));
3750             else
3751             {
3752                 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i));
3753 
3754                 if(equal(attr.name, args[i - 1].byChar()))
3755                 {
3756                     alias ArgType = typeof(*arg);
3757 
3758                     static if(isInstanceOf!(Nullable, ArgType))
3759                         alias TargetType = TemplateArgsOf!ArgType;
3760                     else
3761                         alias TargetType = typeof(*arg);
3762 
3763                     try
3764                         *arg = to!TargetType(attr.value);
3765                     catch(ConvException ce)
3766                     {
3767                         enum fmt = "Failed to convert %s: %s";
3768                         throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos);
3769                     }
3770 
3771                     continue outer;
3772                 }
3773             }
3774         }`;
3775 
3776     if(includeUnmatched)
3777         retval ~= "\n        put(unmatched, attr);";
3778     retval ~= "\n    }";
3779 
3780     return retval;
3781 }
3782 
3783 version(dxmlTests) unittest
3784 {
3785     import std.array : appender;
3786     import std.exception : collectException;
3787     import std.typecons : Nullable;
3788 
3789     {
3790         auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`;
3791         auto range = parseXML(xml);
3792         assert(range.front.type == EntityType.elementEmpty);
3793 
3794         string a;
3795         int b;
3796         bool c;
3797 
3798         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3799         assert(a == "foo");
3800         assert(b == 19);
3801         assert(c == true);
3802     }
3803 
3804     // Nullable!T* accepts the same as T*.
3805     {
3806         auto xml = `<root a="foo" c="true" d="rocks"/>`;
3807         auto range = parseXML(xml);
3808         assert(range.front.type == EntityType.elementEmpty);
3809 
3810         Nullable!string a;
3811         Nullable!int b;
3812         bool c;
3813 
3814         getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a);
3815         assert(a == "foo");
3816         assert(b.isNull);
3817         assert(c == true);
3818     }
3819 
3820     // If an output range of attributes is provided, then the ones that
3821     // weren't matched are put in it.
3822     {
3823         auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`;
3824         auto range = parseXML(xml);
3825         assert(range.front.type == EntityType.elementEmpty);
3826 
3827         alias Attribute = typeof(range).Entity.Attribute;
3828         auto unmatched = appender!(Attribute[])();
3829         int i;
3830         string s;
3831 
3832         getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s);
3833         assert(i == 42);
3834         assert(s == "silly");
3835         assert(unmatched.data.length == 2);
3836         assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28)));
3837         assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38)));
3838     }
3839 
3840     // An XMLParsingException gets thrown if a conversion fails.
3841     {
3842         auto xml = `<root foo="bar" false="true" d="rocks"/>`;
3843         auto range = parseXML(xml);
3844         assert(range.front.type == EntityType.elementEmpty);
3845 
3846         int i;
3847 
3848         auto xpe = collectException!XMLParsingException(
3849             getAttrs(range.front.attributes, "d", &i));
3850         assert(xpe.pos == TextPos(1, 30));
3851     }
3852 }
3853 
3854 version(dxmlTests) unittest
3855 {
3856     auto range = parseXML("<root/>");
3857     auto attrs = range.front.attributes;
3858     int i;
3859     static assert(!__traits(compiles, getAttrs(attrs, "foo")));
3860     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar")));
3861     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i)));
3862     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i)));
3863     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo")));
3864     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i)));
3865     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar")));
3866 }
3867 
3868 version(dxmlTests) @safe pure unittest
3869 {
3870     import std.typecons : Nullable;
3871 
3872     static test(R)(R range, int* i, Nullable!int* j) @safe pure
3873     {
3874         getAttrs(range.front.attributes, "foo", i, "bar", j);
3875     }
3876 
3877     test(parseXML("<root/>"), null, null);
3878 }
3879 
3880 
3881 /++
3882     Takes an $(LREF EntityRange) which is at a start tag and iterates it until
3883     it is at its corresponding end tag. It is an error to call skipContents when
3884     the current entity is not $(LREF EntityType.elementStart).
3885 
3886     $(TABLE
3887         $(TR $(TH Supported $(LREF EntityType)s:))
3888         $(TR $(TD $(LREF2 elementStart, EntityType)))
3889     )
3890 
3891     Returns: The range with its $(D front) now at the end tag corresponding to
3892              the start tag that was $(D front) when the function was called.
3893 
3894     Throws: $(LREF XMLParsingException) on invalid XML.
3895   +/
3896 R skipContents(R)(R entityRange)
3897     if(isInstanceOf!(EntityRange, R))
3898 {
3899     assert(entityRange._type == EntityType.elementStart);
3900 
3901     // We don't bother calling empty, because the only way for the entityRange
3902     // to be empty would be for it to reach the end of the document, and an
3903     // XMLParsingException would be thrown if the end of the document were
3904     // reached before we reached the corresponding end tag.
3905     for(int tagDepth = 1; tagDepth != 0;)
3906     {
3907         entityRange.popFront();
3908         immutable type = entityRange._type;
3909         if(type == EntityType.elementStart)
3910             ++tagDepth;
3911         else if(type == EntityType.elementEnd)
3912             --tagDepth;
3913     }
3914 
3915     return entityRange;
3916 }
3917 
3918 ///
3919 version(dxmlTests) unittest
3920 {
3921     auto xml = "<root>\n" ~
3922                "    <foo>\n" ~
3923                "        <bar>\n" ~
3924                "        Some text\n" ~
3925                "        </bar>\n" ~
3926                "    </foo>\n" ~
3927                "    <!-- no comment -->\n" ~
3928                "</root>";
3929 
3930     auto range = parseXML(xml);
3931     assert(range.front.type == EntityType.elementStart);
3932     assert(range.front.name == "root");
3933 
3934     range.popFront();
3935     assert(range.front.type == EntityType.elementStart);
3936     assert(range.front.name == "foo");
3937 
3938     range = range.skipContents();
3939     assert(range.front.type == EntityType.elementEnd);
3940     assert(range.front.name == "foo");
3941 
3942     range.popFront();
3943     assert(range.front.type == EntityType.comment);
3944     assert(range.front.text == " no comment ");
3945 
3946     range.popFront();
3947     assert(range.front.type == EntityType.elementEnd);
3948     assert(range.front.name == "root");
3949 
3950     range.popFront();
3951     assert(range.empty);
3952 }
3953 
3954 
3955 /++
3956     Skips entities until the given $(LREF EntityType) is reached.
3957 
3958     If multiple $(LREF EntityType)s are given, then any one of them counts as
3959     a match.
3960 
3961     The current entity is skipped regardless of whether it is the given
3962     $(LREF EntityType).
3963 
3964     This is essentially a slightly optimized equivalent to
3965 
3966     ---
3967     if(!range.empty())
3968     {
3969         range.popFront();
3970         range = range.find!((a, b) => a.type == b.type)(entityTypes);
3971     }
3972     ---
3973 
3974     Returns: The given range with its $(D front) now at the first entity which
3975              matched one of the given $(LREF EntityType)s or an empty range if
3976              none were found.
3977 
3978     Throws: $(LREF XMLParsingException) on invalid XML.
3979   +/
3980 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...)
3981     if(isInstanceOf!(EntityRange, R))
3982 {
3983     if(entityRange.empty)
3984         return entityRange;
3985     entityRange.popFront();
3986     for(; !entityRange.empty; entityRange.popFront())
3987     {
3988         immutable type = entityRange._type;
3989         foreach(entityType; entityTypes)
3990         {
3991             if(type == entityType)
3992                 return entityRange;
3993         }
3994     }
3995     return entityRange;
3996 }
3997 
3998 ///
3999 version(dxmlTests) unittest
4000 {
4001     auto xml = "<root>\n" ~
4002                "    <!-- blah blah blah -->\n" ~
4003                "    <foo>nothing to say</foo>\n" ~
4004                "</root>";
4005 
4006     auto range = parseXML(xml);
4007     assert(range.front.type == EntityType.elementStart);
4008     assert(range.front.name == "root");
4009 
4010     range = range.skipToEntityType(EntityType.elementStart,
4011                                    EntityType.elementEmpty);
4012     assert(range.front.type == EntityType.elementStart);
4013     assert(range.front.name == "foo");
4014 
4015     assert(range.skipToEntityType(EntityType.comment).empty);
4016 
4017     // skipToEntityType will work on an empty range but will always
4018     // return an empty range.
4019     assert(range.takeNone().skipToEntityType(EntityType.comment).empty);
4020 }
4021 
4022 
4023 /++
4024     Skips entities until the end tag is reached that corresponds to the start
4025     tag that is the parent of the current entity.
4026 
4027     Returns: The given range with its $(D front) now at the end tag which
4028              corresponds to the parent start tag of the entity that was
4029              $(D front) when skipToParentEndTag was called. If the current
4030              entity does not have a parent start tag (which means that it's
4031              either the root element or a comment or PI outside of the root
4032              element), then an empty range is returned.
4033 
4034     Throws: $(LREF XMLParsingException) on invalid XML.
4035   +/
4036 R skipToParentEndTag(R)(R entityRange)
4037     if(isInstanceOf!(EntityRange, R))
4038 {
4039     with(EntityType) final switch(entityRange._type)
4040     {
4041         case cdata:
4042         case comment:
4043         {
4044             entityRange = entityRange.skipToEntityType(elementStart, elementEnd);
4045             if(entityRange.empty || entityRange._type == elementEnd)
4046                 return entityRange;
4047             goto case elementStart;
4048         }
4049         case elementStart:
4050         {
4051             while(true)
4052             {
4053                 entityRange = entityRange.skipContents();
4054                 entityRange.popFront();
4055                 if(entityRange.empty || entityRange._type == elementEnd)
4056                     return entityRange;
4057                 if(entityRange._type == elementStart)
4058                     continue;
4059                 goto case comment;
4060             }
4061             assert(0); // the compiler isn't smart enough to see that this is unreachable.
4062         }
4063         case elementEnd:
4064         case elementEmpty:
4065         case pi:
4066         case text: goto case comment;
4067     }
4068 }
4069 
4070 ///
4071 version(dxmlTests) unittest
4072 {
4073     auto xml = "<root>\n" ~
4074                "    <foo>\n" ~
4075                "        <!-- comment -->\n" ~
4076                "        <bar>exam</bar>\n" ~
4077                "    </foo>\n" ~
4078                "    <!-- another comment -->\n" ~
4079                "</root>";
4080     {
4081         auto range = parseXML(xml);
4082         assert(range.front.type == EntityType.elementStart);
4083         assert(range.front.name == "root");
4084 
4085         range.popFront();
4086         assert(range.front.type == EntityType.elementStart);
4087         assert(range.front.name == "foo");
4088 
4089         range.popFront();
4090         assert(range.front.type == EntityType.comment);
4091         assert(range.front.text == " comment ");
4092 
4093         range = range.skipToParentEndTag();
4094         assert(range.front.type == EntityType.elementEnd);
4095         assert(range.front.name == "foo");
4096 
4097         range = range.skipToParentEndTag();
4098         assert(range.front.type == EntityType.elementEnd);
4099         assert(range.front.name == "root");
4100 
4101         range = range.skipToParentEndTag();
4102         assert(range.empty);
4103     }
4104     {
4105         auto range = parseXML(xml);
4106         assert(range.front.type == EntityType.elementStart);
4107         assert(range.front.name == "root");
4108 
4109         range.popFront();
4110         assert(range.front.type == EntityType.elementStart);
4111         assert(range.front.name == "foo");
4112 
4113         range.popFront();
4114         assert(range.front.type == EntityType.comment);
4115         assert(range.front.text == " comment ");
4116 
4117         range.popFront();
4118         assert(range.front.type == EntityType.elementStart);
4119         assert(range.front.name == "bar");
4120 
4121         range.popFront();
4122         assert(range.front.type == EntityType.text);
4123         assert(range.front.text == "exam");
4124 
4125         range = range.skipToParentEndTag();
4126         assert(range.front.type == EntityType.elementEnd);
4127         assert(range.front.name == "bar");
4128 
4129         range = range.skipToParentEndTag();
4130         assert(range.front.type == EntityType.elementEnd);
4131         assert(range.front.name == "foo");
4132 
4133         range.popFront();
4134         assert(range.front.type == EntityType.comment);
4135         assert(range.front.text == " another comment ");
4136 
4137         range = range.skipToParentEndTag();
4138         assert(range.front.type == EntityType.elementEnd);
4139         assert(range.front.name == "root");
4140 
4141         assert(range.skipToParentEndTag().empty);
4142     }
4143     {
4144         auto range = parseXML("<root><foo>bar</foo></root>");
4145         assert(range.front.type == EntityType.elementStart);
4146         assert(range.front.name == "root");
4147         assert(range.skipToParentEndTag().empty);
4148     }
4149 }
4150 
4151 version(dxmlTests) unittest
4152 {
4153     import core.exception : AssertError;
4154     import std.algorithm.comparison : equal;
4155     import std.exception : enforce;
4156     import dxml.internal : testRangeFuncs;
4157 
4158     static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__)
4159     {
4160         range.popFront();
4161         enforce!AssertError(!range.empty, "unittest 1", __FILE__, line);
4162         enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line);
4163     }
4164 
4165     static foreach(func; testRangeFuncs)
4166     {{
4167         // cdata
4168         {
4169             auto xml = "<root>\n" ~
4170                        "    <![CDATA[ cdata run ]]>\n" ~
4171                        "    <nothing/>\n" ~
4172                        "    <![CDATA[ cdata have its bits flipped ]]>\n" ~
4173                        "    <foo></foo>\n" ~
4174                        "    <![CDATA[ cdata play violin ]]>\n" ~
4175                        "</root>";
4176 
4177             auto range = parseXML(func(xml));
4178             assert(range.front.type == EntityType.elementStart);
4179             popAndCheck(range, EntityType.cdata);
4180             assert(equal(range.front.text, " cdata run "));
4181             {
4182                 auto temp = range.save.skipToParentEndTag();
4183                 assert(temp._type == EntityType.elementEnd);
4184                 assert(equal(temp.front.name, "root"));
4185             }
4186             popAndCheck(range, EntityType.elementEmpty);
4187             popAndCheck(range, EntityType.cdata);
4188             assert(equal(range.front.text, " cdata have its bits flipped "));
4189             {
4190                 auto temp = range.save.skipToParentEndTag();
4191                 assert(temp._type == EntityType.elementEnd);
4192                 assert(equal(temp.front.name, "root"));
4193             }
4194             popAndCheck(range, EntityType.elementStart);
4195             range = range.skipContents();
4196             popAndCheck(range, EntityType.cdata);
4197             assert(equal(range.front.text, " cdata play violin "));
4198             range = range.skipToParentEndTag();
4199             assert(range._type == EntityType.elementEnd);
4200             assert(equal(range.front.name, "root"));
4201         }
4202         // comment
4203         {
4204             auto xml = "<!-- before -->\n" ~
4205                        "<root>\n" ~
4206                        "    <!-- comment 1 -->\n" ~
4207                        "    <nothing/>\n" ~
4208                        "    <!-- comment 2 -->\n" ~
4209                        "    <foo></foo>\n" ~
4210                        "    <!-- comment 3 -->\n" ~
4211                        "</root>\n" ~
4212                        "<!-- after -->" ~
4213                        "<!-- end -->";
4214 
4215             auto text = func(xml);
4216             assert(parseXML(text.save).skipToParentEndTag().empty);
4217             {
4218                 auto range = parseXML(text.save);
4219                 assert(range.front.type == EntityType.comment);
4220                 popAndCheck(range, EntityType.elementStart);
4221                 popAndCheck(range, EntityType.comment);
4222                 assert(equal(range.front.text, " comment 1 "));
4223                 {
4224                     auto temp = range.save.skipToParentEndTag();
4225                     assert(temp._type == EntityType.elementEnd);
4226                     assert(equal(temp.front.name, "root"));
4227                 }
4228                 popAndCheck(range, EntityType.elementEmpty);
4229                 popAndCheck(range, EntityType.comment);
4230                 assert(equal(range.front.text, " comment 2 "));
4231                 {
4232                     auto temp = range.save.skipToParentEndTag();
4233                     assert(temp._type == EntityType.elementEnd);
4234                     assert(equal(temp.front.name, "root"));
4235                 }
4236                 popAndCheck(range, EntityType.elementStart);
4237                 range = range.skipContents();
4238                 popAndCheck(range, EntityType.comment);
4239                 assert(equal(range.front.text, " comment 3 "));
4240                 range = range.skipToParentEndTag();
4241                 assert(range._type == EntityType.elementEnd);
4242                 assert(equal(range.front.name, "root"));
4243             }
4244             {
4245                 auto range = parseXML(text.save);
4246                 assert(range.front.type == EntityType.comment);
4247                 popAndCheck(range, EntityType.elementStart);
4248                 range = range.skipContents();
4249                 popAndCheck(range, EntityType.comment);
4250                 assert(equal(range.front.text, " after "));
4251                 assert(range.save.skipToParentEndTag().empty);
4252                 popAndCheck(range, EntityType.comment);
4253                 assert(equal(range.front.text, " end "));
4254                 assert(range.skipToParentEndTag().empty);
4255             }
4256         }
4257         // elementStart
4258         {
4259             auto xml = "<root>\n" ~
4260                        "    <a><b>foo</b></a>\n" ~
4261                        "    <nothing/>\n" ~
4262                        "    <c></c>\n" ~
4263                        "    <d>\n" ~
4264                        "        <e>\n" ~
4265                        "        </e>\n" ~
4266                        "        <f>\n" ~
4267                        "            <g>\n" ~
4268                        "            </g>\n" ~
4269                        "        </f>\n" ~
4270                        "    </d>\n" ~
4271                        "</root>";
4272 
4273             auto range = parseXML(func(xml));
4274             assert(range.front.type == EntityType.elementStart);
4275             assert(equal(range.front.name, "root"));
4276             assert(range.save.skipToParentEndTag().empty);
4277             popAndCheck(range, EntityType.elementStart);
4278             assert(equal(range.front.name, "a"));
4279             {
4280                 auto temp = range.save.skipToParentEndTag();
4281                 assert(temp._type == EntityType.elementEnd);
4282                 assert(equal(temp.front.name, "root"));
4283             }
4284             popAndCheck(range, EntityType.elementStart);
4285             assert(equal(range.front.name, "b"));
4286             {
4287                 auto temp = range.save.skipToParentEndTag();
4288                 assert(temp._type == EntityType.elementEnd);
4289                 assert(equal(temp.front.name, "a"));
4290             }
4291             popAndCheck(range, EntityType.text);
4292             popAndCheck(range, EntityType.elementEnd);
4293             popAndCheck(range, EntityType.elementEnd);
4294             popAndCheck(range, EntityType.elementEmpty);
4295             popAndCheck(range, EntityType.elementStart);
4296             assert(equal(range.front.name, "c"));
4297             {
4298                 auto temp = range.save.skipToParentEndTag();
4299                 assert(temp._type == EntityType.elementEnd);
4300                 assert(equal(temp.front.name, "root"));
4301             }
4302             popAndCheck(range, EntityType.elementEnd);
4303             popAndCheck(range, EntityType.elementStart);
4304             assert(equal(range.front.name, "d"));
4305             popAndCheck(range, EntityType.elementStart);
4306             assert(equal(range.front.name, "e"));
4307             range = range.skipToParentEndTag();
4308             assert(range._type == EntityType.elementEnd);
4309             assert(equal(range.front.name, "d"));
4310             range = range.skipToParentEndTag();
4311             assert(range._type == EntityType.elementEnd);
4312             assert(equal(range.front.name, "root"));
4313         }
4314         // elementEnd
4315         {
4316             auto xml = "<root>\n" ~
4317                        "    <a><b>foo</b></a>\n" ~
4318                        "    <nothing/>\n" ~
4319                        "    <c></c>\n" ~
4320                        "</root>";
4321 
4322             auto range = parseXML(func(xml));
4323             assert(range.front.type == EntityType.elementStart);
4324             popAndCheck(range, EntityType.elementStart);
4325             popAndCheck(range, EntityType.elementStart);
4326             popAndCheck(range, EntityType.text);
4327             popAndCheck(range, EntityType.elementEnd);
4328             assert(equal(range.front.name, "b"));
4329             {
4330                 auto temp = range.save.skipToParentEndTag();
4331                 assert(temp._type == EntityType.elementEnd);
4332                 assert(equal(temp.front.name, "a"));
4333             }
4334             popAndCheck(range, EntityType.elementEnd);
4335             assert(equal(range.front.name, "a"));
4336             {
4337                 auto temp = range.save.skipToParentEndTag();
4338                 assert(temp._type == EntityType.elementEnd);
4339                 assert(equal(temp.front.name, "root"));
4340             }
4341             popAndCheck(range, EntityType.elementEmpty);
4342             popAndCheck(range, EntityType.elementStart);
4343             popAndCheck(range, EntityType.elementEnd);
4344             assert(equal(range.front.name, "c"));
4345             {
4346                 auto temp = range.save.skipToParentEndTag();
4347                 assert(temp._type == EntityType.elementEnd);
4348                 assert(equal(temp.front.name, "root"));
4349             }
4350             popAndCheck(range, EntityType.elementEnd);
4351             assert(range.skipToParentEndTag().empty);
4352         }
4353         // elementEmpty
4354         {
4355             auto range = parseXML(func("<root/>"));
4356             assert(range.front.type == EntityType.elementEmpty);
4357             assert(range.skipToParentEndTag().empty);
4358         }
4359         {
4360             auto xml = "<root>\n" ~
4361                        "    <a><b>foo</b></a>\n" ~
4362                        "    <nothing/>\n" ~
4363                        "    <c></c>\n" ~
4364                        "    <whatever/>\n" ~
4365                        "</root>";
4366 
4367             auto range = parseXML(func(xml));
4368             popAndCheck(range, EntityType.elementStart);
4369             assert(range.front.type == EntityType.elementStart);
4370             range = range.skipContents();
4371             popAndCheck(range, EntityType.elementEmpty);
4372             assert(equal(range.front.name, "nothing"));
4373             {
4374                 auto temp = range.save;
4375                 popAndCheck(temp, EntityType.elementStart);
4376                 popAndCheck(temp, EntityType.elementEnd);
4377                 popAndCheck(temp, EntityType.elementEmpty);
4378                 assert(equal(temp.front.name, "whatever"));
4379             }
4380             range = range.skipToParentEndTag();
4381             assert(range._type == EntityType.elementEnd);
4382             assert(equal(range.front.name, "root"));
4383         }
4384         // pi
4385         {
4386             auto xml = "<?Sherlock?>\n" ~
4387                        "<root>\n" ~
4388                        "    <?Foo?>\n" ~
4389                        "    <nothing/>\n" ~
4390                        "    <?Bar?>\n" ~
4391                        "    <foo></foo>\n" ~
4392                        "    <?Baz?>\n" ~
4393                        "</root>\n" ~
4394                        "<?Poirot?>\n" ~
4395                        "<?Conan?>";
4396 
4397             auto range = parseXML(func(xml));
4398             assert(range.front.type == EntityType.pi);
4399             assert(equal(range.front.name, "Sherlock"));
4400             assert(range.save.skipToParentEndTag().empty);
4401             popAndCheck(range, EntityType.elementStart);
4402             popAndCheck(range, EntityType.pi);
4403             assert(equal(range.front.name, "Foo"));
4404             {
4405                 auto temp = range.save.skipToParentEndTag();
4406                 assert(temp._type == EntityType.elementEnd);
4407                 assert(equal(temp.front.name, "root"));
4408             }
4409             popAndCheck(range, EntityType.elementEmpty);
4410             popAndCheck(range, EntityType.pi);
4411             assert(equal(range.front.name, "Bar"));
4412             {
4413                 auto temp = range.save.skipToParentEndTag();
4414                 assert(temp._type == EntityType.elementEnd);
4415                 assert(equal(temp.front.name, "root"));
4416             }
4417             popAndCheck(range, EntityType.elementStart);
4418             popAndCheck(range, EntityType.elementEnd);
4419             popAndCheck(range, EntityType.pi);
4420             assert(equal(range.front.name, "Baz"));
4421             range = range.skipToParentEndTag();
4422             assert(range._type == EntityType.elementEnd);
4423             assert(equal(range.front.name, "root"));
4424             popAndCheck(range, EntityType.pi);
4425             assert(equal(range.front.name, "Poirot"));
4426             assert(range.save.skipToParentEndTag().empty);
4427             popAndCheck(range, EntityType.pi);
4428             assert(equal(range.front.name, "Conan"));
4429             assert(range.skipToParentEndTag().empty);
4430         }
4431         // text
4432         {
4433             auto xml = "<root>\n" ~
4434                        "    nothing to say\n" ~
4435                        "    <nothing/>\n" ~
4436                        "    nothing whatsoever\n" ~
4437                        "    <foo></foo>\n" ~
4438                        "    but he keeps talking\n" ~
4439                        "</root>";
4440 
4441             auto range = parseXML(func(xml));
4442             assert(range.front.type == EntityType.elementStart);
4443             popAndCheck(range, EntityType.text);
4444             assert(equal(range.front.text, "\n    nothing to say\n    "));
4445             {
4446                 auto temp = range.save.skipToParentEndTag();
4447                 assert(temp._type == EntityType.elementEnd);
4448                 assert(equal(temp.front.name, "root"));
4449             }
4450             popAndCheck(range, EntityType.elementEmpty);
4451             popAndCheck(range, EntityType.text);
4452             assert(equal(range.front.text, "\n    nothing whatsoever\n    "));
4453             {
4454                 auto temp = range.save.skipToParentEndTag();
4455                 assert(temp._type == EntityType.elementEnd);
4456                 assert(equal(temp.front.name, "root"));
4457             }
4458             popAndCheck(range, EntityType.elementStart);
4459             range = range.skipContents();
4460             popAndCheck(range, EntityType.text);
4461             assert(equal(range.front.text, "\n    but he keeps talking\n"));
4462             range = range.skipToParentEndTag();
4463             assert(range._type == EntityType.elementEnd);
4464             assert(equal(range.front.name, "root"));
4465         }
4466     }}
4467 }
4468 
4469 
4470 /++
4471     Treats the given string like a file path except that each directory
4472     corresponds to the name of a start tag. Note that this does $(I not) try to
4473     implement XPath as that would be quite complicated, and it really doesn't
4474     fit with a StAX parser.
4475 
4476     A start tag should be thought of as a directory, with its child start tags
4477     as the directories it contains.
4478 
4479     All paths should be relative. $(LREF EntityRange) can only move forward
4480     through the document, so using an absolute path would only make sense at
4481     the beginning of the document. As such, absolute paths are treated as
4482     invalid paths.
4483 
4484     $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated
4485     slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are
4486     treated as an invalid path.
4487 
4488     If $(D range.front.type == EntityType.elementStart), then
4489     $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child
4490     start tag (be it $(LREF EntityType.elementStart) or
4491     $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity)
4492     $(D_CODE_STRING "foo"). That start tag must be a direct child of the current
4493     start tag.
4494 
4495     If $(D range.front.type) is any other $(LREF EntityType), then
4496     $(D range._skipToPath($(D_STRING "foo"))) will return an empty range,
4497     because no other $(LREF EntityType)s have child start tags.
4498 
4499     For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo")))
4500     will search for the first start tag with the
4501     $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level
4502     as the current entity. If the current entity is a start tag with the name
4503     $(D_CODE_STRING "foo"), it will not be considered a match.
4504 
4505     $(D range._skipToPath($(D_STRING "./"))) is a no-op. However,
4506     $(D range._skipToPath($(D_STRING "../"))) will result in the empty range
4507     (since it doesn't target a specific start tag).
4508 
4509     $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to
4510     $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))),
4511     and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to
4512     $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))).
4513 
4514     Returns: The given range with its $(D front) now at the requested entity if
4515              the path is valid; otherwise, an empty range is returned.
4516 
4517     Throws: $(LREF XMLParsingException) on invalid XML.
4518   +/
4519 R skipToPath(R)(R entityRange, string path)
4520     if(isInstanceOf!(EntityRange, R))
4521 {
4522     import std.algorithm.comparison : equal;
4523     import std.path : pathSplitter;
4524 
4525     if(entityRange.empty)
4526         return entityRange;
4527     if(path.empty || path[0] == '/')
4528         return entityRange.takeNone();
4529 
4530     with(EntityType)
4531     {
4532         static if(R.config.splitEmpty == SplitEmpty.yes)
4533             EntityType[2] startOrEnd = [elementStart, elementEnd];
4534         else
4535             EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty];
4536 
4537         R findOnCurrLevel(string name)
4538         {
4539             if(entityRange._type == elementStart)
4540                 entityRange = entityRange.skipContents();
4541             while(true)
4542             {
4543                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4544                 if(entityRange.empty)
4545                     return entityRange;
4546                 if(entityRange._type == elementEnd)
4547                     return entityRange.takeNone();
4548 
4549                 if(equal(name, entityRange._name.save))
4550                     return entityRange;
4551 
4552                 static if(R.config.splitEmpty == SplitEmpty.no)
4553                 {
4554                     if(entityRange._type == elementEmpty)
4555                         continue;
4556                 }
4557                 entityRange = entityRange.skipContents();
4558             }
4559         }
4560 
4561         for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront())
4562         {
4563             if(pieces.front == ".")
4564                 continue;
4565             else if(pieces.front == "..")
4566             {
4567                 pieces.popFront();
4568                 if(pieces.empty)
4569                     return entityRange.takeNone();
4570 
4571                 while(pieces.front == "..")
4572                 {
4573                     pieces.popFront();
4574                     if(pieces.empty)
4575                         return entityRange.takeNone();
4576                     entityRange = entityRange.skipToParentEndTag();
4577                     if(entityRange.empty)
4578                         return entityRange;
4579                 }
4580 
4581                 entityRange = findOnCurrLevel(pieces.front);
4582                 if(entityRange.empty)
4583                     return entityRange;
4584             }
4585             else
4586             {
4587                 if(entityRange._type != elementStart)
4588                     return entityRange.takeNone();
4589 
4590                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4591                 assert(!entityRange.empty);
4592                 if(entityRange._type == elementEnd)
4593                     return entityRange.takeNone();
4594 
4595                 if(!equal(pieces.front, entityRange._name.save))
4596                 {
4597                     entityRange = findOnCurrLevel(pieces.front);
4598                     if(entityRange.empty)
4599                         return entityRange;
4600                 }
4601             }
4602         }
4603 
4604         return entityRange;
4605     }
4606 }
4607 
4608 ///
4609 version(dxmlTests) unittest
4610 {
4611     {
4612         auto xml = "<carrot>\n" ~
4613                    "    <foo>\n" ~
4614                    "        <bar>\n" ~
4615                    "            <baz/>\n" ~
4616                    "            <other/>\n" ~
4617                    "        </bar>\n" ~
4618                    "    </foo>\n" ~
4619                    "</carrot>";
4620 
4621         auto range = parseXML(xml);
4622         // "<carrot>"
4623         assert(range.front.type == EntityType.elementStart);
4624         assert(range.front.name == "carrot");
4625 
4626         range = range.skipToPath("foo/bar");
4627         // "        <bar>
4628         assert(!range.empty);
4629         assert(range.front.type == EntityType.elementStart);
4630         assert(range.front.name == "bar");
4631 
4632         range = range.skipToPath("baz");
4633         // "            <baz/>
4634         assert(!range.empty);
4635         assert(range.front.type == EntityType.elementEmpty);
4636 
4637         // other is not a child element of baz
4638         assert(range.skipToPath("other").empty);
4639 
4640         range = range.skipToPath("../other");
4641         // "            <other/>"
4642         assert(!range.empty);
4643         assert(range.front.type == EntityType.elementEmpty);
4644     }
4645     {
4646         auto xml = "<potato>\n" ~
4647                    "    <foo>\n" ~
4648                    "        <bar>\n "~
4649                    "        </bar>\n" ~
4650                    "        <crazy>\n" ~
4651                    "        </crazy>\n" ~
4652                    "        <fou/>\n" ~
4653                    "    </foo>\n" ~
4654                    "    <buzz/>\n" ~
4655                    "</potato>";
4656 
4657         auto range = parseXML(xml);
4658         // "<potato>"
4659         assert(range.front.type == EntityType.elementStart);
4660 
4661         range = range.skipToPath("./");
4662         // "<potato>"
4663         assert(!range.empty);
4664         assert(range.front.type == EntityType.elementStart);
4665         assert(range.front.name == "potato");
4666 
4667         range = range.skipToPath("./foo/bar");
4668         // "        <bar>"
4669         assert(!range.empty);
4670         assert(range.front.type == EntityType.elementStart);
4671         assert(range.front.name == "bar");
4672 
4673         range = range.skipToPath("../crazy");
4674         // "        <crazy>"
4675         assert(!range.empty);
4676         assert(range.front.type == EntityType.elementStart);
4677         assert(range.front.name == "crazy");
4678 
4679         // Whether popFront is called here before the call to
4680         // range.skipToPath("../fou") below, the result is the same, because
4681         // both <crazy> and </crazy> are at the same level.
4682         range.popFront();
4683         // "        </crazy>"
4684         assert(!range.empty);
4685         assert(range.front.type == EntityType.elementEnd);
4686         assert(range.front.name == "crazy");
4687 
4688         range = range.skipToPath("../fou");
4689         // "        <fou/>"
4690         assert(!range.empty);
4691         assert(range.front.type == EntityType.elementEmpty);
4692     }
4693     // Searching stops at the first matching start tag.
4694     {
4695         auto xml = "<beet>\n" ~
4696                    "    <foo a='42'>\n" ~
4697                    "    </foo>\n" ~
4698                    "    <foo b='451'>\n" ~
4699                    "    </foo>\n" ~
4700                    "</beet>";
4701 
4702         auto range = parseXML(xml);
4703         range = range.skipToPath("foo");
4704         assert(!range.empty);
4705         assert(range.front.type == EntityType.elementStart);
4706         assert(range.front.name == "foo");
4707 
4708         {
4709             auto attrs = range.front.attributes;
4710             assert(attrs.front.name == "a");
4711             assert(attrs.front.value == "42");
4712         }
4713 
4714         range = range.skipToPath("../foo");
4715         assert(!range.empty);
4716         assert(range.front.type == EntityType.elementStart);
4717         assert(range.front.name == "foo");
4718 
4719         {
4720             auto attrs = range.front.attributes;
4721             assert(attrs.front.name == "b");
4722             assert(attrs.front.value == "451");
4723         }
4724     }
4725     // skipToPath will work on an empty range but will always return an
4726     // empty range.
4727     {
4728         auto range = parseXML("<root/>");
4729         assert(range.takeNone().skipToPath("nowhere").empty);
4730     }
4731     // Empty and absolute paths will also result in an empty range as will
4732     // "../" without any actual tag name on the end.
4733     {
4734         auto range = parseXML("<root/>");
4735         assert(range.skipToPath("").empty);
4736         assert(range.skipToPath("/").empty);
4737         assert(range.skipToPath("../").empty);
4738     }
4739     // Only non-empty start tags have children; all other EntityTypes result
4740     // in an empty range unless "../" is used.
4741     {
4742         auto xml = "<!-- comment -->\n" ~
4743                    "<root>\n" ~
4744                    "    <foo/>\n" ~
4745                    "</root>";
4746         auto range = parseXML(xml);
4747         assert(range.skipToPath("root").empty);
4748         assert(range.skipToPath("foo").empty);
4749 
4750         range = range.skipToPath("../root");
4751         assert(!range.empty);
4752         assert(range.front.type == EntityType.elementStart);
4753         assert(range.front.name == "root");
4754     }
4755 }
4756 
4757 version(dxmlTests) unittest
4758 {
4759     import core.exception : AssertError;
4760     import std.algorithm.comparison : equal;
4761     import std.exception : assertNotThrown, enforce;
4762     import dxml.internal : testRangeFuncs;
4763 
4764     static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__)
4765     {
4766         auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line);
4767         enforce!AssertError(!result.empty, "unittest 2", __FILE__, line);
4768         enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line);
4769         enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line);
4770     }
4771 
4772     static void popEmpty(R)(ref R range)
4773     {
4774         range.popFront();
4775         static if(range.config.splitEmpty == SplitEmpty.yes)
4776             range.popFront();
4777     }
4778 
4779     auto xml = "<superuser>\n" ~
4780                "    <!-- comment -->\n" ~
4781                "    <?pi?>\n" ~
4782                "    <![CDATA[cdata]]>\n" ~
4783                "    <foo/>\n" ~
4784                "    <bar/>\n" ~
4785                "    <!-- comment -->\n" ~
4786                "    <!-- comment -->\n" ~
4787                "    <baz/>\n" ~
4788                "    <frobozz>\n" ~
4789                "        <!-- comment -->\n" ~
4790                "        <!-- comment -->\n" ~
4791                "        <whatever/>\n" ~
4792                "        <!-- comment -->\n" ~
4793                "        <!-- comment -->\n" ~
4794                "    </frobozz>\n" ~
4795                "    <!-- comment -->\n" ~
4796                "    <!-- comment -->\n" ~
4797                "    <xyzzy/>\n" ~
4798                "</superuser>";
4799 
4800     static foreach(func; testRangeFuncs)
4801     {{
4802         auto text = func(xml);
4803 
4804         static foreach(config; someTestConfigs)
4805         {{
4806             static if(config.splitEmpty == SplitEmpty.yes)
4807                 enum empty = EntityType.elementStart;
4808             else
4809                 enum empty = EntityType.elementEmpty;
4810 
4811             auto range = parseXML!config(text.save);
4812 
4813             assert(range.save.skipToPath("whatever").empty);
4814             assert(range.save.skipToPath("frobozz/whateve").empty);
4815 
4816             testPath(range.save, "foo", empty, "foo");
4817             testPath(range.save, "bar", empty, "bar");
4818             testPath(range.save, "baz", empty, "baz");
4819             testPath(range.save, "frobozz", EntityType.elementStart, "frobozz");
4820             testPath(range.save, "frobozz/whatever", empty, "whatever");
4821             testPath(range.save, "xyzzy", empty, "xyzzy");
4822 
4823             range.popFront();
4824             for(; range.front.type != empty; range.popFront())
4825             {
4826                 assert(range.save.skipToPath("foo").empty);
4827                 testPath(range.save, "../foo", empty, "foo");
4828                 testPath(range.save, "../bar", empty, "bar");
4829                 testPath(range.save, "../baz", empty, "baz");
4830                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4831                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4832                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4833             }
4834             assert(equal(range.front.name, "foo"));
4835             assert(range.save.skipToPath("foo").empty);
4836             assert(range.save.skipToPath("./foo").empty);
4837             assert(range.save.skipToPath("../foo").empty);
4838             assert(range.save.skipToPath("bar").empty);
4839             assert(range.save.skipToPath("baz").empty);
4840             assert(range.save.skipToPath("frobozz").empty);
4841             assert(range.save.skipToPath("whatever").empty);
4842             assert(range.save.skipToPath("../").empty);
4843             assert(range.save.skipToPath("../../").empty);
4844 
4845             testPath(range.save, "../bar", empty, "bar");
4846             testPath(range.save, "../baz", empty, "baz");
4847             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4848             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4849             testPath(range.save, "../xyzzy", empty, "xyzzy");
4850 
4851             popEmpty(range);
4852             assert(range.save.skipToPath("bar").empty);
4853             testPath(range.save, "../baz", empty, "baz");
4854             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4855             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4856             testPath(range.save, "../xyzzy", empty, "xyzzy");
4857 
4858             range.popFront();
4859             for(; range.front.type != empty; range.popFront())
4860             {
4861                 assert(range.save.skipToPath("baz").empty);
4862                 testPath(range.save, "../baz", empty, "baz");
4863                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4864                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4865                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4866             }
4867             assert(equal(range.front.name, "baz"));
4868 
4869             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4870             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4871             testPath(range.save, "../xyzzy", empty, "xyzzy");
4872 
4873             popEmpty(range);
4874             assert(equal(range.front.name, "frobozz"));
4875             assert(range.save.skipToPath("wizard").empty);
4876             testPath(range.save, "whatever", empty, "whatever");
4877             testPath(range.save, "../xyzzy", empty, "xyzzy");
4878 
4879             range.popFront();
4880             for(; range.front.type != empty; range.popFront())
4881             {
4882                 assert(range.save.skipToPath("whatever").empty);
4883                 testPath(range.save, "../whatever", empty, "whatever");
4884                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4885             }
4886             assert(equal(range.front.name, "whatever"));
4887             assert(range.save.skipToPath("frobozz").empty);
4888             assert(range.save.skipToPath("../frobozz").empty);
4889             assert(range.save.skipToPath("../xyzzy").empty);
4890             assert(range.save.skipToPath("../../frobozz").empty);
4891 
4892             testPath(range.save, "../../xyzzy", empty, "xyzzy");
4893 
4894             popEmpty(range);
4895             for(; range.front.type != EntityType.elementEnd; range.popFront())
4896             {
4897                 assert(range.save.skipToPath("xyzzy").empty);
4898                 assert(range.save.skipToPath("../xyzzy").empty);
4899                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4900             }
4901             assert(equal(range.front.name, "frobozz"));
4902 
4903             range.popFront();
4904             for(; range.front.type != empty; range.popFront())
4905             {
4906                 assert(range.save.skipToPath("xyzzy").empty);
4907                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4908             }
4909             assert(equal(range.front.name, "xyzzy"));
4910 
4911             popEmpty(range);
4912             assert(equal(range.front.name, "superuser"));
4913             assert(range.save.skipToPath("superuser").empty);
4914             assert(range.save.skipToPath("foo").empty);
4915             assert(range.save.skipToPath("../foo").empty);
4916             assert(range.save.skipToPath("../../foo").empty);
4917         }}
4918     }}
4919 }
4920 
4921 
4922 //------------------------------------------------------------------------------
4923 // Private Section
4924 //------------------------------------------------------------------------------
4925 private:
4926 
4927 
4928 version(dxmlTests) auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc
4929 {
4930     import std.utf : byCodeUnit;
4931     typeof(EntityRange!(config, R)._text) text;
4932     text.input = byCodeUnit(xmlText);
4933     return text;
4934 }
4935 
4936 
4937 // Used to indicate where in the grammar we're currently parsing.
4938 enum GrammarPos
4939 {
4940     // Nothing has been parsed yet.
4941     documentStart,
4942 
4943     // document ::= prolog element Misc*
4944     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
4945     // This is that first Misc*. The next entity to parse is either a Misc, the
4946     // doctypedecl, or the root element which follows the prolog.
4947     prologMisc1,
4948 
4949     // document ::= prolog element Misc*
4950     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)
4951     // This is that second Misc*. The next entity to parse is either a Misc or
4952     // the root element which follows the prolog.
4953     prologMisc2,
4954 
4955     // Used with SplitEmpty.yes to tell the parser that we're currently at an
4956     // empty element tag that we're treating as a start tag, so the next entity
4957     // will be an end tag even though we didn't actually parse one.
4958     splittingEmpty,
4959 
4960     // element  ::= EmptyElemTag | STag content ETag
4961     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4962     // This is at the beginning of content at the first CharData?. The next
4963     // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag.
4964     // References are treated as part of the CharData and not parsed out by the
4965     // EntityRange (see EntityRange.Entity.text).
4966     contentCharData1,
4967 
4968     // element  ::= EmptyElemTag | STag content ETag
4969     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4970     // This is after the first CharData?. The next thing to parse will be a
4971     // element, CDSect, PI, Comment, or ETag.
4972     // References are treated as part of the CharData and not parsed out by the
4973     // EntityRange (see EntityRange.Entity.text).
4974     contentMid,
4975 
4976     // element  ::= EmptyElemTag | STag content ETag
4977     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4978     // This is at the second CharData?. The next thing to parse will be a
4979     // CharData, element, CDSect, PI, Comment, or ETag.
4980     // References are treated as part of the CharData and not parsed out by the
4981     // EntityRange (see EntityRange.Entity.text).
4982     contentCharData2,
4983 
4984     // element  ::= EmptyElemTag | STag content ETag
4985     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4986     // This is after the second CharData?. The next thing to parse is an ETag.
4987     endTag,
4988 
4989     // document ::= prolog element Misc*
4990     // This is the Misc* at the end of the document. The next thing to parse is
4991     // either another Misc, or we will hit the end of the document.
4992     endMisc,
4993 
4994     // The end of the document (and the grammar) has been reached.
4995     documentEnd
4996 }
4997 
4998 
4999 // Wrapper around skipOver which takes an EntityParser.Text and handles
5000 // incrementing pos.
5001 //
5002 // It is assumed that there are no newlines.
5003 bool stripStartsWith(Text)(ref Text text, string needle)
5004 {
5005     import std.algorithm.searching : skipOver;
5006     import std.utf : byCodeUnit;
5007 
5008     //TODO In the case where we're parsing an array of char, if we can cleanly
5009     // strip off any byCodeUnit and takeExactly wrappers, then we should be able
5010     // to have skipOver compare the string being parsed and the needle with ==.
5011     // It may happen in some cases right now when text.input is a byCodeUnit
5012     // result, but it won't happen in all cases where it ideally would. We may
5013     // also want to look into using byUTF on the needle so that it matches the
5014     // encoding of text.input or even make needle match the encoding when it's
5015     // passed in instead of always being string.
5016     if(!text.input.skipOver(needle.byCodeUnit()))
5017         return false;
5018 
5019     text.pos.col += needle.length;
5020 
5021     return true;
5022 }
5023 
5024 version(dxmlTests) unittest
5025 {
5026     import core.exception : AssertError;
5027     import std.exception : enforce;
5028     import dxml.internal : equalCU, testRangeFuncs;
5029 
5030     static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith,
5031                                  int row, int col, size_t line = __LINE__)
5032     {
5033         auto haystack = func(origHaystack);
5034         {
5035             auto text = testParser(haystack.save);
5036             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line);
5037             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5038             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5039         }
5040         {
5041             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5042             auto text = testParser(haystack);
5043             text.pos.line += 3;
5044             text.pos.col += 7;
5045             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line);
5046             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5047             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5048         }
5049     }
5050 
5051     static foreach(func; testRangeFuncs)
5052     {
5053         test!func("hello world", "hello", " world", true, 1, "hello".length + 1);
5054         test!func("hello world", "hello world", "", true, 1, "hello world".length + 1);
5055         test!func("hello world", "foo", "hello world", false, 1, 1);
5056         test!func("hello world", "hello sally", "hello world", false, 1, 1);
5057         test!func("hello world", "hello world ", "hello world", false, 1, 1);
5058     }
5059 }
5060 
5061 version(dxmlTests) @safe pure unittest
5062 {
5063     import std.algorithm.comparison : equal;
5064     import dxml.internal : testRangeFuncs;
5065 
5066     static foreach(func; testRangeFuncs)
5067     {{
5068         auto xml = func(`foo`);
5069         auto text = testParser!simpleXML(xml);
5070         assert(text.stripStartsWith("fo"));
5071     }}
5072 }
5073 
5074 
5075 // Strips whitespace while dealing with text.pos accordingly. Newlines are not
5076 // ignored.
5077 // Returns whether any whitespace was stripped.
5078 bool stripWS(Text)(ref Text text)
5079 {
5080     bool strippedSpace = false;
5081 
5082     static if(hasLength!(Text.Input))
5083         size_t lineStart = text.input.length;
5084 
5085     loop: while(!text.input.empty)
5086     {
5087         switch(text.input.front)
5088         {
5089             case ' ':
5090             case '\t':
5091             case '\r':
5092             {
5093                 strippedSpace = true;
5094                 text.input.popFront();
5095                 static if(!hasLength!(Text.Input))
5096                     ++text.pos.col;
5097                 break;
5098             }
5099             case '\n':
5100             {
5101                 strippedSpace = true;
5102                 text.input.popFront();
5103                 static if(hasLength!(Text.Input))
5104                     lineStart = text.input.length;
5105                 nextLine!(Text.config)(text.pos);
5106                 break;
5107             }
5108             default: break loop;
5109         }
5110     }
5111 
5112     static if(hasLength!(Text.Input))
5113         text.pos.col += lineStart - text.input.length;
5114 
5115     return strippedSpace;
5116 }
5117 
5118 version(dxmlTests) unittest
5119 {
5120     import core.exception : AssertError;
5121     import std.exception : enforce;
5122     import dxml.internal : equalCU;
5123     import dxml.internal : testRangeFuncs;
5124 
5125     static void test(alias func)(string origHaystack, string remainder, bool stripped,
5126                                  int row, int col, size_t line = __LINE__)
5127     {
5128         auto haystack = func(origHaystack);
5129         {
5130             auto text = testParser(haystack.save);
5131             enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line);
5132             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5133             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5134         }
5135         {
5136             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5137             auto text = testParser(haystack);
5138             text.pos.line += 3;
5139             text.pos.col += 7;
5140             enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line);
5141             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5142             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5143         }
5144     }
5145 
5146     static foreach(func; testRangeFuncs)
5147     {
5148         test!func("  \t\rhello world", "hello world", true, 1, 5);
5149         test!func("  \n \n \n  \nhello world", "hello world", true, 5, 1);
5150         test!func("  \n \n \n  \n  hello world", "hello world", true, 5, 3);
5151         test!func("hello world", "hello world", false, 1, 1);
5152     }
5153 }
5154 
5155 version(dxmlTests) @safe pure unittest
5156 {
5157     import dxml.internal : testRangeFuncs;
5158 
5159     static foreach(func; testRangeFuncs)
5160     {{
5161         auto xml = func(`foo`);
5162         auto text = testParser!simpleXML(xml);
5163         assert(!text.stripWS());
5164     }}
5165 }
5166 
5167 
5168 // Returns a slice (or takeExactly) of text.input up to but not including the
5169 // given needle, removing both that slice and the given needle from text.input
5170 // in the process. If the needle is not found, then an XMLParsingException is
5171 // thrown.
5172 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5173 {
5174     return _takeUntil!(true, needle, skipQuotedText, Text)(text);
5175 }
5176 
5177 version(dxmlTests) unittest
5178 {
5179     import core.exception : AssertError;
5180     import std.algorithm.comparison : equal;
5181     import std.exception : collectException, enforce;
5182     import dxml.internal : codeLen, testRangeFuncs;
5183 
5184     static void test(alias func, string needle, bool sqt )(string origHaystack, string expected, string remainder,
5185                                                            int row, int col, size_t line = __LINE__)
5186     {
5187         auto haystack = func(origHaystack);
5188         {
5189             auto text = testParser(haystack.save);
5190             auto temp = text.save;
5191             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), expected),
5192                                 "unittest failure 1", __FILE__, line);
5193             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5194             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5195         }
5196         {
5197             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5198             auto text = testParser(haystack);
5199             text.pos.line += 3;
5200             text.pos.col += 7;
5201             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), expected),
5202                                 "unittest failure 4", __FILE__, line);
5203             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5204             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5205         }
5206     }
5207 
5208     static void testFail(alias func, string needle, bool sqt)
5209                         (string origHaystack, int row, int col, size_t line = __LINE__)
5210     {
5211         auto haystack = func(origHaystack);
5212         {
5213             auto text = testParser(haystack.save);
5214             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5215             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5216             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5217         }
5218         {
5219             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5220             auto text = testParser(haystack);
5221             text.pos.line += 3;
5222             text.pos.col += 7;
5223             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5224             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5225             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5226         }
5227     }
5228 
5229     static foreach(func; testRangeFuncs)
5230     {
5231         static foreach(sqt; [false, true])
5232         {
5233             {
5234                 auto haystack = "hello world";
5235                 enum needle = "world";
5236 
5237                 static foreach(i; 1 .. needle.length)
5238                     test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i);
5239             }
5240 
5241             test!(func, "l", sqt)("lello world", "", "ello world", 1, 2);
5242             test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5);
5243             test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4);
5244             {
5245                 enum needle = "great";
5246                 enum expected = "プログラミング in D is ";
5247                 static foreach(i; 1 .. needle.length)
5248                 {
5249                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected,
5250                                                      "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1);
5251                 }
5252             }
5253             static foreach(haystack; ["", "a", "hello", "ディラン"])
5254                 testFail!(func, "x", sqt)(haystack, 1, 1);
5255             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5256                 testFail!(func, "le", sqt)(haystack, 1, 1);
5257             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5258                 testFail!(func, "web", sqt)(haystack, 1, 1);
5259         }
5260 
5261         test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9);
5262         test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10);
5263         test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10);
5264         test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12);
5265         test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12);
5266         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5267         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5268 
5269         test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16);
5270         test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13);
5271         test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13);
5272         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5273         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5274         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5275         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5276 
5277         test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14);
5278         test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7);
5279     }
5280 }
5281 
5282 version(dxmlTests) @safe pure unittest
5283 {
5284     import std.algorithm.comparison : equal;
5285     import dxml.internal : testRangeFuncs;
5286 
5287     static foreach(func; testRangeFuncs)
5288     {{
5289         auto xml = func(`foo`);
5290         auto text = testParser!simpleXML(xml);
5291         assert(equal(text.takeUntilAndDrop!"o"(), "f"));
5292     }}
5293 }
5294 
5295 // Variant of takeUntilAndDrop which does not return a slice. It's intended for
5296 // when the config indicates that something should be skipped.
5297 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5298 {
5299     _takeUntil!(false, needle, skipQuotedText, Text)(text);
5300 }
5301 
5302 version(dxmlTests) unittest
5303 {
5304     import core.exception : AssertError;
5305     import std.algorithm.comparison : equal;
5306     import std.exception : assertNotThrown, collectException, enforce;
5307     import dxml.internal : codeLen, testRangeFuncs;
5308 
5309     static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder,
5310                                                           int row, int col, size_t line = __LINE__)
5311     {
5312         auto haystack = func(origHaystack);
5313         {
5314             auto text = testParser(haystack.save);
5315             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1",
5316                                                 __FILE__, line);
5317             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5318             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5319         }
5320         {
5321             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5322             auto text = testParser(haystack);
5323             text.pos.line += 3;
5324             text.pos.col += 7;
5325             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4",
5326                                                 __FILE__, line);
5327             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5328             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5329         }
5330     }
5331 
5332     static void testFail(alias func, string needle, bool sqt)
5333                         (string origHaystack, int row, int col, size_t line = __LINE__)
5334     {
5335         auto haystack = func(origHaystack);
5336         {
5337             auto text = testParser(haystack.save);
5338             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5339             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5340             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5341         }
5342         {
5343             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5344             auto text = testParser(haystack);
5345             text.pos.line += 3;
5346             text.pos.col += 7;
5347             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5348             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5349             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5350         }
5351     }
5352 
5353     static foreach(func; testRangeFuncs)
5354     {
5355         static foreach(sqt; [false, true])
5356         {
5357             {
5358                 enum needle = "world";
5359                 static foreach(i; 1 .. needle.length)
5360                     test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i);
5361             }
5362 
5363             test!(func, "l", sqt)("lello world", "ello world", 1, 2);
5364             test!(func, "ll", sqt)("lello world", "o world", 1, 5);
5365             test!(func, "le", sqt)("llello world", "llo world", 1, 4);
5366 
5367             {
5368                 enum needle = "great";
5369                 static foreach(i; 1 .. needle.length)
5370                 {
5371                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $],
5372                                                      1, codeLen!(func, "プログラミング in D is ") + i + 1);
5373                 }
5374             }
5375 
5376             static foreach(haystack; ["", "a", "hello", "ディラン"])
5377                 testFail!(func, "x", sqt)(haystack, 1, 1);
5378             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5379                 testFail!(func, "le", sqt)(haystack, 1, 1);
5380             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5381                 testFail!(func, "web", sqt)(haystack, 1, 1);
5382         }
5383 
5384         test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9);
5385         test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10);
5386         test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10);
5387         test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12);
5388         test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12);
5389         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5390         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5391 
5392         test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16);
5393         test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13);
5394         test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13);
5395         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5396         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5397         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5398         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5399 
5400         test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14);
5401         test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7);
5402     }
5403 }
5404 
5405 version(dxmlTests) @safe pure unittest
5406 {
5407     import std.algorithm.comparison : equal;
5408     import dxml.internal : testRangeFuncs;
5409 
5410     static foreach(func; testRangeFuncs)
5411     {{
5412         auto xml = func(`foo`);
5413         auto text = testParser!simpleXML(xml);
5414         text.skipUntilAndDrop!"o"();
5415         assert(equal(text.input, "o"));
5416     }}
5417 }
5418 
5419 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text)
5420 {
5421     import std.algorithm : find;
5422     import std.ascii : isWhite;
5423     import std.range : takeExactly;
5424 
5425     static assert(needle.find!isWhite().empty);
5426 
5427     auto orig = text.save;
5428     bool found = false;
5429     size_t takeLen = 0;
5430     size_t lineStart = 0;
5431 
5432     void processNewline()
5433     {
5434         ++takeLen;
5435         nextLine!(Text.config)(text.pos);
5436         lineStart = takeLen;
5437     }
5438 
5439     loop: while(!text.input.empty)
5440     {
5441         switch(text.input.front)
5442         {
5443             case cast(ElementType!(Text.Input))needle[0]:
5444             {
5445                 static if(needle.length == 1)
5446                 {
5447                     found = true;
5448                     text.input.popFront();
5449                     break loop;
5450                 }
5451                 else static if(needle.length == 2)
5452                 {
5453                     text.input.popFront();
5454                     if(!text.input.empty && text.input.front == needle[1])
5455                     {
5456                         found = true;
5457                         text.input.popFront();
5458                         break loop;
5459                     }
5460                     ++takeLen;
5461                     continue;
5462                 }
5463                 else
5464                 {
5465                     text.input.popFront();
5466                     auto saved = text.input.save;
5467                     foreach(i, c; needle[1 .. $])
5468                     {
5469                         if(text.input.empty)
5470                         {
5471                             takeLen += i + 1;
5472                             break loop;
5473                         }
5474                         if(text.input.front != c)
5475                         {
5476                             text.input = saved;
5477                             ++takeLen;
5478                             continue loop;
5479                         }
5480                         text.input.popFront();
5481                     }
5482                     found = true;
5483                     break loop;
5484                 }
5485             }
5486             static if(skipQuotedText)
5487             {
5488                 static foreach(quote; ['\'', '"'])
5489                 {
5490                     case quote:
5491                     {
5492                         auto quotePos = text.pos;
5493                         quotePos.col += takeLen - lineStart;
5494                         ++takeLen;
5495                         while(true)
5496                         {
5497                             text.input.popFront();
5498                             if(text.input.empty)
5499                                 throw new XMLParsingException("Failed to find matching quote", quotePos);
5500                             switch(text.input.front)
5501                             {
5502                                 case quote:
5503                                 {
5504                                     ++takeLen;
5505                                     text.input.popFront();
5506                                     continue loop;
5507                                 }
5508                                 case '\n':
5509                                 {
5510                                     processNewline();
5511                                     break;
5512                                 }
5513                                 default:
5514                                 {
5515                                     ++takeLen;
5516                                     break;
5517                                 }
5518                             }
5519                         }
5520                         assert(0); // the compiler isn't smart enough to see that this is unreachable.
5521                     }
5522                 }
5523             }
5524             case '\n':
5525             {
5526                 processNewline();
5527                 break;
5528             }
5529             default:
5530             {
5531                 ++takeLen;
5532                 break;
5533             }
5534         }
5535 
5536         text.input.popFront();
5537     }
5538 
5539     text.pos.col += takeLen - lineStart + needle.length;
5540 
5541     if(!found)
5542         throw new XMLParsingException("Failed to find: " ~ needle, orig.pos);
5543 
5544     static if(retSlice)
5545         return takeExactly(orig.input, takeLen);
5546 }
5547 
5548 
5549 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop,
5550 // but I'd rather do this than be passing template arguments to choose between
5551 // behaviors - especially when the logic is so different. It skips until it
5552 // reaches one of the delimiter characters. If it finds one of them, then the
5553 // first character in the input is the delimiter that was found, and if it
5554 // doesn't find either, then it throws.
5555 template skipToOneOf(delims...)
5556 {
5557     static foreach(delim; delims)
5558     {
5559         static assert(is(typeof(delim) == char));
5560         static assert(!isSpace(delim));
5561     }
5562 
5563     void skipToOneOf(Text)(ref Text text)
5564     {
5565         while(!text.input.empty)
5566         {
5567             switch(text.input.front)
5568             {
5569                 static foreach(delim; delims)
5570                     case delim: return;
5571                 case '\n':
5572                 {
5573                     nextLine!(Text.config)(text.pos);
5574                     text.input.popFront();
5575                     break;
5576                 }
5577                 default:
5578                 {
5579                     popFrontAndIncCol(text);
5580                     break;
5581                 }
5582             }
5583         }
5584         throw new XMLParsingException("Prematurely reached end of document", text.pos);
5585     }
5586 }
5587 
5588 version(dxmlTests) unittest
5589 {
5590     import core.exception : AssertError;
5591     import std.algorithm.comparison : equal;
5592     import std.exception : assertNotThrown, collectException, enforce;
5593     import dxml.internal : codeLen, testRangeFuncs;
5594 
5595     static void test(alias func, delims...)(string origHaystack, string remainder,
5596                                             int row, int col, size_t line = __LINE__)
5597     {
5598         auto haystack = func(origHaystack);
5599         {
5600             auto text = testParser(haystack.save);
5601             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line);
5602             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5603             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5604         }
5605         {
5606             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5607             auto text = testParser(haystack);
5608             text.pos.line += 3;
5609             text.pos.col += 7;
5610             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line);
5611             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5612             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5613         }
5614     }
5615 
5616     static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__)
5617     {
5618         auto haystack = func(origHaystack);
5619         {
5620             auto text = testParser(haystack.save);
5621             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5622             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5623             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5624         }
5625         {
5626             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5627             auto text = testParser(haystack);
5628             text.pos.line += 3;
5629             text.pos.col += 7;
5630             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5631             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5632             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5633         }
5634     }
5635 
5636     static foreach(func; testRangeFuncs)
5637     {
5638         test!(func, 'o', 'w')("hello world", "o world", 1, 5);
5639         test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7);
5640         test!(func, 'z', 'y')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6);
5641         test!(func, 'o', 'g')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "g", 8, 1);
5642         test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed",
5643                               1, codeLen!(func, "プログラミング in D is ") + 1);
5644 
5645         testFail!(func, 'a', 'b')("hello world", 1, 12);
5646         testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6);
5647         testFail!(func, 'a', 'b')("プログラミング",  1, codeLen!(func, "プログラミング") + 1);
5648     }
5649 }
5650 
5651 version(dxmlTests) @safe pure unittest
5652 {
5653     import std.algorithm.comparison : equal;
5654     import dxml.internal : testRangeFuncs;
5655 
5656     static foreach(func; testRangeFuncs)
5657     {{
5658         auto xml = func(`foo`);
5659         auto text = testParser!simpleXML(xml);
5660         text.skipToOneOf!('o')();
5661         assert(equal(text.input, "oo"));
5662     }}
5663 }
5664 
5665 
5666 // The front of the input should be text surrounded by single or double quotes.
5667 // This returns a slice of the input containing that text, and the input is
5668 // advanced to one code unit beyond the quote.
5669 auto takeEnquotedText(Text)(ref Text text)
5670 {
5671     checkNotEmpty(text);
5672     immutable quote = text.input.front;
5673     static foreach(quoteChar; [`"`, `'`])
5674     {
5675         // This would be a bit simpler if takeUntilAndDrop took a runtime
5676         // argument, but in all other cases, a compile-time argument makes more
5677         // sense, so this seemed like a reasonable way to handle this one case.
5678         if(quote == quoteChar[0])
5679         {
5680             popFrontAndIncCol(text);
5681             return takeUntilAndDrop!quoteChar(text);
5682         }
5683     }
5684     throw new XMLParsingException("Expected quoted text", text.pos);
5685 }
5686 
5687 version(dxmlTests) unittest
5688 {
5689     import core.exception : AssertError;
5690     import std.algorithm.comparison : equal;
5691     import std.exception : assertThrown, enforce;
5692     import std.range : only;
5693     import dxml.internal : testRangeFuncs;
5694 
5695     static void test(alias func)(string origHaystack, string expected, string remainder,
5696                                  int row, int col, size_t line = __LINE__)
5697     {
5698         auto haystack = func(origHaystack);
5699         {
5700             auto text = testParser(haystack.save);
5701             enforce!AssertError(equal(takeEnquotedText(text), expected), "unittest failure 1", __FILE__, line);
5702             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5703             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5704         }
5705         {
5706             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5707             auto text = testParser(haystack);
5708             text.pos.line += 3;
5709             text.pos.col += 7;
5710             enforce!AssertError(equal(takeEnquotedText(text), expected), "unittest failure 3", __FILE__, line);
5711             enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line);
5712             enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line);
5713         }
5714     }
5715 
5716     static void testFail(alias func)(string origHaystack, size_t line = __LINE__)
5717     {
5718         auto haystack = func(origHaystack);
5719         auto text = testParser(haystack);
5720         assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line);
5721     }
5722 
5723     static foreach(func; testRangeFuncs)
5724     {
5725         foreach(quote; only("\"", "'"))
5726         {
5727             test!func(quote ~ quote, "", "", 1, 3);
5728             test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14);
5729             test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14);
5730             {
5731                 import std.utf : codeLength;
5732                 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D";
5733                 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング ");
5734                 test!func(haystack, "プログラミング ", "in D", 1, len + 3);
5735             }
5736         }
5737 
5738         foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`))
5739             testFail!func(str);
5740     }
5741 }
5742 
5743 
5744 // This removes a name per the Name grammar rule from the front of the input and
5745 // returns it.
5746 // The parsing continues until either one of the given delimiters or an XML
5747 // whitespace character is encountered. The delimiter/whitespace is not returned
5748 // as part of the name and is left at the front of the input.
5749 template takeName(delims...)
5750 {
5751     static foreach(delim; delims)
5752     {
5753         static assert(is(typeof(delim) == char), delim);
5754         static assert(!isSpace(delim));
5755     }
5756 
5757     auto takeName(Text)(ref Text text)
5758     {
5759         import std.format : format;
5760         import std.range : takeExactly;
5761         import std.utf : decodeFront, UseReplacementDchar;
5762         import dxml.internal : isNameStartChar, isNameChar;
5763 
5764         assert(!text.input.empty);
5765 
5766         auto orig = text.input.save;
5767         size_t takeLen;
5768         {
5769             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen);
5770             if(!isNameStartChar(decodedC))
5771                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5772         }
5773 
5774         if(text.input.empty)
5775         {
5776             text.pos.col += takeLen;
5777             return takeExactly(orig, takeLen);
5778         }
5779 
5780         loop: while(true)
5781         {
5782             immutable c = text.input.front;
5783             if(isSpace(c))
5784                 break;
5785             static foreach(delim; delims)
5786             {
5787                 if(c == delim)
5788                     break loop;
5789             }
5790 
5791             size_t numCodeUnits;
5792             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
5793             if(!isNameChar(decodedC))
5794             {
5795                 text.pos.col += takeLen;
5796                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5797             }
5798             takeLen += numCodeUnits;
5799 
5800             if(text.input.empty)
5801                 break;
5802         }
5803 
5804         text.pos.col += takeLen;
5805 
5806         return takeExactly(orig, takeLen);
5807     }
5808 }
5809 
5810 version(dxmlTests) unittest
5811 {
5812     import core.exception : AssertError;
5813     import std.algorithm.comparison : equal;
5814     import std.exception : collectException, enforce;
5815     import std.typecons : tuple;
5816     import dxml.internal : codeLen, testRangeFuncs;
5817 
5818     static void test(alias func, delim...)(string origHaystack, string expected, string remainder,
5819                                            int row, int col, size_t line = __LINE__)
5820     {
5821         auto haystack = func(origHaystack);
5822         {
5823             auto text = testParser(haystack.save);
5824             enforce!AssertError(equal(text.takeName!delim(), expected),
5825                                 "unittest failure 1", __FILE__, line);
5826             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5827             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5828         }
5829         {
5830             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5831             auto text = testParser(haystack);
5832             text.pos.line += 3;
5833             text.pos.col += 7;
5834             enforce!AssertError(equal(text.takeName!delim(), expected),
5835                                 "unittest failure 4", __FILE__, line);
5836             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5837             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5838         }
5839     }
5840 
5841     static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__)
5842     {
5843         auto haystack = func(origHaystack);
5844         {
5845             auto text = testParser(haystack.save);
5846             auto e = collectException!XMLParsingException(text.takeName!delim());
5847             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5848             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5849         }
5850         {
5851             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5852             auto text = testParser(haystack);
5853             text.pos.line += 3;
5854             text.pos.col += 7;
5855             auto e = collectException!XMLParsingException(text.takeName!delim());
5856             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5857             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5858         }
5859     }
5860 
5861     static foreach(func; testRangeFuncs)
5862     {
5863         static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"])
5864         {{
5865             enum len = codeLen!(func, str);
5866 
5867             static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo",  "  foo \n \r "])
5868             {{
5869                 enum strRem = str ~ remainder;
5870                 enum delimRem = '>' ~ remainder;
5871                 enum hay = str ~ delimRem;
5872                 test!func(strRem, str, remainder, 1, len + 1);
5873                 test!(func, '=')(strRem, str, remainder, 1, len + 1);
5874                 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1);
5875                 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1);
5876             }}
5877         }}
5878 
5879         static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)])
5880         {{
5881             testFail!func(t[0], t[1], t[2]);
5882             testFail!func(t[0] ~ '>', t[1], t[2]);
5883             testFail!(func, '?')(t[0], t[1], t[2]);
5884             testFail!(func, '=')(t[0] ~ '=', t[1], t[2]);
5885         }}
5886 
5887         testFail!(func, '>')(">", 1, 1);
5888         testFail!(func, '?')("?", 1, 1);
5889         testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&"));
5890 
5891         static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)])
5892         {
5893             testFail!func(t[0], t[1], t[2]);
5894             testFail!(func, '>')(t[0], t[1], t[2]);
5895         }
5896     }
5897 }
5898 
5899 version(dxmlTests) @safe pure unittest
5900 {
5901     import std.algorithm.comparison : equal;
5902     import dxml.internal : testRangeFuncs;
5903 
5904     static foreach(func; testRangeFuncs)
5905     {{
5906         auto xml = func(`foo`);
5907         auto text = testParser!simpleXML(xml);
5908         assert(equal(text.takeName(), "foo"));
5909     }}
5910 }
5911 
5912 
5913 // This removes an attribute value from the front of the input, partially
5914 // validates it, and returns it. The validation that is not done is whether
5915 // the value in a character reference is valid. It's checked for whether the
5916 // characters used in it are valid but not whether the number they form is a
5917 // valid Unicode character. Checking the number doesn't seem worth the extra
5918 // complication, and it's not required for the XML to be "well-formed."
5919 // dxml.util.parseCharRef will check that it is fully correct if it is used.
5920 auto takeAttValue(Text)(ref Text text)
5921 {
5922     // AttValue    ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
5923     // Reference   ::= EntityRef | CharRef
5924     // EntityRef   ::= '&' Name ';'
5925     // PEReference ::= '%' Name ';'
5926 
5927     import std.range : only;
5928 
5929     checkNotEmpty(text);
5930     immutable quote = text.input.front;
5931     immutable quotePos = text.pos;
5932     foreach(quoteChar; only('"', '\''))
5933     {
5934         // This would be a bit simpler if takeUntilAndDrop took a runtime
5935         // argument, but in all other cases, a compile-time argument makes more
5936         // sense, so this seemed like a reasonable way to handle this one case.
5937         if(quote == quoteChar)
5938         {
5939             popFrontAndIncCol(text);
5940             size_t lineStart = 0;
5941             auto orig = text.input.save;
5942             size_t takeLen;
5943             loop: while(true)
5944             {
5945                 if(text.input.empty)
5946                     throw new XMLParsingException("Unterminated attribute value", quotePos);
5947                 switch(text.input.front)
5948                 {
5949                     case '"':
5950                     {
5951                         if(quote == '"')
5952                         {
5953                             text.input.popFront();
5954                             goto done;
5955                         }
5956                         goto default;
5957                     }
5958                     case '\'':
5959                     {
5960                         if(quote == '\'')
5961                         {
5962                             text.input.popFront();
5963                             goto done;
5964                         }
5965                         goto default;
5966                     }
5967                     case '&':
5968                     {
5969                         {
5970                             import dxml.util : parseCharRef;
5971                             auto temp = text.input.save;
5972                             auto charRef = parseCharRef(temp);
5973                             if(!charRef.isNull)
5974                             {
5975                                 static if(hasLength!(Text.Input))
5976                                 {
5977                                     takeLen += text.input.length - temp.length;
5978                                     text.input = temp;
5979                                 }
5980                                 else
5981                                 {
5982                                     while(text.input.front != ';')
5983                                     {
5984                                         ++takeLen;
5985                                         text.input.popFront();
5986                                     }
5987                                     ++takeLen;
5988                                     text.input.popFront();
5989                                 }
5990                                 continue;
5991                             }
5992                         }
5993 
5994                         immutable ampLen = takeLen - lineStart;
5995                         ++takeLen;
5996                         text.input.popFront();
5997 
5998                         // Std Entity References
5999                         static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6000                         {
6001                             import std.algorithm.searching : startsWith;
6002 
6003                             static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6004                             {
6005                                 if(text.input.save.startsWith(entRef))
6006                                 {
6007                                     takeLen += entRef.length;
6008                                     text.input.popFrontN(entRef.length);
6009                                     continue loop;
6010                                 }
6011                             }
6012 
6013                             text.pos.col += ampLen;
6014                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6015                                                           "reference, and this parser only supports entity " ~
6016                                                           "references if they're predefined by the spec. This is not " ~
6017                                                           "a valid character reference or one of the predefined " ~
6018                                                           "entity references.", text.pos);
6019                         }
6020                         // All Entity References
6021                         else
6022                         {
6023                             import std.utf : decodeFront, UseReplacementDchar;
6024                             import dxml.internal : isNameStartChar, isNameChar;
6025 
6026                             if(text.input.empty || text.input.front == quote)
6027                                 goto failedEntityRef;
6028 
6029                             {
6030                                 size_t numCodeUnits;
6031                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6032                                 if(!isNameStartChar(decodedC))
6033                                     goto failedEntityRef;
6034                                 takeLen += numCodeUnits;
6035                             }
6036 
6037                             while(true)
6038                             {
6039                                 if(text.input.empty)
6040                                     goto failedEntityRef;
6041                                 immutable c = text.input.front;
6042                                 if(c == ';')
6043                                 {
6044                                     ++takeLen;
6045                                     break;
6046                                 }
6047                                 size_t numCodeUnits;
6048                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6049                                 if(!isNameChar(decodedC))
6050                                     goto failedEntityRef;
6051                                 takeLen += numCodeUnits;
6052                             }
6053                             break;
6054 
6055                             failedEntityRef:
6056                             text.pos.col += ampLen;
6057                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6058                                                           "character or entity reference, and this is not a valid " ~
6059                                                           "character or entity reference.", text.pos);
6060                         }
6061                     }
6062                     case '<':
6063                     {
6064                         text.pos.col += takeLen - lineStart;
6065                         throw new XMLParsingException("< is not legal in an attribute name", text.pos);
6066                     }
6067                     case '\n':
6068                     {
6069                         ++takeLen;
6070                         nextLine!(Text.config)(text.pos);
6071                         lineStart = takeLen;
6072                         break;
6073                     }
6074                     default:
6075                     {
6076                         import std.ascii : isASCII;
6077                         import std.format : format;
6078                         import dxml.internal : isXMLChar;
6079 
6080                         immutable c = text.input.front;
6081                         if(isASCII(c))
6082                         {
6083                             if(!isXMLChar(c))
6084                             {
6085                                 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6086                                                               text.pos);
6087                             }
6088                             ++takeLen;
6089                             break;
6090                         }
6091                         import std.utf : decodeFront, UseReplacementDchar, UTFException;
6092                         // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6093                         // replacement character is considered valid XML, and if we decoded using it, then
6094                         // all of the invalid Unicode characters would come out as the replacement character
6095                         // and then be treated as valid instead of being caught, which isn't all bad, but
6096                         // the spec requires that they be treated as invalid instead of playing nice and
6097                         // using the replacement character.
6098                         try
6099                         {
6100                             size_t numCodeUnits;
6101                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6102                             if(!isXMLChar(decodedC))
6103                             {
6104                                 enum fmt = "Character is not legal in an XML File: 0x%0x";
6105                                 throw new XMLParsingException(format!fmt(decodedC), text.pos);
6106                             }
6107                             takeLen += numCodeUnits;
6108                         }
6109                         catch(UTFException e)
6110                             throw new XMLParsingException("Invalid Unicode character", text.pos);
6111                         continue;
6112                     }
6113                 }
6114                 text.input.popFront();
6115             }
6116             done