1 // Written in the D programming language
2 
3 /++
4     This implements a range-based
5     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which
6     will work with XML 1.1 documents assuming that they don't use any
7     1.1-specific features). For the sake of simplicity, sanity, and efficiency,
8     the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD)
9     section is not supported beyond what is required to parse past it.
10 
11     Start tags, end tags, comments, cdata sections, and processing instructions
12     are all supported and reported to the application. Anything in the DTD is
13     skipped (though it's parsed enough to parse past it correctly, and that
14     $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid
15     enough to be correctly skipped), and the
16     $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the
17     top is skipped if present (XML 1.1 requires that it be there, but XML 1.0
18     does not).
19 
20     Regardless of what the XML declaration says (if present), any range of
21     $(K_CHAR) will be treated as being encoded in UTF-8, any range of $(K_WCHAR)
22     will be treated as being encoded in UTF-16, and any range of $(K_DCHAR) will
23     be treated as having been encoded in UTF-32. Strings will be treated as
24     ranges of their code units, not code points.
25 
26     Since the DTD is skipped, entity references other than the five which are
27     predefined by the XML spec cannot be fully processed (since wherever they
28     were used in the document would be replaced by what they referred to, which
29     could be arbitrarily complex XML). As such, by default, if any entity
30     references which are not predefined are encountered outside of the DTD, an
31     $(LREF XMLParsingException) will be thrown (see
32     $(LREF Config.throwOnEntityRef) for how that can be configured). The
33     predefined entity references and any character references encountered will
34     be checked to verify that they're valid, but they will not be replaced
35     (since that does not work with returning slices of the original input).
36 
37     However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
38     $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from
39     $(MREF dxml, util) can be used to convert the predefined entity references
40     to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
41     $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from
42     $(MREF dxml, util) can be used to convert character references to what they
43     refer to.
44 
45     $(H3 Primary Symbols)
46     $(TABLE
47         $(TR $(TH Symbol) $(TH Description))
48         $(TR $(TD $(LREF parseXML))
49              $(TD The function used to initiate the parsing of an XML
50                   document.))
51         $(TR $(TD $(LREF EntityRange))
52              $(TD The range returned by $(LREF parseXML).))
53         $(TR $(TD $(LREF EntityRange.Entity))
54              $(TD The element type of $(LREF EntityRange).))
55     )
56 
57     $(H3 Parser Configuration Helpers)
58     $(TABLE
59         $(TR $(TH Symbol) $(TH Description))
60         $(TR $(TD $(LREF Config))
61              $(TD Used to configure how $(LREF EntityRange) parses the XML.))
62         $(TR $(TD $(LREF simpleXML))
63              $(TD A user-friendly configuration for when the application just
64                   wants the element tags and the data in between them.))
65         $(TR $(TD $(LREF makeConfig))
66              $(TD A convenience function for constructing a custom
67                   $(LREF Config).))
68         $(TR $(TD $(LREF SkipComments))
69              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
70                   to tell the parser to skip comments.))
71         $(TR $(TD $(LREF SkipPI))
72              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
73                   to tell the parser to skip processing instructions.))
74         $(TR $(TD $(LREF SplitEmpty))
75              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
76                   to configure how the parser deals with empty element tags.))
77     )
78 
79     $(H3 Helper Types Used When Parsing)
80     $(TABLE
81         $(TR $(TH Symbol) $(TH Description))
82         $(TR $(TD $(LREF EntityType))
83              $(TD The type of an entity in the XML (e.g. a
84                   $(LREF_ALTTEXT start tag, EntityType.elementStart) or a
85                   $(LREF_ALTTEXT comment, EntityType.comment)).))
86         $(TR $(TD $(LREF TextPos))
87              $(TD Gives the line and column number in the XML document.))
88         $(TR $(TD $(LREF XMLParsingException))
89              $(TD Thrown by $(LREF EntityRange) when it encounters invalid
90                   XML.))
91     )
92 
93     $(H3 Helper Functions Used When Parsing)
94     $(TABLE
95         $(TR $(TH Symbol) $(TH Description))
96         $(TR $(TD $(LREF getAttrs))
97              $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which
98                   allows for the easy processing of start tag attributes.))
99         $(TR $(TD $(LREF skipContents))
100              $(TD Iterates an $(LREF EntityRange) from a start tag to its
101                   matching end tag.))
102         $(TR $(TD $(LREF skipToPath))
103              $(TD Used to navigate from one start tag to another as if the start
104                   tag names formed a file path.))
105         $(TR $(TD $(LREF skipToEntityType))
106              $(TD Skips to the next entity of the given type in the range.))
107         $(TR $(TD $(LREF skipToParentEndTag))
108              $(TD Iterates an $(LREF EntityRange) until it reaches the end tag
109                   that matches the start tag which is the parent of the
110                   current entity.))
111     )
112 
113     $(H3 Helper Traits)
114     $(TABLE
115         $(TR $(TH Symbol) $(TH Description))
116         $(TR $(TD $(LREF isAttrRange))
117              $(TD Whether the given range is a range of attributes.)))
118 
119     Copyright: Copyright 2017 - 2020
120     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
121     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
122     Source:    $(LINK_TO_SRC dxml/_parser.d)
123 
124     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
125   +/
126 module dxml.parser;
127 
128 ///
129 unittest
130 {
131     auto xml = "<!-- comment -->\n" ~
132                "<root>\n" ~
133                "    <foo>some text<whatever/></foo>\n" ~
134                "    <bar/>\n" ~
135                "    <baz></baz>\n" ~
136                "</root>";
137     {
138         auto range = parseXML(xml);
139         assert(range.front.type == EntityType.comment);
140         assert(range.front.text == " comment ");
141         range.popFront();
142 
143         assert(range.front.type == EntityType.elementStart);
144         assert(range.front.name == "root");
145         range.popFront();
146 
147         assert(range.front.type == EntityType.elementStart);
148         assert(range.front.name == "foo");
149         range.popFront();
150 
151         assert(range.front.type == EntityType.text);
152         assert(range.front.text == "some text");
153         range.popFront();
154 
155         assert(range.front.type == EntityType.elementEmpty);
156         assert(range.front.name == "whatever");
157         range.popFront();
158 
159         assert(range.front.type == EntityType.elementEnd);
160         assert(range.front.name == "foo");
161         range.popFront();
162 
163         assert(range.front.type == EntityType.elementEmpty);
164         assert(range.front.name == "bar");
165         range.popFront();
166 
167         assert(range.front.type == EntityType.elementStart);
168         assert(range.front.name == "baz");
169         range.popFront();
170 
171         assert(range.front.type == EntityType.elementEnd);
172         assert(range.front.name == "baz");
173         range.popFront();
174 
175         assert(range.front.type == EntityType.elementEnd);
176         assert(range.front.name == "root");
177         range.popFront();
178 
179         assert(range.empty);
180     }
181     {
182         auto range = parseXML!simpleXML(xml);
183 
184         // simpleXML skips comments
185 
186         assert(range.front.type == EntityType.elementStart);
187         assert(range.front.name == "root");
188         range.popFront();
189 
190         assert(range.front.type == EntityType.elementStart);
191         assert(range.front.name == "foo");
192         range.popFront();
193 
194         assert(range.front.type == EntityType.text);
195         assert(range.front.text == "some text");
196         range.popFront();
197 
198         // simpleXML splits empty element tags into a start tag and end tag
199         // so that the code doesn't have to care whether a start tag with no
200         // content is an empty tag or a start tag and end tag with nothing but
201         // whitespace in between.
202         assert(range.front.type == EntityType.elementStart);
203         assert(range.front.name == "whatever");
204         range.popFront();
205 
206         assert(range.front.type == EntityType.elementEnd);
207         assert(range.front.name == "whatever");
208         range.popFront();
209 
210         assert(range.front.type == EntityType.elementEnd);
211         assert(range.front.name == "foo");
212         range.popFront();
213 
214         assert(range.front.type == EntityType.elementStart);
215         assert(range.front.name == "bar");
216         range.popFront();
217 
218         assert(range.front.type == EntityType.elementEnd);
219         assert(range.front.name == "bar");
220         range.popFront();
221 
222         assert(range.front.type == EntityType.elementStart);
223         assert(range.front.name == "baz");
224         range.popFront();
225 
226         assert(range.front.type == EntityType.elementEnd);
227         assert(range.front.name == "baz");
228         range.popFront();
229 
230         assert(range.front.type == EntityType.elementEnd);
231         assert(range.front.name == "root");
232         range.popFront();
233 
234         assert(range.empty);
235     }
236 }
237 
238 
239 import std.range.primitives;
240 import std.traits;
241 import std.typecons : Flag;
242 
243 
244 /++
245     The exception type thrown when the XML parser encounters invalid XML.
246   +/
247 class XMLParsingException : Exception
248 {
249     /++
250         The position in the XML input where the problem is.
251       +/
252     TextPos pos;
253 
254 package:
255 
256     this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure
257     {
258         import std.format : format;
259         super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line);
260         pos = textPos;
261     }
262 }
263 
264 
265 /++
266     Where in the XML document an entity is.
267 
268     The line and column numbers are 1-based.
269 
270     The primary use case for TextPos is $(LREF XMLParsingException), but an
271     application may have other uses for it. The TextPos for an
272     $(LREF2 Entity, EntityRange) can be obtained from
273     $(LREF2 Entity.pos, EntityRange).
274 
275     See_Also: $(LREF XMLParsingException.pos)$(BR)
276               $(LREF EntityRange.Entity.pos)
277   +/
278 struct TextPos
279 {
280     /// A line number in the XML file.
281     int line = 1;
282 
283     /++
284         A column number in a line of the XML file.
285 
286         Each code unit is considered a column, so depending on what a program
287         is looking to do with the column number, it may need to examine the
288         actual text on that line and calculate the number that represents
289         what the program wants to display (e.g. the number of graphemes).
290       +/
291     int col = 1;
292 }
293 
294 
295 /++
296     Used to configure how the parser works.
297 
298     See_Also:
299         $(LREF makeConfig)$(BR)
300         $(LREF parseXML)$(BR)
301         $(LREF simpleXML)
302   +/
303 struct Config
304 {
305     /++
306         Whether the comments should be skipped while parsing.
307 
308         If $(D skipComments == SkipComments.yes), any entities of type
309         $(LREF EntityType.comment) will be omitted from the parsing results,
310         and they will not be validated beyond what is required to parse past
311         them.
312 
313         Defaults to $(D SkipComments.no).
314       +/
315     auto skipComments = SkipComments.no;
316 
317     /++
318         Whether processing instructions should be skipped.
319 
320         If $(D skipPI == SkipPI.yes), any entities of type
321         $(LREF EntityType.pi) will be skipped, and they will not be validated
322         beyond what is required to parse past them.
323 
324         Defaults to $(D SkipPI.no).
325       +/
326     auto skipPI = SkipPI.no;
327 
328     /++
329         Whether the parser should report empty element tags as if they were a
330         start tag followed by an end tag with nothing in between.
331 
332         If $(D splitEmpty == SplitEmpty.yes),  then whenever an
333         $(LREF EntityType.elementEmpty) is encountered, the parser will claim
334         that that entity is an $(LREF EntityType.elementStart), and then it
335         will provide an $(LREF EntityType.elementEnd) as the next entity before
336         the entity that actually follows it.
337 
338         The purpose of this is to simplify the code using the parser, since most
339         code does not care about the difference between an empty tag and a start
340         and end tag with nothing in between. But since some code may care about
341         the difference, the behavior is configurable.
342 
343         Defaults to $(D SplitEmpty.no).
344       +/
345     auto splitEmpty = SplitEmpty.no;
346 
347     ///
348     unittest
349     {
350         enum configSplitYes = makeConfig(SplitEmpty.yes);
351 
352         {
353             auto range = parseXML("<root></root>");
354             assert(range.front.type == EntityType.elementStart);
355             assert(range.front.name == "root");
356             range.popFront();
357             assert(range.front.type == EntityType.elementEnd);
358             assert(range.front.name == "root");
359             range.popFront();
360             assert(range.empty);
361         }
362         {
363             // No difference if the tags are already split.
364             auto range = parseXML!configSplitYes("<root></root>");
365             assert(range.front.type == EntityType.elementStart);
366             assert(range.front.name == "root");
367             range.popFront();
368             assert(range.front.type == EntityType.elementEnd);
369             assert(range.front.name == "root");
370             range.popFront();
371             assert(range.empty);
372         }
373         {
374             // This treats <root></root> and <root/> as distinct.
375             auto range = parseXML("<root/>");
376             assert(range.front.type == EntityType.elementEmpty);
377             assert(range.front.name == "root");
378             range.popFront();
379             assert(range.empty);
380         }
381         {
382             // This is parsed as if it were <root></root> insead of <root/>.
383             auto range = parseXML!configSplitYes("<root/>");
384             assert(range.front.type == EntityType.elementStart);
385             assert(range.front.name == "root");
386             range.popFront();
387             assert(range.front.type == EntityType.elementEnd);
388             assert(range.front.name == "root");
389             range.popFront();
390             assert(range.empty);
391         }
392     }
393 
394     /++
395         Whether the parser should throw when it encounters any entity references
396         other than the five entity references defined in the XML standard.
397 
398         Any other entity references would have to be defined in the DTD in
399         order to be valid. And in order to know what XML they represent (which
400         could be arbitrarily complex, even effectively inserting entire XML
401         documents into the middle of the XML), the DTD would have to be parsed.
402         However, dxml does not support parsing the DTD beyond what is required
403         to correctly parse past it, and replacing entity references with what
404         they represent would not work with the slicing semantics that
405         $(LREF EntityRange) provides. As such, it is not possible for dxml to
406         correctly handle any entity references other than the five which are
407         defined in the XML standard, and even those are only parsed by using
408         $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util).
409         $(LREF EntityRange) always validates that entity references are one
410         of the five, predefined entity references, but otherwise, it lets them
411         pass through as normal text. It does not replace them with what they
412         represent.
413 
414         As such, the default behavior of $(LREF EntityRange) is to throw an
415         $(LREF XMLParsingException) when it encounters an entity reference
416         which is not one of the five defined by the XML standard. With that
417         behavior, there is no risk of processing an XML document as if it had
418         no entity references and ending up with what the program using the
419         parser would probably consider incorrect results. However, there are
420         cases where a program may find it acceptable to treat entity references
421         as normal text and ignore them. As such, if a program wishes to take
422         that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no).
423 
424         If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity
425         reference that it encounters will be validated to ensure that it is
426         syntactically valid (i.e. that the characters it contains form what
427         could be a valid entity reference assuming that the DTD declared it
428         properly), but otherwise, $(LREF EntityRange) will treat it as normal
429         text, just like it treats the five, predefined entity references as
430         normal text.
431 
432         Note that any valid XML entity reference which contains start or end
433         tags must contain matching start or end tags, and entity references
434         cannot contain incomplete fragments of XML (e.g. the start or end of a
435         comment). So, missing entity references should only affect the data in
436         the XML document and not its overall structure (if that were not _true,
437         attempting to ignore entity references such as $(D ThrowOnEntityRef.no)
438         does would be a disaster in the making). However, how reasonable it is
439         to miss that data depends entirely on the application and what the XML
440         documents it's parsing contain - hence, the behavior is configurable.
441 
442         See_Also: $(REF StdEntityRef, dxml, util)$(BR)
443                   $(REF parseStdEntityRef, dxml, util)$(BR)
444                   $(REF parseCharRef, dxml, util)$(BR)
445                   $(REF encodeCharRef, dxml, util)$(BR)
446                   $(REF decodeXML, dxml, util)$(BR)
447                   $(REF asDecodedXML, dxml, util)
448       +/
449     auto throwOnEntityRef = ThrowOnEntityRef.yes;
450 
451     ///
452     unittest
453     {
454         import std.exception : assertThrown;
455         import dxml.util : decodeXML;
456 
457         auto xml = "<root>\n" ~
458                    "    <std>&amp;&apos;&gt;&lt;&quot;</std>\n" ~
459                    "    <other>&foobar;</other>\n" ~
460                    "    <invalid>&--;</invalid>\n" ~
461                    "</root>";
462 
463         // ThrowOnEntityRef.yes
464         {
465             auto range = parseXML(xml);
466             assert(range.front.type == EntityType.elementStart);
467             assert(range.front.name == "root");
468 
469             range.popFront();
470             assert(range.front.type == EntityType.elementStart);
471             assert(range.front.name == "std");
472 
473             range.popFront();
474             assert(range.front.type == EntityType.text);
475             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
476             assert(range.front.text.decodeXML() == `&'><"`);
477 
478             range.popFront();
479             assert(range.front.type == EntityType.elementEnd);
480             assert(range.front.name == "std");
481 
482             range.popFront();
483             assert(range.front.type == EntityType.elementStart);
484             assert(range.front.name == "other");
485 
486             // Attempted to parse past "&foobar;", which is syntactically
487             // valid, but it's not one of the five predefined entity references.
488             assertThrown!XMLParsingException(range.popFront());
489         }
490 
491         // ThrowOnEntityRef.no
492         {
493             auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml);
494             assert(range.front.type == EntityType.elementStart);
495             assert(range.front.name == "root");
496 
497             range.popFront();
498             assert(range.front.type == EntityType.elementStart);
499             assert(range.front.name == "std");
500 
501             range.popFront();
502             assert(range.front.type == EntityType.text);
503             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
504             assert(range.front.text.decodeXML() == `&'><"`);
505 
506             range.popFront();
507             assert(range.front.type == EntityType.elementEnd);
508             assert(range.front.name == "std");
509 
510             range.popFront();
511             assert(range.front.type == EntityType.elementStart);
512             assert(range.front.name == "other");
513 
514             // Doesn't throw, because "&foobar;" is syntactically valid.
515             range.popFront();
516             assert(range.front.type == EntityType.text);
517             assert(range.front.text == "&foobar;");
518 
519             // decodeXML has no effect on non-standard entity references.
520             assert(range.front.text.decodeXML() == "&foobar;");
521 
522             range.popFront();
523             assert(range.front.type == EntityType.elementEnd);
524             assert(range.front.name == "other");
525 
526             range.popFront();
527             assert(range.front.type == EntityType.elementStart);
528             assert(range.front.name == "invalid");
529 
530             // Attempted to parse past "&--;", which is not syntactically valid,
531             // because -- is not a valid name for an entity reference.
532             assertThrown!XMLParsingException(range.popFront());
533         }
534     }
535 }
536 
537 
538 /// See_Also: $(LREF2 skipComments, Config)
539 alias SkipComments = Flag!"SkipComments";
540 
541 /// See_Also: $(LREF2 skipPI, Config)
542 alias SkipPI = Flag!"SkipPI";
543 
544 /// See_Also: $(LREF2 splitEmpty, Config)
545 alias SplitEmpty = Flag!"SplitEmpty";
546 
547 /// See_Also: $(LREF2 throwOnEntityRef, Config)
548 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef";
549 
550 
551 /++
552     Helper function for creating a custom config. It makes it easy to set one
553     or more of the member variables to something other than the default without
554     having to worry about explicitly setting them individually or setting them
555     all at once via a constructor.
556 
557     The order of the arguments does not matter. The types of each of the members
558     of Config are unique, so that information alone is sufficient to determine
559     which argument should be assigned to which member.
560   +/
561 Config makeConfig(Args...)(Args args)
562 {
563     import std.format : format;
564     import std.meta : AliasSeq, staticIndexOf, staticMap;
565 
566     template isValid(T, Types...)
567     {
568         static if(Types.length == 0)
569             enum isValid = false;
570         else static if(is(T == Types[0]))
571             enum isValid = true;
572         else
573             enum isValid = isValid!(T, Types[1 .. $]);
574     }
575 
576     Config config;
577 
578     alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName));
579     alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config)));
580 
581     foreach(i, arg; args)
582     {
583         static assert(isValid!(typeof(arg), MemberTypes),
584                       format!"Argument %s does not match the type of any members of Config"(i));
585 
586         static foreach(j, Other; Args)
587         {
588             static if(i != j)
589                 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j));
590         }
591 
592         foreach(memberName; __traits(allMembers, Config))
593         {
594             static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg)))
595                 mixin("config." ~ memberName ~ " = arg;");
596         }
597     }
598 
599     return config;
600 }
601 
602 ///
603 @safe pure nothrow @nogc unittest
604 {
605     {
606         auto config = makeConfig(SkipComments.yes);
607         assert(config.skipComments == SkipComments.yes);
608         assert(config.skipPI == Config.init.skipPI);
609         assert(config.splitEmpty == Config.init.splitEmpty);
610         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
611     }
612     {
613         auto config = makeConfig(SkipComments.yes, SkipPI.yes);
614         assert(config.skipComments == SkipComments.yes);
615         assert(config.skipPI == SkipPI.yes);
616         assert(config.splitEmpty == Config.init.splitEmpty);
617         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
618     }
619     {
620         auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no);
621         assert(config.skipComments == SkipComments.yes);
622         assert(config.skipPI == Config.init.skipPI);
623         assert(config.splitEmpty == SplitEmpty.yes);
624         assert(config.throwOnEntityRef == ThrowOnEntityRef.no);
625     }
626 }
627 
628 unittest
629 {
630     import std.typecons : Flag;
631     static assert(!__traits(compiles, makeConfig(42)));
632     static assert(!__traits(compiles, makeConfig("hello")));
633     static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes)));
634     static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no)));
635 }
636 
637 
638 /++
639     This $(LREF Config) is intended for making it easy to parse XML by skipping
640     everything that isn't the actual data as well as making it simpler to deal
641     with empty element tags by treating them the same as a start tag and end
642     tag with nothing but whitespace between them.
643   +/
644 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes);
645 
646 ///
647 @safe pure nothrow @nogc unittest
648 {
649     static assert(simpleXML.skipComments == SkipComments.yes);
650     static assert(simpleXML.skipPI == SkipPI.yes);
651     static assert(simpleXML.splitEmpty == SplitEmpty.yes);
652     static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes);
653 }
654 
655 
656 /++
657     Represents the type of an XML entity. Used by $(LREF EntityRange.Entity).
658   +/
659 enum EntityType
660 {
661     /++
662         A cdata section: `<![CDATA[ ... ]]>`.
663 
664         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect)
665       +/
666     cdata,
667 
668     /++
669         An XML comment: `<!-- ... -->`.
670 
671         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments)
672       +/
673     comment,
674 
675     /++
676         The start tag for an element. e.g. `<foo name="value">`.
677 
678         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
679       +/
680     elementStart,
681 
682     /++
683         The end tag for an element. e.g. `</foo>`.
684 
685         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
686       +/
687     elementEnd,
688 
689     /++
690         The tag for an element with no contents or matching end tag. e.g.
691         `<foo name="value"/>`.
692 
693         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
694       +/
695     elementEmpty,
696 
697     /++
698         A processing instruction such as `<?foo?>`. Note that the
699         `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi).
700 
701         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi)
702       +/
703     pi,
704 
705     /++
706         The content of an element tag that is simple text.
707 
708         If there is an entity other than the end tag following the text, then
709         the text includes up to that entity.
710 
711         Note however that character references (e.g.
712         $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g.
713         $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In
714         order for them to be processed, the text should be passed to either
715         $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
716         $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references
717         which are not predefined are considered invalid XML, because the DTD
718         section is skipped, and thus they cannot be processed properly.
719 
720         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR)
721                   $(REF decodeXML, dxml, util)$(BR)
722                   $(REF asDecodedXML, dxml, util)$(BR)
723                   $(REF parseStdEntityRef, dxml, util)$(BR)
724                   $(REF parseCharRef, dxml, util)$(BR)
725                   $(LREF EntityRange.Entity._text)
726       +/
727     text,
728 }
729 
730 
731 /++
732     Lazily parses the given range of characters as an XML document.
733 
734     EntityRange is essentially a
735     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved
736     into that rather than being based on what Java did, and it's range-based
737     rather than iterator-based, so its API is likely to differ from other
738     implementations. The basic concept should be the same though.
739 
740     One of the core design goals of this parser is to slice the original input
741     rather than having to allocate strings for the output or wrap it in a lazy
742     range that produces a mutated version of the data. So, all of the text that
743     the parser provides is either a slice or
744     $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases,
745     for the parser to be fully compliant with the XML spec,
746     $(REF decodeXML, dxml, util) must be called on the text to mutate certain
747     constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or
748     converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But
749     that's left up to the application.
750 
751     The parser is not $(K_NOGC), but it allocates memory very minimally. It
752     allocates some of its state on the heap so it can validate attributes and
753     end tags. However, that state is shared among all the ranges that came from
754     the same call to parseXML (only the range farthest along in parsing
755     validates attributes or end tags), so $(LREF2 save, _EntityRange) does not
756     allocate memory unless $(D save) on the underlying range allocates memory.
757     The shared state currently uses a couple of dynamic arrays to validate the
758     tags and attributes, and if the document has a particularly deep tag depth
759     or has a lot of attributes on a start tag, then some reallocations may
760     occur until the maximum is reached, but enough is reserved that for most
761     documents, no reallocations will occur. The only other times that the
762     parser would allocate would be if an exception were thrown or if the range
763     that was passed to parseXML allocates for any reason when calling any of the
764     range primitives.
765 
766     If invalid XML is encountered at any point during the parsing process, an
767     $(LREF XMLParsingException) will be thrown. If an exception has been thrown,
768     then the parser is in an invalid state, and it is an error to call any
769     functions on it.
770 
771     However, note that XML validation is reduced for any entities that are
772     skipped (e.g. for anything in the DTD, validation is reduced to what is
773     required to correctly parse past it, and when
774     $(D Config.skipPI == SkipPI.yes), processing instructions are only validated
775     enough to correctly skip past them).
776 
777     As the module documentation says, this parser does not provide any DTD
778     support. It is not possible to properly support the DTD while returning
779     slices of the original input, and the DTD portion of the spec makes parsing
780     XML far, far more complicated.
781 
782     A quick note about carriage returns$(COLON) per the XML spec, they are all
783     supposed to either be stripped out or replaced with newlines or spaces
784     before the XML parser even processes the text. That doesn't work when the
785     parser is slicing the original text and not mutating it at all. So, for the
786     purposes of parsing, this parser treats all carriage returns as if they
787     were newlines or spaces (though they won't count as newlines when counting
788     the lines for $(LREF TextPos)). However, they $(I will) appear in any text
789     fields or attribute values if they are in the document (since the text
790     fields and attribute values are slices of the original text).
791     $(REF decodeXML, dxml, util) can be used to strip them along with
792     converting any character references in the text. Alternatively, the
793     application can remove them all before calling parseXML, but it's not
794     necessary.
795   +/
796 struct EntityRange(Config cfg, R)
797     if(isForwardRange!R && isSomeChar!(ElementType!R))
798 {
799     import std.algorithm : canFind;
800     import std.range : only, takeExactly;
801     import std.typecons : Nullable;
802     import std.utf : byCodeUnit;
803 
804     enum compileInTests = is(R == EntityRangeCompileTests);
805 
806 public:
807 
808     /// The Config used for when parsing the XML.
809     alias config = cfg;
810 
811     /// The type of the range that EntityRange is parsing.
812     alias Input = R;
813 
814     /++
815         The type used when any slice of the original input is used. If $(D R)
816         is a string or supports slicing, then SliceOfR is the same as $(D R);
817         otherwise, it's the result of calling
818         $(PHOBOS_REF takeExactly, std, range) on the input.
819 
820         ---
821         import std.algorithm : filter;
822         import std.range : takeExactly;
823 
824         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
825 
826         auto range = filter!(a => true)("some xml");
827 
828         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
829                          typeof(takeExactly(range, 42))));
830         ---
831       +/
832     static if(isDynamicArray!R || hasSlicing!R)
833         alias SliceOfR = R;
834     else
835         alias SliceOfR = typeof(takeExactly(R.init, 42));
836 
837     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
838     // a ddoc-ed unit test.
839     static if(compileInTests) @safe unittest
840     {
841         import std.algorithm : filter;
842         import std.range : takeExactly;
843 
844         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
845 
846         auto range = filter!(a => true)("some xml");
847 
848         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
849                          typeof(takeExactly(range, 42))));
850     }
851 
852 
853     /++
854         Represents an entity in the XML document.
855 
856         Note that the $(LREF2 type, EntityRange._Entity) determines which
857         properties can be used, and it can determine whether functions which
858         an Entity or $(LREF EntityRange) is passed to are allowed to be called.
859         Each function lists which $(LREF EntityType)s are allowed, and it is an
860         error to call them with any other $(LREF EntityType).
861       +/
862     struct Entity
863     {
864     public:
865 
866         import std.typecons : Tuple;
867 
868         /++
869             The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
870             $(LREF2 attributes, EntityRange.EntityType) returns a range of.
871 
872             See_Also: $(LREF2 attributes, EntityRange.Entity)
873           +/
874         alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
875 
876 
877         /++
878             The $(LREF EntityType) for this Entity.
879           +/
880         @property EntityType type() @safe const pure nothrow @nogc
881         {
882             return _type;
883         }
884 
885         ///
886         static if(compileInTests) unittest
887         {
888             auto xml = "<root>\n" ~
889                        "    <!--no comment-->\n" ~
890                        "    <![CDATA[cdata run]]>\n" ~
891                        "    <text>I am text!</text>\n" ~
892                        "    <empty/>\n" ~
893                        "    <?pi?>\n" ~
894                        "</root>";
895 
896             auto range = parseXML(xml);
897             assert(range.front.type == EntityType.elementStart);
898             assert(range.front.name == "root");
899             range.popFront();
900 
901             assert(range.front.type == EntityType.comment);
902             assert(range.front.text == "no comment");
903             range.popFront();
904 
905             assert(range.front.type == EntityType.cdata);
906             assert(range.front.text == "cdata run");
907             range.popFront();
908 
909             assert(range.front.type == EntityType.elementStart);
910             assert(range.front.name == "text");
911             range.popFront();
912 
913             assert(range.front.type == EntityType.text);
914             assert(range.front.text == "I am text!");
915             range.popFront();
916 
917             assert(range.front.type == EntityType.elementEnd);
918             assert(range.front.name == "text");
919             range.popFront();
920 
921             assert(range.front.type == EntityType.elementEmpty);
922             assert(range.front.name == "empty");
923             range.popFront();
924 
925             assert(range.front.type == EntityType.pi);
926             assert(range.front.name == "pi");
927             range.popFront();
928 
929             assert(range.front.type == EntityType.elementEnd);
930             assert(range.front.name == "root");
931             range.popFront();
932 
933             assert(range.empty);
934         }
935 
936 
937         /++
938             The position in the the original text where the entity starts.
939 
940             See_Also: $(LREF TextPos)$(BR)
941                       $(LREF XMLParsingException._pos)
942           +/
943         @property TextPos pos() @safe const pure nothrow @nogc
944         {
945             return _pos;
946         }
947 
948         ///
949         static if(compileInTests) unittest
950         {
951             auto xml = "<root>\n" ~
952                        "    <foo>\n" ~
953                        "        Foo and bar. Always foo and bar...\n" ~
954                        "    </foo>\n" ~
955                        "</root>";
956 
957             auto range = parseXML(xml);
958             assert(range.front.type == EntityType.elementStart);
959             assert(range.front.name == "root");
960             assert(range.front.pos == TextPos(1, 1));
961             range.popFront();
962 
963             assert(range.front.type == EntityType.elementStart);
964             assert(range.front.name == "foo");
965             assert(range.front.pos == TextPos(2, 5));
966             range.popFront();
967 
968             assert(range.front.type == EntityType.text);
969             assert(range.front.text ==
970                    "\n" ~
971                    "        Foo and bar. Always foo and bar...\n" ~
972                    "    ");
973             assert(range.front.pos == TextPos(2, 10));
974             range.popFront();
975 
976             assert(range.front.type == EntityType.elementEnd);
977             assert(range.front.name == "foo");
978             assert(range.front.pos == TextPos(4, 5));
979             range.popFront();
980 
981             assert(range.front.type == EntityType.elementEnd);
982             assert(range.front.name == "root");
983             assert(range.front.pos == TextPos(5, 1));
984             range.popFront();
985 
986             assert(range.empty);
987         }
988 
989         static if(compileInTests) unittest
990         {
991             import core.exception : AssertError;
992             import std.exception : enforce;
993 
994             static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__)
995             {
996                 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line);
997                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
998                 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
999                 range.popFront();
1000             }
1001 
1002             auto xml = "<?xml?>\n" ~
1003                        "   <!--comment-->\n" ~
1004                        "   <?pi?>\n" ~
1005                        " <root>\n" ~
1006                        "          <!--comment--><!--comment-->\n" ~
1007                        "       <?pi?>\n" ~
1008                        "  <![CDATA[]]>\n" ~
1009                        "              <empty/>     </root>\n" ~
1010                        " <!--comment-->\n" ~
1011                        " <?pi?>\n";
1012 
1013             {
1014                 auto range = parseXML(xml);
1015                 test(range, EntityType.comment, 2, 4);
1016                 test(range, EntityType.pi, 3, 4);
1017                 test(range, EntityType.elementStart, 4, 2);
1018                 test(range, EntityType.comment, 5, 11);
1019                 test(range, EntityType.comment, 5, 25);
1020                 test(range, EntityType.pi, 6, 8);
1021                 test(range, EntityType.cdata, 7, 3);
1022                 test(range, EntityType.elementEmpty, 8, 15);
1023                 test(range, EntityType.elementEnd, 8, 28);
1024                 test(range, EntityType.comment, 9, 2);
1025                 test(range, EntityType.pi, 10, 2);
1026             }
1027 
1028             auto range = parseXML!simpleXML(xml);
1029             test(range, EntityType.elementStart, 4, 2);
1030             test(range, EntityType.cdata, 7, 3);
1031             test(range, EntityType.elementStart, 8, 15);
1032             test(range, EntityType.elementEnd, 8, 15);
1033             test(range, EntityType.elementEnd, 8, 28);
1034         }
1035 
1036 
1037         /++
1038             Gives the name of this Entity.
1039 
1040             Note that this is the direct name in the XML for this entity and
1041             does not contain any of the names of any of the parent entities that
1042             this entity has. If an application wants the full "path" of the
1043             entity, then it will have to keep track of that itself. The parser
1044             does not do that as it would require allocating memory.
1045 
1046             $(TABLE
1047                 $(TR $(TH Supported $(LREF EntityType)s:))
1048                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1049                 $(TR $(TD $(LREF2 elementEnd, EntityType)))
1050                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1051                 $(TR $(TD $(LREF2 pi, EntityType)))
1052             )
1053           +/
1054         @property SliceOfR name()
1055         {
1056             import dxml.internal : checkedSave, stripBCU;
1057             with(EntityType)
1058             {
1059                 import std.format : format;
1060                 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
1061                        format("name cannot be called with %s", _type));
1062             }
1063             return stripBCU!R(checkedSave(_name));
1064         }
1065 
1066         ///
1067         static if(compileInTests) unittest
1068         {
1069             auto xml = "<root>\n" ~
1070                        "    <empty/>\n" ~
1071                        "    <?pi?>\n" ~
1072                        "</root>";
1073 
1074             auto range = parseXML(xml);
1075             assert(range.front.type == EntityType.elementStart);
1076             assert(range.front.name == "root");
1077             range.popFront();
1078 
1079             assert(range.front.type == EntityType.elementEmpty);
1080             assert(range.front.name == "empty");
1081             range.popFront();
1082 
1083             assert(range.front.type == EntityType.pi);
1084             assert(range.front.name == "pi");
1085             range.popFront();
1086 
1087             assert(range.front.type == EntityType.elementEnd);
1088             assert(range.front.name == "root");
1089             range.popFront();
1090 
1091             assert(range.empty);
1092         }
1093 
1094 
1095         /++
1096             Returns a lazy range of attributes for a start tag where each
1097             attribute is represented as a$(BR)
1098             $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
1099                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
1100                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
1101                       $(LREF TextPos), $(D_STRING "pos"))).
1102 
1103             $(TABLE
1104                 $(TR $(TH Supported $(LREF EntityType)s:))
1105                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1106                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1107             )
1108 
1109             See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR)
1110                       $(REF decodeXML, dxml, util)$(BR)
1111                       $(REF asDecodedXML, dxml, util)
1112           +/
1113         @property auto attributes()
1114         {
1115             with(EntityType)
1116             {
1117                 import std.format : format;
1118                 assert(_type == elementStart || _type == elementEmpty,
1119                        format("attributes cannot be called with %s", _type));
1120             }
1121 
1122             // STag         ::= '<' Name (S Attribute)* S? '>'
1123             // Attribute    ::= Name Eq AttValue
1124             // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1125 
1126             static struct AttributeRange
1127             {
1128                 @property Attribute front()
1129                 {
1130                     return _front;
1131                 }
1132 
1133                 void popFront()
1134                 {
1135                     import dxml.internal : stripBCU;
1136 
1137                     stripWS(_text);
1138                     if(_text.input.empty)
1139                     {
1140                         empty = true;
1141                         return;
1142                     }
1143 
1144                     immutable pos = _text.pos;
1145                     auto name = stripBCU!R(_text.takeName!'='());
1146                     stripWS(_text);
1147                     popFrontAndIncCol(_text);
1148                     stripWS(_text);
1149                     _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos);
1150                 }
1151 
1152                 @property auto save()
1153                 {
1154                     import dxml.internal : checkedSave;
1155                     auto retval = this;
1156                     retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]);
1157                     retval._text.input = checkedSave(retval._text.input);
1158                     return retval;
1159                 }
1160 
1161                 this(typeof(_text) text)
1162                 {
1163                     _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945
1164                     _text = text;
1165                     if(_text.input.empty)
1166                         empty = true;
1167                     else
1168                         popFront();
1169                 }
1170 
1171                 bool empty;
1172                 Attribute _front;
1173                 typeof(_savedText) _text;
1174             }
1175 
1176             return AttributeRange(_savedText.save);
1177         }
1178 
1179         ///
1180         static if(compileInTests) unittest
1181         {
1182             import std.algorithm.comparison : equal;
1183             import std.algorithm.iteration : filter;
1184             {
1185                 auto xml = "<root/>";
1186                 auto range = parseXML(xml);
1187                 assert(range.front.type == EntityType.elementEmpty);
1188                 assert(range.front.attributes.empty);
1189 
1190                 static assert(is(ElementType!(typeof(range.front.attributes)) ==
1191                                  typeof(range).Entity.Attribute));
1192             }
1193             {
1194                 auto xml = "<root a='42' q='29' w='hello'/>";
1195                 auto range = parseXML(xml);
1196                 assert(range.front.type == EntityType.elementEmpty);
1197 
1198                 auto attrs = range.front.attributes;
1199                 assert(attrs.front.name == "a");
1200                 assert(attrs.front.value == "42");
1201                 assert(attrs.front.pos == TextPos(1, 7));
1202                 attrs.popFront();
1203 
1204                 assert(attrs.front.name == "q");
1205                 assert(attrs.front.value == "29");
1206                 assert(attrs.front.pos == TextPos(1, 14));
1207                 attrs.popFront();
1208 
1209                 assert(attrs.front.name == "w");
1210                 assert(attrs.front.value == "hello");
1211                 assert(attrs.front.pos == TextPos(1, 21));
1212                 attrs.popFront();
1213 
1214                 assert(attrs.empty);
1215             }
1216             // Because the type of name and value is SliceOfR, == with a string
1217             // only works if the range passed to parseXML was string.
1218             {
1219                 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>");
1220                 auto range = parseXML(xml);
1221                 assert(range.front.type == EntityType.elementEmpty);
1222 
1223                 auto attrs = range.front.attributes;
1224                 assert(equal(attrs.front.name, "a"));
1225                 assert(equal(attrs.front.value, "42"));
1226                 assert(attrs.front.pos == TextPos(1, 7));
1227                 attrs.popFront();
1228 
1229                 assert(equal(attrs.front.name, "q"));
1230                 assert(equal(attrs.front.value, "29"));
1231                 assert(attrs.front.pos == TextPos(1, 14));
1232                 attrs.popFront();
1233 
1234                 assert(equal(attrs.front.name, "w"));
1235                 assert(equal(attrs.front.value, "hello"));
1236                 assert(attrs.front.pos == TextPos(1, 21));
1237                 attrs.popFront();
1238 
1239                 assert(attrs.empty);
1240             }
1241         }
1242 
1243         static if(compileInTests) unittest
1244         {
1245             import core.exception : AssertError;
1246             import std.algorithm.comparison : equal;
1247             import std.exception : assertNotThrown, collectException, enforce;
1248             import std.typecons : Tuple, tuple;
1249             import dxml.internal : codeLen, testRangeFuncs;
1250 
1251             static bool cmpAttr(T, U)(T lhs, U rhs)
1252             {
1253                 return equal(lhs[0].save, rhs[0].save) &&
1254                        equal(lhs[1].save, rhs[1].save);
1255             }
1256 
1257             static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type,
1258                                                                 Tuple!(string, string)[] expected,
1259                                                                 int row, int col, size_t line = __LINE__)
1260             {
1261                 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)),
1262                                                                  "unittest 1", __FILE__, line);
1263                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1264                 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected),
1265                                     "unittest failure 3", __FILE__, line);
1266                 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line);
1267             }
1268 
1269             static void testFail(alias func, ThrowOnEntityRef toer)(string text,
1270                                                                     int row, int col, size_t line = __LINE__)
1271             {
1272                 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text)));
1273                 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
1274                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1275             }
1276 
1277             static foreach(func; testRangeFuncs)
1278             {
1279                 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
1280                 {
1281                     test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14);
1282                     test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17);
1283                     test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty,
1284                                       [tuple("a", "b")], 9, 4);
1285                     test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13);
1286                     test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16);
1287                     test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart,
1288                                       [tuple("a", "b")], 5, 3);
1289 
1290                     test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4);
1291                     test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18);
1292                     test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18);
1293                     test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16);
1294                     test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19);
1295 
1296                     test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty,
1297                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1298                     test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty,
1299                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1300 
1301                     test!(func, toer)(`<root foo="&#42;" a='&#x42;' hello="%foo"/>`, EntityType.elementEmpty,
1302                               [tuple("foo", "&#42;"), tuple("a", "&#x42;"), tuple("hello", "%foo")], 1, 44);
1303 
1304                     test!(func, toer)(`<root foo="&amp;" a='vector&lt;int&gt;'></root>`, EntityType.elementStart,
1305                               [tuple("foo", "&amp;"), tuple("a", "vector&lt;int&gt;"),], 1, 41);
1306 
1307                     test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty,
1308                               [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1);
1309 
1310                     test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16);
1311                     test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21);
1312                     test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15);
1313                     test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20);
1314 
1315                     test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty,
1316                               [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30);
1317 
1318                     testFail!(func, toer)(`<root a="""/>`, 1, 11);
1319                     testFail!(func, toer)(`<root a='''/>`, 1, 11);
1320                     testFail!(func, toer)("<root a=/>", 1, 9);
1321                     testFail!(func, toer)("<root a='/>", 1, 9);
1322                     testFail!(func, toer)("<root a='/>", 1, 9);
1323                     testFail!(func, toer)("<root =''/>", 1, 7);
1324                     testFail!(func, toer)(`<root a ""/>`, 1, 9);
1325                     testFail!(func, toer)(`<root a""/>`, 1, 8);
1326                     testFail!(func, toer)(`<root a/>`, 1, 8);
1327                     testFail!(func, toer)("<root foo='bar' a=/>", 1, 19);
1328                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1329                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1330                     testFail!(func, toer)("<root foo='bar' =''/>", 1, 17);
1331                     testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20);
1332                     // It's 33 rather than 28, because it throws when processing the start tag and not when processing
1333                     // the attributes. So, the mismatched quotes are detected before the attributes are checked.
1334                     testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33);
1335                     testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17);
1336                     testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16);
1337                     testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7);
1338 
1339                     testFail!(func, toer)(`<root foo="<"/>`, 1, 12);
1340                     testFail!(func, toer)(`<root foo="<world"/>`, 1, 12);
1341                     testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17);
1342                     testFail!(func, toer)(`<root foo="&"/>`, 1, 12);
1343                     testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17);
1344                     testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17);
1345                     testFail!(func, toer)(`<root foo="&;"/>`, 1, 12);
1346                     testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12);
1347                     testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12);
1348                     testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12);
1349                     testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12);
1350                     testFail!(func, toer)(`<root foo="&#42"/>`, 1, 12);
1351                     testFail!(func, toer)(`<root foo="&#x42"/>`, 1, 12);
1352                     testFail!(func, toer)(`<root foo="&#x12;"/>`, 1, 12);
1353 
1354                     testFail!(func, toer)("<root\n\nfoo='\nbar&#x42'></root>", 4, 4);
1355 
1356                     testFail!(func, toer)(`<root a="""></root>`, 1, 11);
1357                     testFail!(func, toer)(`<root a='''></root>`, 1, 11);
1358                     testFail!(func, toer)("<root a=></root>", 1, 9);
1359                     testFail!(func, toer)("<root a='></root>", 1, 9);
1360                     testFail!(func, toer)("<root a='></root>", 1, 9);
1361                     testFail!(func, toer)("<root =''></root>", 1, 7);
1362                     testFail!(func, toer)(`<root a ""></root>`, 1, 9);
1363                     testFail!(func, toer)(`<root a""></root>`, 1, 8);
1364                     testFail!(func, toer)(`<root a></root>`, 1, 8);
1365                     testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19);
1366                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1367                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1368                     testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17);
1369                     testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20);
1370                     testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33);
1371                     testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17);
1372                     testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16);
1373                     testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7);
1374 
1375                     testFail!(func, toer)(`<root foo="<"></root>`, 1, 12);
1376                     testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12);
1377                     testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17);
1378                     testFail!(func, toer)(`<root foo="&"></root>`, 1, 12);
1379                     testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17);
1380                     testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17);
1381                     testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12);
1382                     testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12);
1383                     testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12);
1384                     testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12);
1385                     testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12);
1386                     testFail!(func, toer)(`<root foo="&#42"></root>`, 1, 12);
1387                     testFail!(func, toer)(`<root foo="&#x42"></root>`, 1, 12);
1388                     testFail!(func, toer)(`<root foo="&#x12;"></root>`, 1, 12);
1389 
1390                     testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14);
1391                     testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24);
1392                     testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24);
1393                     testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42);
1394                     testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17);
1395 
1396                     test!(func, toer)(`<root foo="&amp;"></root>`, EntityType.elementStart,
1397                                       [tuple("foo", "&amp;")], 1, 19);
1398                     test!(func, toer)(`<root foo="foo&amp;&lt;&gt;&apos;&quot;bar"></root>`, EntityType.elementStart,
1399                                       [tuple("foo", "foo&amp;&lt;&gt;&apos;&quot;bar")], 1, 45);
1400                     testFail!(func, toer)("<root foo='&;'></root>", 1, 12);
1401                     testFail!(func, toer)("<root foo='&.;'></root>", 1, 12);
1402                     testFail!(func, toer)("<root foo='\n &amp ule'></root>", 2, 2);
1403                     testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2);
1404                 }
1405                 {
1406                     alias toer = ThrowOnEntityRef.yes;
1407                     testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12);
1408                     testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12);
1409                     testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15);
1410                     testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18);
1411                     testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2);
1412                 }
1413                 {
1414                     alias toer = ThrowOnEntityRef.no;
1415                     test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty,
1416                                       [tuple("foo", "&foo;")], 1, 20);
1417                     test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart,
1418                                       [tuple("foo", "&foo;")], 1, 19);
1419                     test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart,
1420                                       [tuple("foo", "foo&bar.;")], 1, 23);
1421                     test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart,
1422                                         [tuple("foo", "hello &a; world")], 1, 29);
1423                     test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart,
1424                                         [tuple("foo", "hello \n &a; \n world")], 3, 9);
1425                 }
1426             }
1427         }
1428 
1429 
1430         /++
1431             Returns the textual value of this Entity.
1432 
1433             In the case of $(LREF EntityType.pi), this is the
1434             text that follows the name, whereas in the other cases, the text is
1435             the entire contents of the entity (save for the delimeters on the
1436             ends if that entity has them).
1437 
1438             $(TABLE
1439                 $(TR $(TH Supported $(LREF EntityType)s:))
1440                 $(TR $(TD $(LREF2 cdata, EntityType)))
1441                 $(TR $(TD $(LREF2 comment, EntityType)))
1442                 $(TR $(TD $(LREF2 pi, EntityType)))
1443                 $(TR $(TD $(LREF2 _text, EntityType)))
1444             )
1445 
1446             See_Also: $(REF decodeXML, dxml, util)$(BR)
1447                       $(REF asDecodedXML, dxml, util)$(BR)
1448                       $(REF stripIndent, dxml, util)$(BR)
1449                       $(REF withoutIndent, dxml, util)
1450           +/
1451         @property SliceOfR text()
1452         {
1453             import dxml.internal : checkedSave, stripBCU;
1454             with(EntityType)
1455             {
1456                 import std.format : format;
1457                 assert(only(cdata, comment, pi, text).canFind(_type),
1458                        format("text cannot be called with %s", _type));
1459             }
1460             return stripBCU!R(checkedSave(_savedText.input));
1461         }
1462 
1463         ///
1464         static if(compileInTests) unittest
1465         {
1466             import std.range.primitives : empty;
1467 
1468             auto xml = "<?xml version='1.0'?>\n" ~
1469                        "<?instructionName?>\n" ~
1470                        "<?foo here is something to say?>\n" ~
1471                        "<root>\n" ~
1472                        "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1473                        "    <!-- some random comment -->\n" ~
1474                        "    <p>something here</p>\n" ~
1475                        "    <p>\n" ~
1476                        "       something else\n" ~
1477                        "       here</p>\n" ~
1478                        "</root>";
1479             auto range = parseXML(xml);
1480 
1481             // "<?instructionName?>\n" ~
1482             assert(range.front.type == EntityType.pi);
1483             assert(range.front.name == "instructionName");
1484             assert(range.front.text.empty);
1485 
1486             // "<?foo here is something to say?>\n" ~
1487             range.popFront();
1488             assert(range.front.type == EntityType.pi);
1489             assert(range.front.name == "foo");
1490             assert(range.front.text == "here is something to say");
1491 
1492             // "<root>\n" ~
1493             range.popFront();
1494             assert(range.front.type == EntityType.elementStart);
1495 
1496             // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1497             range.popFront();
1498             assert(range.front.type == EntityType.cdata);
1499             assert(range.front.text == " Yay! random text >> << ");
1500 
1501             // "    <!-- some random comment -->\n" ~
1502             range.popFront();
1503             assert(range.front.type == EntityType.comment);
1504             assert(range.front.text == " some random comment ");
1505 
1506             // "    <p>something here</p>\n" ~
1507             range.popFront();
1508             assert(range.front.type == EntityType.elementStart);
1509             assert(range.front.name == "p");
1510 
1511             range.popFront();
1512             assert(range.front.type == EntityType.text);
1513             assert(range.front.text == "something here");
1514 
1515             range.popFront();
1516             assert(range.front.type == EntityType.elementEnd);
1517             assert(range.front.name == "p");
1518 
1519             // "    <p>\n" ~
1520             // "       something else\n" ~
1521             // "       here</p>\n" ~
1522             range.popFront();
1523             assert(range.front.type == EntityType.elementStart);
1524 
1525             range.popFront();
1526             assert(range.front.type == EntityType.text);
1527             assert(range.front.text == "\n       something else\n       here");
1528 
1529             range.popFront();
1530             assert(range.front.type == EntityType.elementEnd);
1531 
1532             // "</root>"
1533             range.popFront();
1534             assert(range.front.type == EntityType.elementEnd);
1535 
1536             range.popFront();
1537             assert(range.empty);
1538         }
1539 
1540 
1541         // Reduce the chance of bugs if reference-type ranges are involved.
1542         static if(!isDynamicArray!R) this(this)
1543         {
1544             with(EntityType) final switch(_type)
1545             {
1546                 case cdata: break;
1547                 case comment: break;
1548                 case elementStart:
1549                 {
1550                     _name = _name.save;
1551                     break;
1552                 }
1553                 case elementEnd: goto case elementStart;
1554                 case elementEmpty: goto case elementStart;
1555                 case text: break;
1556                 case pi: goto case elementStart;
1557             }
1558 
1559             if(_type != EntityType.elementEnd)
1560                 _savedText = _savedText.save;
1561         }
1562 
1563         static if(compileInTests) unittest
1564         {
1565             import std.algorithm.comparison : equal;
1566             import dxml.internal : testRangeFuncs;
1567 
1568             static bool cmpAttr(T)(T lhs, T rhs)
1569             {
1570                 return equal(lhs.name.save, rhs.name.save) &&
1571                        equal(lhs.value.save, rhs.value.save);
1572             }
1573 
1574             {
1575                 auto xml = "<root>\n" ~
1576                            "    <foo a='42'/>\n" ~
1577                            "    <foo b='42'/>\n" ~
1578                            "    <nocomment>nothing to say</nocomment>\n" ~
1579                            "</root>";
1580 
1581                 // The duplicate lines aren't typos. We want to ensure that the
1582                 // values are independent and that nothing was consumed.
1583                 static foreach(func; testRangeFuncs)
1584                 {{
1585                      auto range = parseXML(func(xml));
1586                      range.popFront();
1587                      {
1588                          auto entity = range.front;
1589                          auto entity2 = entity;
1590                          assert(entity.pos == entity2.pos);
1591                          assert(equal(entity.name, entity2.name));
1592                          assert(equal(entity.name, entity2.name));
1593                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1594                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1595                          range.popFront();
1596                          assert(entity.pos == entity2.pos);
1597                          assert(entity.pos != range.front.pos);
1598                      }
1599                      range.popFront();
1600                      range.popFront();
1601                      {
1602                          auto entity = range.front;
1603                          auto entity2 = entity;
1604                          assert(entity.pos == entity2.pos);
1605                          assert(equal(entity.text, entity2.text));
1606                          assert(equal(entity.text, entity2.text));
1607                          range.popFront();
1608                          assert(entity.pos == entity2.pos);
1609                          assert(entity.pos != range.front.pos);
1610                      }
1611                 }}
1612             }
1613             {
1614                 auto xml = "<root>\n" ~
1615                            "    <![CDATA[whatever]]>\n" ~
1616                            "    <?pi?>\n" ~
1617                            "    <!--comment-->\n" ~
1618                            "    <empty/>\n" ~
1619                            "    <noend a='foo' b='bar'/>\n" ~
1620                            "    <foo baz='42'></foo>\n" ~
1621                            "</root>";
1622 
1623                 static foreach(func; testRangeFuncs)
1624                 {
1625                     for(auto range = parseXML(func(xml)); !range.empty; range.popFront())
1626                     {
1627                         auto entity = range.front;
1628                         auto entity2 = entity;
1629 
1630                         assert(entity.pos == range.front.pos);
1631                         assert(entity.pos == entity2.pos);
1632                         assert(entity.type == range.front.type);
1633                         assert(entity.type == entity2.type);
1634 
1635                         with(EntityType) final switch(entity.type)
1636                         {
1637                             case cdata: goto case text;
1638                             case comment: goto case text;
1639                             case elementStart:
1640                             {
1641                                 assert(equal!cmpAttr(entity.attributes, range.front.attributes));
1642                                 assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1643                                 goto case elementEnd;
1644                             }
1645                             case elementEnd:
1646                             {
1647                                 assert(equal(entity.name, range.front.name));
1648                                 assert(equal(entity.name, entity2.name));
1649                                 break;
1650                             }
1651                             case elementEmpty: goto case elementStart;
1652                             case text:
1653                             {
1654                                 assert(equal(entity.text, range.front.text));
1655                                 assert(equal(entity.text, entity2.text));
1656                                 break;
1657                             }
1658                             case pi:
1659                             {
1660                                 assert(equal(entity.name, range.front.name));
1661                                 assert(equal(entity.name, entity2.name));
1662                                 goto case text;
1663                             }
1664                         }
1665                     }
1666                 }
1667             }
1668         }
1669 
1670 
1671     private:
1672 
1673         this(EntityType type)
1674         {
1675             _type = type;
1676 
1677             // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
1678             _name = typeof(_name).init;
1679             _savedText = typeof(_savedText).init;
1680         }
1681 
1682         EntityType _type;
1683         TextPos _pos;
1684         Taken _name;
1685         typeof(EntityRange._savedText) _savedText;
1686     }
1687 
1688 
1689     /++
1690         Returns the $(LREF Entity) representing the entity in the XML document
1691         which was most recently parsed.
1692       +/
1693     @property Entity front()
1694     {
1695         auto retval = Entity(_type);
1696         with(EntityType) final switch(_type)
1697         {
1698             case cdata: retval._savedText = _savedText.save; break;
1699             case comment: goto case cdata;
1700             case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break;
1701             case elementEnd: retval._name = _name.save; break;
1702             case elementEmpty: goto case elementStart;
1703             case text: goto case cdata;
1704             case pi: goto case elementStart;
1705         }
1706         retval._pos = _entityPos;
1707         return retval;
1708     }
1709 
1710 
1711     /++
1712         Move to the next entity.
1713 
1714         The next entity is the next one that is linearly in the XML document.
1715         So, if the current entity has child entities, the next entity will be
1716         the first child entity, whereas if it has no child entities, it will be
1717         the next entity at the same level.
1718 
1719         Throws: $(LREF XMLParsingException) on invalid XML.
1720       +/
1721     void popFront()
1722     {
1723         final switch(_grammarPos) with(GrammarPos)
1724         {
1725             case documentStart: _parseDocumentStart(); break;
1726             case prologMisc1: _parseAtPrologMisc!1(); break;
1727             case prologMisc2: _parseAtPrologMisc!2(); break;
1728             case splittingEmpty:
1729             {
1730                 _type = EntityType.elementEnd;
1731                 _tagStack.sawEntity();
1732                 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
1733                 break;
1734             }
1735             case contentCharData1:
1736             {
1737                 assert(_type == EntityType.elementStart);
1738                 _tagStack.pushTag(_name.save);
1739                 _parseAtContentCharData();
1740                 break;
1741             }
1742             case contentMid: _parseAtContentMid(); break;
1743             case contentCharData2: _parseAtContentCharData(); break;
1744             case endTag: _parseElementEnd(); break;
1745             case endMisc: _parseAtEndMisc(); break;
1746             case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange.");
1747         }
1748     }
1749 
1750 
1751     /++
1752         Whether the end of the XML document has been reached.
1753 
1754         Note that because an $(LREF XMLParsingException) will be thrown an
1755         invalid XML, it's actually possible to call
1756         $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without
1757         checking empty if the only way that empty would be true is if the XML
1758         were invalid (e.g. if at a start tag, it's a given that there's at
1759         least one end tag left in the document unless it's invalid XML).
1760 
1761         However, of course, caution should be used to ensure that incorrect
1762         assumptions are not made that allow the document to reach its end
1763         earlier than predicted without throwing an $(LREF XMLParsingException),
1764         since it's still an error to call $(LREF2 front, EntityRange) or
1765         $(LREF2 popFront, EntityRange) if empty would return false.
1766       +/
1767     @property bool empty() @safe const pure nothrow @nogc
1768     {
1769         return _grammarPos == GrammarPos.documentEnd;
1770     }
1771 
1772 
1773     /++
1774         Forward range function for obtaining a copy of the range which can then
1775         be iterated independently of the original.
1776       +/
1777     @property auto save()
1778     {
1779         // The init check nonsense is because of ranges whose init values blow
1780         // up when save is called (e.g. a range that's a class).
1781         auto retval = this;
1782         if(retval._name !is typeof(retval._name).init)
1783             retval._name = _name.save;
1784         if(retval._text.input !is typeof(retval._text.input).init)
1785             retval._text.input = _text.input.save;
1786         if(retval._savedText.input !is typeof(retval._savedText.input).init)
1787             retval._savedText.input = _savedText.input.save;
1788         return retval;
1789     }
1790 
1791     static if(compileInTests) unittest
1792     {
1793         import std.algorithm.comparison : equal;
1794         import std.exception : assertNotThrown;
1795         import dxml.internal : testRangeFuncs;
1796 
1797         static bool cmpAttr(T)(T lhs, T rhs)
1798         {
1799             return equal(lhs.name.save, rhs.name.save) &&
1800                    equal(lhs.value.save, rhs.value.save);
1801         }
1802 
1803         static void testEqual(ER)(ER one, ER two)
1804         {
1805              while(!one.empty && !two.empty)
1806              {
1807                  auto left = one.front;
1808                  auto right = two.front;
1809 
1810                  assert(left.pos == right.pos);
1811                  assert(left.type == right.type);
1812 
1813                  with(EntityType) final switch(left.type)
1814                  {
1815                      case cdata: goto case text;
1816                      case comment: goto case text;
1817                      case elementStart:
1818                      {
1819                          assert(equal!cmpAttr(left.attributes, right.attributes));
1820                          goto case elementEnd;
1821                      }
1822                      case elementEnd: assert(equal(left.name, right.name)); break;
1823                      case elementEmpty: goto case elementStart;
1824                      case text: assert(equal(left.text, right.text)); break;
1825                      case pi: assert(equal(left.name, right.name)); goto case text;
1826                  }
1827 
1828                  one.popFront();
1829                  two.popFront();
1830              }
1831 
1832              assert(one.empty);
1833              assert(two.empty);
1834         }
1835 
1836          auto xml = "<root>\n" ~
1837                     "    <!-- comment -->\n" ~
1838                     "    <something>\n" ~
1839                     "         <else/>\n" ~
1840                     "         somet text <i>goes</i> here\n" ~
1841                     "    </something>\n" ~
1842                     "</root>";
1843 
1844         static foreach(i, func; testRangeFuncs)
1845         {{
1846              auto text = func(xml);
1847              testEqual(parseXML(text.save), parseXML(text.save));
1848              auto range = parseXML(text.save);
1849              testEqual(range.save, range.save);
1850         }}
1851     }
1852 
1853 
1854     /++
1855         Returns an empty range. This corresponds to
1856         $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a
1857         wrapper type.
1858       +/
1859     EntityRange takeNone()
1860     {
1861         auto retval = save;
1862         retval._grammarPos = GrammarPos.documentEnd;
1863         return retval;
1864     }
1865 
1866 
1867 private:
1868 
1869     void _parseDocumentStart()
1870     {
1871         auto orig = _text.save;
1872         immutable wasWS = _text.stripWS();
1873         if(_text.stripStartsWith("<?xml"))
1874         {
1875             if(wasWS)
1876                 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init);
1877             checkNotEmpty(_text);
1878             if(_text.input.front == '?' || isSpace(_text.input.front))
1879                 _text.skipUntilAndDrop!"?>"();
1880             else
1881                 _text = orig;
1882         }
1883         _grammarPos = GrammarPos.prologMisc1;
1884         _parseAtPrologMisc!1();
1885     }
1886 
1887     static if(compileInTests) unittest
1888     {
1889         import core.exception : AssertError;
1890         import std.exception : assertNotThrown, enforce;
1891         import dxml.internal : testRangeFuncs;
1892 
1893         static void test(alias func)(string xml, int row, int col, size_t line = __LINE__)
1894         {
1895             auto range = assertNotThrown!XMLParsingException(parseXML(func(xml)));
1896             enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line);
1897             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1898         }
1899 
1900         static foreach(func; testRangeFuncs)
1901         {
1902             test!func("<root/>", 1, 8);
1903             test!func("\n\t\n <root/>   \n", 3, 9);
1904             test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10);
1905             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?><root/>", 6, 23);
1906             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?>\n     <root/>", 7, 13);
1907             test!func("<root/>", 1, 8);
1908             test!func("\n\t\n <root/>   \n", 3, 9);
1909         }
1910     }
1911 
1912 
1913     // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2.
1914     void _parseAtPrologMisc(int miscNum)()
1915     {
1916         static assert(miscNum == 1 || miscNum == 2);
1917 
1918         // document ::= prolog element Misc*
1919         // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
1920         // Misc ::= Comment | PI | S
1921 
1922         stripWS(_text);
1923         checkNotEmpty(_text);
1924         if(_text.input.front != '<')
1925             throw new XMLParsingException("Expected <", _text.pos);
1926         popFrontAndIncCol(_text);
1927         checkNotEmpty(_text);
1928 
1929         switch(_text.input.front)
1930         {
1931             // Comment     ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1932             // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
1933             case '!':
1934             {
1935                 immutable bangPos = _text.pos;
1936                 popFrontAndIncCol(_text);
1937                 if(_text.stripStartsWith("--"))
1938                 {
1939                     _parseComment();
1940                     static if(config.skipComments == SkipComments.yes)
1941                         _parseAtPrologMisc!miscNum();
1942                     break;
1943                 }
1944                 static if(miscNum == 1)
1945                 {
1946                     if(_text.stripStartsWith("DOCTYPE"))
1947                     {
1948                         if(!_text.stripWS())
1949                             throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos);
1950                         _parseDoctypeDecl();
1951                         break;
1952                     }
1953                     throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos);
1954                 }
1955                 else
1956                 {
1957                     if(_text.stripStartsWith("DOCTYPE"))
1958                     {
1959                         throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document",
1960                                                       bangPos);
1961                     }
1962                     throw new XMLParsingException("Expected Comment", bangPos);
1963                 }
1964             }
1965             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1966             case '?':
1967             {
1968                 _parsePI();
1969                 static if(config.skipPI == SkipPI.yes)
1970                     popFront();
1971                 break;
1972             }
1973             // element ::= EmptyElemTag | STag content ETag
1974             default:
1975             {
1976                 _parseElementStart();
1977                 break;
1978             }
1979         }
1980     }
1981 
1982 
1983     // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1984     // Parses a comment. <!-- was already removed from the front of the input.
1985     void _parseComment()
1986     {
1987         static if(config.skipComments == SkipComments.yes)
1988             _text.skipUntilAndDrop!"--"();
1989         else
1990         {
1991             _entityPos = TextPos(_text.pos.line, _text.pos.col - 4);
1992             _type = EntityType.comment;
1993             _tagStack.sawEntity();
1994             _savedText.pos = _text.pos;
1995             _savedText.input = _text.takeUntilAndDrop!"--"();
1996         }
1997         if(_text.input.empty || _text.input.front != '>')
1998             throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos);
1999         // This is here rather than at the end of the previous static if block
2000         // so that the error message for improperly terminating a comment takes
2001         // precedence over the one involving invalid characters in the comment.
2002         static if(config.skipComments == SkipComments.no)
2003             checkText!true(_savedText);
2004         popFrontAndIncCol(_text);
2005     }
2006 
2007     static if(compileInTests) unittest
2008     {
2009         import core.exception : AssertError;
2010         import std.algorithm.comparison : equal;
2011         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2012         import dxml.internal : codeLen, testRangeFuncs;
2013 
2014         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2015         {
2016             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")));
2017             enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line);
2018             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2019             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2020         }
2021 
2022         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2023         {
2024             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2025             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2026             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2027         }
2028 
2029         static foreach(func; testRangeFuncs)
2030         {
2031             test!func("<!--foo-->", "foo", 1, 11);
2032             test!func("<!-- foo -->", " foo ", 1, 13);
2033             test!func("<!-- -->", " ", 1, 9);
2034             test!func("<!---->", "", 1, 8);
2035             test!func("<!--- comment -->", "- comment ", 1, 18);
2036             test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5);
2037             test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1);
2038             test!func("<!--&-->", "&", 1, 9);
2039             test!func("<!--<-->", "<", 1, 9);
2040             test!func("<!-->-->", ">", 1, 9);
2041             test!func("<!--->-->", "->", 1, 10);
2042 
2043             testFail!func("<!", 1, 2);
2044             testFail!func("<!- comment -->", 1, 2);
2045             testFail!func("<!-- comment ->", 1, 5);
2046             testFail!func("<!-- comment --->", 1, 16);
2047             testFail!func("<!---- comment -->", 1, 7);
2048             testFail!func("<!-- comment -- comment -->", 1, 16);
2049             testFail!func("<!->", 1, 2);
2050             testFail!func("<!-->", 1, 5);
2051             testFail!func("<!--->", 1, 5);
2052             testFail!func("<!----->", 1, 7);
2053             testFail!func("<!blah>", 1, 2);
2054             testFail!func("<! blah>", 1, 2);
2055             testFail!func("<!-- \n\n   \v \n -->", 3, 4);
2056             testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v"));
2057 
2058             {
2059                 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>");
2060                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2061                 assert(range.front.type == EntityType.comment);
2062                 assert(equal(range.front.text, " comment "));
2063             }
2064             {
2065                 auto xml = func("<root><!-- comment --></root>");
2066                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2067                 assertNotThrown!XMLParsingException(range.popFront());
2068                 assert(range.front.type == EntityType.comment);
2069                 assert(equal(range.front.text, " comment "));
2070             }
2071             {
2072                 auto xml = func("<root/><!-- comment -->");
2073                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2074                 assertNotThrown!XMLParsingException(range.popFront());
2075                 assert(range.front.type == EntityType.comment);
2076                 assert(equal(range.front.text, " comment "));
2077             }
2078 
2079             static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"])
2080             {
2081                 {
2082                     auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>");
2083                     assertThrown!XMLParsingException(parseXML(xml));
2084                 }
2085                 {
2086                     auto xml = func("<root>" ~ comment ~ "<root>");
2087                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2088                     assertThrown!XMLParsingException(range.popFront());
2089                 }
2090                 {
2091                     auto xml = func("<root/>" ~ comment);
2092                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2093                     assertThrown!XMLParsingException(range.popFront());
2094                 }
2095             }
2096 
2097             {
2098                 auto xml = "<!--one-->\n" ~
2099                            "<!--two-->\n" ~
2100                            "<root>\n" ~
2101                            "    <!--three-->\n" ~
2102                            "    <!--four-->\n" ~
2103                            "</root>\n" ~
2104                            "<!--five-->\n" ~
2105                            "<!--six-->";
2106 
2107                 auto text = func(xml);
2108                 {
2109                     auto range = parseXML(text.save);
2110                     assert(range.front.type == EntityType.comment);
2111                     assert(equal(range.front.text, "one"));
2112                     assertNotThrown!XMLParsingException(range.popFront());
2113                     assert(range.front.type == EntityType.comment);
2114                     assert(equal(range.front.text, "two"));
2115                     assertNotThrown!XMLParsingException(range.popFront());
2116                     assert(range.front.type == EntityType.elementStart);
2117                     assert(equal(range.front.name, "root"));
2118                     assertNotThrown!XMLParsingException(range.popFront());
2119                     assert(range.front.type == EntityType.comment);
2120                     assert(equal(range.front.text, "three"));
2121                     assertNotThrown!XMLParsingException(range.popFront());
2122                     assert(range.front.type == EntityType.comment);
2123                     assert(equal(range.front.text, "four"));
2124                     assertNotThrown!XMLParsingException(range.popFront());
2125                     assert(range.front.type == EntityType.elementEnd);
2126                     assert(equal(range.front.name, "root"));
2127                     assertNotThrown!XMLParsingException(range.popFront());
2128                     assert(range.front.type == EntityType.comment);
2129                     assert(equal(range.front.text, "five"));
2130                     assertNotThrown!XMLParsingException(range.popFront());
2131                     assert(range.front.type == EntityType.comment);
2132                     assert(equal(range.front.text, "six"));
2133                     assertNotThrown!XMLParsingException(range.popFront());
2134                     assert(range.empty);
2135                 }
2136                 {
2137                     auto range = parseXML!simpleXML(text.save);
2138                     assert(range.front.type == EntityType.elementStart);
2139                     assert(equal(range.front.name, "root"));
2140                     assertNotThrown!XMLParsingException(range.popFront());
2141                     assert(range.front.type == EntityType.elementEnd);
2142                     assert(equal(range.front.name, "root"));
2143                     assertNotThrown!XMLParsingException(range.popFront());
2144                     assert(range.empty);
2145                 }
2146             }
2147         }
2148     }
2149 
2150 
2151     // PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2152     // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2153     // Parses a processing instruction. < was already removed from the input.
2154     void _parsePI()
2155     {
2156         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2157         assert(_text.input.front == '?');
2158         popFrontAndIncCol(_text);
2159         static if(config.skipPI == SkipPI.yes)
2160             _text.skipUntilAndDrop!"?>"();
2161         else
2162         {
2163             immutable posAtName = _text.pos;
2164             if(_text.input.empty)
2165                 throw new XMLParsingException("Unterminated processing instruction", posAtName);
2166             _type = EntityType.pi;
2167             _tagStack.sawEntity();
2168             _name = takeName!'?'(_text);
2169             immutable posAtWS = _text.pos;
2170             stripWS(_text);
2171             checkNotEmpty(_text);
2172             _savedText.pos = _text.pos;
2173             _savedText.input = _text.takeUntilAndDrop!"?>"();
2174             checkText!true(_savedText);
2175             if(walkLength(_name.save) == 3)
2176             {
2177                 // FIXME icmp doesn't compile right now due to an issue with
2178                 // byUTF that needs to be looked into.
2179                 /+
2180                 import std.uni : icmp;
2181                 if(icmp(_name.save, "xml") == 0)
2182                     throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2183                 +/
2184                 auto temp = _name.save;
2185                 if(temp.front == 'x' || temp.front == 'X')
2186                 {
2187                     temp.popFront();
2188                     if(temp.front == 'm' || temp.front == 'M')
2189                     {
2190                         temp.popFront();
2191                         if(temp.front == 'l' || temp.front == 'L')
2192                             throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2193                     }
2194                 }
2195             }
2196         }
2197     }
2198 
2199     static if(compileInTests) unittest
2200     {
2201         import core.exception : AssertError;
2202         import std.algorithm.comparison : equal;
2203         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2204         import std.utf : byUTF;
2205         import dxml.internal : codeLen, testRangeFuncs;
2206 
2207         static void test(alias func)(string text, string name, string expected,
2208                                      int row, int col, size_t line = __LINE__)
2209         {
2210             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2211                                                              "unittest failure 1", __FILE__, line);
2212             enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line);
2213             enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line);
2214             enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line);
2215             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line);
2216         }
2217 
2218         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2219         {
2220             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2221             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2222             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2223         }
2224 
2225         static foreach(func; testRangeFuncs)
2226         {
2227             test!func("<?a?>", "a", "", 1, 6);
2228             test!func("<?foo?>", "foo", "", 1, 8);
2229             test!func("<?foo.?>", "foo.", "", 1, 9);
2230             test!func("<?foo bar?>", "foo", "bar", 1, 12);
2231             test!func("<?xmf bar?>", "xmf", "bar", 1, 12);
2232             test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15);
2233             test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16);
2234             test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10);
2235             test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11);
2236             test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6);
2237             test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21);
2238             test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23);
2239             test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3);
2240             test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1);
2241             test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16);
2242             test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16);
2243             test!func("<?pi ?>", "pi", "", 1, 8);
2244             test!func("<?pi\n?>", "pi", "", 2, 3);
2245             test!func("<?foo ??>", "foo", "?", 1, 10);
2246             test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24);
2247 
2248             testFail!func("<?", 1, 3);
2249             testFail!func("<??>", 1, 3);
2250             testFail!func("<? ?>", 1, 3);
2251             testFail!func("<?xml?><?xml?>", 1, 10);
2252             testFail!func("<?XML?>", 1, 3);
2253             testFail!func("<?xMl?>", 1, 3);
2254             testFail!func("<?foo>", 1, 6);
2255             testFail!func("<? foo?>", 1, 3);
2256             testFail!func("<?\nfoo?>", 1, 3);
2257             testFail!func("<??foo?>", 1, 3);
2258             testFail!func("<?.foo?>", 1, 3);
2259             testFail!func("<?foo bar\vbaz?>", 1, 10);
2260 
2261             {
2262                 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>");
2263                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2264                 assert(range.front.type == EntityType.pi);
2265                 assert(equal(range.front.name, "foo"));
2266                 assert(equal(range.front.text, "bar"));
2267             }
2268             {
2269                 auto xml = func("<root><?foo bar?></root>");
2270                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2271                 assertNotThrown!XMLParsingException(range.popFront());
2272                 assert(equal(range.front.name, "foo"));
2273                 assert(equal(range.front.text, "bar"));
2274             }
2275             {
2276                 auto xml = func("<root/><?foo bar?>");
2277                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2278                 assertNotThrown!XMLParsingException(range.popFront());
2279                 assert(equal(range.front.name, "foo"));
2280                 assert(equal(range.front.text, "bar"));
2281             }
2282 
2283             static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"])
2284             {
2285                 {
2286                     auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>");
2287                     assertThrown!XMLParsingException(parseXML(xml));
2288                 }
2289                 {
2290                     auto xml = func("<root>" ~ pi ~ "<root>");
2291                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2292                     assertThrown!XMLParsingException(range.popFront());
2293                 }
2294                 {
2295                     auto xml = func("<root/>" ~ pi);
2296                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2297                     assertThrown!XMLParsingException(range.popFront());
2298                 }
2299             }
2300 
2301             {
2302                 auto xml = "<?one?>\n" ~
2303                            "<?two?>\n" ~
2304                            "<root>\n" ~
2305                            "    <?three?>\n" ~
2306                            "    <?four?>\n" ~
2307                            "</root>\n" ~
2308                            "<?five?>\n" ~
2309                            "<?six?>";
2310 
2311                 auto text = func(xml);
2312                 {
2313                     auto range = parseXML(text.save);
2314                     assert(range.front.type == EntityType.pi);
2315                     assert(equal(range.front.name, "one"));
2316                     assertNotThrown!XMLParsingException(range.popFront());
2317                     assert(range.front.type == EntityType.pi);
2318                     assert(equal(range.front.name, "two"));
2319                     assertNotThrown!XMLParsingException(range.popFront());
2320                     assert(range.front.type == EntityType.elementStart);
2321                     assert(equal(range.front.name, "root"));
2322                     assertNotThrown!XMLParsingException(range.popFront());
2323                     assert(range.front.type == EntityType.pi);
2324                     assert(equal(range.front.name, "three"));
2325                     assertNotThrown!XMLParsingException(range.popFront());
2326                     assert(range.front.type == EntityType.pi);
2327                     assert(equal(range.front.name, "four"));
2328                     assertNotThrown!XMLParsingException(range.popFront());
2329                     assert(range.front.type == EntityType.elementEnd);
2330                     assert(equal(range.front.name, "root"));
2331                     assertNotThrown!XMLParsingException(range.popFront());
2332                     assert(range.front.type == EntityType.pi);
2333                     assert(equal(range.front.name, "five"));
2334                     assertNotThrown!XMLParsingException(range.popFront());
2335                     assert(range.front.type == EntityType.pi);
2336                     assert(equal(range.front.name, "six"));
2337                     assertNotThrown!XMLParsingException(range.popFront());
2338                     assert(range.empty);
2339                 }
2340                 {
2341                     auto range = parseXML!simpleXML(text.save);
2342                     assert(range.front.type == EntityType.elementStart);
2343                     assert(equal(range.front.name, "root"));
2344                     assertNotThrown!XMLParsingException(range.popFront());
2345                     assert(range.front.type == EntityType.elementEnd);
2346                     assert(equal(range.front.name, "root"));
2347                     assertNotThrown!XMLParsingException(range.popFront());
2348                     assert(range.empty);
2349                 }
2350             }
2351         }
2352     }
2353 
2354 
2355     // CDSect  ::= CDStart CData CDEnd
2356     // CDStart ::= '<![CDATA['
2357     // CData   ::= (Char* - (Char* ']]>' Char*))
2358     // CDEnd   ::= ']]>'
2359     // Parses a CDATA. <![CDATA[ was already removed from the front of the input.
2360     void _parseCDATA()
2361     {
2362         _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length);
2363         _type = EntityType.cdata;
2364         _tagStack.sawEntity();
2365         _savedText.pos = _text.pos;
2366         _savedText.input = _text.takeUntilAndDrop!"]]>";
2367         checkText!true(_savedText);
2368         _grammarPos = GrammarPos.contentCharData2;
2369     }
2370 
2371     static if(compileInTests) unittest
2372     {
2373         import core.exception : AssertError;
2374         import std.algorithm.comparison : equal;
2375         import std.exception : assertNotThrown, collectException, enforce;
2376         import dxml.internal : codeLen, testRangeFuncs;
2377 
2378         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2379         {
2380             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2381             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2382             assertNotThrown!XMLParsingException(range.popFront());
2383             enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line);
2384             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2385             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2386         }
2387 
2388         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2389         {
2390             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2391             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2392             auto e = collectException!XMLParsingException(range.popFront());
2393             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2394             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2395         }
2396 
2397         static foreach(func; testRangeFuncs)
2398         {
2399             test!func("<![CDATA[]]>", "", 1, 13);
2400             test!func("<![CDATA[hello world]]>", "hello world", 1, 24);
2401             test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4);
2402             test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2);
2403             test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>",
2404                       "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57);
2405             test!func("<![CDATA[&]]>", "&", 1, 14);
2406 
2407             testFail!func("<[CDATA[]>", 1, 2);
2408             testFail!func("<![CDAT[]>", 1, 2);
2409             testFail!func("<![CDATA]>", 1, 2);
2410             testFail!func("<![CDATA[>", 1, 10);
2411             testFail!func("<![CDATA[]", 1, 10);
2412             testFail!func("<![CDATA[]>", 1, 10);
2413             testFail!func("<![CDATA[ \v ]]>", 1, 11);
2414             testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2);
2415         }
2416     }
2417 
2418 
2419     // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
2420     // DeclSep     ::= PEReference | S
2421     // intSubset   ::= (markupdecl | DeclSep)*
2422     // markupdecl  ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
2423     // Parse doctypedecl after GrammarPos.prologMisc1.
2424     // <!DOCTYPE and any whitespace after it should have already been removed
2425     // from the input.
2426     void _parseDoctypeDecl()
2427     {
2428         outer: while(true)
2429         {
2430             _text.skipToOneOf!('"', '\'', '[', '>')();
2431             switch(_text.input.front)
2432             {
2433                 static foreach(quote; ['"', '\''])
2434                 {
2435                     case quote:
2436                     {
2437                         popFrontAndIncCol(_text);
2438                         _text.skipUntilAndDrop!([quote])();
2439                         continue outer;
2440                     }
2441                 }
2442                 case '[':
2443                 {
2444                     popFrontAndIncCol(_text);
2445                     while(true)
2446                     {
2447                         checkNotEmpty(_text);
2448                         _text.skipToOneOf!('"', '\'', ']')();
2449                         switch(_text.input.front)
2450                         {
2451                             case '"':
2452                             {
2453                                 popFrontAndIncCol(_text);
2454                                 _text.skipUntilAndDrop!`"`();
2455                                 continue;
2456                             }
2457                             case '\'':
2458                             {
2459                                 popFrontAndIncCol(_text);
2460                                 _text.skipUntilAndDrop!`'`();
2461                                 continue;
2462                             }
2463                             case ']':
2464                             {
2465                                 popFrontAndIncCol(_text);
2466                                 stripWS(_text);
2467                                 if(_text.input.empty || _text.input.front != '>')
2468                                     throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos);
2469                                 popFrontAndIncCol(_text);
2470                                 _parseAtPrologMisc!2();
2471                                 return;
2472                             }
2473                             default: assert(0);
2474                         }
2475                     }
2476                 }
2477                 case '>':
2478                 {
2479                     popFrontAndIncCol(_text);
2480                     _parseAtPrologMisc!2();
2481                     break;
2482                 }
2483                 default: assert(0);
2484             }
2485             break;
2486         }
2487     }
2488 
2489     static if(compileInTests) unittest
2490     {
2491         import core.exception : AssertError;
2492         import std.exception : assertNotThrown, collectException, enforce;
2493         import dxml.internal : testRangeFuncs;
2494 
2495         static void test(alias func)(string text, int row, int col, size_t line = __LINE__)
2496         {
2497             auto pos = TextPos(row, col + cast(int)"<root/>".length);
2498             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2499                                                              "unittest failure 1", __FILE__, line);
2500             enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line);
2501             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2502         }
2503 
2504         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2505         {
2506             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2507             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2508             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2509         }
2510 
2511         static foreach(func; testRangeFuncs)
2512         {
2513             test!func("<!DOCTYPE name>", 1, 16);
2514             test!func("<!DOCTYPE \n\n\n name>", 4, 7);
2515             test!func("<!DOCTYPE name \n\n\n >", 4, 3);
2516 
2517             test!func("<!DOCTYPE name []>", 1, 19);
2518             test!func("<!DOCTYPE \n\n\n name []>", 4, 10);
2519             test!func("<!DOCTYPE name \n\n\n []>", 4, 5);
2520 
2521             test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35);
2522             test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38);
2523             test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35);
2524             test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38);
2525 
2526             test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42);
2527             test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40);
2528             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48);
2529             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53);
2530             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43);
2531             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48);
2532 
2533             test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35);
2534             test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55);
2535 
2536             test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40);
2537             test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40);
2538             test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47);
2539             test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53);
2540 
2541             test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51);
2542 
2543             test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26);
2544 
2545             test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36);
2546 
2547             test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56);
2548             test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3);
2549 
2550             test!func("<!DOCTYPE doc [\n" ~
2551                       "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~
2552                       "]>\n", 4, 1);
2553 
2554             testFail!func("<!DOCTYP name>", 1, 2);
2555             testFail!func("<!DOCTYPEname>", 1, 10);
2556             testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18);
2557             testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8);
2558             testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19);
2559 
2560             // FIXME This really should have the exception point at the quote and
2561             // say that it couldn't find the matching quote rather than point at
2562             // the character after it and say that it couldn't find a quote, but
2563             // that requires reworking some helper functions with better error
2564             // messages in mind.
2565             testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~
2566                           "\n<!ELEMENT student (#PCDATA)>\n" ~
2567                           "]>", 1, 40);
2568         }
2569     }
2570 
2571 
2572     // Parse a start tag or empty element tag. It could be the root element, or
2573     // it could be a sub-element.
2574     // < was already removed from the front of the input.
2575     void _parseElementStart()
2576     {
2577         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2578         _savedText.pos = _text.pos;
2579         _savedText.input = _text.takeUntilAndDrop!(">", true)();
2580 
2581         if(_savedText.input.empty)
2582             throw new XMLParsingException("Tag missing name", _savedText.pos);
2583         if(_savedText.input.front == '/')
2584             throw new XMLParsingException("Invalid end tag", _savedText.pos);
2585 
2586         if(_savedText.input.length > 1)
2587         {
2588             auto temp = _savedText.input.save;
2589             temp.popFrontN(temp.length - 1);
2590             if(temp.front == '/')
2591             {
2592                 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1);
2593 
2594                 static if(config.splitEmpty == SplitEmpty.no)
2595                 {
2596                     _type = EntityType.elementEmpty;
2597                     _tagStack.sawEntity();
2598                     _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2599                 }
2600                 else
2601                 {
2602                     _type = EntityType.elementStart;
2603                     _tagStack.sawEntity();
2604                     _grammarPos = GrammarPos.splittingEmpty;
2605                 }
2606             }
2607             else
2608             {
2609                 _type = EntityType.elementStart;
2610                 _tagStack.sawEntity();
2611                 _grammarPos = GrammarPos.contentCharData1;
2612             }
2613         }
2614         else
2615         {
2616             _type = EntityType.elementStart;
2617             _tagStack.sawEntity();
2618             _grammarPos = GrammarPos.contentCharData1;
2619         }
2620 
2621         _name = _savedText.takeName();
2622         // The attributes should be all that's left in savedText.
2623         if(_tagStack.atMax)
2624         {
2625             auto temp = _savedText.save;
2626             auto attrChecker = _tagStack.attrChecker;
2627 
2628             while(true)
2629             {
2630                 immutable wasWS = stripWS(temp);
2631                 if(temp.input.empty)
2632                     break;
2633                 if(!wasWS)
2634                     throw new XMLParsingException("Whitespace missing before attribute name", temp.pos);
2635 
2636                 immutable attrPos = temp.pos;
2637                 attrChecker.pushAttr(temp.takeName!'='(), attrPos);
2638                 stripWS(temp);
2639 
2640                 checkNotEmpty(temp);
2641                 if(temp.input.front != '=')
2642                     throw new XMLParsingException("= missing", temp.pos);
2643                 popFrontAndIncCol(temp);
2644 
2645                 stripWS(temp);
2646                 temp.takeAttValue();
2647             }
2648 
2649             attrChecker.checkAttrs();
2650         }
2651     }
2652 
2653     static if(compileInTests) unittest
2654     {
2655         import core.exception : AssertError;
2656         import std.algorithm.comparison : equal;
2657         import std.exception : assertNotThrown, collectException, enforce;
2658         import dxml.internal : codeLen, testRangeFuncs;
2659 
2660         static void test(alias func)(string text, EntityType type, string name,
2661                                      int row, int col, size_t line = __LINE__)
2662         {
2663             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2664             enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line);
2665             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2666             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2667         }
2668 
2669         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2670         {
2671             auto xml = func(text);
2672             auto e = collectException!XMLParsingException(parseXML(func(text)));
2673             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2674             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2675         }
2676 
2677         static foreach(func; testRangeFuncs)
2678         {
2679             test!func("<a/>", EntityType.elementEmpty, "a", 1, 5);
2680             test!func("<a></a>", EntityType.elementStart, "a", 1, 4);
2681             test!func("<root/>", EntityType.elementEmpty, "root", 1, 8);
2682             test!func("<root></root>", EntityType.elementStart, "root", 1, 7);
2683             test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7);
2684             test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6);
2685             test!func("<foo       />", EntityType.elementEmpty, "foo", 1, 14);
2686             test!func("<foo       ></foo>", EntityType.elementStart, "foo", 1, 13);
2687             test!func("<foo  \n\n\n />", EntityType.elementEmpty, "foo", 4, 4);
2688             test!func("<foo  \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3);
2689             test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7);
2690             test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1);
2691 
2692             testFail!func(`<.foo/>`, 1, 2);
2693             testFail!func(`<>`, 1, 2);
2694             testFail!func(`</>`, 1, 2);
2695             testFail!func(`</foo>`, 1, 2);
2696 
2697             {
2698                 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>")));
2699                 assert(range.front.type == EntityType.elementStart);
2700                 assert(equal(range.front.name, "root"));
2701                 assert(range._text.pos == TextPos(1, 8));
2702                 assertNotThrown!XMLParsingException(range.popFront());
2703                 assert(range.front.type == EntityType.elementEnd);
2704                 assert(equal(range.front.name, "root"));
2705                 assert(range._text.pos == TextPos(1, 8));
2706             }
2707         }
2708     }
2709 
2710 
2711     // Parse an end tag. It could be the root element, or it could be a
2712     // sub-element.
2713     // </ was already removed from the front of the input.
2714     void _parseElementEnd()
2715     {
2716         if(_text.input.empty)
2717             throw new XMLParsingException("Unterminated end tag", _text.pos);
2718         _entityPos = TextPos(_text.pos.line, _text.pos.col - 2);
2719         _type = EntityType.elementEnd;
2720         _tagStack.sawEntity();
2721         immutable namePos = _text.pos;
2722         _name = _text.takeName!'>'();
2723         stripWS(_text);
2724         if(_text.input.empty || _text.input.front != '>')
2725         {
2726             throw new XMLParsingException("There can only be whitespace between an end tag's name and the >",
2727                                           _text.pos);
2728         }
2729         popFrontAndIncCol(_text);
2730         _tagStack.popTag(_name.save, namePos);
2731         _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2732     }
2733 
2734     static if(compileInTests) unittest
2735     {
2736         import core.exception : AssertError;
2737         import std.algorithm.comparison : equal;
2738         import std.exception : assertNotThrown, collectException, enforce;
2739         import dxml.internal : codeLen, testRangeFuncs;
2740 
2741         static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__)
2742         {
2743             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2744             range.popFront();
2745             enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line);
2746             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2747             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2748         }
2749 
2750         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2751         {
2752             auto range = parseXML(func(text));
2753             auto e = collectException!XMLParsingException(range.popFront());
2754             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2755             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2756         }
2757 
2758         static foreach(func; testRangeFuncs)
2759         {
2760             test!func("<a></a>", "a", 1, 8);
2761             test!func("<foo></foo>", "foo", 1, 12);
2762             test!func("<foo    ></foo    >", "foo", 1, 20);
2763             test!func("<foo \n ></foo \n >", "foo", 3, 3);
2764             test!func("<foo>\n\n\n</foo>", "foo", 4, 7);
2765             test!func("<foo.></foo.>", "foo.", 1, 14);
2766             test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1);
2767 
2768             testFail!func(`<foo></ foo>`, 1, 8);
2769             testFail!func(`<foo></bar>`, 1, 8);
2770             testFail!func(`<foo></fo>`, 1, 8);
2771             testFail!func(`<foo></food>`, 1, 8);
2772             testFail!func(`<a></>`, 1, 6);
2773             testFail!func(`<a></`, 1, 6);
2774             testFail!func(`<a><`, 1, 5);
2775             testFail!func(`<a></a b='42'>`, 1, 8);
2776         }
2777     }
2778 
2779 
2780     // GrammarPos.contentCharData1
2781     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2782     // Parses at either CharData?. Nothing from the CharData? (or what's after it
2783     // if it's not there) has been consumed.
2784     void _parseAtContentCharData()
2785     {
2786         checkNotEmpty(_text);
2787         auto orig = _text.save;
2788         stripWS(_text);
2789         checkNotEmpty(_text);
2790         if(_text.input.front != '<')
2791         {
2792             _text = orig;
2793             _entityPos = _text.pos;
2794             _type = EntityType.text;
2795             _tagStack.sawEntity();
2796             _savedText.pos = _text.pos;
2797             _savedText.input = _text.takeUntilAndDrop!"<"();
2798             checkText!false(_savedText);
2799             checkNotEmpty(_text);
2800             if(_text.input.front == '/')
2801             {
2802                 popFrontAndIncCol(_text);
2803                 _grammarPos = GrammarPos.endTag;
2804             }
2805             else
2806                 _grammarPos = GrammarPos.contentMid;
2807         }
2808         else
2809         {
2810             popFrontAndIncCol(_text);
2811             checkNotEmpty(_text);
2812             if(_text.input.front == '/')
2813             {
2814                 popFrontAndIncCol(_text);
2815                 _parseElementEnd();
2816             }
2817             else
2818                 _parseAtContentMid();
2819         }
2820     }
2821 
2822     static if(compileInTests) unittest
2823     {
2824         import core.exception : AssertError;
2825         import std.algorithm.comparison : equal;
2826         import std.exception : assertNotThrown, collectException, enforce;
2827         import dxml.internal : codeLen, testRangeFuncs;
2828 
2829         static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2830         {
2831             auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length));
2832             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2833             assertNotThrown!XMLParsingException(range.popFront());
2834             enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line);
2835             enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line);
2836             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2837         }
2838 
2839         static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2840         {
2841             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2842             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2843             auto e = collectException!XMLParsingException(range.popFront());
2844             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2845             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2846         }
2847 
2848         static foreach(func; testRangeFuncs)
2849         {
2850             static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
2851             {
2852                 test!(func, toer)("hello world", 1, 12);
2853                 test!(func, toer)("\nhello\n\nworld", 4, 6);
2854                 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1);
2855                 test!(func, toer)("&#x42;", 1, 7);
2856                 test!(func, toer)("]", 1, 2);
2857                 test!(func, toer)("]]", 1, 3);
2858                 test!(func, toer)("]>", 1, 3);
2859                 test!(func, toer)("foo \n\n &lt; \n bar", 4, 5);
2860 
2861                 testFail!(func, toer)("&", 1, 1);
2862                 testFail!(func, toer)("&;", 1, 1);
2863                 testFail!(func, toer)("&f", 1, 1);
2864                 testFail!(func, toer)("\v", 1, 1);
2865                 testFail!(func, toer)("hello&world", 1, 6);
2866                 testFail!(func, toer)("hello\vworld", 1, 6);
2867                 testFail!(func, toer)("hello&;world", 1, 6);
2868                 testFail!(func, toer)("hello&#;world", 1, 6);
2869                 testFail!(func, toer)("hello&#x;world", 1, 6);
2870                 testFail!(func, toer)("hello&.;world", 1, 6);
2871                 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4);
2872 
2873                 testFail!(func, toer)("]]>", 1, 1);
2874                 testFail!(func, toer)("foo]]>bar", 1, 4);
2875 
2876                 static if(toer == ThrowOnEntityRef.yes)
2877                 {
2878                     testFail!(func, toer)("&foo; &bar baz", 1, 1);
2879                     testFail!(func, toer)("foo \n\n &ampe; \n bar", 3, 2);
2880                 }
2881                 else
2882                 {
2883                     testFail!(func, toer)("&foo; &bar baz", 1, 7);
2884                     test!(func, toer)("foo \n\n &ampe; \n bar", 4, 5);
2885                 }
2886             }
2887         }
2888     }
2889 
2890 
2891     // GrammarPos.contentMid
2892     // content     ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2893     // The text right after the start tag was what was parsed previously. So,
2894     // that first CharData? was what was parsed last, and this parses starting
2895     // right after. The < should have already been removed from the input.
2896     void _parseAtContentMid()
2897     {
2898         // Note that References are treated as part of the CharData and not
2899         // parsed out by the EntityRange (see EntityRange.text).
2900 
2901         switch(_text.input.front)
2902         {
2903             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2904             // CDSect  ::= CDStart CData CDEnd
2905             // CDStart ::= '<![CDATA['
2906             // CData   ::= (Char* - (Char* ']]>' Char*))
2907             // CDEnd   ::= ']]>'
2908             case '!':
2909             {
2910                 popFrontAndIncCol(_text);
2911                 if(_text.stripStartsWith("--"))
2912                 {
2913                     _parseComment();
2914                     static if(config.skipComments == SkipComments.yes)
2915                         _parseAtContentCharData();
2916                     else
2917                         _grammarPos = GrammarPos.contentCharData2;
2918                 }
2919                 else if(_text.stripStartsWith("[CDATA["))
2920                     _parseCDATA();
2921                 else
2922                 {
2923                     immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2924                     throw new XMLParsingException("Expected Comment or CDATA section", bangPos);
2925                 }
2926                 break;
2927             }
2928             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2929             case '?':
2930             {
2931                 _parsePI();
2932                 _grammarPos = GrammarPos.contentCharData2;
2933                 static if(config.skipPI == SkipPI.yes)
2934                     popFront();
2935                 break;
2936             }
2937             // element ::= EmptyElemTag | STag content ETag
2938             default:
2939             {
2940                 _parseElementStart();
2941                 break;
2942             }
2943         }
2944     }
2945 
2946 
2947     // This parses the Misc* that come after the root element.
2948     void _parseAtEndMisc()
2949     {
2950         // Misc ::= Comment | PI | S
2951 
2952         stripWS(_text);
2953 
2954         if(_text.input.empty)
2955         {
2956             _grammarPos = GrammarPos.documentEnd;
2957             return;
2958         }
2959 
2960         if(_text.input.front != '<')
2961             throw new XMLParsingException("Expected <", _text.pos);
2962         popFrontAndIncCol(_text);
2963         checkNotEmpty(_text);
2964 
2965         switch(_text.input.front)
2966         {
2967             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2968             case '!':
2969             {
2970                 popFrontAndIncCol(_text);
2971                 if(_text.stripStartsWith("--"))
2972                 {
2973                     _parseComment();
2974                     static if(config.skipComments == SkipComments.yes)
2975                         _parseAtEndMisc();
2976                     break;
2977                 }
2978                 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2979                 throw new XMLParsingException("Expected Comment", bangPos);
2980             }
2981             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2982             case '?':
2983             {
2984                 _parsePI();
2985                 static if(config.skipPI == SkipPI.yes)
2986                     popFront();
2987                 break;
2988             }
2989             default: throw new XMLParsingException("Must be a comment or PI", _text.pos);
2990         }
2991     }
2992 
2993     // Used for keeping track of the names of start tags so that end tags can be
2994     // verified as well as making it possible to avoid redoing other validation.
2995     // We keep track of the total number of entities which have been parsed thus
2996     // far so that only whichever EntityRange is farthest along in parsing
2997     // actually adds or removes tags from the TagStack, and the parser can skip
2998     // some of the validation for ranges that are farther behind. That way, the
2999     // end tags get verified, but we only have one stack. If the stack were
3000     // duplicated with every call to save, then there would be a lot more
3001     // allocations, which we don't want. But because we only need to verify the
3002     // end tags once, we can get away with having a shared tag stack. The cost
3003     // is that we have to keep track of how many tags we've parsed so that we
3004     // know if an EntityRange should actually be pushing or popping tags from
3005     // the stack, but that's a lot cheaper than duplicating the stack, and it's
3006     // a lot less annoying then making EntityRange an input range and not a
3007     // forward range or making it a cursor rather than a range.
3008     struct TagStack
3009     {
3010         void pushTag(Taken tagName)
3011         {
3012             if(entityCount++ == state.maxEntities)
3013             {
3014                 ++state.maxEntities;
3015                 put(state.tags, tagName);
3016             }
3017             ++depth;
3018         }
3019 
3020         void popTag(Taken tagName, TextPos pos)
3021         {
3022             import std.algorithm : equal;
3023             import std.format : format;
3024             if(entityCount++ == state.maxEntities)
3025             {
3026                 assert(!state.tags.data.empty);
3027                 if(!equal(state.tags.data.back.save, tagName.save))
3028                 {
3029                     enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>";
3030                     throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos);
3031                 }
3032                 ++state.maxEntities;
3033                 state.tags.shrinkTo(state.tags.data.length - 1);
3034             }
3035             --depth;
3036         }
3037 
3038         @property auto attrChecker()
3039         {
3040             assert(atMax);
3041 
3042             static struct AttrChecker
3043             {
3044                 void pushAttr(Taken attrName, TextPos attrPos)
3045                 {
3046                     import std.typecons : tuple;
3047                     put(state.attrs, tuple(attrName, attrPos));
3048                 }
3049 
3050                 void checkAttrs()
3051                 {
3052                     import std.algorithm.comparison : cmp, equal;
3053                     import std.algorithm.sorting : sort;
3054                     import std.conv : to;
3055 
3056                     if(state.attrs.data.length < 2)
3057                         return;
3058 
3059                     sort!((a,b) => cmp(a[0].save, b[0].save) < 0)(state.attrs.data);
3060                     auto prev = state.attrs.data.front;
3061                     foreach(attr; state.attrs.data[1 .. $])
3062                     {
3063                         if(equal(prev[0], attr[0]))
3064                             throw new XMLParsingException("Duplicate attribute name", attr[1]);
3065                         prev = attr;
3066                     }
3067                 }
3068 
3069                 ~this()
3070                 {
3071                     state.attrs.clear();
3072                 }
3073 
3074                 SharedState* state;
3075             }
3076 
3077             return AttrChecker(state);
3078         }
3079 
3080         void sawEntity()
3081         {
3082             if(entityCount++ == state.maxEntities)
3083                 ++state.maxEntities;
3084         }
3085 
3086         @property bool atMax()
3087         {
3088             return entityCount == state.maxEntities;
3089         }
3090 
3091         struct SharedState
3092         {
3093             import std.array : Appender;
3094             import std.typecons : Tuple;
3095 
3096             Appender!(Taken[]) tags;
3097             Appender!(Tuple!(Taken, TextPos)[]) attrs;
3098             size_t maxEntities;
3099         }
3100 
3101         static create()
3102         {
3103             TagStack tagStack;
3104             tagStack.state = new SharedState;
3105             tagStack.state.tags.reserve(10);
3106             tagStack.state.attrs.reserve(10);
3107             return tagStack;
3108         }
3109 
3110         SharedState* state;
3111         size_t entityCount;
3112         int depth;
3113     }
3114 
3115     static if(compileInTests) unittest
3116     {
3117         import core.exception : AssertError;
3118         import std.algorithm.comparison : equal;
3119         import std.exception : assertNotThrown, collectException, enforce;
3120         import dxml.internal : testRangeFuncs;
3121 
3122         static void test(alias func)(string text, size_t line = __LINE__)
3123         {
3124             auto xml = func(text);
3125             static foreach(config; someTestConfigs)
3126             {{
3127                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3128                                                                  __FILE__, line);
3129                 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line);
3130             }}
3131         }
3132 
3133         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3134         {
3135             auto xml = func(text);
3136             static foreach(config; someTestConfigs)
3137             {{
3138                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3139                                                                  __FILE__, line);
3140                 auto e = collectException!XMLParsingException(walkLength(range));
3141                 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line);
3142                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
3143             }}
3144         }
3145 
3146         static foreach(func; testRangeFuncs)
3147         {
3148             test!func("<root></root>");
3149             test!func("<root><a></a></root>");
3150             test!func("<root><a><b></b></a></root>");
3151             test!func("<root><a><b></b></a></root>");
3152             test!func("<root><a><b></b></a><foo><bar></bar></foo></root>");
3153             test!func("<a>\n" ~
3154                       "    <b>\n" ~
3155                       "        <c>\n" ~
3156                       "            <d>\n" ~
3157                       "                <e>\n" ~
3158                       "                    <f>\n" ~
3159                       "                        <g>\n" ~
3160                       "                            <h>\n" ~
3161                       "                                 <i><i><i><i>\n" ~
3162                       "                                 </i></i></i></i>\n" ~
3163                       "                                 <i>\n" ~
3164                       "                                     <j>\n" ~
3165                       "                                         <k>\n" ~
3166                       "                                             <l>\n" ~
3167                       "                                                 <m>\n" ~
3168                       "                                                     <n>\n" ~
3169                       "                                                         <o>\n" ~
3170                       "                                                             <p>\n" ~
3171                       "                                                                 <q>\n" ~
3172                       "                                                                     <r>\n" ~
3173                       "                                                                         <s>\n" ~
3174                       "          <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~
3175                       "                                                                         </s>\n" ~
3176                       "                                                                     </r>\n" ~
3177                       "                                                                 </q>\n" ~
3178                       "                                                </p></o></n></m>\n" ~
3179                       "                                                               </l>\n" ~
3180                       "                    </k>\n" ~
3181                       "           </j>\n" ~
3182                       "</i></h>" ~
3183                       "                        </g>\n" ~
3184                       "                    </f>\n" ~
3185                       "                </e>\n" ~
3186                       "            </d>\n" ~
3187                       "        </c>\n" ~
3188                       "    </b>\n" ~
3189                       "</a>");
3190             test!func(`<京都市></京都市>`);
3191 
3192             testFail!func(`<a>`, 1, 4);
3193             testFail!func(`<foo></foobar>`, 1, 8);
3194             testFail!func(`<foobar></foo>`, 1, 11);
3195             testFail!func(`<a><\a>`, 1, 5);
3196             testFail!func(`<a><a/>`, 1, 8);
3197             testFail!func(`<a><b>`, 1, 7);
3198             testFail!func(`<a><b><c>`, 1, 10);
3199             testFail!func(`<a></a><b>`, 1, 9);
3200             testFail!func(`<a></a><b></b>`, 1, 9);
3201             testFail!func(`<a><b></a></b>`, 1, 9);
3202             testFail!func(`<a><b><c></c><b></a>`, 1, 19);
3203             testFail!func(`<a><b></c><c></b></a>`, 1, 9);
3204             testFail!func(`<a><b></c></b></a>`, 1, 9);
3205             testFail!func("<a>\n" ~
3206                           "    <b>\n" ~
3207                           "        <c>\n" ~
3208                           "            <d>\n" ~
3209                           "                <e>\n" ~
3210                           "                    <f>\n" ~
3211                           "                    </f>\n" ~
3212                           "                </e>\n" ~
3213                           "            </d>\n" ~
3214                           "        </c>\n" ~
3215                           "    </b>\n" ~
3216                           "<a>", 12, 4);
3217             testFail!func("<a>\n" ~
3218                           "    <b>\n" ~
3219                           "        <c>\n" ~
3220                           "            <d>\n" ~
3221                           "                <e>\n" ~
3222                           "                    <f>\n" ~
3223                           "                    </f>\n" ~
3224                           "                </e>\n" ~
3225                           "            </d>\n" ~
3226                           "        </c>\n" ~
3227                           "    </b>\n" ~
3228                           "</q>", 12, 3);
3229         }
3230     }
3231 
3232 
3233     struct Text(R)
3234     {
3235         alias config = cfg;
3236         alias Input = R;
3237 
3238         Input input;
3239         TextPos pos;
3240 
3241         @property save() { return typeof(this)(input.save, pos); }
3242     }
3243 
3244 
3245     alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42));
3246 
3247 
3248     EntityType _type;
3249     TextPos _entityPos;
3250     auto _grammarPos = GrammarPos.documentStart;
3251 
3252     Taken _name;
3253     TagStack _tagStack;
3254 
3255     Text!(typeof(byCodeUnit(R.init))) _text;
3256     Text!Taken _savedText;
3257 
3258 
3259     this(R xmlText)
3260     {
3261         _tagStack = TagStack.create();
3262         _text.input = byCodeUnit(xmlText);
3263 
3264         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
3265         _savedText = typeof(_savedText).init;
3266         _name = typeof(_name).init;
3267 
3268         popFront();
3269     }
3270 }
3271 
3272 /// Ditto
3273 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText)
3274     if(isForwardRange!R && isSomeChar!(ElementType!R))
3275 {
3276     return EntityRange!(config, R)(xmlText);
3277 }
3278 
3279 ///
3280 unittest
3281 {
3282     import std.range.primitives : walkLength;
3283 
3284     auto xml = "<?xml version='1.0'?>\n" ~
3285                "<?instruction start?>\n" ~
3286                "<foo attr='42'>\n" ~
3287                "    <bar/>\n" ~
3288                "    <!-- no comment -->\n" ~
3289                "    <baz hello='world'>\n" ~
3290                "    nothing to say.\n" ~
3291                "    nothing at all...\n" ~
3292                "    </baz>\n" ~
3293                "</foo>\n" ~
3294                "<?some foo?>";
3295 
3296     {
3297         auto range = parseXML(xml);
3298         assert(range.front.type == EntityType.pi);
3299         assert(range.front.name == "instruction");
3300         assert(range.front.text == "start");
3301 
3302         range.popFront();
3303         assert(range.front.type == EntityType.elementStart);
3304         assert(range.front.name == "foo");
3305 
3306         {
3307             auto attrs = range.front.attributes;
3308             assert(walkLength(attrs.save) == 1);
3309             assert(attrs.front.name == "attr");
3310             assert(attrs.front.value == "42");
3311         }
3312 
3313         range.popFront();
3314         assert(range.front.type == EntityType.elementEmpty);
3315         assert(range.front.name == "bar");
3316 
3317         range.popFront();
3318         assert(range.front.type == EntityType.comment);
3319         assert(range.front.text == " no comment ");
3320 
3321         range.popFront();
3322         assert(range.front.type == EntityType.elementStart);
3323         assert(range.front.name == "baz");
3324 
3325         {
3326             auto attrs = range.front.attributes;
3327             assert(walkLength(attrs.save) == 1);
3328             assert(attrs.front.name == "hello");
3329             assert(attrs.front.value == "world");
3330         }
3331 
3332         range.popFront();
3333         assert(range.front.type == EntityType.text);
3334         assert(range.front.text ==
3335                "\n    nothing to say.\n    nothing at all...\n    ");
3336 
3337         range.popFront();
3338         assert(range.front.type == EntityType.elementEnd); // </baz>
3339         range.popFront();
3340         assert(range.front.type == EntityType.elementEnd); // </foo>
3341 
3342         range.popFront();
3343         assert(range.front.type == EntityType.pi);
3344         assert(range.front.name == "some");
3345         assert(range.front.text == "foo");
3346 
3347         range.popFront();
3348         assert(range.empty);
3349     }
3350     {
3351         auto range = parseXML!simpleXML(xml);
3352 
3353         // simpleXML is set to skip processing instructions.
3354 
3355         assert(range.front.type == EntityType.elementStart);
3356         assert(range.front.name == "foo");
3357 
3358         {
3359             auto attrs = range.front.attributes;
3360             assert(walkLength(attrs.save) == 1);
3361             assert(attrs.front.name == "attr");
3362             assert(attrs.front.value == "42");
3363         }
3364 
3365         // simpleXML is set to split empty tags so that <bar/> is treated
3366         // as the same as <bar></bar> so that code does not have to
3367         // explicitly handle empty tags.
3368         range.popFront();
3369         assert(range.front.type == EntityType.elementStart);
3370         assert(range.front.name == "bar");
3371         range.popFront();
3372         assert(range.front.type == EntityType.elementEnd);
3373         assert(range.front.name == "bar");
3374 
3375         // simpleXML is set to skip comments.
3376 
3377         range.popFront();
3378         assert(range.front.type == EntityType.elementStart);
3379         assert(range.front.name == "baz");
3380 
3381         {
3382             auto attrs = range.front.attributes;
3383             assert(walkLength(attrs.save) == 1);
3384             assert(attrs.front.name == "hello");
3385             assert(attrs.front.value == "world");
3386         }
3387 
3388         range.popFront();
3389         assert(range.front.type == EntityType.text);
3390         assert(range.front.text ==
3391                "\n    nothing to say.\n    nothing at all...\n    ");
3392 
3393         range.popFront();
3394         assert(range.front.type == EntityType.elementEnd); // </baz>
3395         range.popFront();
3396         assert(range.front.type == EntityType.elementEnd); // </foo>
3397         range.popFront();
3398         assert(range.empty);
3399     }
3400 }
3401 
3402 // Test the state of the range immediately after parseXML returns.
3403 unittest
3404 {
3405     import std.algorithm.comparison : equal;
3406     import dxml.internal : testRangeFuncs;
3407 
3408     static foreach(func; testRangeFuncs)
3409     {
3410         static foreach(config; someTestConfigs)
3411         {{
3412             auto range = parseXML!config("<?xml?><root></root>");
3413             assert(!range.empty);
3414             assert(range.front.type == EntityType.elementStart);
3415             assert(equal(range.front.name, "root"));
3416         }}
3417 
3418         static foreach(config; [Config.init, makeConfig(SkipPI.yes)])
3419         {{
3420             auto range = parseXML!config("<!--no comment--><root></root>");
3421             assert(!range.empty);
3422             assert(range.front.type == EntityType.comment);
3423             assert(equal(range.front.text, "no comment"));
3424         }}
3425         static foreach(config; [simpleXML, makeConfig(SkipComments.yes)])
3426         {{
3427             auto range = parseXML!config("<!--no comment--><root></root>");
3428             assert(!range.empty);
3429             assert(range.front.type == EntityType.elementStart);
3430             assert(equal(range.front.name, "root"));
3431         }}
3432 
3433         static foreach(config; [Config.init, makeConfig(SkipComments.yes)])
3434         {{
3435             auto range = parseXML!config("<?private eye?><root></root>");
3436             assert(!range.empty);
3437             assert(range.front.type == EntityType.pi);
3438             assert(equal(range.front.name, "private"));
3439             assert(equal(range.front.text, "eye"));
3440         }}
3441         static foreach(config; [simpleXML, makeConfig(SkipPI.yes)])
3442         {{
3443             auto range = parseXML!config("<?private eye?><root></root>");
3444             assert(!range.empty);
3445             assert(range.front.type == EntityType.elementStart);
3446             assert(equal(range.front.name, "root"));
3447         }}
3448 
3449         static foreach(config; someTestConfigs)
3450         {{
3451             auto range = parseXML!config("<root></root>");
3452             assert(!range.empty);
3453             assert(range.front.type == EntityType.elementStart);
3454             assert(equal(range.front.name, "root"));
3455         }}
3456     }
3457 }
3458 
3459 // Test various invalid states that didn't seem to fit well into tests elsewhere.
3460 unittest
3461 {
3462     import core.exception : AssertError;
3463     import std.exception : collectException, enforce;
3464     import dxml.internal : testRangeFuncs;
3465 
3466     static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3467     {
3468         auto xml = func(text);
3469         static foreach(config; someTestConfigs)
3470         {{
3471             auto e = collectException!XMLParsingException(
3472                 {
3473                     auto range = parseXML!config(xml.save);
3474                     while(!range.empty)
3475                         range.popFront();
3476                 }());
3477             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
3478             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
3479         }}
3480     }
3481 
3482     static foreach(func; testRangeFuncs)
3483     {{
3484         testFail!func("<root></root><invalid></invalid>", 1, 15);
3485         testFail!func("<root></root><invalid/>", 1, 15);
3486         testFail!func("<root/><invalid></invalid>", 1, 9);
3487         testFail!func("<root/><invalid/>", 1, 9);
3488 
3489         testFail!func("<root></root>invalid", 1, 14);
3490         testFail!func("<root/>invalid", 1, 8);
3491 
3492         testFail!func("<root/><?pi?>invalid", 1, 14);
3493         testFail!func("<root/><?pi?><invalid/>", 1, 15);
3494 
3495         testFail!func("<root/><!DOCTYPE foo>", 1, 9);
3496         testFail!func("<root/></root>", 1, 9);
3497 
3498         testFail!func("invalid<root></root>", 1, 1);
3499         testFail!func("invalid<?xml?><root></root>", 1, 1);
3500         testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1);
3501         testFail!func("invalid<!--comment--><root></root>", 1, 1);
3502         testFail!func("invalid<?Poirot?><root></root>", 1, 1);
3503 
3504         testFail!func("<?xml?>invalid<root></root>", 1, 8);
3505         testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15);
3506         testFail!func("<!--comment-->invalid<root></root>", 1, 15);
3507         testFail!func("<?Poirot?>invalid<root></root>", 1, 11);
3508 
3509         testFail!func("<?xml?>", 1, 8);
3510         testFail!func("<!DOCTYPE name>", 1, 16);
3511         testFail!func("<?Sherlock?>", 1, 13);
3512         testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33);
3513         testFail!func("<?Poirot?></Poirot>", 1, 12);
3514         testFail!func("</Poirot>", 1, 2);
3515         testFail!func("<", 1, 2);
3516         testFail!func(`</`, 1, 2);
3517         testFail!func(`</a`, 1, 2);
3518         testFail!func(`</a>`, 1, 2);
3519 
3520 
3521         testFail!func("<doc>]]></doc>", 1, 6);
3522 
3523         testFail!func(" <?xml?><root/>", 1, 1);
3524         testFail!func("\n<?xml?><root/>", 1, 1);
3525     }}
3526 }
3527 
3528 // Test that parseXML and EntityRange's properties work with @safe.
3529 // pure would be nice too, but at minimum, the use of format for exception
3530 // messages, and the use of assumeSafeAppend prevent it. It may or may not be
3531 // worth trying to fix that.
3532 @safe unittest
3533 {
3534     import std.algorithm.comparison : equal;
3535     import dxml.internal : testRangeFuncs;
3536 
3537     auto xml = "<root>\n" ~
3538                "    <![CDATA[nothing]]>\n" ~
3539                "    <foo a='42'/>\n" ~
3540                "</root>";
3541 
3542     static foreach(func; testRangeFuncs)
3543     {{
3544         auto range = parseXML(xml);
3545         assert(range.front.type == EntityType.elementStart);
3546         assert(equal(range.front.name, "root"));
3547         range.popFront();
3548         assert(!range.empty);
3549         assert(range.front.type == EntityType.cdata);
3550         assert(equal(range.front.text, "nothing"));
3551         range.popFront();
3552         assert(!range.empty);
3553         assert(range.front.type == EntityType.elementEmpty);
3554         assert(equal(range.front.name, "foo"));
3555         {
3556             auto attrs = range.front.attributes;
3557             auto saved = attrs.save;
3558             auto attr = attrs.front;
3559             assert(attr.name == "a");
3560             assert(attr.value == "42");
3561             attrs.popFront();
3562             assert(attrs.empty);
3563         }
3564         auto saved = range.save;
3565     }}
3566 }
3567 
3568 
3569 // This is purely to provide a way to trigger the unittest blocks in EntityRange
3570 // without compiling them in normally.
3571 struct EntityRangeCompileTests
3572 {
3573     @property bool empty() @safe pure nothrow @nogc { assert(0); }
3574     @property char front() @safe pure nothrow @nogc { assert(0); }
3575     void popFront() @safe pure nothrow @nogc { assert(0); }
3576     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
3577 }
3578 
3579 unittest
3580 {
3581     EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests;
3582 }
3583 
3584 
3585 /++
3586     Whether the given type is a forward range of attributes.
3587 
3588     Essentially, an attribute range must be a forward range where
3589 
3590     $(UL
3591         $(LI each element has the members $(D name), $(D value), and $(D pos))
3592         $(LI $(D name) and $(D value) are forward ranges of characters)
3593         $(LI $(D name) and $(D value) have the same type)
3594         $(LI $(D pos) is a $(LREF TextPos)))
3595 
3596     Normally, an attribute range would come from
3597     $(LREF EntityRange.Entity.attributes) or
3598     $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but
3599     as long as a range has the correct API, it qualifies as an attribute range.
3600 
3601     See_Also: $(LREF EntityRange.Entity.Attribute)$(BR)
3602               $(LREF EntityRange.Entity.attributes)$(BR)
3603               $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR)
3604               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3605   +/
3606 template isAttrRange(R)
3607 {
3608     static if(isForwardRange!R &&
3609               is(typeof(R.init.front.name)) &&
3610               is(typeof(R.init.front.value)) &&
3611               is(ReturnType!((R r) => r.front.pos) == TextPos))
3612     {
3613         alias NameType  = ReturnType!((R r) => r.front.name);
3614         alias ValueType = ReturnType!((R r) => r.front.value);
3615 
3616         enum isAttrRange = is(NameType == ValueType) &&
3617                            isForwardRange!NameType &&
3618                            isSomeChar!(ElementType!NameType);
3619     }
3620     else
3621         enum isAttrRange = false;
3622 }
3623 
3624 ///
3625 unittest
3626 {
3627     import std.typecons : Tuple;
3628     import dxml.dom : parseDOM;
3629 
3630     alias R1 = typeof(parseXML("<root/>").front.attributes);
3631     static assert(isAttrRange!R1);
3632 
3633     alias R2 = typeof(parseDOM("<root/>").children[0].attributes);
3634     static assert(isAttrRange!R2);
3635 
3636     alias T = Tuple!(string, "name", string, "value", TextPos, "pos");
3637     static assert(isAttrRange!(T[]));
3638 
3639     static assert(!isAttrRange!string);
3640 }
3641 
3642 unittest
3643 {
3644     import std.typecons : Tuple;
3645     {
3646         alias T = Tuple!(string, "nam", string, "value", TextPos, "pos");
3647         static assert(!isAttrRange!(T[]));
3648     }
3649     {
3650         alias T = Tuple!(string, "name", string, "valu", TextPos, "pos");
3651         static assert(!isAttrRange!(T[]));
3652     }
3653     {
3654         alias T = Tuple!(string, "name", string, "value", TextPos, "po");
3655         static assert(!isAttrRange!(T[]));
3656     }
3657     {
3658         alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos");
3659         static assert(!isAttrRange!(T[]));
3660     }
3661     {
3662         alias T = Tuple!(string, "name", string, "value");
3663         static assert(!isAttrRange!(T[]));
3664     }
3665     {
3666         alias T = Tuple!(int, "name", string, "value", TextPos, "pos");
3667         static assert(!isAttrRange!(T[]));
3668     }
3669     {
3670         alias T = Tuple!(string, "name", int, "value", TextPos, "pos");
3671         static assert(!isAttrRange!(T[]));
3672     }
3673     {
3674         alias T = Tuple!(string, "name", string, "value", int, "pos");
3675         static assert(!isAttrRange!(T[]));
3676     }
3677 }
3678 
3679 
3680 /++
3681     A helper function for processing start tag attributes.
3682 
3683     It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a
3684     range of attributes and a list of alternating strings and pointers where
3685     each string represents the name of the attribute to parse and the pointer
3686     immediately after it is assigned the value that corresponds to the attribute
3687     name (if present). If the given pointer does not point to the same type as
3688     the range of characters used in the attributes, then
3689     $(PHOBOS_REF to, std, conv) is used to convert the value to the type the
3690     pointer points to.
3691 
3692     If a $(D Nullable!T*) is given rather than a $(D T*), then it will be
3693     treated the same as if it had been $(D T*). So, $(D to!T) will be used to
3694     convert the attribute value if the matching attribute name is present. The
3695     advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's
3696     possible to distinguish between an attribute that wasn't present and one
3697     where it was present but was equivalent to $(D T.init).
3698 
3699     Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed
3700     rather than taking it by $(K_REF) and leaving the attributes that weren't
3701     matched in the range (since that really doesn't work with an arbitrary
3702     range as opposed to a dynamic array). However, if the second argument of
3703     getAttrs is not a $(K_STRING) but is instead an output range that accepts
3704     the element type of the range, then any attributes which aren't matched are
3705     put into the output range.
3706 
3707     Params:
3708         attrRange = A range of attributes (see $(LREF isAttrRange)).
3709         unmatched = An output range that any _unmatched attributes from the
3710                     range are put into (optional argument).
3711         args = An alternating list of strings and pointers where the names
3712                represent the attribute names to get the value of, and the
3713                corresponding values get assigned to what the pointers point to.
3714 
3715     Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to
3716             convert an attribute value.
3717 
3718     See_Also: $(LREF isAttrRange)$(BR)
3719               $(LREF EntityRange.Entity.attributes)$(BR)
3720               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3721   +/
3722 void getAttrs(R, Args...)(R attrRange, Args args)
3723     if(isAttrRange!R && Args.length % 2 == 0)
3724 {
3725     mixin(_genGetAttrs(false));
3726 }
3727 
3728 /// Ditto
3729 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args)
3730     if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0)
3731 {
3732     mixin(_genGetAttrs(true));
3733 }
3734 
3735 private string _genGetAttrs(bool includeUnmatched)
3736 {
3737     auto retval =
3738 `    import std.algorithm.comparison : equal;
3739     import std.conv : ConvException, to;
3740     import std.format : format;
3741     import std.typecons : Nullable;
3742     import std.utf : byChar;
3743 
3744     alias Attr = ElementType!R;
3745     alias SliceOfR = ElementType!(typeof(Attr.init.name));
3746 
3747     outer: foreach(attr; attrRange)
3748     {
3749         static foreach(i, arg; args)
3750         {
3751             static if(i % 2 == 0)
3752                 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i));
3753             else
3754             {
3755                 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i));
3756 
3757                 if(equal(attr.name, args[i - 1].byChar()))
3758                 {
3759                     alias ArgType = typeof(*arg);
3760 
3761                     static if(isInstanceOf!(Nullable, ArgType))
3762                         alias TargetType = TemplateArgsOf!ArgType;
3763                     else
3764                         alias TargetType = typeof(*arg);
3765 
3766                     try
3767                         *arg = to!TargetType(attr.value);
3768                     catch(ConvException ce)
3769                     {
3770                         enum fmt = "Failed to convert %s: %s";
3771                         throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos);
3772                     }
3773 
3774                     continue outer;
3775                 }
3776             }
3777         }`;
3778 
3779     if(includeUnmatched)
3780         retval ~= "\n        put(unmatched, attr);";
3781     retval ~= "\n    }";
3782 
3783     return retval;
3784 }
3785 
3786 unittest
3787 {
3788     import std.array : appender;
3789     import std.exception : collectException;
3790     import std.typecons : Nullable;
3791 
3792     {
3793         auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`;
3794         auto range = parseXML(xml);
3795         assert(range.front.type == EntityType.elementEmpty);
3796 
3797         string a;
3798         int b;
3799         bool c;
3800 
3801         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3802         assert(a == "foo");
3803         assert(b == 19);
3804         assert(c == true);
3805     }
3806 
3807     // Nullable!T* accepts the same as T*.
3808     {
3809         auto xml = `<root a="foo" c="true" d="rocks"/>`;
3810         auto range = parseXML(xml);
3811         assert(range.front.type == EntityType.elementEmpty);
3812 
3813         Nullable!string a;
3814         Nullable!int b;
3815         bool c;
3816 
3817         getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a);
3818         assert(a == "foo");
3819         assert(b.isNull);
3820         assert(c == true);
3821     }
3822 
3823     // If an output range of attributes is provided, then the ones that
3824     // weren't matched are put in it.
3825     {
3826         auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`;
3827         auto range = parseXML(xml);
3828         assert(range.front.type == EntityType.elementEmpty);
3829 
3830         alias Attribute = typeof(range).Entity.Attribute;
3831         auto unmatched = appender!(Attribute[])();
3832         int i;
3833         string s;
3834 
3835         getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s);
3836         assert(i == 42);
3837         assert(s == "silly");
3838         assert(unmatched.data.length == 2);
3839         assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28)));
3840         assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38)));
3841     }
3842 
3843     // An XMLParsingException gets thrown if a conversion fails.
3844     {
3845         auto xml = `<root foo="bar" false="true" d="rocks"/>`;
3846         auto range = parseXML(xml);
3847         assert(range.front.type == EntityType.elementEmpty);
3848 
3849         int i;
3850 
3851         auto xpe = collectException!XMLParsingException(
3852             getAttrs(range.front.attributes, "d", &i));
3853         assert(xpe.pos == TextPos(1, 30));
3854     }
3855 }
3856 
3857 unittest
3858 {
3859     auto range = parseXML("<root/>");
3860     auto attrs = range.front.attributes;
3861     int i;
3862     static assert(!__traits(compiles, getAttrs(attrs, "foo")));
3863     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar")));
3864     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i)));
3865     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i)));
3866     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo")));
3867     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i)));
3868     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar")));
3869 }
3870 
3871 @safe pure unittest
3872 {
3873     import std.typecons : Nullable;
3874 
3875     static test(R)(R range, int* i, Nullable!int* j) @safe pure
3876     {
3877         getAttrs(range.front.attributes, "foo", i, "bar", j);
3878     }
3879 
3880     test(parseXML("<root/>"), null, null);
3881 }
3882 
3883 
3884 /++
3885     Takes an $(LREF EntityRange) which is at a start tag and iterates it until
3886     it is at its corresponding end tag. It is an error to call skipContents when
3887     the current entity is not $(LREF EntityType.elementStart).
3888 
3889     $(TABLE
3890         $(TR $(TH Supported $(LREF EntityType)s:))
3891         $(TR $(TD $(LREF2 elementStart, EntityType)))
3892     )
3893 
3894     Returns: The range with its $(D front) now at the end tag corresponding to
3895              the start tag that was $(D front) when the function was called.
3896 
3897     Throws: $(LREF XMLParsingException) on invalid XML.
3898   +/
3899 R skipContents(R)(R entityRange)
3900     if(isInstanceOf!(EntityRange, R))
3901 {
3902     assert(entityRange._type == EntityType.elementStart);
3903 
3904     // We don't bother calling empty, because the only way for the entityRange
3905     // to be empty would be for it to reach the end of the document, and an
3906     // XMLParsingException would be thrown if the end of the document were
3907     // reached before we reached the corresponding end tag.
3908     for(int tagDepth = 1; tagDepth != 0;)
3909     {
3910         entityRange.popFront();
3911         immutable type = entityRange._type;
3912         if(type == EntityType.elementStart)
3913             ++tagDepth;
3914         else if(type == EntityType.elementEnd)
3915             --tagDepth;
3916     }
3917 
3918     return entityRange;
3919 }
3920 
3921 ///
3922 unittest
3923 {
3924     auto xml = "<root>\n" ~
3925                "    <foo>\n" ~
3926                "        <bar>\n" ~
3927                "        Some text\n" ~
3928                "        </bar>\n" ~
3929                "    </foo>\n" ~
3930                "    <!-- no comment -->\n" ~
3931                "</root>";
3932 
3933     auto range = parseXML(xml);
3934     assert(range.front.type == EntityType.elementStart);
3935     assert(range.front.name == "root");
3936 
3937     range.popFront();
3938     assert(range.front.type == EntityType.elementStart);
3939     assert(range.front.name == "foo");
3940 
3941     range = range.skipContents();
3942     assert(range.front.type == EntityType.elementEnd);
3943     assert(range.front.name == "foo");
3944 
3945     range.popFront();
3946     assert(range.front.type == EntityType.comment);
3947     assert(range.front.text == " no comment ");
3948 
3949     range.popFront();
3950     assert(range.front.type == EntityType.elementEnd);
3951     assert(range.front.name == "root");
3952 
3953     range.popFront();
3954     assert(range.empty);
3955 }
3956 
3957 
3958 /++
3959     Skips entities until the given $(LREF EntityType) is reached.
3960 
3961     If multiple $(LREF EntityType)s are given, then any one of them counts as
3962     a match.
3963 
3964     The current entity is skipped regardless of whether it is the given
3965     $(LREF EntityType).
3966 
3967     This is essentially a slightly optimized equivalent to
3968 
3969     ---
3970     if(!range.empty())
3971     {
3972         range.popFront();
3973         range = range.find!((a, b) => a.type == b.type)(entityTypes);
3974     }
3975     ---
3976 
3977     Returns: The given range with its $(D front) now at the first entity which
3978              matched one of the given $(LREF EntityType)s or an empty range if
3979              none were found.
3980 
3981     Throws: $(LREF XMLParsingException) on invalid XML.
3982   +/
3983 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...)
3984     if(isInstanceOf!(EntityRange, R))
3985 {
3986     if(entityRange.empty)
3987         return entityRange;
3988     entityRange.popFront();
3989     for(; !entityRange.empty; entityRange.popFront())
3990     {
3991         immutable type = entityRange._type;
3992         foreach(entityType; entityTypes)
3993         {
3994             if(type == entityType)
3995                 return entityRange;
3996         }
3997     }
3998     return entityRange;
3999 }
4000 
4001 ///
4002 unittest
4003 {
4004     auto xml = "<root>\n" ~
4005                "    <!-- blah blah blah -->\n" ~
4006                "    <foo>nothing to say</foo>\n" ~
4007                "</root>";
4008 
4009     auto range = parseXML(xml);
4010     assert(range.front.type == EntityType.elementStart);
4011     assert(range.front.name == "root");
4012 
4013     range = range.skipToEntityType(EntityType.elementStart,
4014                                    EntityType.elementEmpty);
4015     assert(range.front.type == EntityType.elementStart);
4016     assert(range.front.name == "foo");
4017 
4018     assert(range.skipToEntityType(EntityType.comment).empty);
4019 
4020     // skipToEntityType will work on an empty range but will always
4021     // return an empty range.
4022     assert(range.takeNone().skipToEntityType(EntityType.comment).empty);
4023 }
4024 
4025 
4026 /++
4027     Skips entities until the end tag is reached that corresponds to the start
4028     tag that is the parent of the current entity.
4029 
4030     Returns: The given range with its $(D front) now at the end tag which
4031              corresponds to the parent start tag of the entity that was
4032              $(D front) when skipToParentEndTag was called. If the current
4033              entity does not have a parent start tag (which means that it's
4034              either the root element or a comment or PI outside of the root
4035              element), then an empty range is returned.
4036 
4037     Throws: $(LREF XMLParsingException) on invalid XML.
4038   +/
4039 R skipToParentEndTag(R)(R entityRange)
4040     if(isInstanceOf!(EntityRange, R))
4041 {
4042     with(EntityType) final switch(entityRange._type)
4043     {
4044         case cdata:
4045         case comment:
4046         {
4047             entityRange = entityRange.skipToEntityType(elementStart, elementEnd);
4048             if(entityRange.empty || entityRange._type == elementEnd)
4049                 return entityRange;
4050             goto case elementStart;
4051         }
4052         case elementStart:
4053         {
4054             while(true)
4055             {
4056                 entityRange = entityRange.skipContents();
4057                 entityRange.popFront();
4058                 if(entityRange.empty || entityRange._type == elementEnd)
4059                     return entityRange;
4060                 if(entityRange._type == elementStart)
4061                     continue;
4062                 goto case comment;
4063             }
4064             assert(0); // the compiler isn't smart enough to see that this is unreachable.
4065         }
4066         case elementEnd:
4067         case elementEmpty:
4068         case pi:
4069         case text: goto case comment;
4070     }
4071 }
4072 
4073 ///
4074 unittest
4075 {
4076     auto xml = "<root>\n" ~
4077                "    <foo>\n" ~
4078                "        <!-- comment -->\n" ~
4079                "        <bar>exam</bar>\n" ~
4080                "    </foo>\n" ~
4081                "    <!-- another comment -->\n" ~
4082                "</root>";
4083     {
4084         auto range = parseXML(xml);
4085         assert(range.front.type == EntityType.elementStart);
4086         assert(range.front.name == "root");
4087 
4088         range.popFront();
4089         assert(range.front.type == EntityType.elementStart);
4090         assert(range.front.name == "foo");
4091 
4092         range.popFront();
4093         assert(range.front.type == EntityType.comment);
4094         assert(range.front.text == " comment ");
4095 
4096         range = range.skipToParentEndTag();
4097         assert(range.front.type == EntityType.elementEnd);
4098         assert(range.front.name == "foo");
4099 
4100         range = range.skipToParentEndTag();
4101         assert(range.front.type == EntityType.elementEnd);
4102         assert(range.front.name == "root");
4103 
4104         range = range.skipToParentEndTag();
4105         assert(range.empty);
4106     }
4107     {
4108         auto range = parseXML(xml);
4109         assert(range.front.type == EntityType.elementStart);
4110         assert(range.front.name == "root");
4111 
4112         range.popFront();
4113         assert(range.front.type == EntityType.elementStart);
4114         assert(range.front.name == "foo");
4115 
4116         range.popFront();
4117         assert(range.front.type == EntityType.comment);
4118         assert(range.front.text == " comment ");
4119 
4120         range.popFront();
4121         assert(range.front.type == EntityType.elementStart);
4122         assert(range.front.name == "bar");
4123 
4124         range.popFront();
4125         assert(range.front.type == EntityType.text);
4126         assert(range.front.text == "exam");
4127 
4128         range = range.skipToParentEndTag();
4129         assert(range.front.type == EntityType.elementEnd);
4130         assert(range.front.name == "bar");
4131 
4132         range = range.skipToParentEndTag();
4133         assert(range.front.type == EntityType.elementEnd);
4134         assert(range.front.name == "foo");
4135 
4136         range.popFront();
4137         assert(range.front.type == EntityType.comment);
4138         assert(range.front.text == " another comment ");
4139 
4140         range = range.skipToParentEndTag();
4141         assert(range.front.type == EntityType.elementEnd);
4142         assert(range.front.name == "root");
4143 
4144         assert(range.skipToParentEndTag().empty);
4145     }
4146     {
4147         auto range = parseXML("<root><foo>bar</foo></root>");
4148         assert(range.front.type == EntityType.elementStart);
4149         assert(range.front.name == "root");
4150         assert(range.skipToParentEndTag().empty);
4151     }
4152 }
4153 
4154 unittest
4155 {
4156     import core.exception : AssertError;
4157     import std.algorithm.comparison : equal;
4158     import std.exception : enforce;
4159     import dxml.internal : testRangeFuncs;
4160 
4161     static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__)
4162     {
4163         range.popFront();
4164         enforce!AssertError(!range.empty, "unittest 1", __FILE__, line);
4165         enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line);
4166     }
4167 
4168     static foreach(func; testRangeFuncs)
4169     {{
4170         // cdata
4171         {
4172             auto xml = "<root>\n" ~
4173                        "    <![CDATA[ cdata run ]]>\n" ~
4174                        "    <nothing/>\n" ~
4175                        "    <![CDATA[ cdata have its bits flipped ]]>\n" ~
4176                        "    <foo></foo>\n" ~
4177                        "    <![CDATA[ cdata play violin ]]>\n" ~
4178                        "</root>";
4179 
4180             auto range = parseXML(func(xml));
4181             assert(range.front.type == EntityType.elementStart);
4182             popAndCheck(range, EntityType.cdata);
4183             assert(equal(range.front.text, " cdata run "));
4184             {
4185                 auto temp = range.save.skipToParentEndTag();
4186                 assert(temp._type == EntityType.elementEnd);
4187                 assert(equal(temp.front.name, "root"));
4188             }
4189             popAndCheck(range, EntityType.elementEmpty);
4190             popAndCheck(range, EntityType.cdata);
4191             assert(equal(range.front.text, " cdata have its bits flipped "));
4192             {
4193                 auto temp = range.save.skipToParentEndTag();
4194                 assert(temp._type == EntityType.elementEnd);
4195                 assert(equal(temp.front.name, "root"));
4196             }
4197             popAndCheck(range, EntityType.elementStart);
4198             range = range.skipContents();
4199             popAndCheck(range, EntityType.cdata);
4200             assert(equal(range.front.text, " cdata play violin "));
4201             range = range.skipToParentEndTag();
4202             assert(range._type == EntityType.elementEnd);
4203             assert(equal(range.front.name, "root"));
4204         }
4205         // comment
4206         {
4207             auto xml = "<!-- before -->\n" ~
4208                        "<root>\n" ~
4209                        "    <!-- comment 1 -->\n" ~
4210                        "    <nothing/>\n" ~
4211                        "    <!-- comment 2 -->\n" ~
4212                        "    <foo></foo>\n" ~
4213                        "    <!-- comment 3 -->\n" ~
4214                        "</root>\n" ~
4215                        "<!-- after -->" ~
4216                        "<!-- end -->";
4217 
4218             auto text = func(xml);
4219             assert(parseXML(text.save).skipToParentEndTag().empty);
4220             {
4221                 auto range = parseXML(text.save);
4222                 assert(range.front.type == EntityType.comment);
4223                 popAndCheck(range, EntityType.elementStart);
4224                 popAndCheck(range, EntityType.comment);
4225                 assert(equal(range.front.text, " comment 1 "));
4226                 {
4227                     auto temp = range.save.skipToParentEndTag();
4228                     assert(temp._type == EntityType.elementEnd);
4229                     assert(equal(temp.front.name, "root"));
4230                 }
4231                 popAndCheck(range, EntityType.elementEmpty);
4232                 popAndCheck(range, EntityType.comment);
4233                 assert(equal(range.front.text, " comment 2 "));
4234                 {
4235                     auto temp = range.save.skipToParentEndTag();
4236                     assert(temp._type == EntityType.elementEnd);
4237                     assert(equal(temp.front.name, "root"));
4238                 }
4239                 popAndCheck(range, EntityType.elementStart);
4240                 range = range.skipContents();
4241                 popAndCheck(range, EntityType.comment);
4242                 assert(equal(range.front.text, " comment 3 "));
4243                 range = range.skipToParentEndTag();
4244                 assert(range._type == EntityType.elementEnd);
4245                 assert(equal(range.front.name, "root"));
4246             }
4247             {
4248                 auto range = parseXML(text.save);
4249                 assert(range.front.type == EntityType.comment);
4250                 popAndCheck(range, EntityType.elementStart);
4251                 range = range.skipContents();
4252                 popAndCheck(range, EntityType.comment);
4253                 assert(equal(range.front.text, " after "));
4254                 assert(range.save.skipToParentEndTag().empty);
4255                 popAndCheck(range, EntityType.comment);
4256                 assert(equal(range.front.text, " end "));
4257                 assert(range.skipToParentEndTag().empty);
4258             }
4259         }
4260         // elementStart
4261         {
4262             auto xml = "<root>\n" ~
4263                        "    <a><b>foo</b></a>\n" ~
4264                        "    <nothing/>\n" ~
4265                        "    <c></c>\n" ~
4266                        "    <d>\n" ~
4267                        "        <e>\n" ~
4268                        "        </e>\n" ~
4269                        "        <f>\n" ~
4270                        "            <g>\n" ~
4271                        "            </g>\n" ~
4272                        "        </f>\n" ~
4273                        "    </d>\n" ~
4274                        "</root>";
4275 
4276             auto range = parseXML(func(xml));
4277             assert(range.front.type == EntityType.elementStart);
4278             assert(equal(range.front.name, "root"));
4279             assert(range.save.skipToParentEndTag().empty);
4280             popAndCheck(range, EntityType.elementStart);
4281             assert(equal(range.front.name, "a"));
4282             {
4283                 auto temp = range.save.skipToParentEndTag();
4284                 assert(temp._type == EntityType.elementEnd);
4285                 assert(equal(temp.front.name, "root"));
4286             }
4287             popAndCheck(range, EntityType.elementStart);
4288             assert(equal(range.front.name, "b"));
4289             {
4290                 auto temp = range.save.skipToParentEndTag();
4291                 assert(temp._type == EntityType.elementEnd);
4292                 assert(equal(temp.front.name, "a"));
4293             }
4294             popAndCheck(range, EntityType.text);
4295             popAndCheck(range, EntityType.elementEnd);
4296             popAndCheck(range, EntityType.elementEnd);
4297             popAndCheck(range, EntityType.elementEmpty);
4298             popAndCheck(range, EntityType.elementStart);
4299             assert(equal(range.front.name, "c"));
4300             {
4301                 auto temp = range.save.skipToParentEndTag();
4302                 assert(temp._type == EntityType.elementEnd);
4303                 assert(equal(temp.front.name, "root"));
4304             }
4305             popAndCheck(range, EntityType.elementEnd);
4306             popAndCheck(range, EntityType.elementStart);
4307             assert(equal(range.front.name, "d"));
4308             popAndCheck(range, EntityType.elementStart);
4309             assert(equal(range.front.name, "e"));
4310             range = range.skipToParentEndTag();
4311             assert(range._type == EntityType.elementEnd);
4312             assert(equal(range.front.name, "d"));
4313             range = range.skipToParentEndTag();
4314             assert(range._type == EntityType.elementEnd);
4315             assert(equal(range.front.name, "root"));
4316         }
4317         // elementEnd
4318         {
4319             auto xml = "<root>\n" ~
4320                        "    <a><b>foo</b></a>\n" ~
4321                        "    <nothing/>\n" ~
4322                        "    <c></c>\n" ~
4323                        "</root>";
4324 
4325             auto range = parseXML(func(xml));
4326             assert(range.front.type == EntityType.elementStart);
4327             popAndCheck(range, EntityType.elementStart);
4328             popAndCheck(range, EntityType.elementStart);
4329             popAndCheck(range, EntityType.text);
4330             popAndCheck(range, EntityType.elementEnd);
4331             assert(equal(range.front.name, "b"));
4332             {
4333                 auto temp = range.save.skipToParentEndTag();
4334                 assert(temp._type == EntityType.elementEnd);
4335                 assert(equal(temp.front.name, "a"));
4336             }
4337             popAndCheck(range, EntityType.elementEnd);
4338             assert(equal(range.front.name, "a"));
4339             {
4340                 auto temp = range.save.skipToParentEndTag();
4341                 assert(temp._type == EntityType.elementEnd);
4342                 assert(equal(temp.front.name, "root"));
4343             }
4344             popAndCheck(range, EntityType.elementEmpty);
4345             popAndCheck(range, EntityType.elementStart);
4346             popAndCheck(range, EntityType.elementEnd);
4347             assert(equal(range.front.name, "c"));
4348             {
4349                 auto temp = range.save.skipToParentEndTag();
4350                 assert(temp._type == EntityType.elementEnd);
4351                 assert(equal(temp.front.name, "root"));
4352             }
4353             popAndCheck(range, EntityType.elementEnd);
4354             assert(range.skipToParentEndTag().empty);
4355         }
4356         // elementEmpty
4357         {
4358             auto range = parseXML(func("<root/>"));
4359             assert(range.front.type == EntityType.elementEmpty);
4360             assert(range.skipToParentEndTag().empty);
4361         }
4362         {
4363             auto xml = "<root>\n" ~
4364                        "    <a><b>foo</b></a>\n" ~
4365                        "    <nothing/>\n" ~
4366                        "    <c></c>\n" ~
4367                        "    <whatever/>\n" ~
4368                        "</root>";
4369 
4370             auto range = parseXML(func(xml));
4371             popAndCheck(range, EntityType.elementStart);
4372             assert(range.front.type == EntityType.elementStart);
4373             range = range.skipContents();
4374             popAndCheck(range, EntityType.elementEmpty);
4375             assert(equal(range.front.name, "nothing"));
4376             {
4377                 auto temp = range.save;
4378                 popAndCheck(temp, EntityType.elementStart);
4379                 popAndCheck(temp, EntityType.elementEnd);
4380                 popAndCheck(temp, EntityType.elementEmpty);
4381                 assert(equal(temp.front.name, "whatever"));
4382             }
4383             range = range.skipToParentEndTag();
4384             assert(range._type == EntityType.elementEnd);
4385             assert(equal(range.front.name, "root"));
4386         }
4387         // pi
4388         {
4389             auto xml = "<?Sherlock?>\n" ~
4390                        "<root>\n" ~
4391                        "    <?Foo?>\n" ~
4392                        "    <nothing/>\n" ~
4393                        "    <?Bar?>\n" ~
4394                        "    <foo></foo>\n" ~
4395                        "    <?Baz?>\n" ~
4396                        "</root>\n" ~
4397                        "<?Poirot?>\n" ~
4398                        "<?Conan?>";
4399 
4400             auto range = parseXML(func(xml));
4401             assert(range.front.type == EntityType.pi);
4402             assert(equal(range.front.name, "Sherlock"));
4403             assert(range.save.skipToParentEndTag().empty);
4404             popAndCheck(range, EntityType.elementStart);
4405             popAndCheck(range, EntityType.pi);
4406             assert(equal(range.front.name, "Foo"));
4407             {
4408                 auto temp = range.save.skipToParentEndTag();
4409                 assert(temp._type == EntityType.elementEnd);
4410                 assert(equal(temp.front.name, "root"));
4411             }
4412             popAndCheck(range, EntityType.elementEmpty);
4413             popAndCheck(range, EntityType.pi);
4414             assert(equal(range.front.name, "Bar"));
4415             {
4416                 auto temp = range.save.skipToParentEndTag();
4417                 assert(temp._type == EntityType.elementEnd);
4418                 assert(equal(temp.front.name, "root"));
4419             }
4420             popAndCheck(range, EntityType.elementStart);
4421             popAndCheck(range, EntityType.elementEnd);
4422             popAndCheck(range, EntityType.pi);
4423             assert(equal(range.front.name, "Baz"));
4424             range = range.skipToParentEndTag();
4425             assert(range._type == EntityType.elementEnd);
4426             assert(equal(range.front.name, "root"));
4427             popAndCheck(range, EntityType.pi);
4428             assert(equal(range.front.name, "Poirot"));
4429             assert(range.save.skipToParentEndTag().empty);
4430             popAndCheck(range, EntityType.pi);
4431             assert(equal(range.front.name, "Conan"));
4432             assert(range.skipToParentEndTag().empty);
4433         }
4434         // text
4435         {
4436             auto xml = "<root>\n" ~
4437                        "    nothing to say\n" ~
4438                        "    <nothing/>\n" ~
4439                        "    nothing whatsoever\n" ~
4440                        "    <foo></foo>\n" ~
4441                        "    but he keeps talking\n" ~
4442                        "</root>";
4443 
4444             auto range = parseXML(func(xml));
4445             assert(range.front.type == EntityType.elementStart);
4446             popAndCheck(range, EntityType.text);
4447             assert(equal(range.front.text, "\n    nothing to say\n    "));
4448             {
4449                 auto temp = range.save.skipToParentEndTag();
4450                 assert(temp._type == EntityType.elementEnd);
4451                 assert(equal(temp.front.name, "root"));
4452             }
4453             popAndCheck(range, EntityType.elementEmpty);
4454             popAndCheck(range, EntityType.text);
4455             assert(equal(range.front.text, "\n    nothing whatsoever\n    "));
4456             {
4457                 auto temp = range.save.skipToParentEndTag();
4458                 assert(temp._type == EntityType.elementEnd);
4459                 assert(equal(temp.front.name, "root"));
4460             }
4461             popAndCheck(range, EntityType.elementStart);
4462             range = range.skipContents();
4463             popAndCheck(range, EntityType.text);
4464             assert(equal(range.front.text, "\n    but he keeps talking\n"));
4465             range = range.skipToParentEndTag();
4466             assert(range._type == EntityType.elementEnd);
4467             assert(equal(range.front.name, "root"));
4468         }
4469     }}
4470 }
4471 
4472 
4473 /++
4474     Treats the given string like a file path except that each directory
4475     corresponds to the name of a start tag. Note that this does $(I not) try to
4476     implement XPath as that would be quite complicated, and it really doesn't
4477     fit with a StAX parser.
4478 
4479     A start tag should be thought of as a directory, with its child start tags
4480     as the directories it contains.
4481 
4482     All paths should be relative. $(LREF EntityRange) can only move forward
4483     through the document, so using an absolute path would only make sense at
4484     the beginning of the document. As such, absolute paths are treated as
4485     invalid paths.
4486 
4487     $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated
4488     slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are
4489     treated as an invalid path.
4490 
4491     If $(D range.front.type == EntityType.elementStart), then
4492     $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child
4493     start tag (be it $(LREF EntityType.elementStart) or
4494     $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity)
4495     $(D_CODE_STRING "foo"). That start tag must be a direct child of the current
4496     start tag.
4497 
4498     If $(D range.front.type) is any other $(LREF EntityType), then
4499     $(D range._skipToPath($(D_STRING "foo"))) will return an empty range,
4500     because no other $(LREF EntityType)s have child start tags.
4501 
4502     For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo")))
4503     will search for the first start tag with the
4504     $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level
4505     as the current entity. If the current entity is a start tag with the name
4506     $(D_CODE_STRING "foo"), it will not be considered a match.
4507 
4508     $(D range._skipToPath($(D_STRING "./"))) is a no-op. However,
4509     $(D range._skipToPath($(D_STRING "../"))) will result in the empty range
4510     (since it doesn't target a specific start tag).
4511 
4512     $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to
4513     $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))),
4514     and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to
4515     $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))).
4516 
4517     Returns: The given range with its $(D front) now at the requested entity if
4518              the path is valid; otherwise, an empty range is returned.
4519 
4520     Throws: $(LREF XMLParsingException) on invalid XML.
4521   +/
4522 R skipToPath(R)(R entityRange, string path)
4523     if(isInstanceOf!(EntityRange, R))
4524 {
4525     import std.algorithm.comparison : equal;
4526     import std.path : pathSplitter;
4527 
4528     if(entityRange.empty)
4529         return entityRange;
4530     if(path.empty || path[0] == '/')
4531         return entityRange.takeNone();
4532 
4533     with(EntityType)
4534     {
4535         static if(R.config.splitEmpty == SplitEmpty.yes)
4536             EntityType[2] startOrEnd = [elementStart, elementEnd];
4537         else
4538             EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty];
4539 
4540         R findOnCurrLevel(string name)
4541         {
4542             if(entityRange._type == elementStart)
4543                 entityRange = entityRange.skipContents();
4544             while(true)
4545             {
4546                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4547                 if(entityRange.empty)
4548                     return entityRange;
4549                 if(entityRange._type == elementEnd)
4550                     return entityRange.takeNone();
4551 
4552                 if(equal(name, entityRange._name.save))
4553                     return entityRange;
4554 
4555                 static if(R.config.splitEmpty == SplitEmpty.no)
4556                 {
4557                     if(entityRange._type == elementEmpty)
4558                         continue;
4559                 }
4560                 entityRange = entityRange.skipContents();
4561             }
4562         }
4563 
4564         for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront())
4565         {
4566             if(pieces.front == ".")
4567                 continue;
4568             else if(pieces.front == "..")
4569             {
4570                 pieces.popFront();
4571                 if(pieces.empty)
4572                     return entityRange.takeNone();
4573 
4574                 while(pieces.front == "..")
4575                 {
4576                     pieces.popFront();
4577                     if(pieces.empty)
4578                         return entityRange.takeNone();
4579                     entityRange = entityRange.skipToParentEndTag();
4580                     if(entityRange.empty)
4581                         return entityRange;
4582                 }
4583 
4584                 entityRange = findOnCurrLevel(pieces.front);
4585                 if(entityRange.empty)
4586                     return entityRange;
4587             }
4588             else
4589             {
4590                 if(entityRange._type != elementStart)
4591                     return entityRange.takeNone();
4592 
4593                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4594                 assert(!entityRange.empty);
4595                 if(entityRange._type == elementEnd)
4596                     return entityRange.takeNone();
4597 
4598                 if(!equal(pieces.front, entityRange._name.save))
4599                 {
4600                     entityRange = findOnCurrLevel(pieces.front);
4601                     if(entityRange.empty)
4602                         return entityRange;
4603                 }
4604             }
4605         }
4606 
4607         return entityRange;
4608     }
4609 }
4610 
4611 ///
4612 unittest
4613 {
4614     {
4615         auto xml = "<carrot>\n" ~
4616                    "    <foo>\n" ~
4617                    "        <bar>\n" ~
4618                    "            <baz/>\n" ~
4619                    "            <other/>\n" ~
4620                    "        </bar>\n" ~
4621                    "    </foo>\n" ~
4622                    "</carrot>";
4623 
4624         auto range = parseXML(xml);
4625         // "<carrot>"
4626         assert(range.front.type == EntityType.elementStart);
4627         assert(range.front.name == "carrot");
4628 
4629         range = range.skipToPath("foo/bar");
4630         // "        <bar>
4631         assert(!range.empty);
4632         assert(range.front.type == EntityType.elementStart);
4633         assert(range.front.name == "bar");
4634 
4635         range = range.skipToPath("baz");
4636         // "            <baz/>
4637         assert(!range.empty);
4638         assert(range.front.type == EntityType.elementEmpty);
4639 
4640         // other is not a child element of baz
4641         assert(range.skipToPath("other").empty);
4642 
4643         range = range.skipToPath("../other");
4644         // "            <other/>"
4645         assert(!range.empty);
4646         assert(range.front.type == EntityType.elementEmpty);
4647     }
4648     {
4649         auto xml = "<potato>\n" ~
4650                    "    <foo>\n" ~
4651                    "        <bar>\n "~
4652                    "        </bar>\n" ~
4653                    "        <crazy>\n" ~
4654                    "        </crazy>\n" ~
4655                    "        <fou/>\n" ~
4656                    "    </foo>\n" ~
4657                    "    <buzz/>\n" ~
4658                    "</potato>";
4659 
4660         auto range = parseXML(xml);
4661         // "<potato>"
4662         assert(range.front.type == EntityType.elementStart);
4663 
4664         range = range.skipToPath("./");
4665         // "<potato>"
4666         assert(!range.empty);
4667         assert(range.front.type == EntityType.elementStart);
4668         assert(range.front.name == "potato");
4669 
4670         range = range.skipToPath("./foo/bar");
4671         // "        <bar>"
4672         assert(!range.empty);
4673         assert(range.front.type == EntityType.elementStart);
4674         assert(range.front.name == "bar");
4675 
4676         range = range.skipToPath("../crazy");
4677         // "        <crazy>"
4678         assert(!range.empty);
4679         assert(range.front.type == EntityType.elementStart);
4680         assert(range.front.name == "crazy");
4681 
4682         // Whether popFront is called here before the call to
4683         // range.skipToPath("../fou") below, the result is the same, because
4684         // both <crazy> and </crazy> are at the same level.
4685         range.popFront();
4686         // "        </crazy>"
4687         assert(!range.empty);
4688         assert(range.front.type == EntityType.elementEnd);
4689         assert(range.front.name == "crazy");
4690 
4691         range = range.skipToPath("../fou");
4692         // "        <fou/>"
4693         assert(!range.empty);
4694         assert(range.front.type == EntityType.elementEmpty);
4695     }
4696     // Searching stops at the first matching start tag.
4697     {
4698         auto xml = "<beet>\n" ~
4699                    "    <foo a='42'>\n" ~
4700                    "    </foo>\n" ~
4701                    "    <foo b='451'>\n" ~
4702                    "    </foo>\n" ~
4703                    "</beet>";
4704 
4705         auto range = parseXML(xml);
4706         range = range.skipToPath("foo");
4707         assert(!range.empty);
4708         assert(range.front.type == EntityType.elementStart);
4709         assert(range.front.name == "foo");
4710 
4711         {
4712             auto attrs = range.front.attributes;
4713             assert(attrs.front.name == "a");
4714             assert(attrs.front.value == "42");
4715         }
4716 
4717         range = range.skipToPath("../foo");
4718         assert(!range.empty);
4719         assert(range.front.type == EntityType.elementStart);
4720         assert(range.front.name == "foo");
4721 
4722         {
4723             auto attrs = range.front.attributes;
4724             assert(attrs.front.name == "b");
4725             assert(attrs.front.value == "451");
4726         }
4727     }
4728     // skipToPath will work on an empty range but will always return an
4729     // empty range.
4730     {
4731         auto range = parseXML("<root/>");
4732         assert(range.takeNone().skipToPath("nowhere").empty);
4733     }
4734     // Empty and absolute paths will also result in an empty range as will
4735     // "../" without any actual tag name on the end.
4736     {
4737         auto range = parseXML("<root/>");
4738         assert(range.skipToPath("").empty);
4739         assert(range.skipToPath("/").empty);
4740         assert(range.skipToPath("../").empty);
4741     }
4742     // Only non-empty start tags have children; all other EntityTypes result
4743     // in an empty range unless "../" is used.
4744     {
4745         auto xml = "<!-- comment -->\n" ~
4746                    "<root>\n" ~
4747                    "    <foo/>\n" ~
4748                    "</root>";
4749         auto range = parseXML(xml);
4750         assert(range.skipToPath("root").empty);
4751         assert(range.skipToPath("foo").empty);
4752 
4753         range = range.skipToPath("../root");
4754         assert(!range.empty);
4755         assert(range.front.type == EntityType.elementStart);
4756         assert(range.front.name == "root");
4757     }
4758 }
4759 
4760 unittest
4761 {
4762     import core.exception : AssertError;
4763     import std.algorithm.comparison : equal;
4764     import std.exception : assertNotThrown, enforce;
4765     import dxml.internal : testRangeFuncs;
4766 
4767     static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__)
4768     {
4769         auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line);
4770         enforce!AssertError(!result.empty, "unittest 2", __FILE__, line);
4771         enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line);
4772         enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line);
4773     }
4774 
4775     static void popEmpty(R)(ref R range)
4776     {
4777         range.popFront();
4778         static if(range.config.splitEmpty == SplitEmpty.yes)
4779             range.popFront();
4780     }
4781 
4782     auto xml = "<superuser>\n" ~
4783                "    <!-- comment -->\n" ~
4784                "    <?pi?>\n" ~
4785                "    <![CDATA[cdata]]>\n" ~
4786                "    <foo/>\n" ~
4787                "    <bar/>\n" ~
4788                "    <!-- comment -->\n" ~
4789                "    <!-- comment -->\n" ~
4790                "    <baz/>\n" ~
4791                "    <frobozz>\n" ~
4792                "        <!-- comment -->\n" ~
4793                "        <!-- comment -->\n" ~
4794                "        <whatever/>\n" ~
4795                "        <!-- comment -->\n" ~
4796                "        <!-- comment -->\n" ~
4797                "    </frobozz>\n" ~
4798                "    <!-- comment -->\n" ~
4799                "    <!-- comment -->\n" ~
4800                "    <xyzzy/>\n" ~
4801                "</superuser>";
4802 
4803     static foreach(func; testRangeFuncs)
4804     {{
4805         auto text = func(xml);
4806 
4807         static foreach(config; someTestConfigs)
4808         {{
4809             static if(config.splitEmpty == SplitEmpty.yes)
4810                 enum empty = EntityType.elementStart;
4811             else
4812                 enum empty = EntityType.elementEmpty;
4813 
4814             auto range = parseXML!config(text.save);
4815 
4816             assert(range.save.skipToPath("whatever").empty);
4817             assert(range.save.skipToPath("frobozz/whateve").empty);
4818 
4819             testPath(range.save, "foo", empty, "foo");
4820             testPath(range.save, "bar", empty, "bar");
4821             testPath(range.save, "baz", empty, "baz");
4822             testPath(range.save, "frobozz", EntityType.elementStart, "frobozz");
4823             testPath(range.save, "frobozz/whatever", empty, "whatever");
4824             testPath(range.save, "xyzzy", empty, "xyzzy");
4825 
4826             range.popFront();
4827             for(; range.front.type != empty; range.popFront())
4828             {
4829                 assert(range.save.skipToPath("foo").empty);
4830                 testPath(range.save, "../foo", empty, "foo");
4831                 testPath(range.save, "../bar", empty, "bar");
4832                 testPath(range.save, "../baz", empty, "baz");
4833                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4834                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4835                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4836             }
4837             assert(equal(range.front.name, "foo"));
4838             assert(range.save.skipToPath("foo").empty);
4839             assert(range.save.skipToPath("./foo").empty);
4840             assert(range.save.skipToPath("../foo").empty);
4841             assert(range.save.skipToPath("bar").empty);
4842             assert(range.save.skipToPath("baz").empty);
4843             assert(range.save.skipToPath("frobozz").empty);
4844             assert(range.save.skipToPath("whatever").empty);
4845             assert(range.save.skipToPath("../").empty);
4846             assert(range.save.skipToPath("../../").empty);
4847 
4848             testPath(range.save, "../bar", empty, "bar");
4849             testPath(range.save, "../baz", empty, "baz");
4850             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4851             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4852             testPath(range.save, "../xyzzy", empty, "xyzzy");
4853 
4854             popEmpty(range);
4855             assert(range.save.skipToPath("bar").empty);
4856             testPath(range.save, "../baz", empty, "baz");
4857             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4858             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4859             testPath(range.save, "../xyzzy", empty, "xyzzy");
4860 
4861             range.popFront();
4862             for(; range.front.type != empty; range.popFront())
4863             {
4864                 assert(range.save.skipToPath("baz").empty);
4865                 testPath(range.save, "../baz", empty, "baz");
4866                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4867                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4868                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4869             }
4870             assert(equal(range.front.name, "baz"));
4871 
4872             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4873             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4874             testPath(range.save, "../xyzzy", empty, "xyzzy");
4875 
4876             popEmpty(range);
4877             assert(equal(range.front.name, "frobozz"));
4878             assert(range.save.skipToPath("wizard").empty);
4879             testPath(range.save, "whatever", empty, "whatever");
4880             testPath(range.save, "../xyzzy", empty, "xyzzy");
4881 
4882             range.popFront();
4883             for(; range.front.type != empty; range.popFront())
4884             {
4885                 assert(range.save.skipToPath("whatever").empty);
4886                 testPath(range.save, "../whatever", empty, "whatever");
4887                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4888             }
4889             assert(equal(range.front.name, "whatever"));
4890             assert(range.save.skipToPath("frobozz").empty);
4891             assert(range.save.skipToPath("../frobozz").empty);
4892             assert(range.save.skipToPath("../xyzzy").empty);
4893             assert(range.save.skipToPath("../../frobozz").empty);
4894 
4895             testPath(range.save, "../../xyzzy", empty, "xyzzy");
4896 
4897             popEmpty(range);
4898             for(; range.front.type != EntityType.elementEnd; range.popFront())
4899             {
4900                 assert(range.save.skipToPath("xyzzy").empty);
4901                 assert(range.save.skipToPath("../xyzzy").empty);
4902                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4903             }
4904             assert(equal(range.front.name, "frobozz"));
4905 
4906             range.popFront();
4907             for(; range.front.type != empty; range.popFront())
4908             {
4909                 assert(range.save.skipToPath("xyzzy").empty);
4910                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4911             }
4912             assert(equal(range.front.name, "xyzzy"));
4913 
4914             popEmpty(range);
4915             assert(equal(range.front.name, "superuser"));
4916             assert(range.save.skipToPath("superuser").empty);
4917             assert(range.save.skipToPath("foo").empty);
4918             assert(range.save.skipToPath("../foo").empty);
4919             assert(range.save.skipToPath("../../foo").empty);
4920         }}
4921     }}
4922 }
4923 
4924 
4925 //------------------------------------------------------------------------------
4926 // Private Section
4927 //------------------------------------------------------------------------------
4928 private:
4929 
4930 
4931 auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc
4932 {
4933     import std.utf : byCodeUnit;
4934     typeof(EntityRange!(config, R)._text) text;
4935     text.input = byCodeUnit(xmlText);
4936     return text;
4937 }
4938 
4939 
4940 // toCmpType is to make it easy for tests to convert the expected result to a
4941 // range with the correct element type, since comparing with equal won't do
4942 // the right thing if the result doesn't have dchar as its element type.
4943 auto toCmpType(alias func)(string str)
4944 {
4945     import std.range : takeExactly;
4946     import std.utf : byUTF;
4947 
4948     return str.byUTF!(immutable ElementType!(typeof(testParser(func(str)).input.takeExactly(1))))();
4949 }
4950 
4951 auto toCmpType(alias func, ThrowOnEntityRef toer)(string str)
4952 {
4953     import std.range : takeExactly;
4954     import std.utf : byUTF;
4955 
4956     return str.byUTF!(immutable ElementType!(typeof(testParser!(makeConfig(toer))(func(str)).input.takeExactly(1))))();
4957 }
4958 
4959 
4960 // Used to indicate where in the grammar we're currently parsing.
4961 enum GrammarPos
4962 {
4963     // Nothing has been parsed yet.
4964     documentStart,
4965 
4966     // document ::= prolog element Misc*
4967     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
4968     // This is that first Misc*. The next entity to parse is either a Misc, the
4969     // doctypedecl, or the root element which follows the prolog.
4970     prologMisc1,
4971 
4972     // document ::= prolog element Misc*
4973     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)
4974     // This is that second Misc*. The next entity to parse is either a Misc or
4975     // the root element which follows the prolog.
4976     prologMisc2,
4977 
4978     // Used with SplitEmpty.yes to tell the parser that we're currently at an
4979     // empty element tag that we're treating as a start tag, so the next entity
4980     // will be an end tag even though we didn't actually parse one.
4981     splittingEmpty,
4982 
4983     // element  ::= EmptyElemTag | STag content ETag
4984     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4985     // This is at the beginning of content at the first CharData?. The next
4986     // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag.
4987     // References are treated as part of the CharData and not parsed out by the
4988     // EntityRange (see EntityRange.Entity.text).
4989     contentCharData1,
4990 
4991     // element  ::= EmptyElemTag | STag content ETag
4992     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
4993     // This is after the first CharData?. The next thing to parse will be a
4994     // element, CDSect, PI, Comment, or ETag.
4995     // References are treated as part of the CharData and not parsed out by the
4996     // EntityRange (see EntityRange.Entity.text).
4997     contentMid,
4998 
4999     // element  ::= EmptyElemTag | STag content ETag
5000     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5001     // This is at the second CharData?. The next thing to parse will be a
5002     // CharData, element, CDSect, PI, Comment, or ETag.
5003     // References are treated as part of the CharData and not parsed out by the
5004     // EntityRange (see EntityRange.Entity.text).
5005     contentCharData2,
5006 
5007     // element  ::= EmptyElemTag | STag content ETag
5008     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5009     // This is after the second CharData?. The next thing to parse is an ETag.
5010     endTag,
5011 
5012     // document ::= prolog element Misc*
5013     // This is the Misc* at the end of the document. The next thing to parse is
5014     // either another Misc, or we will hit the end of the document.
5015     endMisc,
5016 
5017     // The end of the document (and the grammar) has been reached.
5018     documentEnd
5019 }
5020 
5021 
5022 // Wrapper around skipOver which takes an EntityParser.Text and handles
5023 // incrementing pos.
5024 //
5025 // It is assumed that there are no newlines.
5026 bool stripStartsWith(Text)(ref Text text, string needle)
5027 {
5028     import std.algorithm.searching : skipOver;
5029     import std.utf : byCodeUnit;
5030 
5031     //TODO In the case where we're parsing an array of char, if we can cleanly
5032     // strip off any byCodeUnit and takeExactly wrappers, then we should be able
5033     // to have skipOver compare the string being parsed and the needle with ==.
5034     // It may happen in some cases right now when text.input is a byCodeUnit
5035     // result, but it won't happen in all cases where it ideally would. We may
5036     // also want to look into using byUTF on the needle so that it matches the
5037     // encoding of text.input or even make needle match the encoding when it's
5038     // passed in instead of always being string.
5039     if(!text.input.skipOver(needle.byCodeUnit()))
5040         return false;
5041 
5042     text.pos.col += needle.length;
5043 
5044     return true;
5045 }
5046 
5047 unittest
5048 {
5049     import core.exception : AssertError;
5050     import std.exception : enforce;
5051     import dxml.internal : equalCU, testRangeFuncs;
5052 
5053     static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith,
5054                                  int row, int col, size_t line = __LINE__)
5055     {
5056         auto haystack = func(origHaystack);
5057         {
5058             auto text = testParser(haystack.save);
5059             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line);
5060             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5061             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5062         }
5063         {
5064             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5065             auto text = testParser(haystack);
5066             text.pos.line += 3;
5067             text.pos.col += 7;
5068             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line);
5069             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5070             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5071         }
5072     }
5073 
5074     static foreach(func; testRangeFuncs)
5075     {
5076         test!func("hello world", "hello", " world", true, 1, "hello".length + 1);
5077         test!func("hello world", "hello world", "", true, 1, "hello world".length + 1);
5078         test!func("hello world", "foo", "hello world", false, 1, 1);
5079         test!func("hello world", "hello sally", "hello world", false, 1, 1);
5080         test!func("hello world", "hello world ", "hello world", false, 1, 1);
5081     }
5082 }
5083 
5084 @safe pure unittest
5085 {
5086     import std.algorithm.comparison : equal;
5087     import dxml.internal : testRangeFuncs;
5088 
5089     static foreach(func; testRangeFuncs)
5090     {{
5091         auto xml = func(`foo`);
5092         auto text = testParser!simpleXML(xml);
5093         assert(text.stripStartsWith("fo"));
5094     }}
5095 }
5096 
5097 
5098 // Strips whitespace while dealing with text.pos accordingly. Newlines are not
5099 // ignored.
5100 // Returns whether any whitespace was stripped.
5101 bool stripWS(Text)(ref Text text)
5102 {
5103     bool strippedSpace = false;
5104 
5105     static if(hasLength!(Text.Input))
5106         size_t lineStart = text.input.length;
5107 
5108     loop: while(!text.input.empty)
5109     {
5110         switch(text.input.front)
5111         {
5112             case ' ':
5113             case '\t':
5114             case '\r':
5115             {
5116                 strippedSpace = true;
5117                 text.input.popFront();
5118                 static if(!hasLength!(Text.Input))
5119                     ++text.pos.col;
5120                 break;
5121             }
5122             case '\n':
5123             {
5124                 strippedSpace = true;
5125                 text.input.popFront();
5126                 static if(hasLength!(Text.Input))
5127                     lineStart = text.input.length;
5128                 nextLine!(Text.config)(text.pos);
5129                 break;
5130             }
5131             default: break loop;
5132         }
5133     }
5134 
5135     static if(hasLength!(Text.Input))
5136         text.pos.col += lineStart - text.input.length;
5137 
5138     return strippedSpace;
5139 }
5140 
5141 unittest
5142 {
5143     import core.exception : AssertError;
5144     import std.exception : enforce;
5145     import dxml.internal : equalCU;
5146     import dxml.internal : testRangeFuncs;
5147 
5148     static void test(alias func)(string origHaystack, string remainder, bool stripped,
5149                                  int row, int col, size_t line = __LINE__)
5150     {
5151         auto haystack = func(origHaystack);
5152         {
5153             auto text = testParser(haystack.save);
5154             enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line);
5155             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5156             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5157         }
5158         {
5159             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5160             auto text = testParser(haystack);
5161             text.pos.line += 3;
5162             text.pos.col += 7;
5163             enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line);
5164             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5165             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5166         }
5167     }
5168 
5169     static foreach(func; testRangeFuncs)
5170     {
5171         test!func("  \t\rhello world", "hello world", true, 1, 5);
5172         test!func("  \n \n \n  \nhello world", "hello world", true, 5, 1);
5173         test!func("  \n \n \n  \n  hello world", "hello world", true, 5, 3);
5174         test!func("hello world", "hello world", false, 1, 1);
5175     }
5176 }
5177 
5178 @safe pure unittest
5179 {
5180     import dxml.internal : testRangeFuncs;
5181 
5182     static foreach(func; testRangeFuncs)
5183     {{
5184         auto xml = func(`foo`);
5185         auto text = testParser!simpleXML(xml);
5186         assert(!text.stripWS());
5187     }}
5188 }
5189 
5190 
5191 // Returns a slice (or takeExactly) of text.input up to but not including the
5192 // given needle, removing both that slice and the given needle from text.input
5193 // in the process. If the needle is not found, then an XMLParsingException is
5194 // thrown.
5195 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5196 {
5197     return _takeUntil!(true, needle, skipQuotedText, Text)(text);
5198 }
5199 
5200 unittest
5201 {
5202     import core.exception : AssertError;
5203     import std.algorithm.comparison : equal;
5204     import std.exception : collectException, enforce;
5205     import dxml.internal : codeLen, testRangeFuncs;
5206 
5207     static void test(alias func, string needle, bool sqt)(string origHaystack, string expected, string remainder,
5208                                                           int row, int col, size_t line = __LINE__)
5209     {
5210         auto haystack = func(origHaystack);
5211         auto adjExpected = expected.toCmpType!func();
5212         {
5213             auto text = testParser(haystack.save);
5214             auto temp = text.save;
5215             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected.save),
5216                                 "unittest failure 1", __FILE__, line);
5217             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5218             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5219         }
5220         {
5221             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5222             auto text = testParser(haystack);
5223             text.pos.line += 3;
5224             text.pos.col += 7;
5225             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected),
5226                                 "unittest failure 4", __FILE__, line);
5227             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5228             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5229         }
5230     }
5231 
5232     static void testFail(alias func, string needle, bool sqt)
5233                         (string origHaystack, int row, int col, size_t line = __LINE__)
5234     {
5235         auto haystack = func(origHaystack);
5236         {
5237             auto text = testParser(haystack.save);
5238             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5239             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5240             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5241         }
5242         {
5243             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5244             auto text = testParser(haystack);
5245             text.pos.line += 3;
5246             text.pos.col += 7;
5247             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5248             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5249             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5250         }
5251     }
5252 
5253     static foreach(func; testRangeFuncs)
5254     {
5255         static foreach(sqt; [false, true])
5256         {
5257             {
5258                 auto haystack = "hello world";
5259                 enum needle = "world";
5260 
5261                 static foreach(i; 1 .. needle.length)
5262                     test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i);
5263             }
5264 
5265             test!(func, "l", sqt)("lello world", "", "ello world", 1, 2);
5266             test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5);
5267             test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4);
5268             {
5269                 enum needle = "great";
5270                 enum expected = "プログラミング in D is ";
5271                 static foreach(i; 1 .. needle.length)
5272                 {
5273                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected,
5274                                                      "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1);
5275                 }
5276             }
5277             static foreach(haystack; ["", "a", "hello", "ディラン"])
5278                 testFail!(func, "x", sqt)(haystack, 1, 1);
5279             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5280                 testFail!(func, "le", sqt)(haystack, 1, 1);
5281             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5282                 testFail!(func, "web", sqt)(haystack, 1, 1);
5283         }
5284 
5285         test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9);
5286         test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10);
5287         test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10);
5288         test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12);
5289         test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12);
5290         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5291         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5292 
5293         test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16);
5294         test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13);
5295         test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13);
5296         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5297         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5298         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5299         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5300 
5301         test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14);
5302         test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7);
5303     }
5304 }
5305 
5306 @safe pure unittest
5307 {
5308     import std.algorithm.comparison : equal;
5309     import dxml.internal : testRangeFuncs;
5310 
5311     static foreach(func; testRangeFuncs)
5312     {{
5313         auto xml = func(`foo`);
5314         auto text = testParser!simpleXML(xml);
5315         assert(equal(text.takeUntilAndDrop!"o"(), "f"));
5316     }}
5317 }
5318 
5319 // Variant of takeUntilAndDrop which does not return a slice. It's intended for
5320 // when the config indicates that something should be skipped.
5321 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5322 {
5323     _takeUntil!(false, needle, skipQuotedText, Text)(text);
5324 }
5325 
5326 unittest
5327 {
5328     import core.exception : AssertError;
5329     import std.algorithm.comparison : equal;
5330     import std.exception : assertNotThrown, collectException, enforce;
5331     import dxml.internal : codeLen, testRangeFuncs;
5332 
5333     static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder,
5334                                                           int row, int col, size_t line = __LINE__)
5335     {
5336         auto haystack = func(origHaystack);
5337         {
5338             auto text = testParser(haystack.save);
5339             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1",
5340                                                 __FILE__, line);
5341             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5342             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5343         }
5344         {
5345             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5346             auto text = testParser(haystack);
5347             text.pos.line += 3;
5348             text.pos.col += 7;
5349             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4",
5350                                                 __FILE__, line);
5351             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5352             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5353         }
5354     }
5355 
5356     static void testFail(alias func, string needle, bool sqt)
5357                         (string origHaystack, int row, int col, size_t line = __LINE__)
5358     {
5359         auto haystack = func(origHaystack);
5360         {
5361             auto text = testParser(haystack.save);
5362             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5363             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5364             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5365         }
5366         {
5367             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5368             auto text = testParser(haystack);
5369             text.pos.line += 3;
5370             text.pos.col += 7;
5371             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5372             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5373             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5374         }
5375     }
5376 
5377     static foreach(func; testRangeFuncs)
5378     {
5379         static foreach(sqt; [false, true])
5380         {
5381             {
5382                 enum needle = "world";
5383                 static foreach(i; 1 .. needle.length)
5384                     test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i);
5385             }
5386 
5387             test!(func, "l", sqt)("lello world", "ello world", 1, 2);
5388             test!(func, "ll", sqt)("lello world", "o world", 1, 5);
5389             test!(func, "le", sqt)("llello world", "llo world", 1, 4);
5390 
5391             {
5392                 enum needle = "great";
5393                 static foreach(i; 1 .. needle.length)
5394                 {
5395                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $],
5396                                                      1, codeLen!(func, "プログラミング in D is ") + i + 1);
5397                 }
5398             }
5399 
5400             static foreach(haystack; ["", "a", "hello", "ディラン"])
5401                 testFail!(func, "x", sqt)(haystack, 1, 1);
5402             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5403                 testFail!(func, "le", sqt)(haystack, 1, 1);
5404             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5405                 testFail!(func, "web", sqt)(haystack, 1, 1);
5406         }
5407 
5408         test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9);
5409         test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10);
5410         test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10);
5411         test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12);
5412         test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12);
5413         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5414         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5415 
5416         test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16);
5417         test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13);
5418         test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13);
5419         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5420         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5421         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5422         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5423 
5424         test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14);
5425         test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7);
5426     }
5427 }
5428 
5429 @safe pure unittest
5430 {
5431     import std.algorithm.comparison : equal;
5432     import dxml.internal : testRangeFuncs;
5433 
5434     static foreach(func; testRangeFuncs)
5435     {{
5436         auto xml = func(`foo`);
5437         auto text = testParser!simpleXML(xml);
5438         text.skipUntilAndDrop!"o"();
5439         assert(equal(text.input, "o"));
5440     }}
5441 }
5442 
5443 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text)
5444 {
5445     import std.algorithm : find;
5446     import std.ascii : isWhite;
5447     import std.range : takeExactly;
5448 
5449     static assert(needle.find!isWhite().empty);
5450 
5451     auto orig = text.save;
5452     bool found = false;
5453     size_t takeLen = 0;
5454     size_t lineStart = 0;
5455 
5456     void processNewline()
5457     {
5458         ++takeLen;
5459         nextLine!(Text.config)(text.pos);
5460         lineStart = takeLen;
5461     }
5462 
5463     loop: while(!text.input.empty)
5464     {
5465         switch(text.input.front)
5466         {
5467             case cast(ElementType!(Text.Input))needle[0]:
5468             {
5469                 static if(needle.length == 1)
5470                 {
5471                     found = true;
5472                     text.input.popFront();
5473                     break loop;
5474                 }
5475                 else static if(needle.length == 2)
5476                 {
5477                     text.input.popFront();
5478                     if(!text.input.empty && text.input.front == needle[1])
5479                     {
5480                         found = true;
5481                         text.input.popFront();
5482                         break loop;
5483                     }
5484                     ++takeLen;
5485                     continue;
5486                 }
5487                 else
5488                 {
5489                     text.input.popFront();
5490                     auto saved = text.input.save;
5491                     foreach(i, c; needle[1 .. $])
5492                     {
5493                         if(text.input.empty)
5494                         {
5495                             takeLen += i + 1;
5496                             break loop;
5497                         }
5498                         if(text.input.front != c)
5499                         {
5500                             text.input = saved;
5501                             ++takeLen;
5502                             continue loop;
5503                         }
5504                         text.input.popFront();
5505                     }
5506                     found = true;
5507                     break loop;
5508                 }
5509             }
5510             static if(skipQuotedText)
5511             {
5512                 static foreach(quote; ['\'', '"'])
5513                 {
5514                     case quote:
5515                     {
5516                         auto quotePos = text.pos;
5517                         quotePos.col += takeLen - lineStart;
5518                         ++takeLen;
5519                         while(true)
5520                         {
5521                             text.input.popFront();
5522                             if(text.input.empty)
5523                                 throw new XMLParsingException("Failed to find matching quote", quotePos);
5524                             switch(text.input.front)
5525                             {
5526                                 case quote:
5527                                 {
5528                                     ++takeLen;
5529                                     text.input.popFront();
5530                                     continue loop;
5531                                 }
5532                                 case '\n':
5533                                 {
5534                                     processNewline();
5535                                     break;
5536                                 }
5537                                 default:
5538                                 {
5539                                     ++takeLen;
5540                                     break;
5541                                 }
5542                             }
5543                         }
5544                         assert(0); // the compiler isn't smart enough to see that this is unreachable.
5545                     }
5546                 }
5547             }
5548             case '\n':
5549             {
5550                 processNewline();
5551                 break;
5552             }
5553             default:
5554             {
5555                 ++takeLen;
5556                 break;
5557             }
5558         }
5559 
5560         text.input.popFront();
5561     }
5562 
5563     text.pos.col += takeLen - lineStart + needle.length;
5564 
5565     if(!found)
5566         throw new XMLParsingException("Failed to find: " ~ needle, orig.pos);
5567 
5568     static if(retSlice)
5569         return takeExactly(orig.input, takeLen);
5570 }
5571 
5572 
5573 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop,
5574 // but I'd rather do this than be passing template arguments to choose between
5575 // behaviors - especially when the logic is so different. It skips until it
5576 // reaches one of the delimiter characters. If it finds one of them, then the
5577 // first character in the input is the delimiter that was found, and if it
5578 // doesn't find either, then it throws.
5579 template skipToOneOf(delims...)
5580 {
5581     static foreach(delim; delims)
5582     {
5583         static assert(is(typeof(delim) == char));
5584         static assert(!isSpace(delim));
5585     }
5586 
5587     void skipToOneOf(Text)(ref Text text)
5588     {
5589         while(!text.input.empty)
5590         {
5591             switch(text.input.front)
5592             {
5593                 static foreach(delim; delims)
5594                     case delim: return;
5595                 case '\n':
5596                 {
5597                     nextLine!(Text.config)(text.pos);
5598                     text.input.popFront();
5599                     break;
5600                 }
5601                 default:
5602                 {
5603                     popFrontAndIncCol(text);
5604                     break;
5605                 }
5606             }
5607         }
5608         throw new XMLParsingException("Prematurely reached end of document", text.pos);
5609     }
5610 }
5611 
5612 unittest
5613 {
5614     import core.exception : AssertError;
5615     import std.algorithm.comparison : equal;
5616     import std.exception : assertNotThrown, collectException, enforce;
5617     import dxml.internal : codeLen, testRangeFuncs;
5618 
5619     static void test(alias func, delims...)(string origHaystack, string remainder,
5620                                             int row, int col, size_t line = __LINE__)
5621     {
5622         auto haystack = func(origHaystack);
5623         {
5624             auto text = testParser(haystack.save);
5625             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line);
5626             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5627             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5628         }
5629         {
5630             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5631             auto text = testParser(haystack);
5632             text.pos.line += 3;
5633             text.pos.col += 7;
5634             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line);
5635             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5636             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5637         }
5638     }
5639 
5640     static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__)
5641     {
5642         auto haystack = func(origHaystack);
5643         {
5644             auto text = testParser(haystack.save);
5645             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5646             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5647             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5648         }
5649         {
5650             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5651             auto text = testParser(haystack);
5652             text.pos.line += 3;
5653             text.pos.col += 7;
5654             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5655             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5656             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5657         }
5658     }
5659 
5660     static foreach(func; testRangeFuncs)
5661     {
5662         test!(func, 'o', 'w')("hello world", "o world", 1, 5);
5663         test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7);
5664         test!(func, 'z', 'y')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6);
5665         test!(func, 'o', 'g')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "g", 8, 1);
5666         test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed",
5667                               1, codeLen!(func, "プログラミング in D is ") + 1);
5668 
5669         testFail!(func, 'a', 'b')("hello world", 1, 12);
5670         testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6);
5671         testFail!(func, 'a', 'b')("プログラミング",  1, codeLen!(func, "プログラミング") + 1);
5672     }
5673 }
5674 
5675 @safe pure unittest
5676 {
5677     import std.algorithm.comparison : equal;
5678     import dxml.internal : testRangeFuncs;
5679 
5680     static foreach(func; testRangeFuncs)
5681     {{
5682         auto xml = func(`foo`);
5683         auto text = testParser!simpleXML(xml);
5684         text.skipToOneOf!('o')();
5685         assert(equal(text.input, "oo"));
5686     }}
5687 }
5688 
5689 
5690 // The front of the input should be text surrounded by single or double quotes.
5691 // This returns a slice of the input containing that text, and the input is
5692 // advanced to one code unit beyond the quote.
5693 auto takeEnquotedText(Text)(ref Text text)
5694 {
5695     checkNotEmpty(text);
5696     immutable quote = text.input.front;
5697     static foreach(quoteChar; [`"`, `'`])
5698     {
5699         // This would be a bit simpler if takeUntilAndDrop took a runtime
5700         // argument, but in all other cases, a compile-time argument makes more
5701         // sense, so this seemed like a reasonable way to handle this one case.
5702         if(quote == quoteChar[0])
5703         {
5704             popFrontAndIncCol(text);
5705             return takeUntilAndDrop!quoteChar(text);
5706         }
5707     }
5708     throw new XMLParsingException("Expected quoted text", text.pos);
5709 }
5710 
5711 unittest
5712 {
5713     import core.exception : AssertError;
5714     import std.algorithm.comparison : equal;
5715     import std.exception : assertThrown, enforce;
5716     import std.range : only;
5717     import dxml.internal : testRangeFuncs;
5718 
5719     static void test(alias func)(string origHaystack, string expected, string remainder,
5720                                  int row, int col, size_t line = __LINE__)
5721     {
5722         auto haystack = func(origHaystack);
5723         auto adjExpected = expected.toCmpType!func();
5724         {
5725             auto text = testParser(haystack.save);
5726             enforce!AssertError(equal(takeEnquotedText(text), adjExpected.save), "unittest failure 1", __FILE__, line);
5727             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5728             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5729         }
5730         {
5731             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5732             auto text = testParser(haystack);
5733             text.pos.line += 3;
5734             text.pos.col += 7;
5735             enforce!AssertError(equal(takeEnquotedText(text), adjExpected), "unittest failure 3", __FILE__, line);
5736             enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line);
5737             enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line);
5738         }
5739     }
5740 
5741     static void testFail(alias func)(string origHaystack, size_t line = __LINE__)
5742     {
5743         auto haystack = func(origHaystack);
5744         auto text = testParser(haystack);
5745         assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line);
5746     }
5747 
5748     static foreach(func; testRangeFuncs)
5749     {
5750         foreach(quote; only("\"", "'"))
5751         {
5752             test!func(quote ~ quote, "", "", 1, 3);
5753             test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14);
5754             test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14);
5755             {
5756                 import std.utf : codeLength;
5757                 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D";
5758                 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング ");
5759                 test!func(haystack, "プログラミング ", "in D", 1, len + 3);
5760             }
5761         }
5762 
5763         foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`))
5764             testFail!func(str);
5765     }
5766 }
5767 
5768 
5769 // This removes a name per the Name grammar rule from the front of the input and
5770 // returns it.
5771 // The parsing continues until either one of the given delimiters or an XML
5772 // whitespace character is encountered. The delimiter/whitespace is not returned
5773 // as part of the name and is left at the front of the input.
5774 template takeName(delims...)
5775 {
5776     static foreach(delim; delims)
5777     {
5778         static assert(is(typeof(delim) == char), delim);
5779         static assert(!isSpace(delim));
5780     }
5781 
5782     auto takeName(Text)(ref Text text)
5783     {
5784         import std.format : format;
5785         import std.range : takeExactly;
5786         import std.utf : decodeFront, UseReplacementDchar;
5787         import dxml.internal : isNameStartChar, isNameChar;
5788 
5789         assert(!text.input.empty);
5790 
5791         auto orig = text.input.save;
5792         size_t takeLen;
5793         {
5794             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen);
5795             if(!isNameStartChar(decodedC))
5796                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5797         }
5798 
5799         if(text.input.empty)
5800         {
5801             text.pos.col += takeLen;
5802             return takeExactly(orig, takeLen);
5803         }
5804 
5805         loop: while(true)
5806         {
5807             immutable c = text.input.front;
5808             if(isSpace(c))
5809                 break;
5810             static foreach(delim; delims)
5811             {
5812                 if(c == delim)
5813                     break loop;
5814             }
5815 
5816             size_t numCodeUnits;
5817             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
5818             if(!isNameChar(decodedC))
5819             {
5820                 text.pos.col += takeLen;
5821                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5822             }
5823             takeLen += numCodeUnits;
5824 
5825             if(text.input.empty)
5826                 break;
5827         }
5828 
5829         text.pos.col += takeLen;
5830 
5831         return takeExactly(orig, takeLen);
5832     }
5833 }
5834 
5835 unittest
5836 {
5837     import core.exception : AssertError;
5838     import std.algorithm.comparison : equal;
5839     import std.exception : collectException, enforce;
5840     import std.typecons : tuple;
5841     import dxml.internal : codeLen, testRangeFuncs;
5842 
5843     static void test(alias func, delim...)(string origHaystack, string expected, string remainder,
5844                                            int row, int col, size_t line = __LINE__)
5845     {
5846         auto haystack = func(origHaystack);
5847         auto adjExpected = expected.toCmpType!func();
5848         {
5849             auto text = testParser(haystack.save);
5850             enforce!AssertError(equal(text.takeName!delim(), adjExpected.save),
5851                                 "unittest failure 1", __FILE__, line);
5852             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5853             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5854         }
5855         {
5856             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5857             auto text = testParser(haystack);
5858             text.pos.line += 3;
5859             text.pos.col += 7;
5860             enforce!AssertError(equal(text.takeName!delim(), adjExpected),
5861                                 "unittest failure 4", __FILE__, line);
5862             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5863             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5864         }
5865     }
5866 
5867     static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__)
5868     {
5869         auto haystack = func(origHaystack);
5870         {
5871             auto text = testParser(haystack.save);
5872             auto e = collectException!XMLParsingException(text.takeName!delim());
5873             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5874             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5875         }
5876         {
5877             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5878             auto text = testParser(haystack);
5879             text.pos.line += 3;
5880             text.pos.col += 7;
5881             auto e = collectException!XMLParsingException(text.takeName!delim());
5882             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5883             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5884         }
5885     }
5886 
5887     static foreach(func; testRangeFuncs)
5888     {
5889         static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"])
5890         {{
5891             enum len = codeLen!(func, str);
5892 
5893             static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo",  "  foo \n \r "])
5894             {{
5895                 enum strRem = str ~ remainder;
5896                 enum delimRem = '>' ~ remainder;
5897                 enum hay = str ~ delimRem;
5898                 test!func(strRem, str, remainder, 1, len + 1);
5899                 test!(func, '=')(strRem, str, remainder, 1, len + 1);
5900                 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1);
5901                 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1);
5902             }}
5903         }}
5904 
5905         static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)])
5906         {{
5907             testFail!func(t[0], t[1], t[2]);
5908             testFail!func(t[0] ~ '>', t[1], t[2]);
5909             testFail!(func, '?')(t[0], t[1], t[2]);
5910             testFail!(func, '=')(t[0] ~ '=', t[1], t[2]);
5911         }}
5912 
5913         testFail!(func, '>')(">", 1, 1);
5914         testFail!(func, '?')("?", 1, 1);
5915         testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&"));
5916 
5917         static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)])
5918         {
5919             testFail!func(t[0], t[1], t[2]);
5920             testFail!(func, '>')(t[0], t[1], t[2]);
5921         }
5922     }
5923 }
5924 
5925 @safe pure unittest
5926 {
5927     import std.algorithm.comparison : equal;
5928     import dxml.internal : testRangeFuncs;
5929 
5930     static foreach(func; testRangeFuncs)
5931     {{
5932         auto xml = func(`foo`);
5933         auto text = testParser!simpleXML(xml);
5934         assert(equal(text.takeName(), "foo"));
5935     }}
5936 }
5937 
5938 
5939 // This removes an attribute value from the front of the input, partially
5940 // validates it, and returns it. The validation that is not done is whether
5941 // the value in a character reference is valid. It's checked for whether the
5942 // characters used in it are valid but not whether the number they form is a
5943 // valid Unicode character. Checking the number doesn't seem worth the extra
5944 // complication, and it's not required for the XML to be "well-formed."
5945 // dxml.util.parseCharRef will check that it is fully correct if it is used.
5946 auto takeAttValue(Text)(ref Text text)
5947 {
5948     // AttValue    ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
5949     // Reference   ::= EntityRef | CharRef
5950     // EntityRef   ::= '&' Name ';'
5951     // PEReference ::= '%' Name ';'
5952 
5953     import std.range : only;
5954 
5955     checkNotEmpty(text);
5956     immutable quote = text.input.front;
5957     immutable quotePos = text.pos;
5958     foreach(quoteChar; only('"', '\''))
5959     {
5960         // This would be a bit simpler if takeUntilAndDrop took a runtime
5961         // argument, but in all other cases, a compile-time argument makes more
5962         // sense, so this seemed like a reasonable way to handle this one case.
5963         if(quote == quoteChar)
5964         {
5965             popFrontAndIncCol(text);
5966             size_t lineStart = 0;
5967             auto orig = text.input.save;
5968             size_t takeLen;
5969             loop: while(true)
5970             {
5971                 if(text.input.empty)
5972                     throw new XMLParsingException("Unterminated attribute value", quotePos);
5973                 switch(text.input.front)
5974                 {
5975                     case '"':
5976                     {
5977                         if(quote == '"')
5978                         {
5979                             text.input.popFront();
5980                             goto done;
5981                         }
5982                         goto default;
5983                     }
5984                     case '\'':
5985                     {
5986                         if(quote == '\'')
5987                         {
5988                             text.input.popFront();
5989                             goto done;
5990                         }
5991                         goto default;
5992                     }
5993                     case '&':
5994                     {
5995                         {
5996                             import dxml.util : parseCharRef;
5997                             auto temp = text.input.save;
5998                             auto charRef = parseCharRef(temp);
5999                             if(!charRef.isNull)
6000                             {
6001                                 static if(hasLength!(Text.Input))
6002                                 {
6003                                     takeLen += text.input.length - temp.length;
6004                                     text.input = temp;
6005                                 }
6006                                 else
6007                                 {
6008                                     while(text.input.front != ';')
6009                                     {
6010                                         ++takeLen;
6011                                         text.input.popFront();
6012                                     }
6013                                     ++takeLen;
6014                                     text.input.popFront();
6015                                 }
6016                                 continue;
6017                             }
6018                         }
6019 
6020                         immutable ampLen = takeLen - lineStart;
6021                         ++takeLen;
6022                         text.input.popFront();
6023 
6024                         // Std Entity References
6025                         static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6026                         {
6027                             import std.algorithm.searching : startsWith;
6028 
6029                             static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6030                             {
6031                                 if(text.input.save.startsWith(entRef))
6032                                 {
6033                                     takeLen += entRef.length;
6034                                     text.input.popFrontN(entRef.length);
6035                                     continue loop;
6036                                 }
6037                             }
6038 
6039                             text.pos.col += ampLen;
6040                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6041                                                           "reference, and this parser only supports entity " ~
6042                                                           "references if they're predefined by the spec. This is not " ~
6043                                                           "a valid character reference or one of the predefined " ~
6044                                                           "entity references.", text.pos);
6045                         }
6046                         // All Entity References
6047                         else
6048                         {
6049                             import std.utf : decodeFront, UseReplacementDchar;
6050                             import dxml.internal : isNameStartChar, isNameChar;
6051 
6052                             if(text.input.empty || text.input.front == quote)
6053                                 goto failedEntityRef;
6054 
6055                             {
6056                                 size_t numCodeUnits;
6057                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6058                                 if(!isNameStartChar(decodedC))
6059                                     goto failedEntityRef;
6060                                 takeLen += numCodeUnits;
6061                             }
6062 
6063                             while(true)
6064                             {
6065                                 if(text.input.empty)
6066                                     goto failedEntityRef;
6067                                 immutable c = text.input.front;
6068                                 if(c == ';')
6069                                 {
6070                                     ++takeLen;
6071                                     break;
6072                                 }
6073                                 size_t numCodeUnits;
6074                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6075                                 if(!isNameChar(decodedC))
6076                                     goto failedEntityRef;
6077                                 takeLen += numCodeUnits;
6078                             }
6079                             break;
6080 
6081                             failedEntityRef:
6082                             text.pos.col += ampLen;
6083                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6084                                                           "character or entity reference, and this is not a valid " ~
6085                                                           "character or entity reference.", text.pos);
6086                         }
6087                     }
6088                     case '<':
6089                     {
6090                         text.pos.col += takeLen - lineStart;
6091                         throw new XMLParsingException("< is not legal in an attribute name", text.pos);
6092                     }
6093                     case '\n':
6094                     {
6095                         ++takeLen;
6096                         nextLine!(Text.config)(text.pos);
6097                         lineStart = takeLen;
6098                         break;
6099                     }
6100                     default:
6101                     {
6102                         import std.ascii : isASCII;
6103                         import std.format : format;
6104                         import dxml.internal : isXMLChar;
6105 
6106                         immutable c = text.input.front;
6107                         if(isASCII(c))
6108                         {
6109                             if(!isXMLChar(c))
6110                             {
6111                                 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6112                                                               text.pos);
6113                             }
6114                             ++takeLen;
6115                             break;
6116                         }
6117                         import std.utf : decodeFront, UseReplacementDchar, UTFException;
6118                         // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6119                         // replacement character is considered valid XML, and if we decoded using it, then
6120                         // all of the invalid Unicode characters would come out as the replacement character
6121                         // and then be treated as valid instead of being caught, which isn't all bad, but
6122                         // the spec requires that they be treated as invalid instead of playing nice and
6123                         // using the replacement character.
6124                         try
6125                         {
6126                             size_t numCodeUnits;
6127                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6128                             if(!isXMLChar(decodedC))
6129                             {
6130                                 enum fmt = "Character is not legal in an XML File: 0x%0x";
6131                                 throw new XMLParsingException(format!fmt(decodedC), text.pos);
6132                             }
6133                             takeLen += numCodeUnits;
6134                         }
6135                         catch(UTFException e)
6136                             throw new XMLParsingException("Invalid Unicode character", text.pos);
6137                         continue;
6138                     }
6139                 }
6140                 text.input.popFront();
6141             }
6142             done:
6143             {
6144                 import std.range : takeExactly;
6145                 text.pos.col += takeLen - lineStart + 1;
6146                 return takeExactly(orig, takeLen);
6147             }
6148         }
6149     }
6150     throw new XMLParsingException("Expected quoted text", text.pos);
6151 }
6152 
6153 unittest
6154 {
6155     import core.exception : AssertError;
6156     import std.algorithm.comparison : equal;
6157     import std.exception : collectException, enforce;
6158     import std.range : only;
6159     import dxml.internal : codeLen, testRangeFuncs;
6160 
6161     static void test(alias func, ThrowOnEntityRef toer)(string origHaystack, string expected, string remainder,
6162                                                         int row, int col, size_t line = __LINE__)
6163     {
6164         auto haystack = func(origHaystack);
6165         auto adjExpected = expected.toCmpType!(func, toer)();
6166         {
6167             auto text = testParser!(makeConfig(toer))(haystack.save);
6168             enforce!AssertError(equal(text.takeAttValue(), adjExpected.save),
6169                                 "unittest failure 1", __FILE__, line);
6170             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
6171             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
6172         }
6173         {
6174             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6175             auto text = testParser!(makeConfig(toer))(haystack);
6176             text.pos.line += 3;
6177             text.pos.col += 7;
6178             enforce!AssertError(equal(text.takeAttValue(), adjExpected),
6179                                 "unittest failure 4", __FILE__, line);
6180             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
6181             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
6182         }
6183     }
6184 
6185     static void testFail(alias func, ThrowOnEntityRef toer)(string origHaystack,
6186                                                             int row, int col, size_t line = __LINE__)
6187     {
6188         auto haystack = func(origHaystack);
6189         {
6190             auto text = testParser!(makeConfig(toer))(haystack.save);
6191             auto e = collectException!XMLParsingException(text.takeAttValue());
6192             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6193             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6194         }
6195         {
6196             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6197             auto text = testParser!(makeConfig(toer))(haystack);
6198             text.pos.line += 3;
6199             text.pos.col += 7;
6200             auto e = collectException!XMLParsingException(text.takeAttValue());
6201             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6202             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
6203         }
6204     }
6205 
6206     static foreach(i, func; testRangeFuncs)
6207     {
6208         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6209         {
6210             test!(func, toer)(`""`, "", "", 1, 3);
6211             test!(func, toer)(`"J"`, "J", "", 1, 4);
6212             test!(func, toer)(`"foo"`, "foo", "", 1, 6);
6213             test!(func, toer)(`"プログラミング"`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6214             test!(func, toer)(`"foo"bar`, "foo", "bar", 1, 6);
6215             test!(func, toer)(`"プログラミング" after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6216 
6217             test!(func, toer)(`''`, "", "", 1, 3);
6218             test!(func, toer)(`'J'`, "J", "", 1, 4);
6219             test!(func, toer)(`'foo'`, "foo", "", 1, 6);
6220             test!(func, toer)(`'プログラミング'`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6221             test!(func, toer)(`'foo'bar`, "foo", "bar", 1, 6);
6222             test!(func, toer)(`'プログラミング' after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6223 
6224             test!(func, toer)(`"&amp;&gt;&lt;"`, "&amp;&gt;&lt;", "", 1, 16);
6225             test!(func, toer)(`"&apos;&quot;"`, "&apos;&quot;", "", 1, 15);
6226             test!(func, toer)(`"hello&amp;&gt;&lt;world"`, "hello&amp;&gt;&lt;world", "", 1, 26);
6227             test!(func, toer)(`".....&amp;&gt;&lt;....."`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6228             test!(func, toer)(`"&#12487;&#12451;&#12521;&#12531;"`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6229             test!(func, toer)(`"hello&#xAF;&#77;&amp;world"`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6230 
6231             test!(func, toer)(`'&amp;&gt;&lt;'`, "&amp;&gt;&lt;", "", 1, 16);
6232             test!(func, toer)(`'hello&amp;&gt;&lt;world'`, "hello&amp;&gt;&lt;world", "", 1, 26);
6233             test!(func, toer)(`'&apos;&quot;'`, "&apos;&quot;", "", 1, 15);
6234             test!(func, toer)(`'.....&amp;&gt;&lt;.....'`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6235             test!(func, toer)(`'&#12487;&#12451;&#12521;&#12531;'`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6236             test!(func, toer)(`'hello&#xAF;&#77;&amp;world'`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6237 
6238             test!(func, toer)("'hello\nworld'", "hello\nworld", "", 2, 7);
6239             test!(func, toer)("'hello\nworld\n'", "hello\nworld\n", "", 3, 2);
6240 
6241             test!(func, toer)(`"'''"whatever`, "'''", "whatever", 1, 6);
6242             test!(func, toer)(`'"""'whatever`, `"""`, "whatever", 1, 6);
6243 
6244             test!(func, toer)(`"&#42;"`, "&#42;", "", 1, 8);
6245             test!(func, toer)(`"&#x42;"`, "&#x42;", "", 1, 9);
6246             test!(func, toer)(`"%foo"`, "%foo", "", 1, 7);
6247 
6248             testFail!(func, toer)(`"`, 1, 1);
6249             testFail!(func, toer)(`"foo`, 1, 1);
6250             testFail!(func, toer)(`"foo'`, 1, 1);
6251             testFail!(func, toer)(`"<"`, 1, 2);
6252             testFail!(func, toer)(`"&`, 1, 2);
6253             testFail!(func, toer)(`"&"`, 1, 2);
6254             testFail!(func, toer)(`"&x"`, 1, 2);
6255             testFail!(func, toer)(`"&.;"`, 1, 2);
6256             testFail!(func, toer)(`"&&;"`, 1, 2);
6257             testFail!(func, toer)(`"&a"`, 1, 2);
6258             testFail!(func, toer)(`"&a`, 1, 2);
6259             testFail!(func, toer)(`"hello&;"`, 1, 7);
6260             testFail!(func, toer)(`"hello&;world"`,1, 7);
6261             testFail!(func, toer)(`"hello&<;world"`,1, 7);
6262             testFail!(func, toer)(`"hello&world"`,1, 7);
6263             testFail!(func, toer)(`"hello<world"`,1, 7);
6264             testFail!(func, toer)(`"hello world&"`, 1, 13);
6265             testFail!(func, toer)(`"hello world&;"`, 1, 13);
6266             testFail!(func, toer)(`"hello world&foo"`, 1, 13);
6267             testFail!(func, toer)(`"foo<"`, 1, 5);
6268             testFail!(func, toer)(`"&#`, 1, 2);
6269             testFail!(func, toer)(`"&#"`, 1, 2);
6270             testFail!(func, toer)(`"&#;"`, 1, 2);
6271             testFail!(func, toer)(`"&#x;"`, 1, 2);
6272             testFail!(func, toer)(`"&#AF;"`, 1, 2);
6273             testFail!(func, toer)(`"&#x`, 1, 2);
6274             testFail!(func, toer)(`"&#77`, 1, 2);
6275             testFail!(func, toer)(`"&#77;`, 1, 1);
6276             testFail!(func, toer)(`"&#x0`, 1, 2);
6277             testFail!(func, toer)(`"&#x0;`, 1, 2);
6278             testFail!(func, toer)(`"&#x0;"`, 1, 2);
6279 
6280             testFail!(func, toer)(`'`, 1, 1);
6281             testFail!(func, toer)(`'foo`, 1, 1);
6282             testFail!(func, toer)(`'foo"`, 1, 1);
6283             testFail!(func, toer)(`'<'`, 1, 2);
6284             testFail!(func, toer)("'\v'", 1, 2);
6285             testFail!(func, toer)("'\uFFFE'", 1, 2);
6286             testFail!(func, toer)(`'&`, 1, 2);
6287             testFail!(func, toer)(`'&'`, 1, 2);
6288             testFail!(func, toer)(`'&x'`, 1, 2);
6289             testFail!(func, toer)(`'&.;'`, 1, 2);
6290             testFail!(func, toer)(`'&&;'`, 1, 2);
6291             testFail!(func, toer)(`'&a'`, 1, 2);
6292             testFail!(func, toer)(`'&a`, 1, 2);
6293             testFail!(func, toer)(`'hello&;'`, 1, 7);
6294             testFail!(func, toer)(`'hello&;world'`, 1, 7);
6295             testFail!(func, toer)(`'hello&<;world'`, 1, 7);
6296             testFail!(func, toer)(`'hello&world'`, 1, 7);
6297             testFail!(func, toer)(`'hello<world'`, 1, 7);
6298             testFail!(func, toer)(`'hello world&'`, 1, 13);
6299             testFail!(func, toer)(`'hello world&;'`, 1, 13);
6300             testFail!(func, toer)(`'hello world&foo'`, 1, 13);
6301             testFail!(func, toer)(`'foo<'`, 1, 5);
6302             testFail!(func, toer)(`'&#`, 1, 2);
6303             testFail!(func, toer)(`'&#'`, 1, 2);
6304             testFail!(func, toer)(`'&#;'`, 1, 2);
6305             testFail!(func, toer)(`'&#x;'`, 1, 2);
6306             testFail!(func, toer)(`'&#AF;'`, 1, 2);
6307             testFail!(func, toer)(`'&#x`, 1, 2);
6308             testFail!(func, toer)(`'&#77`, 1, 2);
6309             testFail!(func, toer)(`'&#77;`, 1, 1);
6310             testFail!(func, toer)(`'&#x0`, 1, 2);
6311             testFail!(func, toer)(`'&#x0;`, 1, 2);
6312             testFail!(func, toer)(`'&#x0;'`, 1, 2);
6313             testFail!(func, toer)("'&#xA\nF;'", 1, 2);
6314             testFail!(func, toer)("'&amp\n;'", 1, 2);
6315             testFail!(func, toer)("'&\namp;'", 1, 2);
6316             testFail!(func, toer)("'\n&amp;&;'", 2, 6);
6317         }
6318         {
6319             alias toer = ThrowOnEntityRef.yes;
6320             testFail!(func, toer)(`"&foo;"`, 1, 2);
6321             testFail!(func, toer)(`"hello world&foo;"`, 1, 13);
6322             testFail!(func, toer)(`"hello &foo; world"`, 1, 8);
6323             testFail!(func, toer)(`"&am;"`, 1, 2);
6324             testFail!(func, toer)(`"&ampe;"`, 1, 2);
6325             testFail!(func, toer)(`"&l;"`, 1, 2);
6326             testFail!(func, toer)(`"&lte;"`, 1, 2);
6327             testFail!(func, toer)(`"&g;"`, 1, 2);
6328             testFail!(func, toer)(`"&gte;"`, 1, 2);
6329             testFail!(func, toer)(`"&apo;"`, 1, 2);
6330             testFail!(func, toer)(`"&aposs;"`, 1, 2);
6331             testFail!(func, toer)(`"&quo;"`, 1, 2);
6332             testFail!(func, toer)(`"&quote;"`, 1, 2);
6333 
6334             testFail!(func, toer)(`'&foo;'`, 1, 2);
6335             testFail!(func, toer)(`'hello world&foo;'`, 1, 13);
6336             testFail!(func, toer)(`'hello &foo; world'`, 1, 8);
6337             testFail!(func, toer)(`'&am;'`, 1, 2);
6338             testFail!(func, toer)(`'&ampe;'`, 1, 2);
6339             testFail!(func, toer)(`'&l;'`, 1, 2);
6340             testFail!(func, toer)(`'&lte;'`, 1, 2);
6341             testFail!(func, toer)(`'&g;'`, 1, 2);
6342             testFail!(func, toer)(`'&gte;'`, 1, 2);
6343             testFail!(func, toer)(`'&apo;'`, 1, 2);
6344             testFail!(func, toer)(`'&aposs;'`, 1, 2);
6345             testFail!(func, toer)(`'&quo;'`, 1, 2);
6346             testFail!(func, toer)(`'&quote;'`, 1, 2);
6347         }
6348         {
6349             alias toer = ThrowOnEntityRef.no;
6350             test!(func, toer)(`"&foo;"`, "&foo;", "", 1, 8);
6351             test!(func, toer)(`"hello world&foo;"`, "hello world&foo;", "", 1, 19);
6352             test!(func, toer)(`"hello &foo; world"`, "hello &foo; world", "", 1, 20);
6353             test!(func, toer)(`"&am;"`, "&am;", "", 1, 7);
6354             test!(func, toer)(`"&ampe;"`, "&ampe;", "", 1, 9);
6355             test!(func, toer)(`"&l;"`, "&l;", "", 1, 6);
6356             test!(func, toer)(`"&lte;"`, "&lte;", "", 1, 8);
6357             test!(func, toer)(`"&g;"`, "&g;", "", 1, 6);
6358             test!(func, toer)(`"&gte;"`, "&gte;", "", 1, 8);
6359             test!(func, toer)(`"&apo;"`, "&apo;", "", 1, 8);
6360             test!(func, toer)(`"&aposs;"`, "&aposs;", "", 1, 10);
6361             test!(func, toer)(`"&quo;"`, "&quo;", "", 1, 8);
6362             test!(func, toer)(`"&quote;"`, "&quote;", "", 1, 10);
6363 
6364             test!(func, toer)(`'&foo;'`, "&foo;", "", 1, 8);
6365             test!(func, toer)(`'hello world&foo;'`, "hello world&foo;", "", 1, 19);
6366             test!(func, toer)(`'hello &foo; world'`, "hello &foo; world", "", 1, 20);
6367             test!(func, toer)(`'&am;'`, "&am;", "", 1, 7);
6368             test!(func, toer)(`'&ampe;'`, "&ampe;", "", 1, 9);
6369             test!(func, toer)(`'&l;'`, "&l;", "", 1, 6);
6370             test!(func, toer)(`'&lte;'`, "&lte;", "", 1, 8);
6371             test!(func, toer)(`'&g;'`, "&g;", "", 1, 6);
6372             test!(func, toer)(`'&gte;'`, "&gte;", "", 1, 8);
6373             test!(func, toer)(`'&apo;'`, "&apo;", "", 1, 8);
6374             test!(func, toer)(`'&aposs;'`, "&aposs;", "", 1, 10);
6375             test!(func, toer)(`'&quo;'`, "&quo;", "", 1, 8);
6376             test!(func, toer)(`'&quote;'`, "&quote;", "", 1, 10);
6377         }
6378     }
6379 
6380     // These can't be tested with testFail, because attempting to convert
6381     // invalid Unicode results in UnicodeExceptions before parseXML even
6382     // gets called.
6383     import std.meta : AliasSeq;
6384     static foreach(str; AliasSeq!("'" ~ cast(string)[255] ~ "'",
6385                                   "'"w ~ cast(wstring)[0xD800] ~ "'",
6386                                   "'"d ~ cast(dstring)[0xD800] ~ "'"))
6387     {{
6388         auto text = testParser(str);
6389         auto e = collectException!XMLParsingException(text.takeAttValue());
6390         assert(e ! is null);
6391         assert(e.pos == TextPos(1, 2));
6392     }}
6393 }
6394 
6395 @safe pure unittest
6396 {
6397     import std.algorithm.comparison : equal;
6398     import dxml.internal : testRangeFuncs;
6399 
6400     static foreach(func; testRangeFuncs)
6401     {
6402         static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6403         {{
6404             auto xml = func(`'foo'`);
6405             auto text = testParser!simpleXML(xml);
6406             assert(equal(text.takeAttValue(), "foo"));
6407         }}
6408     }
6409 }
6410 
6411 
6412 // Validates an EntityType.text field to verify that it does not contain invalid
6413 // characters.
6414 void checkText(bool allowRestrictedChars, Text)(ref Text orig)
6415 {
6416     import std.format : format;
6417     import std.utf : decodeFront, UseReplacementDchar;
6418 
6419     auto text = orig.save;
6420     loop: while(!text.input.empty)
6421     {
6422         switch(text.input.front)
6423         {
6424             static if(!allowRestrictedChars)
6425             {
6426                 case '&':
6427                 {
6428                     import dxml.util : parseCharRef;
6429 
6430                     {
6431                         auto temp = text.input.save;
6432                         auto charRef = parseCharRef(temp);
6433                         if(!charRef.isNull)
6434                         {
6435                             static if(hasLength!(Text.Input))
6436                             {
6437                                 text.pos.col += text.input.length - temp.length;
6438                                 text.input = temp;
6439                             }
6440                             else
6441                             {
6442                                 while(text.input.front != ';')
6443                                     popFrontAndIncCol(text);
6444                                 popFrontAndIncCol(text);
6445                             }
6446                             continue;
6447                         }
6448                     }
6449 
6450                     immutable ampPos = text.pos;
6451                     popFrontAndIncCol(text);
6452 
6453                     // Std Entity References
6454                     static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6455                     {
6456                         static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6457                         {
6458                             if(text.stripStartsWith(entRef))
6459                                 continue loop;
6460                         }
6461 
6462                         throw new XMLParsingException("& is only legal in an EntitType.text entity as part of a " ~
6463                                                       "reference, and this parser only supports entity references if " ~
6464                                                       "they're predefined by the spec. This is not a valid character " ~
6465                                                       "reference or one of the predefined entity references.", ampPos);
6466                     }
6467                     // All Entity References
6468                     else
6469                     {
6470                         import std.utf : decodeFront, UseReplacementDchar;
6471                         import dxml.internal : isNameStartChar, isNameChar;
6472 
6473                         if(text.input.empty)
6474                             goto failedEntityRef;
6475                         {
6476                             size_t numCodeUnits;
6477                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6478                             if(!isNameStartChar(decodedC))
6479                                 goto failedEntityRef;
6480                             text.pos.col += numCodeUnits;
6481                         }
6482                         while(true)
6483                         {
6484                             if(text.input.empty)
6485                                 goto failedEntityRef;
6486                             immutable c = text.input.front;
6487                             if(c == ';')
6488                                 break;
6489                             size_t numCodeUnits;
6490                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6491                             if(!isNameChar(decodedC))
6492                                 goto failedEntityRef;
6493                             text.pos.col += numCodeUnits;
6494                         }
6495                         assert(text.input.front == ';');
6496                         popFrontAndIncCol(text);
6497                         continue;
6498 
6499                         failedEntityRef:
6500                         throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6501                                                       "character or entity reference, and this is not a valid " ~
6502                                                       "character or entity reference.", ampPos);
6503                     }
6504                 }
6505                 case '<': throw new XMLParsingException("< is not legal in EntityType.text", text.pos);
6506                 case ']':
6507                 {
6508                     popFrontAndIncCol(text);
6509                     if(text.stripStartsWith("]>"))
6510                     {
6511                         text.pos.col -= 3;
6512                         throw new XMLParsingException("]]> is not legal in EntityType.text", text.pos);
6513                     }
6514                     break;
6515                 }
6516             }
6517             case '\n':
6518             {
6519                 nextLine!(text.config)(text.pos);
6520                 text.input.popFront();
6521                 break;
6522             }
6523             default:
6524             {
6525                 import std.ascii : isASCII;
6526                 import dxml.internal : isXMLChar;
6527                 immutable c = text.input.front;
6528                 if(isASCII(c))
6529                 {
6530                     if(!isXMLChar(c))
6531                     {
6532                         throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6533                                                       text.pos);
6534                     }
6535                     popFrontAndIncCol(text);
6536                 }
6537                 else
6538                 {
6539                     import std.utf : UTFException;
6540                     // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6541                     // replacement character is considered valid XML, and if we decoded using it, then
6542                     // all of the invalid Unicode characters would come out as the replacement character
6543                     // and then be treated as valid instead of being caught, which isn't all bad, but
6544                     // the spec requires that they be treated as invalid instead of playing nice and
6545                     // using the replacement character.
6546                     try
6547                     {
6548                         size_t numCodeUnits;
6549                         immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6550                         if(!isXMLChar(decodedC))
6551                         {
6552                             enum fmt = "Character is not legal in an XML File: 0x%0x";
6553                             throw new XMLParsingException(format!fmt(decodedC), text.pos);
6554                         }
6555                         text.pos.col += numCodeUnits;
6556                     }
6557                     catch(UTFException)
6558                         throw new XMLParsingException("Invalid Unicode character", text.pos);
6559                 }
6560                 break;
6561             }
6562         }
6563     }
6564 }
6565 
6566 unittest
6567 {
6568     import core.exception : AssertError;
6569     import std.exception : assertNotThrown, collectException, enforce;
6570     import dxml.internal : codeLen, testRangeFuncs;
6571 
6572     static void test(alias func, bool arc, ThrowOnEntityRef toer)(string text, size_t line = __LINE__)
6573     {
6574         auto xml = func(text);
6575         auto range = testParser!(makeConfig(toer))(xml);
6576         assertNotThrown(checkText!arc(range), "unittest failure", __FILE__, line);
6577     }
6578 
6579     static void testFail(alias func, bool arc, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
6580     {
6581         auto xml = func(text);
6582         {
6583             auto range = testParser!(makeConfig(toer))(xml.save);
6584             auto e = collectException!XMLParsingException(checkText!arc(range));
6585             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6586             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6587         }
6588         {
6589             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6590             auto range = testParser!(makeConfig(toer))(xml);
6591             range.pos.line += 3;
6592             range.pos.col += 7;
6593             auto e = collectException!XMLParsingException(checkText!arc(range));
6594             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
6595             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
6596         }
6597     }
6598 
6599     static foreach(func; testRangeFuncs)
6600     {
6601         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6602         {
6603             static foreach(arc; [false, true])
6604             {
6605                 test!(func, arc, toer)("");
6606                 test!(func, arc, toer)("J",);
6607                 test!(func, arc, toer)("foo");
6608                 test!(func, arc, toer)("プログラミング");
6609 
6610                 test!(func, arc, toer)("&amp;&gt;&lt;");
6611                 test!(func, arc, toer)("hello&amp;&gt;&lt;world");
6612                 test!(func, arc, toer)(".....&apos;&quot;&amp;.....");
6613                 test!(func, arc, toer)("&#12487;&#12451;&#12521;&#12531;");
6614                 test!(func, arc, toer)("hello&#xAF;&#42;&quot;world");
6615 
6616                 test!(func, arc, toer)("]]");
6617                 test!(func, arc, toer)("]>");
6618                 test!(func, arc, toer)("foo]]bar");
6619                 test!(func, arc, toer)("foo]>bar");
6620                 test!(func, arc, toer)("]] >");
6621 
6622                 testFail!(func, arc, toer)("\v", 1, 1);
6623                 testFail!(func, arc, toer)("\uFFFE", 1, 1);
6624                 testFail!(func, arc, toer)("hello\vworld", 1, 6);
6625                 testFail!(func, arc, toer)("he\nllo\vwo\nrld", 2, 4);
6626             }
6627 
6628             testFail!(func, false, toer)("<", 1, 1);
6629             testFail!(func, false, toer)("&", 1, 1);
6630             testFail!(func, false, toer)("&", 1, 1);
6631             testFail!(func, false, toer)("&x", 1, 1);
6632             testFail!(func, false, toer)("&&;", 1, 1);
6633             testFail!(func, false, toer)("&a", 1, 1);
6634             testFail!(func, false, toer)("hello&;", 1, 6);
6635             testFail!(func, false, toer)("hello&;world", 1, 6);
6636             testFail!(func, false, toer)("hello&<;world", 1, 6);
6637             testFail!(func, false, toer)("hello&world", 1, 6);
6638             testFail!(func, false, toer)("hello world&", 1, 12);
6639             testFail!(func, false, toer)("hello world&;", 1, 12);
6640             testFail!(func, false, toer)("hello world&foo", 1, 12);
6641             testFail!(func, false, toer)("&#;", 1, 1);
6642             testFail!(func, false, toer)("&#x;", 1, 1);
6643             testFail!(func, false, toer)("&#AF;", 1, 1);
6644             testFail!(func, false, toer)("&#x", 1, 1);
6645             testFail!(func, false, toer)("&#42", 1, 1);
6646             testFail!(func, false, toer)("&#x42", 1, 1);
6647             testFail!(func, false, toer)("&#12;", 1, 1);
6648             testFail!(func, false, toer)("&#x12;", 1, 1);
6649             testFail!(func, false, toer)("&#42;foo\nbar&#;", 2, 4);
6650             testFail!(func, false, toer)("&#42;foo\nbar&#x;", 2, 4);
6651             testFail!(func, false, toer)("&#42;foo\nbar&#AF;", 2, 4);
6652             testFail!(func, false, toer)("&#42;foo\nbar&#x", 2, 4);
6653             testFail!(func, false, toer)("&#42;foo\nbar&#42", 2, 4);
6654             testFail!(func, false, toer)("&#42;foo\nbar&#x42", 2, 4);
6655             testFail!(func, false, toer)("プログラミング&", 1, codeLen!(func, "プログラミング&"));
6656 
6657             static if(toer == ThrowOnEntityRef.yes)
6658             {
6659                 testFail!(func, false, toer)("&a;", 1, 1);
6660                 testFail!(func, false, toer)(`&am;`, 1, 1);
6661                 testFail!(func, false, toer)(`&ampe;`, 1, 1);
6662                 testFail!(func, false, toer)(`&l;`, 1, 1);
6663                 testFail!(func, false, toer)(`&lte;`, 1, 1);
6664                 testFail!(func, false, toer)(`&g;`, 1, 1);
6665                 testFail!(func, false, toer)(`&gte;`, 1, 1);
6666                 testFail!(func, false, toer)(`&apo;`, 1, 1);
6667                 testFail!(func, false, toer)(`&aposs;`, 1, 1);
6668                 testFail!(func, false, toer)(`&quo;`, 1, 1);
6669                 testFail!(func, false, toer)(`&quote;`, 1, 1);
6670                 testFail!(func, false, toer)(`hello &foo; world`, 1, 7);
6671                 testFail!(func, false, toer)("hello\n &foo; \nworld", 2, 2);
6672             }
6673             else
6674             {
6675                 test!(func, false, toer)("&a;");
6676                 test!(func, false, toer)(`&am;`);
6677                 test!(func, false, toer)(`&ampe;`);
6678                 test!(func, false, toer)(`&l;`);
6679                 test!(func, false, toer)(`&lte;`);
6680                 test!(func, false, toer)(`&g;`);
6681                 test!(func, false, toer)(`&gte;`);
6682                 test!(func, false, toer)(`&apo;`);
6683                 test!(func, false, toer)(`&aposs;`);
6684                 test!(func, false, toer)(`&quo;`);
6685                 test!(func, false, toer)(`&quote;`);
6686                 test!(func, false, toer)(`hello &foo; world`);
6687                 test!(func, false, toer)("hello\n &foo; \nworld");
6688             }
6689 
6690             testFail!(func, false, toer)("]]>", 1, 1);
6691             testFail!(func, false, toer)("foo]]>bar", 1, 4);
6692 
6693             test!(func, true, toer)("]]>");
6694             test!(func, true, toer)("foo]]>bar");
6695 
6696             test!(func, true, toer)("<");
6697             test!(func, true, toer)("&");
6698             test!(func, true, toer)("&x");
6699             test!(func, true, toer)("&&;");
6700             test!(func, true, toer)("&a");
6701             test!(func, true, toer)("&a;");
6702             test!(func, true, toer)(`&am;`);
6703             test!(func, true, toer)(`&ampe;`);
6704             test!(func, true, toer)(`&l;`);
6705             test!(func, true, toer)(`&lte;`);
6706             test!(func, true, toer)(`&g;`);
6707             test!(func, true, toer)(`&gte;`);
6708             test!(func, true, toer)(`&apo;`);
6709             test!(func, true, toer)(`&aposs;`);
6710             test!(func, true, toer)(`&quo;`);
6711             test!(func, true, toer)(`&quote;`);
6712             test!(func, true, toer)("hello&;");
6713             test!(func, true, toer)("hello&;world");
6714             test!(func, true, toer)("hello&<;world");
6715             test!(func, true, toer)("hello&world");
6716             test!(func, true, toer)("hello world&");
6717             test!(func, true, toer)("hello world&;");
6718             test!(func, true, toer)("hello world&foo");
6719             test!(func, true, toer)("&#;");
6720             test!(func, true, toer)("&#x;");
6721             test!(func, true, toer)("&#AF;");
6722             test!(func, true, toer)("&#x");
6723             test!(func, true, toer)("&#42");
6724             test!(func, true, toer)("&#x42");
6725             test!(func, true, toer)("&#12;");
6726             test!(func, true, toer)("&#x12;");
6727             test!(func, true, toer)("&#42;foo\nbar&#;");
6728             test!(func, true, toer)("&#42;foo\nbar&#x;");
6729             test!(func, true, toer)("&#42;foo\nbar&#AF;");
6730             test!(func, true, toer)("&#42;foo\nbar&#x");
6731             test!(func, true, toer)("&#42;foo\nbar&#42");
6732             test!(func, true, toer)("&#42;foo\nbar&#x42");
6733             test!(func, true, toer)("プログラミング&");
6734         }
6735     }
6736 
6737     // These can't be tested with testFail, because attempting to convert
6738     // invalid Unicode results in UnicodeExceptions before parseXML even
6739     // gets called.
6740     import std.meta : AliasSeq;
6741     static foreach(str; AliasSeq!(cast(string)[255], cast(wstring)[0xD800], cast(dstring)[0xD800]))
6742     {
6743         static foreach(arc; [false, true])
6744         {{
6745             auto text = testParser(str);
6746             auto e = collectException!XMLParsingException(text.checkText!arc());
6747             assert(e ! is null);
6748             assert(e.pos == TextPos(1, 1));
6749         }}
6750     }
6751 }
6752 
6753 @safe unittest
6754 {
6755     import dxml.internal : testRangeFuncs;
6756 
6757     static foreach(func; testRangeFuncs)
6758     {
6759         static foreach(arc; [false, true])
6760         {
6761             static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6762             {{
6763                 auto xml = func("foo");
6764                 auto text = testParser!config(xml);
6765                 checkText!arc(text);
6766             }}
6767         }
6768     }
6769 }
6770 
6771 
6772 // S := (#x20 | #x9 | #xD | #XA)+
6773 bool isSpace(C)(C c) @safe pure nothrow @nogc
6774     if(isSomeChar!C)
6775 {
6776     switch(c)
6777     {
6778         case ' ':
6779         case '\t':
6780         case '\r':
6781         case '\n': return true;
6782         default : return false;
6783     }
6784 }
6785 
6786 pure nothrow @safe @nogc unittest
6787 {
6788     foreach(char c; char.min .. char.max)
6789     {
6790         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6791             assert(isSpace(c));
6792         else
6793             assert(!isSpace(c));
6794     }
6795     foreach(wchar c; wchar.min .. wchar.max / 100)
6796     {
6797         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6798             assert(isSpace(c));
6799         else
6800             assert(!isSpace(c));
6801     }
6802     foreach(dchar c; dchar.min .. dchar.max / 1000)
6803     {
6804         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6805             assert(isSpace(c));
6806         else
6807             assert(!isSpace(c));
6808     }
6809 }
6810 
6811 
6812 pragma(inline, true) void popFrontAndIncCol(Text)(ref Text text)
6813 {
6814     text.input.popFront();
6815     ++text.pos.col;
6816 }
6817 
6818 pragma(inline, true) void nextLine(Config config)(ref TextPos pos)
6819 {
6820     ++pos.line;
6821     pos.col = 1;
6822 }
6823 
6824 // TODO create bug report, because this function cannot be inlined
6825 /+pragma(inline, true)+/ void checkNotEmpty(Text)(ref Text text, size_t line = __LINE__)
6826 {
6827     if(text.input.empty)
6828         throw new XMLParsingException("Prematurely reached end of document", text.pos, __FILE__, line);
6829 }
6830 
6831 
6832 version(unittest)
6833     enum someTestConfigs = [Config.init, simpleXML, makeConfig(SkipComments.yes), makeConfig(SkipPI.yes)];
6834 
6835 
6836 // Fuzz-testing failures
6837 unittest
6838 {
6839     static void parseEverything(string xml)
6840     {
6841         with(EntityType) foreach(entity; parseXML(xml))
6842         {
6843             final switch(entity.type)
6844             {
6845                 case cdata: break;
6846                 case comment: break;
6847                 case elementStart: auto name = entity.name; break;
6848                 case elementEnd: goto case elementStart;
6849                 case elementEmpty: goto case elementStart;
6850                 case pi: goto case elementStart;
6851                 case text: break;
6852             }
6853 
6854             final switch(entity.type)
6855             {
6856                 case cdata: auto text = entity.text; break;
6857                 case comment: goto case cdata;
6858                 case elementStart:
6859                 {
6860                     foreach(attr; entity.attributes)
6861                     {
6862                         auto name = attr.name;
6863                         auto value = attr.value;
6864                     }
6865                     break;
6866                 }
6867                 case elementEnd: break;
6868                 case elementEmpty: goto case elementStart;
6869                 case pi: goto case cdata;
6870                 case text: goto case cdata;
6871             }
6872         }
6873     }
6874 
6875     static void testFail(string xml, size_t line = __LINE__)
6876     {
6877         import std.exception : assertThrown;
6878         assertThrown!XMLParsingException(parseEverything(xml));
6879     }
6880 
6881     testFail([0x3c, 0xff, 0x3e, 0x3e, 0x3a, 0x3c, 0x2f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6882               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6883               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6884               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6885               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x31, 0xff,
6886               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xff, 0xff,
6887               0xff]);
6888 }