1 // Written in the D programming language
2 
3 /++
4     This implements a range-based
5     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which
6     will work with XML 1.1 documents assuming that they don't use any
7     1.1-specific features). For the sake of simplicity, sanity, and efficiency,
8     the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD)
9     section is not supported beyond what is required to parse past it.
10 
11     Start tags, end tags, comments, cdata sections, and processing instructions
12     are all supported and reported to the application. Anything in the DTD is
13     skipped (though it's parsed enough to parse past it correctly, and that
14     $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid
15     enough to be correctly skipped), and the
16     $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the
17     top is skipped if present (XML 1.1 requires that it be there, but XML 1.0
18     does not).
19 
20     Regardless of what the XML declaration says (if present), any range of
21     $(K_CHAR) will be treated as being encoded in UTF-8, any range of
22     $(K_WCHAR) will be treated as being encoded in UTF-16, and any range of
23     $(K_DCHAR) will be treated as having been encoded in UTF-32. Strings will
24     be treated as ranges of their code units, not code points. Note that like
25     Phobos typically does when processing strings, the code assumes that BOMs
26     have already been removed, so if the range of characters comes from a file
27     that uses a BOM, the calling code needs to strip it out before calling
28     $(LREF parseXML), or parsing will fail due to invalid characters.
29 
30     Since the DTD is skipped, entity references other than the five which are
31     predefined by the XML spec cannot be fully processed (since wherever they
32     were used in the document would be replaced by what they referred to, which
33     could be arbitrarily complex XML). As such, by default, if any entity
34     references which are not predefined are encountered outside of the DTD, an
35     $(LREF XMLParsingException) will be thrown (see
36     $(LREF Config.throwOnEntityRef) for how that can be configured). The
37     predefined entity references and any character references encountered will
38     be checked to verify that they're valid, but they will not be replaced
39     (since that does not work with returning slices of the original input).
40 
41     However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
42     $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from
43     $(MREF dxml, util) can be used to convert the predefined entity references
44     to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
45     $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from
46     $(MREF dxml, util) can be used to convert character references to what they
47     refer to.
48 
49     $(H3 Primary Symbols)
50     $(TABLE
51         $(TR $(TH Symbol) $(TH Description))
52         $(TR $(TD $(LREF parseXML))
53              $(TD The function used to initiate the parsing of an XML
54                   document.))
55         $(TR $(TD $(LREF EntityRange))
56              $(TD The range returned by $(LREF parseXML).))
57         $(TR $(TD $(LREF EntityRange.Entity))
58              $(TD The element type of $(LREF EntityRange).))
59     )
60 
61     $(H3 Parser Configuration Helpers)
62     $(TABLE
63         $(TR $(TH Symbol) $(TH Description))
64         $(TR $(TD $(LREF Config))
65              $(TD Used to configure how $(LREF EntityRange) parses the XML.))
66         $(TR $(TD $(LREF simpleXML))
67              $(TD A user-friendly configuration for when the application just
68                   wants the element tags and the data in between them.))
69         $(TR $(TD $(LREF makeConfig))
70              $(TD A convenience function for constructing a custom
71                   $(LREF Config).))
72         $(TR $(TD $(LREF SkipComments))
73              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
74                   to tell the parser to skip comments.))
75         $(TR $(TD $(LREF SkipPI))
76              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
77                   to tell the parser to skip processing instructions.))
78         $(TR $(TD $(LREF SplitEmpty))
79              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
80                   to configure how the parser deals with empty element tags.))
81     )
82 
83     $(H3 Helper Types Used When Parsing)
84     $(TABLE
85         $(TR $(TH Symbol) $(TH Description))
86         $(TR $(TD $(LREF EntityType))
87              $(TD The type of an entity in the XML (e.g. a
88                   $(LREF_ALTTEXT start tag, EntityType.elementStart) or a
89                   $(LREF_ALTTEXT comment, EntityType.comment)).))
90         $(TR $(TD $(LREF TextPos))
91              $(TD Gives the line and column number in the XML document.))
92         $(TR $(TD $(LREF XMLParsingException))
93              $(TD Thrown by $(LREF EntityRange) when it encounters invalid
94                   XML.))
95     )
96 
97     $(H3 Helper Functions Used When Parsing)
98     $(TABLE
99         $(TR $(TH Symbol) $(TH Description))
100         $(TR $(TD $(LREF getAttrs))
101              $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which
102                   allows for the easy processing of start tag attributes.))
103         $(TR $(TD $(LREF skipContents))
104              $(TD Iterates an $(LREF EntityRange) from a start tag to its
105                   matching end tag.))
106         $(TR $(TD $(LREF skipToPath))
107              $(TD Used to navigate from one start tag to another as if the start
108                   tag names formed a file path.))
109         $(TR $(TD $(LREF skipToEntityType))
110              $(TD Skips to the next entity of the given type in the range.))
111         $(TR $(TD $(LREF skipToParentEndTag))
112              $(TD Iterates an $(LREF EntityRange) until it reaches the end tag
113                   that matches the start tag which is the parent of the
114                   current entity.))
115     )
116 
117     $(H3 Helper Traits)
118     $(TABLE
119         $(TR $(TH Symbol) $(TH Description))
120         $(TR $(TD $(LREF isAttrRange))
121              $(TD Whether the given range is a range of attributes.)))
122 
123     Copyright: Copyright 2017 - 2025
124     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
125     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
126     Source:    $(LINK_TO_SRC dxml/_parser.d)
127 
128     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
129   +/
130 module dxml.parser;
131 
132 ///
133 unittest
134 {
135     auto xml = "<!-- comment -->\n" ~
136                "<root>\n" ~
137                "    <foo>some text<whatever/></foo>\n" ~
138                "    <bar/>\n" ~
139                "    <baz></baz>\n" ~
140                "</root>";
141     {
142         auto range = parseXML(xml);
143         assert(range.front.type == EntityType.comment);
144         assert(range.front.text == " comment ");
145         range.popFront();
146 
147         assert(range.front.type == EntityType.elementStart);
148         assert(range.front.name == "root");
149         range.popFront();
150 
151         assert(range.front.type == EntityType.elementStart);
152         assert(range.front.name == "foo");
153         range.popFront();
154 
155         assert(range.front.type == EntityType.text);
156         assert(range.front.text == "some text");
157         range.popFront();
158 
159         assert(range.front.type == EntityType.elementEmpty);
160         assert(range.front.name == "whatever");
161         range.popFront();
162 
163         assert(range.front.type == EntityType.elementEnd);
164         assert(range.front.name == "foo");
165         range.popFront();
166 
167         assert(range.front.type == EntityType.elementEmpty);
168         assert(range.front.name == "bar");
169         range.popFront();
170 
171         assert(range.front.type == EntityType.elementStart);
172         assert(range.front.name == "baz");
173         range.popFront();
174 
175         assert(range.front.type == EntityType.elementEnd);
176         assert(range.front.name == "baz");
177         range.popFront();
178 
179         assert(range.front.type == EntityType.elementEnd);
180         assert(range.front.name == "root");
181         range.popFront();
182 
183         assert(range.empty);
184     }
185     {
186         auto range = parseXML!simpleXML(xml);
187 
188         // simpleXML skips comments
189 
190         assert(range.front.type == EntityType.elementStart);
191         assert(range.front.name == "root");
192         range.popFront();
193 
194         assert(range.front.type == EntityType.elementStart);
195         assert(range.front.name == "foo");
196         range.popFront();
197 
198         assert(range.front.type == EntityType.text);
199         assert(range.front.text == "some text");
200         range.popFront();
201 
202         // simpleXML splits empty element tags into a start tag and end tag
203         // so that the code doesn't have to care whether a start tag with no
204         // content is an empty tag or a start tag and end tag with nothing but
205         // whitespace in between.
206         assert(range.front.type == EntityType.elementStart);
207         assert(range.front.name == "whatever");
208         range.popFront();
209 
210         assert(range.front.type == EntityType.elementEnd);
211         assert(range.front.name == "whatever");
212         range.popFront();
213 
214         assert(range.front.type == EntityType.elementEnd);
215         assert(range.front.name == "foo");
216         range.popFront();
217 
218         assert(range.front.type == EntityType.elementStart);
219         assert(range.front.name == "bar");
220         range.popFront();
221 
222         assert(range.front.type == EntityType.elementEnd);
223         assert(range.front.name == "bar");
224         range.popFront();
225 
226         assert(range.front.type == EntityType.elementStart);
227         assert(range.front.name == "baz");
228         range.popFront();
229 
230         assert(range.front.type == EntityType.elementEnd);
231         assert(range.front.name == "baz");
232         range.popFront();
233 
234         assert(range.front.type == EntityType.elementEnd);
235         assert(range.front.name == "root");
236         range.popFront();
237 
238         assert(range.empty);
239     }
240 }
241 
242 
243 import std.range.primitives;
244 import std.traits;
245 import std.typecons : Flag;
246 
247 
248 /++
249     The exception type thrown when the XML parser encounters invalid XML.
250   +/
251 class XMLParsingException : Exception
252 {
253     /++
254         The position in the XML input where the problem is.
255       +/
256     TextPos pos;
257 
258 package:
259 
260     this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure
261     {
262         import std.format : format;
263         super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line);
264         pos = textPos;
265     }
266 }
267 
268 
269 /++
270     Where in the XML document an entity is.
271 
272     The line and column numbers are 1-based.
273 
274     The primary use case for TextPos is $(LREF XMLParsingException), but an
275     application may have other uses for it. The TextPos for an
276     $(LREF2 Entity, EntityRange) can be obtained from
277     $(LREF2 Entity.pos, EntityRange).
278 
279     See_Also: $(LREF XMLParsingException.pos)$(BR)
280               $(LREF EntityRange.Entity.pos)
281   +/
282 struct TextPos
283 {
284     /// A line number in the XML file.
285     int line = 1;
286 
287     /++
288         A column number in a line of the XML file.
289 
290         Each code unit is considered a column, so depending on what a program
291         is looking to do with the column number, it may need to examine the
292         actual text on that line and calculate the number that represents
293         what the program wants to display (e.g. the number of graphemes).
294       +/
295     int col = 1;
296 }
297 
298 
299 /++
300     Used to configure how the parser works.
301 
302     See_Also:
303         $(LREF makeConfig)$(BR)
304         $(LREF parseXML)$(BR)
305         $(LREF simpleXML)
306   +/
307 struct Config
308 {
309     /++
310         Whether the comments should be skipped while parsing.
311 
312         If $(D skipComments == SkipComments.yes), any entities of type
313         $(LREF EntityType.comment) will be omitted from the parsing results,
314         and they will not be validated beyond what is required to parse past
315         them.
316 
317         Defaults to $(D SkipComments.no).
318       +/
319     auto skipComments = SkipComments.no;
320 
321     /++
322         Whether processing instructions should be skipped.
323 
324         If $(D skipPI == SkipPI.yes), any entities of type
325         $(LREF EntityType.pi) will be skipped, and they will not be validated
326         beyond what is required to parse past them.
327 
328         Defaults to $(D SkipPI.no).
329       +/
330     auto skipPI = SkipPI.no;
331 
332     /++
333         Whether the parser should report empty element tags as if they were a
334         start tag followed by an end tag with nothing in between.
335 
336         If $(D splitEmpty == SplitEmpty.yes),  then whenever an
337         $(LREF EntityType.elementEmpty) is encountered, the parser will claim
338         that that entity is an $(LREF EntityType.elementStart), and then it
339         will provide an $(LREF EntityType.elementEnd) as the next entity before
340         the entity that actually follows it.
341 
342         The purpose of this is to simplify the code using the parser, since most
343         code does not care about the difference between an empty tag and a start
344         and end tag with nothing in between. But since some code may care about
345         the difference, the behavior is configurable.
346 
347         Defaults to $(D SplitEmpty.no).
348       +/
349     auto splitEmpty = SplitEmpty.no;
350 
351     ///
352     unittest
353     {
354         enum configSplitYes = makeConfig(SplitEmpty.yes);
355 
356         {
357             auto range = parseXML("<root></root>");
358             assert(range.front.type == EntityType.elementStart);
359             assert(range.front.name == "root");
360             range.popFront();
361             assert(range.front.type == EntityType.elementEnd);
362             assert(range.front.name == "root");
363             range.popFront();
364             assert(range.empty);
365         }
366         {
367             // No difference if the tags are already split.
368             auto range = parseXML!configSplitYes("<root></root>");
369             assert(range.front.type == EntityType.elementStart);
370             assert(range.front.name == "root");
371             range.popFront();
372             assert(range.front.type == EntityType.elementEnd);
373             assert(range.front.name == "root");
374             range.popFront();
375             assert(range.empty);
376         }
377         {
378             // This treats <root></root> and <root/> as distinct.
379             auto range = parseXML("<root/>");
380             assert(range.front.type == EntityType.elementEmpty);
381             assert(range.front.name == "root");
382             range.popFront();
383             assert(range.empty);
384         }
385         {
386             // This is parsed as if it were <root></root> insead of <root/>.
387             auto range = parseXML!configSplitYes("<root/>");
388             assert(range.front.type == EntityType.elementStart);
389             assert(range.front.name == "root");
390             range.popFront();
391             assert(range.front.type == EntityType.elementEnd);
392             assert(range.front.name == "root");
393             range.popFront();
394             assert(range.empty);
395         }
396     }
397 
398     /++
399         Whether the parser should throw when it encounters any entity references
400         other than the five entity references defined in the XML standard.
401 
402         Any other entity references would have to be defined in the DTD in
403         order to be valid. And in order to know what XML they represent (which
404         could be arbitrarily complex, even effectively inserting entire XML
405         documents into the middle of the XML), the DTD would have to be parsed.
406         However, dxml does not support parsing the DTD beyond what is required
407         to correctly parse past it, and replacing entity references with what
408         they represent would not work with the slicing semantics that
409         $(LREF EntityRange) provides. As such, it is not possible for dxml to
410         correctly handle any entity references other than the five which are
411         defined in the XML standard, and even those are only parsed by using
412         $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util).
413         $(LREF EntityRange) always validates that entity references are one
414         of the five, predefined entity references, but otherwise, it lets them
415         pass through as normal text. It does not replace them with what they
416         represent.
417 
418         As such, the default behavior of $(LREF EntityRange) is to throw an
419         $(LREF XMLParsingException) when it encounters an entity reference
420         which is not one of the five defined by the XML standard. With that
421         behavior, there is no risk of processing an XML document as if it had
422         no entity references and ending up with what the program using the
423         parser would probably consider incorrect results. However, there are
424         cases where a program may find it acceptable to treat entity references
425         as normal text and ignore them. As such, if a program wishes to take
426         that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no).
427 
428         If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity
429         reference that it encounters will be validated to ensure that it is
430         syntactically valid (i.e. that the characters it contains form what
431         could be a valid entity reference assuming that the DTD declared it
432         properly), but otherwise, $(LREF EntityRange) will treat it as normal
433         text, just like it treats the five, predefined entity references as
434         normal text.
435 
436         Note that any valid XML entity reference which contains start or end
437         tags must contain matching start or end tags, and entity references
438         cannot contain incomplete fragments of XML (e.g. the start or end of a
439         comment). So, missing entity references should only affect the data in
440         the XML document and not its overall structure (if that were not _true,
441         attempting to ignore entity references such as $(D ThrowOnEntityRef.no)
442         does would be a disaster in the making). However, how reasonable it is
443         to miss that data depends entirely on the application and what the XML
444         documents it's parsing contain - hence, the behavior is configurable.
445 
446         See_Also: $(REF StdEntityRef, dxml, util)$(BR)
447                   $(REF parseStdEntityRef, dxml, util)$(BR)
448                   $(REF parseCharRef, dxml, util)$(BR)
449                   $(REF encodeCharRef, dxml, util)$(BR)
450                   $(REF decodeXML, dxml, util)$(BR)
451                   $(REF asDecodedXML, dxml, util)
452       +/
453     auto throwOnEntityRef = ThrowOnEntityRef.yes;
454 
455     ///
456     unittest
457     {
458         import std.exception : assertThrown;
459         import dxml.util : decodeXML;
460 
461         auto xml = "<root>\n" ~
462                    "    <std>&amp;&apos;&gt;&lt;&quot;</std>\n" ~
463                    "    <other>&foobar;</other>\n" ~
464                    "    <invalid>&--;</invalid>\n" ~
465                    "</root>";
466 
467         // ThrowOnEntityRef.yes
468         {
469             auto range = parseXML(xml);
470             assert(range.front.type == EntityType.elementStart);
471             assert(range.front.name == "root");
472 
473             range.popFront();
474             assert(range.front.type == EntityType.elementStart);
475             assert(range.front.name == "std");
476 
477             range.popFront();
478             assert(range.front.type == EntityType.text);
479             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
480             assert(range.front.text.decodeXML() == `&'><"`);
481 
482             range.popFront();
483             assert(range.front.type == EntityType.elementEnd);
484             assert(range.front.name == "std");
485 
486             range.popFront();
487             assert(range.front.type == EntityType.elementStart);
488             assert(range.front.name == "other");
489 
490             // Attempted to parse past "&foobar;", which is syntactically
491             // valid, but it's not one of the five predefined entity references.
492             assertThrown!XMLParsingException(range.popFront());
493         }
494 
495         // ThrowOnEntityRef.no
496         {
497             auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml);
498             assert(range.front.type == EntityType.elementStart);
499             assert(range.front.name == "root");
500 
501             range.popFront();
502             assert(range.front.type == EntityType.elementStart);
503             assert(range.front.name == "std");
504 
505             range.popFront();
506             assert(range.front.type == EntityType.text);
507             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
508             assert(range.front.text.decodeXML() == `&'><"`);
509 
510             range.popFront();
511             assert(range.front.type == EntityType.elementEnd);
512             assert(range.front.name == "std");
513 
514             range.popFront();
515             assert(range.front.type == EntityType.elementStart);
516             assert(range.front.name == "other");
517 
518             // Doesn't throw, because "&foobar;" is syntactically valid.
519             range.popFront();
520             assert(range.front.type == EntityType.text);
521             assert(range.front.text == "&foobar;");
522 
523             // decodeXML has no effect on non-standard entity references.
524             assert(range.front.text.decodeXML() == "&foobar;");
525 
526             range.popFront();
527             assert(range.front.type == EntityType.elementEnd);
528             assert(range.front.name == "other");
529 
530             range.popFront();
531             assert(range.front.type == EntityType.elementStart);
532             assert(range.front.name == "invalid");
533 
534             // Attempted to parse past "&--;", which is not syntactically valid,
535             // because -- is not a valid name for an entity reference.
536             assertThrown!XMLParsingException(range.popFront());
537         }
538     }
539 }
540 
541 
542 /// See_Also: $(LREF2 skipComments, Config)
543 alias SkipComments = Flag!"SkipComments";
544 
545 /// See_Also: $(LREF2 skipPI, Config)
546 alias SkipPI = Flag!"SkipPI";
547 
548 /// See_Also: $(LREF2 splitEmpty, Config)
549 alias SplitEmpty = Flag!"SplitEmpty";
550 
551 /// See_Also: $(LREF2 throwOnEntityRef, Config)
552 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef";
553 
554 
555 /++
556     Helper function for creating a custom config. It makes it easy to set one
557     or more of the member variables to something other than the default without
558     having to worry about explicitly setting them individually or setting them
559     all at once via a constructor.
560 
561     The order of the arguments does not matter. The types of each of the members
562     of Config are unique, so that information alone is sufficient to determine
563     which argument should be assigned to which member.
564   +/
565 Config makeConfig(Args...)(Args args)
566 {
567     import std.format : format;
568     import std.meta : AliasSeq, staticIndexOf, staticMap;
569 
570     template isValid(T, Types...)
571     {
572         static if(Types.length == 0)
573             enum isValid = false;
574         else static if(is(T == Types[0]))
575             enum isValid = true;
576         else
577             enum isValid = isValid!(T, Types[1 .. $]);
578     }
579 
580     Config config;
581 
582     alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName));
583     alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config)));
584 
585     foreach(i, arg; args)
586     {
587         static assert(isValid!(typeof(arg), MemberTypes),
588                       format!"Argument %s does not match the type of any members of Config"(i));
589 
590         static foreach(j, Other; Args)
591         {
592             static if(i != j)
593                 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j));
594         }
595 
596         foreach(memberName; __traits(allMembers, Config))
597         {
598             static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg)))
599                 mixin("config." ~ memberName ~ " = arg;");
600         }
601     }
602 
603     return config;
604 }
605 
606 ///
607 @safe pure nothrow @nogc unittest
608 {
609     {
610         auto config = makeConfig(SkipComments.yes);
611         assert(config.skipComments == SkipComments.yes);
612         assert(config.skipPI == Config.init.skipPI);
613         assert(config.splitEmpty == Config.init.splitEmpty);
614         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
615     }
616     {
617         auto config = makeConfig(SkipComments.yes, SkipPI.yes);
618         assert(config.skipComments == SkipComments.yes);
619         assert(config.skipPI == SkipPI.yes);
620         assert(config.splitEmpty == Config.init.splitEmpty);
621         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
622     }
623     {
624         auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no);
625         assert(config.skipComments == SkipComments.yes);
626         assert(config.skipPI == Config.init.skipPI);
627         assert(config.splitEmpty == SplitEmpty.yes);
628         assert(config.throwOnEntityRef == ThrowOnEntityRef.no);
629     }
630 }
631 
632 unittest
633 {
634     import std.typecons : Flag;
635     static assert(!__traits(compiles, makeConfig(42)));
636     static assert(!__traits(compiles, makeConfig("hello")));
637     static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes)));
638     static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no)));
639 }
640 
641 
642 /++
643     This $(LREF Config) is intended for making it easy to parse XML by skipping
644     everything that isn't the actual data as well as making it simpler to deal
645     with empty element tags by treating them the same as a start tag and end
646     tag with nothing but whitespace between them.
647   +/
648 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes);
649 
650 ///
651 @safe pure nothrow @nogc unittest
652 {
653     static assert(simpleXML.skipComments == SkipComments.yes);
654     static assert(simpleXML.skipPI == SkipPI.yes);
655     static assert(simpleXML.splitEmpty == SplitEmpty.yes);
656     static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes);
657 }
658 
659 
660 /++
661     Represents the type of an XML entity. Used by $(LREF EntityRange.Entity).
662   +/
663 enum EntityType
664 {
665     /++
666         A cdata section: `<![CDATA[ ... ]]>`.
667 
668         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect)
669       +/
670     cdata,
671 
672     /++
673         An XML comment: `<!-- ... -->`.
674 
675         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments)
676       +/
677     comment,
678 
679     /++
680         The start tag for an element. e.g. `<foo name="value">`.
681 
682         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
683       +/
684     elementStart,
685 
686     /++
687         The end tag for an element. e.g. `</foo>`.
688 
689         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
690       +/
691     elementEnd,
692 
693     /++
694         The tag for an element with no contents or matching end tag. e.g.
695         `<foo name="value"/>`.
696 
697         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
698       +/
699     elementEmpty,
700 
701     /++
702         A processing instruction such as `<?foo?>`. Note that the
703         `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi).
704 
705         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi)
706       +/
707     pi,
708 
709     /++
710         The content of an element tag that is simple text.
711 
712         If there is an entity other than the end tag following the text, then
713         the text includes up to that entity.
714 
715         Note however that character references (e.g.
716         $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g.
717         $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In
718         order for them to be processed, the text should be passed to either
719         $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
720         $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references
721         which are not predefined are considered invalid XML, because the DTD
722         section is skipped, and thus they cannot be processed properly.
723 
724         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR)
725                   $(REF decodeXML, dxml, util)$(BR)
726                   $(REF asDecodedXML, dxml, util)$(BR)
727                   $(REF parseStdEntityRef, dxml, util)$(BR)
728                   $(REF parseCharRef, dxml, util)$(BR)
729                   $(LREF EntityRange.Entity._text)
730       +/
731     text,
732 }
733 
734 
735 /++
736     Lazily parses the given range of characters as an XML document.
737 
738     EntityRange is essentially a
739     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved
740     into that rather than being based on what Java did, and it's range-based
741     rather than iterator-based, so its API is likely to differ from other
742     implementations. The basic concept should be the same though.
743 
744     One of the core design goals of this parser is to slice the original input
745     rather than having to allocate strings for the output or wrap it in a lazy
746     range that produces a mutated version of the data. So, all of the text that
747     the parser provides is either a slice or
748     $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases,
749     for the parser to be fully compliant with the XML spec,
750     $(REF decodeXML, dxml, util) must be called on the text to mutate certain
751     constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or
752     converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But
753     that's left up to the application.
754 
755     The parser is not $(K_NOGC), but it allocates memory very minimally. It
756     allocates some of its state on the heap so it can validate attributes and
757     end tags. However, that state is shared among all the ranges that came from
758     the same call to parseXML (only the range farthest along in parsing
759     validates attributes or end tags), so $(LREF2 save, _EntityRange) does not
760     allocate memory unless $(D save) on the underlying range allocates memory.
761     The shared state currently uses a couple of dynamic arrays to validate the
762     tags and attributes, and if the document has a particularly deep tag depth
763     or has a lot of attributes on a start tag, then some reallocations may
764     occur until the maximum is reached, but enough is reserved that for most
765     documents, no reallocations will occur. The only other times that the
766     parser would allocate would be if an exception were thrown or if the range
767     that was passed to parseXML allocates for any reason when calling any of the
768     range primitives.
769 
770     If invalid XML is encountered at any point during the parsing process, an
771     $(LREF XMLParsingException) will be thrown. If an exception has been thrown,
772     then the parser is in an invalid state, and it is an error to call any
773     functions on it.
774 
775     However, note that XML validation is reduced for any entities that are
776     skipped (e.g. for anything in the DTD, validation is reduced to what is
777     required to correctly parse past it, and when
778     $(D Config.skipPI == SkipPI.yes), processing instructions are only validated
779     enough to correctly skip past them).
780 
781     As the module documentation says, this parser does not provide any DTD
782     support. It is not possible to properly support the DTD while returning
783     slices of the original input, and the DTD portion of the spec makes parsing
784     XML far, far more complicated.
785 
786     A quick note about carriage returns$(COLON) per the XML spec, they are all
787     supposed to either be stripped out or replaced with newlines or spaces
788     before the XML parser even processes the text. That doesn't work when the
789     parser is slicing the original text and not mutating it at all. So, for the
790     purposes of parsing, this parser treats all carriage returns as if they
791     were newlines or spaces (though they won't count as newlines when counting
792     the lines for $(LREF TextPos)). However, they $(I will) appear in any text
793     fields or attribute values if they are in the document (since the text
794     fields and attribute values are slices of the original text).
795     $(REF decodeXML, dxml, util) can be used to strip them along with
796     converting any character references in the text. Alternatively, the
797     application can remove them all before calling parseXML, but it's not
798     necessary.
799   +/
800 struct EntityRange(Config cfg, R)
801     if(isForwardRange!R && isSomeChar!(ElementType!R))
802 {
803     import std.algorithm : canFind;
804     import std.range : only, takeExactly;
805     import std.typecons : Nullable;
806     import std.utf : byCodeUnit;
807 
808     enum compileInTests = is(R == EntityRangeCompileTests);
809 
810 public:
811 
812     /// The Config used for when parsing the XML.
813     alias config = cfg;
814 
815     /// The type of the range that EntityRange is parsing.
816     alias Input = R;
817 
818     /++
819         The type used when any slice of the original input is used. If $(D R)
820         is a string or supports slicing, then SliceOfR is the same as $(D R);
821         otherwise, it's the result of calling
822         $(PHOBOS_REF takeExactly, std, range) on the input.
823 
824         ---
825         import std.algorithm : filter;
826         import std.range : takeExactly;
827 
828         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
829 
830         auto range = filter!(a => true)("some xml");
831 
832         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
833                          typeof(takeExactly(range, 42))));
834         ---
835       +/
836     static if(isDynamicArray!R || hasSlicing!R)
837         alias SliceOfR = R;
838     else
839         alias SliceOfR = typeof(takeExactly(R.init, 42));
840 
841     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
842     // a ddoc-ed unit test.
843     static if(compileInTests) @safe unittest
844     {
845         import std.algorithm : filter;
846         import std.range : takeExactly;
847 
848         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
849 
850         auto range = filter!(a => true)("some xml");
851 
852         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
853                          typeof(takeExactly(range, 42))));
854     }
855 
856 
857     /++
858         Represents an entity in the XML document.
859 
860         Note that the $(LREF2 type, EntityRange._Entity) determines which
861         properties can be used, and it can determine whether functions which
862         an Entity or $(LREF EntityRange) is passed to are allowed to be called.
863         Each function lists which $(LREF EntityType)s are allowed, and it is an
864         error to call them with any other $(LREF EntityType).
865       +/
866     struct Entity
867     {
868     public:
869 
870         import std.typecons : Tuple;
871 
872         /++
873             The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
874             $(LREF2 attributes, EntityRange.EntityType) returns a range of.
875 
876             See_Also: $(LREF2 attributes, EntityRange.Entity)
877           +/
878         alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
879 
880 
881         /++
882             The $(LREF EntityType) for this Entity.
883           +/
884         @property EntityType type() @safe const pure nothrow @nogc
885         {
886             return _type;
887         }
888 
889         ///
890         static if(compileInTests) unittest
891         {
892             auto xml = "<root>\n" ~
893                        "    <!--no comment-->\n" ~
894                        "    <![CDATA[cdata run]]>\n" ~
895                        "    <text>I am text!</text>\n" ~
896                        "    <empty/>\n" ~
897                        "    <?pi?>\n" ~
898                        "</root>";
899 
900             auto range = parseXML(xml);
901             assert(range.front.type == EntityType.elementStart);
902             assert(range.front.name == "root");
903             range.popFront();
904 
905             assert(range.front.type == EntityType.comment);
906             assert(range.front.text == "no comment");
907             range.popFront();
908 
909             assert(range.front.type == EntityType.cdata);
910             assert(range.front.text == "cdata run");
911             range.popFront();
912 
913             assert(range.front.type == EntityType.elementStart);
914             assert(range.front.name == "text");
915             range.popFront();
916 
917             assert(range.front.type == EntityType.text);
918             assert(range.front.text == "I am text!");
919             range.popFront();
920 
921             assert(range.front.type == EntityType.elementEnd);
922             assert(range.front.name == "text");
923             range.popFront();
924 
925             assert(range.front.type == EntityType.elementEmpty);
926             assert(range.front.name == "empty");
927             range.popFront();
928 
929             assert(range.front.type == EntityType.pi);
930             assert(range.front.name == "pi");
931             range.popFront();
932 
933             assert(range.front.type == EntityType.elementEnd);
934             assert(range.front.name == "root");
935             range.popFront();
936 
937             assert(range.empty);
938         }
939 
940 
941         /++
942             The position in the the original text where the entity starts.
943 
944             See_Also: $(LREF TextPos)$(BR)
945                       $(LREF XMLParsingException._pos)
946           +/
947         @property TextPos pos() @safe const pure nothrow @nogc
948         {
949             return _pos;
950         }
951 
952         ///
953         static if(compileInTests) unittest
954         {
955             auto xml = "<root>\n" ~
956                        "    <foo>\n" ~
957                        "        Foo and bar. Always foo and bar...\n" ~
958                        "    </foo>\n" ~
959                        "</root>";
960 
961             auto range = parseXML(xml);
962             assert(range.front.type == EntityType.elementStart);
963             assert(range.front.name == "root");
964             assert(range.front.pos == TextPos(1, 1));
965             range.popFront();
966 
967             assert(range.front.type == EntityType.elementStart);
968             assert(range.front.name == "foo");
969             assert(range.front.pos == TextPos(2, 5));
970             range.popFront();
971 
972             assert(range.front.type == EntityType.text);
973             assert(range.front.text ==
974                    "\n" ~
975                    "        Foo and bar. Always foo and bar...\n" ~
976                    "    ");
977             assert(range.front.pos == TextPos(2, 10));
978             range.popFront();
979 
980             assert(range.front.type == EntityType.elementEnd);
981             assert(range.front.name == "foo");
982             assert(range.front.pos == TextPos(4, 5));
983             range.popFront();
984 
985             assert(range.front.type == EntityType.elementEnd);
986             assert(range.front.name == "root");
987             assert(range.front.pos == TextPos(5, 1));
988             range.popFront();
989 
990             assert(range.empty);
991         }
992 
993         static if(compileInTests) unittest
994         {
995             import core.exception : AssertError;
996             import std.exception : enforce;
997 
998             static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__)
999             {
1000                 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line);
1001                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1002                 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
1003                 range.popFront();
1004             }
1005 
1006             auto xml = "<?xml?>\n" ~
1007                        "   <!--comment-->\n" ~
1008                        "   <?pi?>\n" ~
1009                        " <root>\n" ~
1010                        "          <!--comment--><!--comment-->\n" ~
1011                        "       <?pi?>\n" ~
1012                        "  <![CDATA[]]>\n" ~
1013                        "              <empty/>     </root>\n" ~
1014                        " <!--comment-->\n" ~
1015                        " <?pi?>\n";
1016 
1017             {
1018                 auto range = parseXML(xml);
1019                 test(range, EntityType.comment, 2, 4);
1020                 test(range, EntityType.pi, 3, 4);
1021                 test(range, EntityType.elementStart, 4, 2);
1022                 test(range, EntityType.comment, 5, 11);
1023                 test(range, EntityType.comment, 5, 25);
1024                 test(range, EntityType.pi, 6, 8);
1025                 test(range, EntityType.cdata, 7, 3);
1026                 test(range, EntityType.elementEmpty, 8, 15);
1027                 test(range, EntityType.elementEnd, 8, 28);
1028                 test(range, EntityType.comment, 9, 2);
1029                 test(range, EntityType.pi, 10, 2);
1030             }
1031 
1032             auto range = parseXML!simpleXML(xml);
1033             test(range, EntityType.elementStart, 4, 2);
1034             test(range, EntityType.cdata, 7, 3);
1035             test(range, EntityType.elementStart, 8, 15);
1036             test(range, EntityType.elementEnd, 8, 15);
1037             test(range, EntityType.elementEnd, 8, 28);
1038         }
1039 
1040 
1041         /++
1042             Gives the name of this Entity.
1043 
1044             Note that this is the direct name in the XML for this entity and
1045             does not contain any of the names of any of the parent entities that
1046             this entity has. If an application wants the full "path" of the
1047             entity, then it will have to keep track of that itself. The parser
1048             does not do that as it would require allocating memory.
1049 
1050             $(TABLE
1051                 $(TR $(TH Supported $(LREF EntityType)s:))
1052                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1053                 $(TR $(TD $(LREF2 elementEnd, EntityType)))
1054                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1055                 $(TR $(TD $(LREF2 pi, EntityType)))
1056             )
1057           +/
1058         @property SliceOfR name()
1059         {
1060             import dxml.internal : checkedSave, stripBCU;
1061             with(EntityType)
1062             {
1063                 import std.format : format;
1064                 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
1065                        format("name cannot be called with %s", _type));
1066             }
1067             return stripBCU!R(checkedSave(_name));
1068         }
1069 
1070         ///
1071         static if(compileInTests) unittest
1072         {
1073             auto xml = "<root>\n" ~
1074                        "    <empty/>\n" ~
1075                        "    <?pi?>\n" ~
1076                        "</root>";
1077 
1078             auto range = parseXML(xml);
1079             assert(range.front.type == EntityType.elementStart);
1080             assert(range.front.name == "root");
1081             range.popFront();
1082 
1083             assert(range.front.type == EntityType.elementEmpty);
1084             assert(range.front.name == "empty");
1085             range.popFront();
1086 
1087             assert(range.front.type == EntityType.pi);
1088             assert(range.front.name == "pi");
1089             range.popFront();
1090 
1091             assert(range.front.type == EntityType.elementEnd);
1092             assert(range.front.name == "root");
1093             range.popFront();
1094 
1095             assert(range.empty);
1096         }
1097 
1098 
1099         /++
1100             Returns a lazy range of attributes for a start tag where each
1101             attribute is represented as a$(BR)
1102             $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
1103                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
1104                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
1105                       $(LREF TextPos), $(D_STRING "pos"))).
1106 
1107             $(TABLE
1108                 $(TR $(TH Supported $(LREF EntityType)s:))
1109                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1110                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1111             )
1112 
1113             See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR)
1114                       $(REF decodeXML, dxml, util)$(BR)
1115                       $(REF asDecodedXML, dxml, util)
1116           +/
1117         @property auto attributes()
1118         {
1119             with(EntityType)
1120             {
1121                 import std.format : format;
1122                 assert(_type == elementStart || _type == elementEmpty,
1123                        format("attributes cannot be called with %s", _type));
1124             }
1125 
1126             // STag         ::= '<' Name (S Attribute)* S? '>'
1127             // Attribute    ::= Name Eq AttValue
1128             // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1129 
1130             static struct AttributeRange
1131             {
1132                 @property Attribute front()
1133                 {
1134                     return _front;
1135                 }
1136 
1137                 void popFront()
1138                 {
1139                     import dxml.internal : stripBCU;
1140 
1141                     stripWS(_text);
1142                     if(_text.input.empty)
1143                     {
1144                         empty = true;
1145                         return;
1146                     }
1147 
1148                     immutable pos = _text.pos;
1149                     auto name = stripBCU!R(_text.takeName!'='());
1150                     stripWS(_text);
1151                     popFrontAndIncCol(_text);
1152                     stripWS(_text);
1153                     _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos);
1154                 }
1155 
1156                 @property auto save()
1157                 {
1158                     import dxml.internal : checkedSave;
1159                     auto retval = this;
1160                     retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]);
1161                     retval._text.input = checkedSave(retval._text.input);
1162                     return retval;
1163                 }
1164 
1165                 this(typeof(_text) text)
1166                 {
1167                     _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945
1168                     _text = text;
1169                     if(_text.input.empty)
1170                         empty = true;
1171                     else
1172                         popFront();
1173                 }
1174 
1175                 bool empty;
1176                 Attribute _front;
1177                 typeof(_savedText) _text;
1178             }
1179 
1180             return AttributeRange(_savedText.save);
1181         }
1182 
1183         ///
1184         static if(compileInTests) unittest
1185         {
1186             import std.algorithm.comparison : equal;
1187             import std.algorithm.iteration : filter;
1188             {
1189                 auto xml = "<root/>";
1190                 auto range = parseXML(xml);
1191                 assert(range.front.type == EntityType.elementEmpty);
1192                 assert(range.front.attributes.empty);
1193 
1194                 static assert(is(ElementType!(typeof(range.front.attributes)) ==
1195                                  typeof(range).Entity.Attribute));
1196             }
1197             {
1198                 auto xml = "<root a='42' q='29' w='hello'/>";
1199                 auto range = parseXML(xml);
1200                 assert(range.front.type == EntityType.elementEmpty);
1201 
1202                 auto attrs = range.front.attributes;
1203                 assert(attrs.front.name == "a");
1204                 assert(attrs.front.value == "42");
1205                 assert(attrs.front.pos == TextPos(1, 7));
1206                 attrs.popFront();
1207 
1208                 assert(attrs.front.name == "q");
1209                 assert(attrs.front.value == "29");
1210                 assert(attrs.front.pos == TextPos(1, 14));
1211                 attrs.popFront();
1212 
1213                 assert(attrs.front.name == "w");
1214                 assert(attrs.front.value == "hello");
1215                 assert(attrs.front.pos == TextPos(1, 21));
1216                 attrs.popFront();
1217 
1218                 assert(attrs.empty);
1219             }
1220             // Because the type of name and value is SliceOfR, == with a string
1221             // only works if the range passed to parseXML was string.
1222             {
1223                 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>");
1224                 auto range = parseXML(xml);
1225                 assert(range.front.type == EntityType.elementEmpty);
1226 
1227                 auto attrs = range.front.attributes;
1228                 assert(equal(attrs.front.name, "a"));
1229                 assert(equal(attrs.front.value, "42"));
1230                 assert(attrs.front.pos == TextPos(1, 7));
1231                 attrs.popFront();
1232 
1233                 assert(equal(attrs.front.name, "q"));
1234                 assert(equal(attrs.front.value, "29"));
1235                 assert(attrs.front.pos == TextPos(1, 14));
1236                 attrs.popFront();
1237 
1238                 assert(equal(attrs.front.name, "w"));
1239                 assert(equal(attrs.front.value, "hello"));
1240                 assert(attrs.front.pos == TextPos(1, 21));
1241                 attrs.popFront();
1242 
1243                 assert(attrs.empty);
1244             }
1245         }
1246 
1247         static if(compileInTests) unittest
1248         {
1249             import core.exception : AssertError;
1250             import std.algorithm.comparison : equal;
1251             import std.exception : assertNotThrown, collectException, enforce;
1252             import std.typecons : Tuple, tuple;
1253             import dxml.internal : codeLen, testRangeFuncs;
1254 
1255             static bool cmpAttr(T, U)(T lhs, U rhs)
1256             {
1257                 return equal(lhs[0].save, rhs[0].save) &&
1258                        equal(lhs[1].save, rhs[1].save);
1259             }
1260 
1261             static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type,
1262                                                                 Tuple!(string, string)[] expected,
1263                                                                 int row, int col, size_t line = __LINE__)
1264             {
1265                 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)),
1266                                                                  "unittest 1", __FILE__, line);
1267                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1268                 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected),
1269                                     "unittest failure 3", __FILE__, line);
1270                 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line);
1271             }
1272 
1273             static void testFail(alias func, ThrowOnEntityRef toer)(string text,
1274                                                                     int row, int col, size_t line = __LINE__)
1275             {
1276                 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text)));
1277                 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
1278                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1279             }
1280 
1281             static foreach(func; testRangeFuncs)
1282             {
1283                 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
1284                 {
1285                     test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14);
1286                     test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17);
1287                     test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty,
1288                                       [tuple("a", "b")], 9, 4);
1289                     test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13);
1290                     test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16);
1291                     test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart,
1292                                       [tuple("a", "b")], 5, 3);
1293 
1294                     test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4);
1295                     test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18);
1296                     test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18);
1297                     test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16);
1298                     test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19);
1299 
1300                     test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty,
1301                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1302                     test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty,
1303                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1304 
1305                     test!(func, toer)(`<root foo="&#42;" a='&#x42;' hello="%foo"/>`, EntityType.elementEmpty,
1306                               [tuple("foo", "&#42;"), tuple("a", "&#x42;"), tuple("hello", "%foo")], 1, 44);
1307 
1308                     test!(func, toer)(`<root foo="&amp;" a='vector&lt;int&gt;'></root>`, EntityType.elementStart,
1309                               [tuple("foo", "&amp;"), tuple("a", "vector&lt;int&gt;"),], 1, 41);
1310 
1311                     test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty,
1312                               [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1);
1313 
1314                     test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16);
1315                     test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21);
1316                     test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15);
1317                     test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20);
1318 
1319                     test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty,
1320                               [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30);
1321 
1322                     testFail!(func, toer)(`<root a="""/>`, 1, 11);
1323                     testFail!(func, toer)(`<root a='''/>`, 1, 11);
1324                     testFail!(func, toer)("<root a=/>", 1, 9);
1325                     testFail!(func, toer)("<root a='/>", 1, 9);
1326                     testFail!(func, toer)("<root a='/>", 1, 9);
1327                     testFail!(func, toer)("<root =''/>", 1, 7);
1328                     testFail!(func, toer)(`<root a ""/>`, 1, 9);
1329                     testFail!(func, toer)(`<root a""/>`, 1, 8);
1330                     testFail!(func, toer)(`<root a/>`, 1, 8);
1331                     testFail!(func, toer)("<root foo='bar' a=/>", 1, 19);
1332                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1333                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1334                     testFail!(func, toer)("<root foo='bar' =''/>", 1, 17);
1335                     testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20);
1336                     // It's 33 rather than 28, because it throws when processing the start tag and not when processing
1337                     // the attributes. So, the mismatched quotes are detected before the attributes are checked.
1338                     testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33);
1339                     testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17);
1340                     testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16);
1341                     testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7);
1342 
1343                     testFail!(func, toer)(`<root foo="<"/>`, 1, 12);
1344                     testFail!(func, toer)(`<root foo="<world"/>`, 1, 12);
1345                     testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17);
1346                     testFail!(func, toer)(`<root foo="&"/>`, 1, 12);
1347                     testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17);
1348                     testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17);
1349                     testFail!(func, toer)(`<root foo="&;"/>`, 1, 12);
1350                     testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12);
1351                     testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12);
1352                     testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12);
1353                     testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12);
1354                     testFail!(func, toer)(`<root foo="&#42"/>`, 1, 12);
1355                     testFail!(func, toer)(`<root foo="&#x42"/>`, 1, 12);
1356                     testFail!(func, toer)(`<root foo="&#x12;"/>`, 1, 12);
1357 
1358                     testFail!(func, toer)("<root\n\nfoo='\nbar&#x42'></root>", 4, 4);
1359 
1360                     testFail!(func, toer)(`<root a="""></root>`, 1, 11);
1361                     testFail!(func, toer)(`<root a='''></root>`, 1, 11);
1362                     testFail!(func, toer)("<root a=></root>", 1, 9);
1363                     testFail!(func, toer)("<root a='></root>", 1, 9);
1364                     testFail!(func, toer)("<root a='></root>", 1, 9);
1365                     testFail!(func, toer)("<root =''></root>", 1, 7);
1366                     testFail!(func, toer)(`<root a ""></root>`, 1, 9);
1367                     testFail!(func, toer)(`<root a""></root>`, 1, 8);
1368                     testFail!(func, toer)(`<root a></root>`, 1, 8);
1369                     testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19);
1370                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1371                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1372                     testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17);
1373                     testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20);
1374                     testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33);
1375                     testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17);
1376                     testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16);
1377                     testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7);
1378 
1379                     testFail!(func, toer)(`<root foo="<"></root>`, 1, 12);
1380                     testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12);
1381                     testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17);
1382                     testFail!(func, toer)(`<root foo="&"></root>`, 1, 12);
1383                     testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17);
1384                     testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17);
1385                     testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12);
1386                     testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12);
1387                     testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12);
1388                     testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12);
1389                     testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12);
1390                     testFail!(func, toer)(`<root foo="&#42"></root>`, 1, 12);
1391                     testFail!(func, toer)(`<root foo="&#x42"></root>`, 1, 12);
1392                     testFail!(func, toer)(`<root foo="&#x12;"></root>`, 1, 12);
1393 
1394                     testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14);
1395                     testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24);
1396                     testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24);
1397                     testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42);
1398                     testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17);
1399 
1400                     test!(func, toer)(`<root foo="&amp;"></root>`, EntityType.elementStart,
1401                                       [tuple("foo", "&amp;")], 1, 19);
1402                     test!(func, toer)(`<root foo="foo&amp;&lt;&gt;&apos;&quot;bar"></root>`, EntityType.elementStart,
1403                                       [tuple("foo", "foo&amp;&lt;&gt;&apos;&quot;bar")], 1, 45);
1404                     testFail!(func, toer)("<root foo='&;'></root>", 1, 12);
1405                     testFail!(func, toer)("<root foo='&.;'></root>", 1, 12);
1406                     testFail!(func, toer)("<root foo='\n &amp ule'></root>", 2, 2);
1407                     testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2);
1408                 }
1409                 {
1410                     alias toer = ThrowOnEntityRef.yes;
1411                     testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12);
1412                     testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12);
1413                     testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15);
1414                     testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18);
1415                     testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2);
1416                 }
1417                 {
1418                     alias toer = ThrowOnEntityRef.no;
1419                     test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty,
1420                                       [tuple("foo", "&foo;")], 1, 20);
1421                     test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart,
1422                                       [tuple("foo", "&foo;")], 1, 19);
1423                     test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart,
1424                                       [tuple("foo", "foo&bar.;")], 1, 23);
1425                     test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart,
1426                                         [tuple("foo", "hello &a; world")], 1, 29);
1427                     test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart,
1428                                         [tuple("foo", "hello \n &a; \n world")], 3, 9);
1429                 }
1430             }
1431         }
1432 
1433 
1434         /++
1435             Returns the textual value of this Entity.
1436 
1437             In the case of $(LREF EntityType.pi), this is the
1438             text that follows the name, whereas in the other cases, the text is
1439             the entire contents of the entity (save for the delimeters on the
1440             ends if that entity has them).
1441 
1442             $(TABLE
1443                 $(TR $(TH Supported $(LREF EntityType)s:))
1444                 $(TR $(TD $(LREF2 cdata, EntityType)))
1445                 $(TR $(TD $(LREF2 comment, EntityType)))
1446                 $(TR $(TD $(LREF2 pi, EntityType)))
1447                 $(TR $(TD $(LREF2 _text, EntityType)))
1448             )
1449 
1450             See_Also: $(REF decodeXML, dxml, util)$(BR)
1451                       $(REF asDecodedXML, dxml, util)$(BR)
1452                       $(REF stripIndent, dxml, util)$(BR)
1453                       $(REF withoutIndent, dxml, util)
1454           +/
1455         @property SliceOfR text()
1456         {
1457             import dxml.internal : checkedSave, stripBCU;
1458             with(EntityType)
1459             {
1460                 import std.format : format;
1461                 assert(only(cdata, comment, pi, text).canFind(_type),
1462                        format("text cannot be called with %s", _type));
1463             }
1464             return stripBCU!R(checkedSave(_savedText.input));
1465         }
1466 
1467         ///
1468         static if(compileInTests) unittest
1469         {
1470             import std.range.primitives : empty;
1471 
1472             auto xml = "<?xml version='1.0'?>\n" ~
1473                        "<?instructionName?>\n" ~
1474                        "<?foo here is something to say?>\n" ~
1475                        "<root>\n" ~
1476                        "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1477                        "    <!-- some random comment -->\n" ~
1478                        "    <p>something here</p>\n" ~
1479                        "    <p>\n" ~
1480                        "       something else\n" ~
1481                        "       here</p>\n" ~
1482                        "</root>";
1483             auto range = parseXML(xml);
1484 
1485             // "<?instructionName?>\n" ~
1486             assert(range.front.type == EntityType.pi);
1487             assert(range.front.name == "instructionName");
1488             assert(range.front.text.empty);
1489 
1490             // "<?foo here is something to say?>\n" ~
1491             range.popFront();
1492             assert(range.front.type == EntityType.pi);
1493             assert(range.front.name == "foo");
1494             assert(range.front.text == "here is something to say");
1495 
1496             // "<root>\n" ~
1497             range.popFront();
1498             assert(range.front.type == EntityType.elementStart);
1499 
1500             // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1501             range.popFront();
1502             assert(range.front.type == EntityType.cdata);
1503             assert(range.front.text == " Yay! random text >> << ");
1504 
1505             // "    <!-- some random comment -->\n" ~
1506             range.popFront();
1507             assert(range.front.type == EntityType.comment);
1508             assert(range.front.text == " some random comment ");
1509 
1510             // "    <p>something here</p>\n" ~
1511             range.popFront();
1512             assert(range.front.type == EntityType.elementStart);
1513             assert(range.front.name == "p");
1514 
1515             range.popFront();
1516             assert(range.front.type == EntityType.text);
1517             assert(range.front.text == "something here");
1518 
1519             range.popFront();
1520             assert(range.front.type == EntityType.elementEnd);
1521             assert(range.front.name == "p");
1522 
1523             // "    <p>\n" ~
1524             // "       something else\n" ~
1525             // "       here</p>\n" ~
1526             range.popFront();
1527             assert(range.front.type == EntityType.elementStart);
1528 
1529             range.popFront();
1530             assert(range.front.type == EntityType.text);
1531             assert(range.front.text == "\n       something else\n       here");
1532 
1533             range.popFront();
1534             assert(range.front.type == EntityType.elementEnd);
1535 
1536             // "</root>"
1537             range.popFront();
1538             assert(range.front.type == EntityType.elementEnd);
1539 
1540             range.popFront();
1541             assert(range.empty);
1542         }
1543 
1544 
1545         // Reduce the chance of bugs if reference-type ranges are involved.
1546         static if(!isDynamicArray!R) this(this)
1547         {
1548             with(EntityType) final switch(_type)
1549             {
1550                 case cdata: break;
1551                 case comment: break;
1552                 case elementStart:
1553                 {
1554                     _name = _name.save;
1555                     break;
1556                 }
1557                 case elementEnd: goto case elementStart;
1558                 case elementEmpty: goto case elementStart;
1559                 case text: break;
1560                 case pi: goto case elementStart;
1561             }
1562 
1563             if(_type != EntityType.elementEnd)
1564                 _savedText = _savedText.save;
1565         }
1566 
1567         static if(compileInTests) unittest
1568         {
1569             import std.algorithm.comparison : equal;
1570             import dxml.internal : testRangeFuncs;
1571 
1572             static bool cmpAttr(T)(T lhs, T rhs)
1573             {
1574                 return equal(lhs.name.save, rhs.name.save) &&
1575                        equal(lhs.value.save, rhs.value.save);
1576             }
1577 
1578             {
1579                 auto xml = "<root>\n" ~
1580                            "    <foo a='42'/>\n" ~
1581                            "    <foo b='42'/>\n" ~
1582                            "    <nocomment>nothing to say</nocomment>\n" ~
1583                            "</root>";
1584 
1585                 // The duplicate lines aren't typos. We want to ensure that the
1586                 // values are independent and that nothing was consumed.
1587                 static foreach(func; testRangeFuncs)
1588                 {{
1589                      auto range = parseXML(func(xml));
1590                      range.popFront();
1591                      {
1592                          auto entity = range.front;
1593                          auto entity2 = entity;
1594                          assert(entity.pos == entity2.pos);
1595                          assert(equal(entity.name, entity2.name));
1596                          assert(equal(entity.name, entity2.name));
1597                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1598                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1599                          range.popFront();
1600                          assert(entity.pos == entity2.pos);
1601                          assert(entity.pos != range.front.pos);
1602                      }
1603                      range.popFront();
1604                      range.popFront();
1605                      {
1606                          auto entity = range.front;
1607                          auto entity2 = entity;
1608                          assert(entity.pos == entity2.pos);
1609                          assert(equal(entity.text, entity2.text));
1610                          assert(equal(entity.text, entity2.text));
1611                          range.popFront();
1612                          assert(entity.pos == entity2.pos);
1613                          assert(entity.pos != range.front.pos);
1614                      }
1615                 }}
1616             }
1617             {
1618                 auto xml = "<root>\n" ~
1619                            "    <![CDATA[whatever]]>\n" ~
1620                            "    <?pi?>\n" ~
1621                            "    <!--comment-->\n" ~
1622                            "    <empty/>\n" ~
1623                            "    <noend a='foo' b='bar'/>\n" ~
1624                            "    <foo baz='42'></foo>\n" ~
1625                            "</root>";
1626 
1627                 static foreach(func; testRangeFuncs)
1628                 {
1629                     for(auto range = parseXML(func(xml)); !range.empty; range.popFront())
1630                     {
1631                         auto entity = range.front;
1632                         auto entity2 = entity;
1633 
1634                         assert(entity.pos == range.front.pos);
1635                         assert(entity.pos == entity2.pos);
1636                         assert(entity.type == range.front.type);
1637                         assert(entity.type == entity2.type);
1638 
1639                         with(EntityType) final switch(entity.type)
1640                         {
1641                             case cdata: goto case text;
1642                             case comment: goto case text;
1643                             case elementStart:
1644                             {
1645                                 assert(equal!cmpAttr(entity.attributes, range.front.attributes));
1646                                 assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1647                                 goto case elementEnd;
1648                             }
1649                             case elementEnd:
1650                             {
1651                                 assert(equal(entity.name, range.front.name));
1652                                 assert(equal(entity.name, entity2.name));
1653                                 break;
1654                             }
1655                             case elementEmpty: goto case elementStart;
1656                             case text:
1657                             {
1658                                 assert(equal(entity.text, range.front.text));
1659                                 assert(equal(entity.text, entity2.text));
1660                                 break;
1661                             }
1662                             case pi:
1663                             {
1664                                 assert(equal(entity.name, range.front.name));
1665                                 assert(equal(entity.name, entity2.name));
1666                                 goto case text;
1667                             }
1668                         }
1669                     }
1670                 }
1671             }
1672         }
1673 
1674 
1675     private:
1676 
1677         this(EntityType type)
1678         {
1679             _type = type;
1680 
1681             // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
1682             _name = typeof(_name).init;
1683             _savedText = typeof(_savedText).init;
1684         }
1685 
1686         EntityType _type;
1687         TextPos _pos;
1688         Taken _name;
1689         typeof(EntityRange._savedText) _savedText;
1690     }
1691 
1692 
1693     /++
1694         Returns the $(LREF Entity) representing the entity in the XML document
1695         which was most recently parsed.
1696       +/
1697     @property Entity front()
1698     {
1699         auto retval = Entity(_type);
1700         with(EntityType) final switch(_type)
1701         {
1702             case cdata: retval._savedText = _savedText.save; break;
1703             case comment: goto case cdata;
1704             case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break;
1705             case elementEnd: retval._name = _name.save; break;
1706             case elementEmpty: goto case elementStart;
1707             case text: goto case cdata;
1708             case pi: goto case elementStart;
1709         }
1710         retval._pos = _entityPos;
1711         return retval;
1712     }
1713 
1714 
1715     /++
1716         Move to the next entity.
1717 
1718         The next entity is the next one that is linearly in the XML document.
1719         So, if the current entity has child entities, the next entity will be
1720         the first child entity, whereas if it has no child entities, it will be
1721         the next entity at the same level.
1722 
1723         Throws: $(LREF XMLParsingException) on invalid XML.
1724       +/
1725     void popFront()
1726     {
1727         final switch(_grammarPos) with(GrammarPos)
1728         {
1729             case documentStart: _parseDocumentStart(); break;
1730             case prologMisc1: _parseAtPrologMisc!1(); break;
1731             case prologMisc2: _parseAtPrologMisc!2(); break;
1732             case splittingEmpty:
1733             {
1734                 _type = EntityType.elementEnd;
1735                 _tagStack.sawEntity();
1736                 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
1737                 break;
1738             }
1739             case contentCharData1:
1740             {
1741                 assert(_type == EntityType.elementStart);
1742                 _tagStack.pushTag(_name.save);
1743                 _parseAtContentCharData();
1744                 break;
1745             }
1746             case contentMid: _parseAtContentMid(); break;
1747             case contentCharData2: _parseAtContentCharData(); break;
1748             case endTag: _parseElementEnd(); break;
1749             case endMisc: _parseAtEndMisc(); break;
1750             case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange.");
1751         }
1752     }
1753 
1754 
1755     /++
1756         Whether the end of the XML document has been reached.
1757 
1758         Note that because an $(LREF XMLParsingException) will be thrown an
1759         invalid XML, it's actually possible to call
1760         $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without
1761         checking empty if the only way that empty would be true is if the XML
1762         were invalid (e.g. if at a start tag, it's a given that there's at
1763         least one end tag left in the document unless it's invalid XML).
1764 
1765         However, of course, caution should be used to ensure that incorrect
1766         assumptions are not made that allow the document to reach its end
1767         earlier than predicted without throwing an $(LREF XMLParsingException),
1768         since it's still an error to call $(LREF2 front, EntityRange) or
1769         $(LREF2 popFront, EntityRange) if empty would return false.
1770       +/
1771     @property bool empty() @safe const pure nothrow @nogc
1772     {
1773         return _grammarPos == GrammarPos.documentEnd;
1774     }
1775 
1776 
1777     /++
1778         Forward range function for obtaining a copy of the range which can then
1779         be iterated independently of the original.
1780       +/
1781     @property auto save()
1782     {
1783         // The init check nonsense is because of ranges whose init values blow
1784         // up when save is called (e.g. a range that's a class).
1785         auto retval = this;
1786         if(retval._name !is typeof(retval._name).init)
1787             retval._name = _name.save;
1788         if(retval._text.input !is typeof(retval._text.input).init)
1789             retval._text.input = _text.input.save;
1790         if(retval._savedText.input !is typeof(retval._savedText.input).init)
1791             retval._savedText.input = _savedText.input.save;
1792         return retval;
1793     }
1794 
1795     static if(compileInTests) unittest
1796     {
1797         import std.algorithm.comparison : equal;
1798         import std.exception : assertNotThrown;
1799         import dxml.internal : testRangeFuncs;
1800 
1801         static bool cmpAttr(T)(T lhs, T rhs)
1802         {
1803             return equal(lhs.name.save, rhs.name.save) &&
1804                    equal(lhs.value.save, rhs.value.save);
1805         }
1806 
1807         static void testEqual(ER)(ER one, ER two)
1808         {
1809              while(!one.empty && !two.empty)
1810              {
1811                  auto left = one.front;
1812                  auto right = two.front;
1813 
1814                  assert(left.pos == right.pos);
1815                  assert(left.type == right.type);
1816 
1817                  with(EntityType) final switch(left.type)
1818                  {
1819                      case cdata: goto case text;
1820                      case comment: goto case text;
1821                      case elementStart:
1822                      {
1823                          assert(equal!cmpAttr(left.attributes, right.attributes));
1824                          goto case elementEnd;
1825                      }
1826                      case elementEnd: assert(equal(left.name, right.name)); break;
1827                      case elementEmpty: goto case elementStart;
1828                      case text: assert(equal(left.text, right.text)); break;
1829                      case pi: assert(equal(left.name, right.name)); goto case text;
1830                  }
1831 
1832                  one.popFront();
1833                  two.popFront();
1834              }
1835 
1836              assert(one.empty);
1837              assert(two.empty);
1838         }
1839 
1840          auto xml = "<root>\n" ~
1841                     "    <!-- comment -->\n" ~
1842                     "    <something>\n" ~
1843                     "         <else/>\n" ~
1844                     "         somet text <i>goes</i> here\n" ~
1845                     "    </something>\n" ~
1846                     "</root>";
1847 
1848         static foreach(i, func; testRangeFuncs)
1849         {{
1850              auto text = func(xml);
1851              testEqual(parseXML(text.save), parseXML(text.save));
1852              auto range = parseXML(text.save);
1853              testEqual(range.save, range.save);
1854         }}
1855     }
1856 
1857 
1858     /++
1859         Returns an empty range. This corresponds to
1860         $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a
1861         wrapper type.
1862       +/
1863     EntityRange takeNone()
1864     {
1865         auto retval = save;
1866         retval._grammarPos = GrammarPos.documentEnd;
1867         return retval;
1868     }
1869 
1870 
1871 private:
1872 
1873     void _parseDocumentStart()
1874     {
1875         auto orig = _text.save;
1876         immutable wasWS = _text.stripWS();
1877         if(_text.stripStartsWith("<?xml"))
1878         {
1879             if(wasWS)
1880                 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init);
1881             checkNotEmpty(_text);
1882             if(_text.input.front == '?' || isSpace(_text.input.front))
1883                 _text.skipUntilAndDrop!"?>"();
1884             else
1885                 _text = orig;
1886         }
1887         _grammarPos = GrammarPos.prologMisc1;
1888         _parseAtPrologMisc!1();
1889     }
1890 
1891     static if(compileInTests) unittest
1892     {
1893         import core.exception : AssertError;
1894         import std.exception : assertNotThrown, enforce;
1895         import dxml.internal : testRangeFuncs;
1896 
1897         static void test(alias func)(string xml, int row, int col, size_t line = __LINE__)
1898         {
1899             auto range = assertNotThrown!XMLParsingException(parseXML(func(xml)));
1900             enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line);
1901             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1902         }
1903 
1904         static foreach(func; testRangeFuncs)
1905         {
1906             test!func("<root/>", 1, 8);
1907             test!func("\n\t\n <root/>   \n", 3, 9);
1908             test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10);
1909             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?><root/>", 6, 23);
1910             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?>\n     <root/>", 7, 13);
1911             test!func("<root/>", 1, 8);
1912             test!func("\n\t\n <root/>   \n", 3, 9);
1913         }
1914     }
1915 
1916 
1917     // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2.
1918     void _parseAtPrologMisc(int miscNum)()
1919     {
1920         static assert(miscNum == 1 || miscNum == 2);
1921 
1922         // document ::= prolog element Misc*
1923         // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
1924         // Misc ::= Comment | PI | S
1925 
1926         stripWS(_text);
1927         checkNotEmpty(_text);
1928         if(_text.input.front != '<')
1929             throw new XMLParsingException("Expected <", _text.pos);
1930         popFrontAndIncCol(_text);
1931         checkNotEmpty(_text);
1932 
1933         switch(_text.input.front)
1934         {
1935             // Comment     ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1936             // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
1937             case '!':
1938             {
1939                 immutable bangPos = _text.pos;
1940                 popFrontAndIncCol(_text);
1941                 if(_text.stripStartsWith("--"))
1942                 {
1943                     _parseComment();
1944                     static if(config.skipComments == SkipComments.yes)
1945                         _parseAtPrologMisc!miscNum();
1946                     break;
1947                 }
1948                 static if(miscNum == 1)
1949                 {
1950                     if(_text.stripStartsWith("DOCTYPE"))
1951                     {
1952                         if(!_text.stripWS())
1953                             throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos);
1954                         _parseDoctypeDecl();
1955                         break;
1956                     }
1957                     throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos);
1958                 }
1959                 else
1960                 {
1961                     if(_text.stripStartsWith("DOCTYPE"))
1962                     {
1963                         throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document",
1964                                                       bangPos);
1965                     }
1966                     throw new XMLParsingException("Expected Comment", bangPos);
1967                 }
1968             }
1969             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1970             case '?':
1971             {
1972                 _parsePI();
1973                 static if(config.skipPI == SkipPI.yes)
1974                     popFront();
1975                 break;
1976             }
1977             // element ::= EmptyElemTag | STag content ETag
1978             default:
1979             {
1980                 _parseElementStart();
1981                 break;
1982             }
1983         }
1984     }
1985 
1986 
1987     // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1988     // Parses a comment. <!-- was already removed from the front of the input.
1989     void _parseComment()
1990     {
1991         static if(config.skipComments == SkipComments.yes)
1992             _text.skipUntilAndDrop!"--"();
1993         else
1994         {
1995             _entityPos = TextPos(_text.pos.line, _text.pos.col - 4);
1996             _type = EntityType.comment;
1997             _tagStack.sawEntity();
1998             _savedText.pos = _text.pos;
1999             _savedText.input = _text.takeUntilAndDrop!"--"();
2000         }
2001         if(_text.input.empty || _text.input.front != '>')
2002             throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos);
2003         // This is here rather than at the end of the previous static if block
2004         // so that the error message for improperly terminating a comment takes
2005         // precedence over the one involving invalid characters in the comment.
2006         static if(config.skipComments == SkipComments.no)
2007             checkText!true(_savedText);
2008         popFrontAndIncCol(_text);
2009     }
2010 
2011     static if(compileInTests) unittest
2012     {
2013         import core.exception : AssertError;
2014         import std.algorithm.comparison : equal;
2015         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2016         import dxml.internal : codeLen, testRangeFuncs;
2017 
2018         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2019         {
2020             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")));
2021             enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line);
2022             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2023             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2024         }
2025 
2026         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2027         {
2028             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2029             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2030             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2031         }
2032 
2033         static foreach(func; testRangeFuncs)
2034         {
2035             test!func("<!--foo-->", "foo", 1, 11);
2036             test!func("<!-- foo -->", " foo ", 1, 13);
2037             test!func("<!-- -->", " ", 1, 9);
2038             test!func("<!---->", "", 1, 8);
2039             test!func("<!--- comment -->", "- comment ", 1, 18);
2040             test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5);
2041             test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1);
2042             test!func("<!--&-->", "&", 1, 9);
2043             test!func("<!--<-->", "<", 1, 9);
2044             test!func("<!-->-->", ">", 1, 9);
2045             test!func("<!--->-->", "->", 1, 10);
2046 
2047             testFail!func("<!", 1, 2);
2048             testFail!func("<!- comment -->", 1, 2);
2049             testFail!func("<!-- comment ->", 1, 5);
2050             testFail!func("<!-- comment --->", 1, 16);
2051             testFail!func("<!---- comment -->", 1, 7);
2052             testFail!func("<!-- comment -- comment -->", 1, 16);
2053             testFail!func("<!->", 1, 2);
2054             testFail!func("<!-->", 1, 5);
2055             testFail!func("<!--->", 1, 5);
2056             testFail!func("<!----->", 1, 7);
2057             testFail!func("<!blah>", 1, 2);
2058             testFail!func("<! blah>", 1, 2);
2059             testFail!func("<!-- \n\n   \v \n -->", 3, 4);
2060             testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v"));
2061 
2062             {
2063                 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>");
2064                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2065                 assert(range.front.type == EntityType.comment);
2066                 assert(equal(range.front.text, " comment "));
2067             }
2068             {
2069                 auto xml = func("<root><!-- comment --></root>");
2070                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2071                 assertNotThrown!XMLParsingException(range.popFront());
2072                 assert(range.front.type == EntityType.comment);
2073                 assert(equal(range.front.text, " comment "));
2074             }
2075             {
2076                 auto xml = func("<root/><!-- comment -->");
2077                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2078                 assertNotThrown!XMLParsingException(range.popFront());
2079                 assert(range.front.type == EntityType.comment);
2080                 assert(equal(range.front.text, " comment "));
2081             }
2082 
2083             static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"])
2084             {
2085                 {
2086                     auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>");
2087                     assertThrown!XMLParsingException(parseXML(xml));
2088                 }
2089                 {
2090                     auto xml = func("<root>" ~ comment ~ "<root>");
2091                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2092                     assertThrown!XMLParsingException(range.popFront());
2093                 }
2094                 {
2095                     auto xml = func("<root/>" ~ comment);
2096                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2097                     assertThrown!XMLParsingException(range.popFront());
2098                 }
2099             }
2100 
2101             {
2102                 auto xml = "<!--one-->\n" ~
2103                            "<!--two-->\n" ~
2104                            "<root>\n" ~
2105                            "    <!--three-->\n" ~
2106                            "    <!--four-->\n" ~
2107                            "</root>\n" ~
2108                            "<!--five-->\n" ~
2109                            "<!--six-->";
2110 
2111                 auto text = func(xml);
2112                 {
2113                     auto range = parseXML(text.save);
2114                     assert(range.front.type == EntityType.comment);
2115                     assert(equal(range.front.text, "one"));
2116                     assertNotThrown!XMLParsingException(range.popFront());
2117                     assert(range.front.type == EntityType.comment);
2118                     assert(equal(range.front.text, "two"));
2119                     assertNotThrown!XMLParsingException(range.popFront());
2120                     assert(range.front.type == EntityType.elementStart);
2121                     assert(equal(range.front.name, "root"));
2122                     assertNotThrown!XMLParsingException(range.popFront());
2123                     assert(range.front.type == EntityType.comment);
2124                     assert(equal(range.front.text, "three"));
2125                     assertNotThrown!XMLParsingException(range.popFront());
2126                     assert(range.front.type == EntityType.comment);
2127                     assert(equal(range.front.text, "four"));
2128                     assertNotThrown!XMLParsingException(range.popFront());
2129                     assert(range.front.type == EntityType.elementEnd);
2130                     assert(equal(range.front.name, "root"));
2131                     assertNotThrown!XMLParsingException(range.popFront());
2132                     assert(range.front.type == EntityType.comment);
2133                     assert(equal(range.front.text, "five"));
2134                     assertNotThrown!XMLParsingException(range.popFront());
2135                     assert(range.front.type == EntityType.comment);
2136                     assert(equal(range.front.text, "six"));
2137                     assertNotThrown!XMLParsingException(range.popFront());
2138                     assert(range.empty);
2139                 }
2140                 {
2141                     auto range = parseXML!simpleXML(text.save);
2142                     assert(range.front.type == EntityType.elementStart);
2143                     assert(equal(range.front.name, "root"));
2144                     assertNotThrown!XMLParsingException(range.popFront());
2145                     assert(range.front.type == EntityType.elementEnd);
2146                     assert(equal(range.front.name, "root"));
2147                     assertNotThrown!XMLParsingException(range.popFront());
2148                     assert(range.empty);
2149                 }
2150             }
2151         }
2152     }
2153 
2154 
2155     // PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2156     // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2157     // Parses a processing instruction. < was already removed from the input.
2158     void _parsePI()
2159     {
2160         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2161         assert(_text.input.front == '?');
2162         popFrontAndIncCol(_text);
2163         static if(config.skipPI == SkipPI.yes)
2164             _text.skipUntilAndDrop!"?>"();
2165         else
2166         {
2167             immutable posAtName = _text.pos;
2168             if(_text.input.empty)
2169                 throw new XMLParsingException("Unterminated processing instruction", posAtName);
2170             _type = EntityType.pi;
2171             _tagStack.sawEntity();
2172             _name = takeName!'?'(_text);
2173             immutable posAtWS = _text.pos;
2174             stripWS(_text);
2175             checkNotEmpty(_text);
2176             _savedText.pos = _text.pos;
2177             _savedText.input = _text.takeUntilAndDrop!"?>"();
2178             checkText!true(_savedText);
2179             if(walkLength(_name.save) == 3)
2180             {
2181                 // FIXME icmp doesn't compile right now due to an issue with
2182                 // byUTF that needs to be looked into.
2183                 /+
2184                 import std.uni : icmp;
2185                 if(icmp(_name.save, "xml") == 0)
2186                     throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2187                 +/
2188                 auto temp = _name.save;
2189                 if(temp.front == 'x' || temp.front == 'X')
2190                 {
2191                     temp.popFront();
2192                     if(temp.front == 'm' || temp.front == 'M')
2193                     {
2194                         temp.popFront();
2195                         if(temp.front == 'l' || temp.front == 'L')
2196                             throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2197                     }
2198                 }
2199             }
2200         }
2201     }
2202 
2203     static if(compileInTests) unittest
2204     {
2205         import core.exception : AssertError;
2206         import std.algorithm.comparison : equal;
2207         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2208         import std.utf : byUTF;
2209         import dxml.internal : codeLen, testRangeFuncs;
2210 
2211         static void test(alias func)(string text, string name, string expected,
2212                                      int row, int col, size_t line = __LINE__)
2213         {
2214             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2215                                                              "unittest failure 1", __FILE__, line);
2216             enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line);
2217             enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line);
2218             enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line);
2219             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line);
2220         }
2221 
2222         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2223         {
2224             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2225             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2226             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2227         }
2228 
2229         static foreach(func; testRangeFuncs)
2230         {
2231             test!func("<?a?>", "a", "", 1, 6);
2232             test!func("<?foo?>", "foo", "", 1, 8);
2233             test!func("<?foo.?>", "foo.", "", 1, 9);
2234             test!func("<?foo bar?>", "foo", "bar", 1, 12);
2235             test!func("<?xmf bar?>", "xmf", "bar", 1, 12);
2236             test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15);
2237             test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16);
2238             test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10);
2239             test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11);
2240             test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6);
2241             test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21);
2242             test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23);
2243             test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3);
2244             test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1);
2245             test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16);
2246             test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16);
2247             test!func("<?pi ?>", "pi", "", 1, 8);
2248             test!func("<?pi\n?>", "pi", "", 2, 3);
2249             test!func("<?foo ??>", "foo", "?", 1, 10);
2250             test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24);
2251 
2252             testFail!func("<?", 1, 3);
2253             testFail!func("<??>", 1, 3);
2254             testFail!func("<? ?>", 1, 3);
2255             testFail!func("<?xml?><?xml?>", 1, 10);
2256             testFail!func("<?XML?>", 1, 3);
2257             testFail!func("<?xMl?>", 1, 3);
2258             testFail!func("<?foo>", 1, 6);
2259             testFail!func("<? foo?>", 1, 3);
2260             testFail!func("<?\nfoo?>", 1, 3);
2261             testFail!func("<??foo?>", 1, 3);
2262             testFail!func("<?.foo?>", 1, 3);
2263             testFail!func("<?foo bar\vbaz?>", 1, 10);
2264 
2265             {
2266                 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>");
2267                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2268                 assert(range.front.type == EntityType.pi);
2269                 assert(equal(range.front.name, "foo"));
2270                 assert(equal(range.front.text, "bar"));
2271             }
2272             {
2273                 auto xml = func("<root><?foo bar?></root>");
2274                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2275                 assertNotThrown!XMLParsingException(range.popFront());
2276                 assert(equal(range.front.name, "foo"));
2277                 assert(equal(range.front.text, "bar"));
2278             }
2279             {
2280                 auto xml = func("<root/><?foo bar?>");
2281                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2282                 assertNotThrown!XMLParsingException(range.popFront());
2283                 assert(equal(range.front.name, "foo"));
2284                 assert(equal(range.front.text, "bar"));
2285             }
2286 
2287             static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"])
2288             {
2289                 {
2290                     auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>");
2291                     assertThrown!XMLParsingException(parseXML(xml));
2292                 }
2293                 {
2294                     auto xml = func("<root>" ~ pi ~ "<root>");
2295                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2296                     assertThrown!XMLParsingException(range.popFront());
2297                 }
2298                 {
2299                     auto xml = func("<root/>" ~ pi);
2300                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2301                     assertThrown!XMLParsingException(range.popFront());
2302                 }
2303             }
2304 
2305             {
2306                 auto xml = "<?one?>\n" ~
2307                            "<?two?>\n" ~
2308                            "<root>\n" ~
2309                            "    <?three?>\n" ~
2310                            "    <?four?>\n" ~
2311                            "</root>\n" ~
2312                            "<?five?>\n" ~
2313                            "<?six?>";
2314 
2315                 auto text = func(xml);
2316                 {
2317                     auto range = parseXML(text.save);
2318                     assert(range.front.type == EntityType.pi);
2319                     assert(equal(range.front.name, "one"));
2320                     assertNotThrown!XMLParsingException(range.popFront());
2321                     assert(range.front.type == EntityType.pi);
2322                     assert(equal(range.front.name, "two"));
2323                     assertNotThrown!XMLParsingException(range.popFront());
2324                     assert(range.front.type == EntityType.elementStart);
2325                     assert(equal(range.front.name, "root"));
2326                     assertNotThrown!XMLParsingException(range.popFront());
2327                     assert(range.front.type == EntityType.pi);
2328                     assert(equal(range.front.name, "three"));
2329                     assertNotThrown!XMLParsingException(range.popFront());
2330                     assert(range.front.type == EntityType.pi);
2331                     assert(equal(range.front.name, "four"));
2332                     assertNotThrown!XMLParsingException(range.popFront());
2333                     assert(range.front.type == EntityType.elementEnd);
2334                     assert(equal(range.front.name, "root"));
2335                     assertNotThrown!XMLParsingException(range.popFront());
2336                     assert(range.front.type == EntityType.pi);
2337                     assert(equal(range.front.name, "five"));
2338                     assertNotThrown!XMLParsingException(range.popFront());
2339                     assert(range.front.type == EntityType.pi);
2340                     assert(equal(range.front.name, "six"));
2341                     assertNotThrown!XMLParsingException(range.popFront());
2342                     assert(range.empty);
2343                 }
2344                 {
2345                     auto range = parseXML!simpleXML(text.save);
2346                     assert(range.front.type == EntityType.elementStart);
2347                     assert(equal(range.front.name, "root"));
2348                     assertNotThrown!XMLParsingException(range.popFront());
2349                     assert(range.front.type == EntityType.elementEnd);
2350                     assert(equal(range.front.name, "root"));
2351                     assertNotThrown!XMLParsingException(range.popFront());
2352                     assert(range.empty);
2353                 }
2354             }
2355         }
2356     }
2357 
2358 
2359     // CDSect  ::= CDStart CData CDEnd
2360     // CDStart ::= '<![CDATA['
2361     // CData   ::= (Char* - (Char* ']]>' Char*))
2362     // CDEnd   ::= ']]>'
2363     // Parses a CDATA. <![CDATA[ was already removed from the front of the input.
2364     void _parseCDATA()
2365     {
2366         _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length);
2367         _type = EntityType.cdata;
2368         _tagStack.sawEntity();
2369         _savedText.pos = _text.pos;
2370         _savedText.input = _text.takeUntilAndDrop!"]]>";
2371         checkText!true(_savedText);
2372         _grammarPos = GrammarPos.contentCharData2;
2373     }
2374 
2375     static if(compileInTests) unittest
2376     {
2377         import core.exception : AssertError;
2378         import std.algorithm.comparison : equal;
2379         import std.exception : assertNotThrown, collectException, enforce;
2380         import dxml.internal : codeLen, testRangeFuncs;
2381 
2382         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2383         {
2384             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2385             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2386             assertNotThrown!XMLParsingException(range.popFront());
2387             enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line);
2388             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2389             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2390         }
2391 
2392         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2393         {
2394             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2395             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2396             auto e = collectException!XMLParsingException(range.popFront());
2397             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2398             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2399         }
2400 
2401         static foreach(func; testRangeFuncs)
2402         {
2403             test!func("<![CDATA[]]>", "", 1, 13);
2404             test!func("<![CDATA[hello world]]>", "hello world", 1, 24);
2405             test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4);
2406             test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2);
2407             test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>",
2408                       "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57);
2409             test!func("<![CDATA[&]]>", "&", 1, 14);
2410 
2411             testFail!func("<[CDATA[]>", 1, 2);
2412             testFail!func("<![CDAT[]>", 1, 2);
2413             testFail!func("<![CDATA]>", 1, 2);
2414             testFail!func("<![CDATA[>", 1, 10);
2415             testFail!func("<![CDATA[]", 1, 10);
2416             testFail!func("<![CDATA[]>", 1, 10);
2417             testFail!func("<![CDATA[ \v ]]>", 1, 11);
2418             testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2);
2419         }
2420     }
2421 
2422 
2423     // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
2424     // DeclSep     ::= PEReference | S
2425     // intSubset   ::= (markupdecl | DeclSep)*
2426     // markupdecl  ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
2427     // Parse doctypedecl after GrammarPos.prologMisc1.
2428     // <!DOCTYPE and any whitespace after it should have already been removed
2429     // from the input.
2430     void _parseDoctypeDecl()
2431     {
2432         outer: while(true)
2433         {
2434             _text.skipToOneOf!('"', '\'', '[', '>')();
2435             switch(_text.input.front)
2436             {
2437                 static foreach(quote; ['"', '\''])
2438                 {
2439                     case quote:
2440                     {
2441                         popFrontAndIncCol(_text);
2442                         _text.skipUntilAndDrop!([quote])();
2443                         continue outer;
2444                     }
2445                 }
2446                 case '[':
2447                 {
2448                     popFrontAndIncCol(_text);
2449                     while(true)
2450                     {
2451                         checkNotEmpty(_text);
2452                         _text.skipToOneOf!('"', '\'', ']')();
2453                         switch(_text.input.front)
2454                         {
2455                             case '"':
2456                             {
2457                                 popFrontAndIncCol(_text);
2458                                 _text.skipUntilAndDrop!`"`();
2459                                 continue;
2460                             }
2461                             case '\'':
2462                             {
2463                                 popFrontAndIncCol(_text);
2464                                 _text.skipUntilAndDrop!`'`();
2465                                 continue;
2466                             }
2467                             case ']':
2468                             {
2469                                 popFrontAndIncCol(_text);
2470                                 stripWS(_text);
2471                                 if(_text.input.empty || _text.input.front != '>')
2472                                     throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos);
2473                                 popFrontAndIncCol(_text);
2474                                 _parseAtPrologMisc!2();
2475                                 return;
2476                             }
2477                             default: assert(0);
2478                         }
2479                     }
2480                 }
2481                 case '>':
2482                 {
2483                     popFrontAndIncCol(_text);
2484                     _parseAtPrologMisc!2();
2485                     break;
2486                 }
2487                 default: assert(0);
2488             }
2489             break;
2490         }
2491     }
2492 
2493     static if(compileInTests) unittest
2494     {
2495         import core.exception : AssertError;
2496         import std.exception : assertNotThrown, collectException, enforce;
2497         import dxml.internal : testRangeFuncs;
2498 
2499         static void test(alias func)(string text, int row, int col, size_t line = __LINE__)
2500         {
2501             auto pos = TextPos(row, col + cast(int)"<root/>".length);
2502             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2503                                                              "unittest failure 1", __FILE__, line);
2504             enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line);
2505             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2506         }
2507 
2508         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2509         {
2510             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2511             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2512             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2513         }
2514 
2515         static foreach(func; testRangeFuncs)
2516         {
2517             test!func("<!DOCTYPE name>", 1, 16);
2518             test!func("<!DOCTYPE \n\n\n name>", 4, 7);
2519             test!func("<!DOCTYPE name \n\n\n >", 4, 3);
2520 
2521             test!func("<!DOCTYPE name []>", 1, 19);
2522             test!func("<!DOCTYPE \n\n\n name []>", 4, 10);
2523             test!func("<!DOCTYPE name \n\n\n []>", 4, 5);
2524 
2525             test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35);
2526             test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38);
2527             test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35);
2528             test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38);
2529 
2530             test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42);
2531             test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40);
2532             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48);
2533             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53);
2534             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43);
2535             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48);
2536 
2537             test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35);
2538             test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55);
2539 
2540             test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40);
2541             test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40);
2542             test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47);
2543             test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53);
2544 
2545             test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51);
2546 
2547             test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26);
2548 
2549             test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36);
2550 
2551             test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56);
2552             test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3);
2553 
2554             test!func("<!DOCTYPE doc [\n" ~
2555                       "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~
2556                       "]>\n", 4, 1);
2557 
2558             testFail!func("<!DOCTYP name>", 1, 2);
2559             testFail!func("<!DOCTYPEname>", 1, 10);
2560             testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18);
2561             testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8);
2562             testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19);
2563 
2564             // FIXME This really should have the exception point at the quote and
2565             // say that it couldn't find the matching quote rather than point at
2566             // the character after it and say that it couldn't find a quote, but
2567             // that requires reworking some helper functions with better error
2568             // messages in mind.
2569             testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~
2570                           "\n<!ELEMENT student (#PCDATA)>\n" ~
2571                           "]>", 1, 40);
2572         }
2573     }
2574 
2575 
2576     // Parse a start tag or empty element tag. It could be the root element, or
2577     // it could be a sub-element.
2578     // < was already removed from the front of the input.
2579     void _parseElementStart()
2580     {
2581         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2582         _savedText.pos = _text.pos;
2583         _savedText.input = _text.takeUntilAndDrop!(">", true)();
2584 
2585         if(_savedText.input.empty)
2586             throw new XMLParsingException("Tag missing name", _savedText.pos);
2587         if(_savedText.input.front == '/')
2588             throw new XMLParsingException("Invalid end tag", _savedText.pos);
2589 
2590         if(_savedText.input.length > 1)
2591         {
2592             auto temp = _savedText.input.save;
2593             temp.popFrontN(temp.length - 1);
2594             if(temp.front == '/')
2595             {
2596                 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1);
2597 
2598                 static if(config.splitEmpty == SplitEmpty.no)
2599                 {
2600                     _type = EntityType.elementEmpty;
2601                     _tagStack.sawEntity();
2602                     _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2603                 }
2604                 else
2605                 {
2606                     _type = EntityType.elementStart;
2607                     _tagStack.sawEntity();
2608                     _grammarPos = GrammarPos.splittingEmpty;
2609                 }
2610             }
2611             else
2612             {
2613                 _type = EntityType.elementStart;
2614                 _tagStack.sawEntity();
2615                 _grammarPos = GrammarPos.contentCharData1;
2616             }
2617         }
2618         else
2619         {
2620             _type = EntityType.elementStart;
2621             _tagStack.sawEntity();
2622             _grammarPos = GrammarPos.contentCharData1;
2623         }
2624 
2625         _name = _savedText.takeName();
2626         // The attributes should be all that's left in savedText.
2627         if(_tagStack.atMax)
2628         {
2629             auto temp = _savedText.save;
2630             auto attrChecker = _tagStack.attrChecker;
2631 
2632             while(true)
2633             {
2634                 immutable wasWS = stripWS(temp);
2635                 if(temp.input.empty)
2636                     break;
2637                 if(!wasWS)
2638                     throw new XMLParsingException("Whitespace missing before attribute name", temp.pos);
2639 
2640                 immutable attrPos = temp.pos;
2641                 attrChecker.pushAttr(temp.takeName!'='(), attrPos);
2642                 stripWS(temp);
2643 
2644                 checkNotEmpty(temp);
2645                 if(temp.input.front != '=')
2646                     throw new XMLParsingException("= missing", temp.pos);
2647                 popFrontAndIncCol(temp);
2648 
2649                 stripWS(temp);
2650                 temp.takeAttValue();
2651             }
2652 
2653             attrChecker.checkAttrs();
2654         }
2655     }
2656 
2657     static if(compileInTests) unittest
2658     {
2659         import core.exception : AssertError;
2660         import std.algorithm.comparison : equal;
2661         import std.exception : assertNotThrown, collectException, enforce;
2662         import dxml.internal : codeLen, testRangeFuncs;
2663 
2664         static void test(alias func)(string text, EntityType type, string name,
2665                                      int row, int col, size_t line = __LINE__)
2666         {
2667             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2668             enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line);
2669             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2670             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2671         }
2672 
2673         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2674         {
2675             auto xml = func(text);
2676             auto e = collectException!XMLParsingException(parseXML(func(text)));
2677             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2678             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2679         }
2680 
2681         static foreach(func; testRangeFuncs)
2682         {
2683             test!func("<a/>", EntityType.elementEmpty, "a", 1, 5);
2684             test!func("<a></a>", EntityType.elementStart, "a", 1, 4);
2685             test!func("<root/>", EntityType.elementEmpty, "root", 1, 8);
2686             test!func("<root></root>", EntityType.elementStart, "root", 1, 7);
2687             test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7);
2688             test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6);
2689             test!func("<foo       />", EntityType.elementEmpty, "foo", 1, 14);
2690             test!func("<foo       ></foo>", EntityType.elementStart, "foo", 1, 13);
2691             test!func("<foo  \n\n\n />", EntityType.elementEmpty, "foo", 4, 4);
2692             test!func("<foo  \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3);
2693             test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7);
2694             test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1);
2695 
2696             testFail!func(`<.foo/>`, 1, 2);
2697             testFail!func(`<>`, 1, 2);
2698             testFail!func(`</>`, 1, 2);
2699             testFail!func(`</foo>`, 1, 2);
2700 
2701             {
2702                 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>")));
2703                 assert(range.front.type == EntityType.elementStart);
2704                 assert(equal(range.front.name, "root"));
2705                 assert(range._text.pos == TextPos(1, 8));
2706                 assertNotThrown!XMLParsingException(range.popFront());
2707                 assert(range.front.type == EntityType.elementEnd);
2708                 assert(equal(range.front.name, "root"));
2709                 assert(range._text.pos == TextPos(1, 8));
2710             }
2711         }
2712     }
2713 
2714 
2715     // Parse an end tag. It could be the root element, or it could be a
2716     // sub-element.
2717     // </ was already removed from the front of the input.
2718     void _parseElementEnd()
2719     {
2720         if(_text.input.empty)
2721             throw new XMLParsingException("Unterminated end tag", _text.pos);
2722         _entityPos = TextPos(_text.pos.line, _text.pos.col - 2);
2723         _type = EntityType.elementEnd;
2724         _tagStack.sawEntity();
2725         immutable namePos = _text.pos;
2726         _name = _text.takeName!'>'();
2727         stripWS(_text);
2728         if(_text.input.empty || _text.input.front != '>')
2729         {
2730             throw new XMLParsingException("There can only be whitespace between an end tag's name and the >",
2731                                           _text.pos);
2732         }
2733         popFrontAndIncCol(_text);
2734         _tagStack.popTag(_name.save, namePos);
2735         _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2736     }
2737 
2738     static if(compileInTests) unittest
2739     {
2740         import core.exception : AssertError;
2741         import std.algorithm.comparison : equal;
2742         import std.exception : assertNotThrown, collectException, enforce;
2743         import dxml.internal : codeLen, testRangeFuncs;
2744 
2745         static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__)
2746         {
2747             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2748             range.popFront();
2749             enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line);
2750             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2751             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2752         }
2753 
2754         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2755         {
2756             auto range = parseXML(func(text));
2757             auto e = collectException!XMLParsingException(range.popFront());
2758             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2759             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2760         }
2761 
2762         static foreach(func; testRangeFuncs)
2763         {
2764             test!func("<a></a>", "a", 1, 8);
2765             test!func("<foo></foo>", "foo", 1, 12);
2766             test!func("<foo    ></foo    >", "foo", 1, 20);
2767             test!func("<foo \n ></foo \n >", "foo", 3, 3);
2768             test!func("<foo>\n\n\n</foo>", "foo", 4, 7);
2769             test!func("<foo.></foo.>", "foo.", 1, 14);
2770             test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1);
2771 
2772             testFail!func(`<foo></ foo>`, 1, 8);
2773             testFail!func(`<foo></bar>`, 1, 8);
2774             testFail!func(`<foo></fo>`, 1, 8);
2775             testFail!func(`<foo></food>`, 1, 8);
2776             testFail!func(`<a></>`, 1, 6);
2777             testFail!func(`<a></`, 1, 6);
2778             testFail!func(`<a><`, 1, 5);
2779             testFail!func(`<a></a b='42'>`, 1, 8);
2780         }
2781     }
2782 
2783 
2784     // GrammarPos.contentCharData1
2785     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2786     // Parses at either CharData?. Nothing from the CharData? (or what's after it
2787     // if it's not there) has been consumed.
2788     void _parseAtContentCharData()
2789     {
2790         checkNotEmpty(_text);
2791         auto orig = _text.save;
2792         stripWS(_text);
2793         checkNotEmpty(_text);
2794         if(_text.input.front != '<')
2795         {
2796             _text = orig;
2797             _entityPos = _text.pos;
2798             _type = EntityType.text;
2799             _tagStack.sawEntity();
2800             _savedText.pos = _text.pos;
2801             _savedText.input = _text.takeUntilAndDrop!"<"();
2802             checkText!false(_savedText);
2803             checkNotEmpty(_text);
2804             if(_text.input.front == '/')
2805             {
2806                 popFrontAndIncCol(_text);
2807                 _grammarPos = GrammarPos.endTag;
2808             }
2809             else
2810                 _grammarPos = GrammarPos.contentMid;
2811         }
2812         else
2813         {
2814             popFrontAndIncCol(_text);
2815             checkNotEmpty(_text);
2816             if(_text.input.front == '/')
2817             {
2818                 popFrontAndIncCol(_text);
2819                 _parseElementEnd();
2820             }
2821             else
2822                 _parseAtContentMid();
2823         }
2824     }
2825 
2826     static if(compileInTests) unittest
2827     {
2828         import core.exception : AssertError;
2829         import std.algorithm.comparison : equal;
2830         import std.exception : assertNotThrown, collectException, enforce;
2831         import dxml.internal : codeLen, testRangeFuncs;
2832 
2833         static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2834         {
2835             auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length));
2836             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2837             assertNotThrown!XMLParsingException(range.popFront());
2838             enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line);
2839             enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line);
2840             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2841         }
2842 
2843         static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2844         {
2845             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2846             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2847             auto e = collectException!XMLParsingException(range.popFront());
2848             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2849             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2850         }
2851 
2852         static foreach(func; testRangeFuncs)
2853         {
2854             static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
2855             {
2856                 test!(func, toer)("hello world", 1, 12);
2857                 test!(func, toer)("\nhello\n\nworld", 4, 6);
2858                 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1);
2859                 test!(func, toer)("&#x42;", 1, 7);
2860                 test!(func, toer)("]", 1, 2);
2861                 test!(func, toer)("]]", 1, 3);
2862                 test!(func, toer)("]>", 1, 3);
2863                 test!(func, toer)("foo \n\n &lt; \n bar", 4, 5);
2864 
2865                 testFail!(func, toer)("&", 1, 1);
2866                 testFail!(func, toer)("&;", 1, 1);
2867                 testFail!(func, toer)("&f", 1, 1);
2868                 testFail!(func, toer)("\v", 1, 1);
2869                 testFail!(func, toer)("hello&world", 1, 6);
2870                 testFail!(func, toer)("hello\vworld", 1, 6);
2871                 testFail!(func, toer)("hello&;world", 1, 6);
2872                 testFail!(func, toer)("hello&#;world", 1, 6);
2873                 testFail!(func, toer)("hello&#x;world", 1, 6);
2874                 testFail!(func, toer)("hello&.;world", 1, 6);
2875                 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4);
2876 
2877                 testFail!(func, toer)("]]>", 1, 1);
2878                 testFail!(func, toer)("foo]]>bar", 1, 4);
2879 
2880                 static if(toer == ThrowOnEntityRef.yes)
2881                 {
2882                     testFail!(func, toer)("&foo; &bar baz", 1, 1);
2883                     testFail!(func, toer)("foo \n\n &ampe; \n bar", 3, 2);
2884                 }
2885                 else
2886                 {
2887                     testFail!(func, toer)("&foo; &bar baz", 1, 7);
2888                     test!(func, toer)("foo \n\n &ampe; \n bar", 4, 5);
2889                 }
2890             }
2891         }
2892     }
2893 
2894 
2895     // GrammarPos.contentMid
2896     // content     ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2897     // The text right after the start tag was what was parsed previously. So,
2898     // that first CharData? was what was parsed last, and this parses starting
2899     // right after. The < should have already been removed from the input.
2900     void _parseAtContentMid()
2901     {
2902         // Note that References are treated as part of the CharData and not
2903         // parsed out by the EntityRange (see EntityRange.text).
2904 
2905         switch(_text.input.front)
2906         {
2907             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2908             // CDSect  ::= CDStart CData CDEnd
2909             // CDStart ::= '<![CDATA['
2910             // CData   ::= (Char* - (Char* ']]>' Char*))
2911             // CDEnd   ::= ']]>'
2912             case '!':
2913             {
2914                 popFrontAndIncCol(_text);
2915                 if(_text.stripStartsWith("--"))
2916                 {
2917                     _parseComment();
2918                     static if(config.skipComments == SkipComments.yes)
2919                         _parseAtContentCharData();
2920                     else
2921                         _grammarPos = GrammarPos.contentCharData2;
2922                 }
2923                 else if(_text.stripStartsWith("[CDATA["))
2924                     _parseCDATA();
2925                 else
2926                 {
2927                     immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2928                     throw new XMLParsingException("Expected Comment or CDATA section", bangPos);
2929                 }
2930                 break;
2931             }
2932             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2933             case '?':
2934             {
2935                 _parsePI();
2936                 _grammarPos = GrammarPos.contentCharData2;
2937                 static if(config.skipPI == SkipPI.yes)
2938                     popFront();
2939                 break;
2940             }
2941             // element ::= EmptyElemTag | STag content ETag
2942             default:
2943             {
2944                 _parseElementStart();
2945                 break;
2946             }
2947         }
2948     }
2949 
2950 
2951     // This parses the Misc* that come after the root element.
2952     void _parseAtEndMisc()
2953     {
2954         // Misc ::= Comment | PI | S
2955 
2956         stripWS(_text);
2957 
2958         if(_text.input.empty)
2959         {
2960             _grammarPos = GrammarPos.documentEnd;
2961             return;
2962         }
2963 
2964         if(_text.input.front != '<')
2965             throw new XMLParsingException("Expected <", _text.pos);
2966         popFrontAndIncCol(_text);
2967         checkNotEmpty(_text);
2968 
2969         switch(_text.input.front)
2970         {
2971             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2972             case '!':
2973             {
2974                 popFrontAndIncCol(_text);
2975                 if(_text.stripStartsWith("--"))
2976                 {
2977                     _parseComment();
2978                     static if(config.skipComments == SkipComments.yes)
2979                         _parseAtEndMisc();
2980                     break;
2981                 }
2982                 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2983                 throw new XMLParsingException("Expected Comment", bangPos);
2984             }
2985             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2986             case '?':
2987             {
2988                 _parsePI();
2989                 static if(config.skipPI == SkipPI.yes)
2990                     popFront();
2991                 break;
2992             }
2993             default: throw new XMLParsingException("Must be a comment or PI", _text.pos);
2994         }
2995     }
2996 
2997     // Used for keeping track of the names of start tags so that end tags can be
2998     // verified as well as making it possible to avoid redoing other validation.
2999     // We keep track of the total number of entities which have been parsed thus
3000     // far so that only whichever EntityRange is farthest along in parsing
3001     // actually adds or removes tags from the TagStack, and the parser can skip
3002     // some of the validation for ranges that are farther behind. That way, the
3003     // end tags get verified, but we only have one stack. If the stack were
3004     // duplicated with every call to save, then there would be a lot more
3005     // allocations, which we don't want. But because we only need to verify the
3006     // end tags once, we can get away with having a shared tag stack. The cost
3007     // is that we have to keep track of how many tags we've parsed so that we
3008     // know if an EntityRange should actually be pushing or popping tags from
3009     // the stack, but that's a lot cheaper than duplicating the stack, and it's
3010     // a lot less annoying then making EntityRange an input range and not a
3011     // forward range or making it a cursor rather than a range.
3012     struct TagStack
3013     {
3014         void pushTag(Taken tagName)
3015         {
3016             if(entityCount++ == state.maxEntities)
3017             {
3018                 ++state.maxEntities;
3019                 put(state.tags, tagName);
3020             }
3021             ++depth;
3022         }
3023 
3024         void popTag(Taken tagName, TextPos pos)
3025         {
3026             import std.algorithm : equal;
3027             import std.format : format;
3028             if(entityCount++ == state.maxEntities)
3029             {
3030                 assert(!state.tags.data.empty);
3031                 if(!equal(state.tags.data.back.save, tagName.save))
3032                 {
3033                     enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>";
3034                     throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos);
3035                 }
3036                 ++state.maxEntities;
3037                 state.tags.shrinkTo(state.tags.data.length - 1);
3038             }
3039             --depth;
3040         }
3041 
3042         @property auto attrChecker()
3043         {
3044             assert(atMax);
3045 
3046             static struct AttrChecker
3047             {
3048                 void pushAttr(Taken attrName, TextPos attrPos)
3049                 {
3050                     put(state.attrs, Attribute(attrName, attrPos));
3051                 }
3052 
3053                 void checkAttrs()
3054                 {
3055                     import std.algorithm.comparison : cmp, equal;
3056                     import std.algorithm.sorting : sort;
3057                     import std.conv : to;
3058 
3059                     if(state.attrs.data.length < 2)
3060                         return;
3061 
3062                     sort!((a,b) => cmp(a.taken.save, b.taken.save) < 0)(state.attrs.data);
3063                     auto prev = state.attrs.data.front;
3064                     foreach(attr; state.attrs.data[1 .. $])
3065                     {
3066                         if(equal(prev.taken, attr.taken))
3067                             throw new XMLParsingException("Duplicate attribute name", attr.pos);
3068                         prev = attr;
3069                     }
3070                 }
3071 
3072                 ~this()
3073                 {
3074                     state.attrs.clear();
3075                 }
3076 
3077                 SharedState* state;
3078             }
3079 
3080             return AttrChecker(state);
3081         }
3082 
3083         void sawEntity()
3084         {
3085             if(entityCount++ == state.maxEntities)
3086                 ++state.maxEntities;
3087         }
3088 
3089         @property bool atMax()
3090         {
3091             return entityCount == state.maxEntities;
3092         }
3093 
3094         struct Attribute
3095         {
3096             Taken taken;
3097             TextPos pos;
3098         }
3099 
3100         struct SharedState
3101         {
3102             import std.array : Appender;
3103 
3104             Appender!(Taken[]) tags;
3105             Appender!(Attribute[]) attrs;
3106             size_t maxEntities;
3107         }
3108 
3109         static create()
3110         {
3111             TagStack tagStack;
3112             tagStack.state = new SharedState;
3113             tagStack.state.tags.reserve(10);
3114             tagStack.state.attrs.reserve(10);
3115             return tagStack;
3116         }
3117 
3118         SharedState* state;
3119         size_t entityCount;
3120         int depth;
3121     }
3122 
3123     static if(compileInTests) unittest
3124     {
3125         import core.exception : AssertError;
3126         import std.algorithm.comparison : equal;
3127         import std.exception : assertNotThrown, collectException, enforce;
3128         import dxml.internal : testRangeFuncs;
3129 
3130         static void test(alias func)(string text, size_t line = __LINE__)
3131         {
3132             auto xml = func(text);
3133             static foreach(config; someTestConfigs)
3134             {{
3135                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3136                                                                  __FILE__, line);
3137                 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line);
3138             }}
3139         }
3140 
3141         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3142         {
3143             auto xml = func(text);
3144             static foreach(config; someTestConfigs)
3145             {{
3146                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3147                                                                  __FILE__, line);
3148                 auto e = collectException!XMLParsingException(walkLength(range));
3149                 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line);
3150                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
3151             }}
3152         }
3153 
3154         static foreach(func; testRangeFuncs)
3155         {
3156             test!func("<root></root>");
3157             test!func("<root><a></a></root>");
3158             test!func("<root><a><b></b></a></root>");
3159             test!func("<root><a><b></b></a></root>");
3160             test!func("<root><a><b></b></a><foo><bar></bar></foo></root>");
3161             test!func("<a>\n" ~
3162                       "    <b>\n" ~
3163                       "        <c>\n" ~
3164                       "            <d>\n" ~
3165                       "                <e>\n" ~
3166                       "                    <f>\n" ~
3167                       "                        <g>\n" ~
3168                       "                            <h>\n" ~
3169                       "                                 <i><i><i><i>\n" ~
3170                       "                                 </i></i></i></i>\n" ~
3171                       "                                 <i>\n" ~
3172                       "                                     <j>\n" ~
3173                       "                                         <k>\n" ~
3174                       "                                             <l>\n" ~
3175                       "                                                 <m>\n" ~
3176                       "                                                     <n>\n" ~
3177                       "                                                         <o>\n" ~
3178                       "                                                             <p>\n" ~
3179                       "                                                                 <q>\n" ~
3180                       "                                                                     <r>\n" ~
3181                       "                                                                         <s>\n" ~
3182                       "          <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~
3183                       "                                                                         </s>\n" ~
3184                       "                                                                     </r>\n" ~
3185                       "                                                                 </q>\n" ~
3186                       "                                                </p></o></n></m>\n" ~
3187                       "                                                               </l>\n" ~
3188                       "                    </k>\n" ~
3189                       "           </j>\n" ~
3190                       "</i></h>" ~
3191                       "                        </g>\n" ~
3192                       "                    </f>\n" ~
3193                       "                </e>\n" ~
3194                       "            </d>\n" ~
3195                       "        </c>\n" ~
3196                       "    </b>\n" ~
3197                       "</a>");
3198             test!func(`<京都市></京都市>`);
3199 
3200             testFail!func(`<a>`, 1, 4);
3201             testFail!func(`<foo></foobar>`, 1, 8);
3202             testFail!func(`<foobar></foo>`, 1, 11);
3203             testFail!func(`<a><\a>`, 1, 5);
3204             testFail!func(`<a><a/>`, 1, 8);
3205             testFail!func(`<a><b>`, 1, 7);
3206             testFail!func(`<a><b><c>`, 1, 10);
3207             testFail!func(`<a></a><b>`, 1, 9);
3208             testFail!func(`<a></a><b></b>`, 1, 9);
3209             testFail!func(`<a><b></a></b>`, 1, 9);
3210             testFail!func(`<a><b><c></c><b></a>`, 1, 19);
3211             testFail!func(`<a><b></c><c></b></a>`, 1, 9);
3212             testFail!func(`<a><b></c></b></a>`, 1, 9);
3213             testFail!func("<a>\n" ~
3214                           "    <b>\n" ~
3215                           "        <c>\n" ~
3216                           "            <d>\n" ~
3217                           "                <e>\n" ~
3218                           "                    <f>\n" ~
3219                           "                    </f>\n" ~
3220                           "                </e>\n" ~
3221                           "            </d>\n" ~
3222                           "        </c>\n" ~
3223                           "    </b>\n" ~
3224                           "<a>", 12, 4);
3225             testFail!func("<a>\n" ~
3226                           "    <b>\n" ~
3227                           "        <c>\n" ~
3228                           "            <d>\n" ~
3229                           "                <e>\n" ~
3230                           "                    <f>\n" ~
3231                           "                    </f>\n" ~
3232                           "                </e>\n" ~
3233                           "            </d>\n" ~
3234                           "        </c>\n" ~
3235                           "    </b>\n" ~
3236                           "</q>", 12, 3);
3237         }
3238     }
3239 
3240 
3241     struct Text(R)
3242     {
3243         alias config = cfg;
3244         alias Input = R;
3245 
3246         Input input;
3247         TextPos pos;
3248 
3249         @property save() { return typeof(this)(input.save, pos); }
3250     }
3251 
3252 
3253     alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42));
3254 
3255 
3256     EntityType _type;
3257     TextPos _entityPos;
3258     auto _grammarPos = GrammarPos.documentStart;
3259 
3260     Taken _name;
3261     TagStack _tagStack;
3262 
3263     Text!(typeof(byCodeUnit(R.init))) _text;
3264     Text!Taken _savedText;
3265 
3266 
3267     this(R xmlText)
3268     {
3269         _tagStack = TagStack.create();
3270         _text.input = byCodeUnit(xmlText);
3271 
3272         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
3273         _savedText = typeof(_savedText).init;
3274         _name = typeof(_name).init;
3275 
3276         popFront();
3277     }
3278 }
3279 
3280 /// Ditto
3281 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText)
3282     if(isForwardRange!R && isSomeChar!(ElementType!R))
3283 {
3284     return EntityRange!(config, R)(xmlText);
3285 }
3286 
3287 ///
3288 unittest
3289 {
3290     import std.range.primitives : walkLength;
3291 
3292     auto xml = "<?xml version='1.0'?>\n" ~
3293                "<?instruction start?>\n" ~
3294                "<foo attr='42'>\n" ~
3295                "    <bar/>\n" ~
3296                "    <!-- no comment -->\n" ~
3297                "    <baz hello='world'>\n" ~
3298                "    nothing to say.\n" ~
3299                "    nothing at all...\n" ~
3300                "    </baz>\n" ~
3301                "</foo>\n" ~
3302                "<?some foo?>";
3303 
3304     {
3305         auto range = parseXML(xml);
3306         assert(range.front.type == EntityType.pi);
3307         assert(range.front.name == "instruction");
3308         assert(range.front.text == "start");
3309 
3310         range.popFront();
3311         assert(range.front.type == EntityType.elementStart);
3312         assert(range.front.name == "foo");
3313 
3314         {
3315             auto attrs = range.front.attributes;
3316             assert(walkLength(attrs.save) == 1);
3317             assert(attrs.front.name == "attr");
3318             assert(attrs.front.value == "42");
3319         }
3320 
3321         range.popFront();
3322         assert(range.front.type == EntityType.elementEmpty);
3323         assert(range.front.name == "bar");
3324 
3325         range.popFront();
3326         assert(range.front.type == EntityType.comment);
3327         assert(range.front.text == " no comment ");
3328 
3329         range.popFront();
3330         assert(range.front.type == EntityType.elementStart);
3331         assert(range.front.name == "baz");
3332 
3333         {
3334             auto attrs = range.front.attributes;
3335             assert(walkLength(attrs.save) == 1);
3336             assert(attrs.front.name == "hello");
3337             assert(attrs.front.value == "world");
3338         }
3339 
3340         range.popFront();
3341         assert(range.front.type == EntityType.text);
3342         assert(range.front.text ==
3343                "\n    nothing to say.\n    nothing at all...\n    ");
3344 
3345         range.popFront();
3346         assert(range.front.type == EntityType.elementEnd); // </baz>
3347         range.popFront();
3348         assert(range.front.type == EntityType.elementEnd); // </foo>
3349 
3350         range.popFront();
3351         assert(range.front.type == EntityType.pi);
3352         assert(range.front.name == "some");
3353         assert(range.front.text == "foo");
3354 
3355         range.popFront();
3356         assert(range.empty);
3357     }
3358     {
3359         auto range = parseXML!simpleXML(xml);
3360 
3361         // simpleXML is set to skip processing instructions.
3362 
3363         assert(range.front.type == EntityType.elementStart);
3364         assert(range.front.name == "foo");
3365 
3366         {
3367             auto attrs = range.front.attributes;
3368             assert(walkLength(attrs.save) == 1);
3369             assert(attrs.front.name == "attr");
3370             assert(attrs.front.value == "42");
3371         }
3372 
3373         // simpleXML is set to split empty tags so that <bar/> is treated
3374         // as the same as <bar></bar> so that code does not have to
3375         // explicitly handle empty tags.
3376         range.popFront();
3377         assert(range.front.type == EntityType.elementStart);
3378         assert(range.front.name == "bar");
3379         range.popFront();
3380         assert(range.front.type == EntityType.elementEnd);
3381         assert(range.front.name == "bar");
3382 
3383         // simpleXML is set to skip comments.
3384 
3385         range.popFront();
3386         assert(range.front.type == EntityType.elementStart);
3387         assert(range.front.name == "baz");
3388 
3389         {
3390             auto attrs = range.front.attributes;
3391             assert(walkLength(attrs.save) == 1);
3392             assert(attrs.front.name == "hello");
3393             assert(attrs.front.value == "world");
3394         }
3395 
3396         range.popFront();
3397         assert(range.front.type == EntityType.text);
3398         assert(range.front.text ==
3399                "\n    nothing to say.\n    nothing at all...\n    ");
3400 
3401         range.popFront();
3402         assert(range.front.type == EntityType.elementEnd); // </baz>
3403         range.popFront();
3404         assert(range.front.type == EntityType.elementEnd); // </foo>
3405         range.popFront();
3406         assert(range.empty);
3407     }
3408 }
3409 
3410 // Test the state of the range immediately after parseXML returns.
3411 unittest
3412 {
3413     import std.algorithm.comparison : equal;
3414     import dxml.internal : testRangeFuncs;
3415 
3416     static foreach(func; testRangeFuncs)
3417     {
3418         static foreach(config; someTestConfigs)
3419         {{
3420             auto range = parseXML!config("<?xml?><root></root>");
3421             assert(!range.empty);
3422             assert(range.front.type == EntityType.elementStart);
3423             assert(equal(range.front.name, "root"));
3424         }}
3425 
3426         static foreach(config; [Config.init, makeConfig(SkipPI.yes)])
3427         {{
3428             auto range = parseXML!config("<!--no comment--><root></root>");
3429             assert(!range.empty);
3430             assert(range.front.type == EntityType.comment);
3431             assert(equal(range.front.text, "no comment"));
3432         }}
3433         static foreach(config; [simpleXML, makeConfig(SkipComments.yes)])
3434         {{
3435             auto range = parseXML!config("<!--no comment--><root></root>");
3436             assert(!range.empty);
3437             assert(range.front.type == EntityType.elementStart);
3438             assert(equal(range.front.name, "root"));
3439         }}
3440 
3441         static foreach(config; [Config.init, makeConfig(SkipComments.yes)])
3442         {{
3443             auto range = parseXML!config("<?private eye?><root></root>");
3444             assert(!range.empty);
3445             assert(range.front.type == EntityType.pi);
3446             assert(equal(range.front.name, "private"));
3447             assert(equal(range.front.text, "eye"));
3448         }}
3449         static foreach(config; [simpleXML, makeConfig(SkipPI.yes)])
3450         {{
3451             auto range = parseXML!config("<?private eye?><root></root>");
3452             assert(!range.empty);
3453             assert(range.front.type == EntityType.elementStart);
3454             assert(equal(range.front.name, "root"));
3455         }}
3456 
3457         static foreach(config; someTestConfigs)
3458         {{
3459             auto range = parseXML!config("<root></root>");
3460             assert(!range.empty);
3461             assert(range.front.type == EntityType.elementStart);
3462             assert(equal(range.front.name, "root"));
3463         }}
3464     }
3465 }
3466 
3467 // Test various invalid states that didn't seem to fit well into tests elsewhere.
3468 unittest
3469 {
3470     import core.exception : AssertError;
3471     import std.exception : collectException, enforce;
3472     import dxml.internal : testRangeFuncs;
3473 
3474     static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3475     {
3476         auto xml = func(text);
3477         static foreach(config; someTestConfigs)
3478         {{
3479             auto e = collectException!XMLParsingException(
3480                 {
3481                     auto range = parseXML!config(xml.save);
3482                     while(!range.empty)
3483                         range.popFront();
3484                 }());
3485             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
3486             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
3487         }}
3488     }
3489 
3490     static foreach(func; testRangeFuncs)
3491     {{
3492         testFail!func("<root></root><invalid></invalid>", 1, 15);
3493         testFail!func("<root></root><invalid/>", 1, 15);
3494         testFail!func("<root/><invalid></invalid>", 1, 9);
3495         testFail!func("<root/><invalid/>", 1, 9);
3496 
3497         testFail!func("<root></root>invalid", 1, 14);
3498         testFail!func("<root/>invalid", 1, 8);
3499 
3500         testFail!func("<root/><?pi?>invalid", 1, 14);
3501         testFail!func("<root/><?pi?><invalid/>", 1, 15);
3502 
3503         testFail!func("<root/><!DOCTYPE foo>", 1, 9);
3504         testFail!func("<root/></root>", 1, 9);
3505 
3506         testFail!func("invalid<root></root>", 1, 1);
3507         testFail!func("invalid<?xml?><root></root>", 1, 1);
3508         testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1);
3509         testFail!func("invalid<!--comment--><root></root>", 1, 1);
3510         testFail!func("invalid<?Poirot?><root></root>", 1, 1);
3511 
3512         testFail!func("<?xml?>invalid<root></root>", 1, 8);
3513         testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15);
3514         testFail!func("<!--comment-->invalid<root></root>", 1, 15);
3515         testFail!func("<?Poirot?>invalid<root></root>", 1, 11);
3516 
3517         testFail!func("<?xml?>", 1, 8);
3518         testFail!func("<!DOCTYPE name>", 1, 16);
3519         testFail!func("<?Sherlock?>", 1, 13);
3520         testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33);
3521         testFail!func("<?Poirot?></Poirot>", 1, 12);
3522         testFail!func("</Poirot>", 1, 2);
3523         testFail!func("<", 1, 2);
3524         testFail!func(`</`, 1, 2);
3525         testFail!func(`</a`, 1, 2);
3526         testFail!func(`</a>`, 1, 2);
3527 
3528 
3529         testFail!func("<doc>]]></doc>", 1, 6);
3530 
3531         testFail!func(" <?xml?><root/>", 1, 1);
3532         testFail!func("\n<?xml?><root/>", 1, 1);
3533     }}
3534 }
3535 
3536 // Test that parseXML and EntityRange's properties work with @safe.
3537 // pure would be nice too, but at minimum, the use of format for exception
3538 // messages, and the use of assumeSafeAppend prevent it. It may or may not be
3539 // worth trying to fix that.
3540 @safe unittest
3541 {
3542     import std.algorithm.comparison : equal;
3543     import dxml.internal : testRangeFuncs;
3544 
3545     auto xml = "<root>\n" ~
3546                "    <![CDATA[nothing]]>\n" ~
3547                "    <foo a='42'/>\n" ~
3548                "</root>";
3549 
3550     static foreach(func; testRangeFuncs)
3551     {{
3552         auto range = parseXML(xml);
3553         assert(range.front.type == EntityType.elementStart);
3554         assert(equal(range.front.name, "root"));
3555         range.popFront();
3556         assert(!range.empty);
3557         assert(range.front.type == EntityType.cdata);
3558         assert(equal(range.front.text, "nothing"));
3559         range.popFront();
3560         assert(!range.empty);
3561         assert(range.front.type == EntityType.elementEmpty);
3562         assert(equal(range.front.name, "foo"));
3563         {
3564             auto attrs = range.front.attributes;
3565             auto saved = attrs.save;
3566             auto attr = attrs.front;
3567             assert(attr.name == "a");
3568             assert(attr.value == "42");
3569             attrs.popFront();
3570             assert(attrs.empty);
3571         }
3572         auto saved = range.save;
3573     }}
3574 }
3575 
3576 
3577 // This is purely to provide a way to trigger the unittest blocks in EntityRange
3578 // without compiling them in normally.
3579 struct EntityRangeCompileTests
3580 {
3581     @property bool empty() @safe pure nothrow @nogc { assert(0); }
3582     @property char front() @safe pure nothrow @nogc { assert(0); }
3583     void popFront() @safe pure nothrow @nogc { assert(0); }
3584     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
3585 }
3586 
3587 unittest
3588 {
3589     EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests;
3590 }
3591 
3592 
3593 /++
3594     Whether the given type is a forward range of attributes.
3595 
3596     Essentially, an attribute range must be a forward range where
3597 
3598     $(UL
3599         $(LI each element has the members $(D name), $(D value), and $(D pos))
3600         $(LI $(D name) and $(D value) are forward ranges of characters)
3601         $(LI $(D name) and $(D value) have the same type)
3602         $(LI $(D pos) is a $(LREF TextPos)))
3603 
3604     Normally, an attribute range would come from
3605     $(LREF EntityRange.Entity.attributes) or
3606     $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but
3607     as long as a range has the correct API, it qualifies as an attribute range.
3608 
3609     See_Also: $(LREF EntityRange.Entity.Attribute)$(BR)
3610               $(LREF EntityRange.Entity.attributes)$(BR)
3611               $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR)
3612               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3613   +/
3614 template isAttrRange(R)
3615 {
3616     static if(isForwardRange!R &&
3617               is(typeof(R.init.front.name)) &&
3618               is(typeof(R.init.front.value)) &&
3619               is(ReturnType!((R r) => r.front.pos) == TextPos))
3620     {
3621         alias NameType  = ReturnType!((R r) => r.front.name);
3622         alias ValueType = ReturnType!((R r) => r.front.value);
3623 
3624         enum isAttrRange = is(NameType == ValueType) &&
3625                            isForwardRange!NameType &&
3626                            isSomeChar!(ElementType!NameType);
3627     }
3628     else
3629         enum isAttrRange = false;
3630 }
3631 
3632 ///
3633 unittest
3634 {
3635     import std.typecons : Tuple;
3636     import dxml.dom : parseDOM;
3637 
3638     alias R1 = typeof(parseXML("<root/>").front.attributes);
3639     static assert(isAttrRange!R1);
3640 
3641     alias R2 = typeof(parseDOM("<root/>").children[0].attributes);
3642     static assert(isAttrRange!R2);
3643 
3644     alias T = Tuple!(string, "name", string, "value", TextPos, "pos");
3645     static assert(isAttrRange!(T[]));
3646 
3647     static assert(!isAttrRange!string);
3648 }
3649 
3650 unittest
3651 {
3652     import std.typecons : Tuple;
3653     {
3654         alias T = Tuple!(string, "nam", string, "value", TextPos, "pos");
3655         static assert(!isAttrRange!(T[]));
3656     }
3657     {
3658         alias T = Tuple!(string, "name", string, "valu", TextPos, "pos");
3659         static assert(!isAttrRange!(T[]));
3660     }
3661     {
3662         alias T = Tuple!(string, "name", string, "value", TextPos, "po");
3663         static assert(!isAttrRange!(T[]));
3664     }
3665     {
3666         alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos");
3667         static assert(!isAttrRange!(T[]));
3668     }
3669     {
3670         alias T = Tuple!(string, "name", string, "value");
3671         static assert(!isAttrRange!(T[]));
3672     }
3673     {
3674         alias T = Tuple!(int, "name", string, "value", TextPos, "pos");
3675         static assert(!isAttrRange!(T[]));
3676     }
3677     {
3678         alias T = Tuple!(string, "name", int, "value", TextPos, "pos");
3679         static assert(!isAttrRange!(T[]));
3680     }
3681     {
3682         alias T = Tuple!(string, "name", string, "value", int, "pos");
3683         static assert(!isAttrRange!(T[]));
3684     }
3685 }
3686 
3687 
3688 /++
3689     A helper function for processing start tag attributes.
3690 
3691     It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a
3692     range of attributes and a list of alternating strings and pointers where
3693     each string represents the name of the attribute to parse and the pointer
3694     immediately after it is assigned the value that corresponds to the attribute
3695     name (if present). If the given pointer does not point to the same type as
3696     the range of characters used in the attributes, then
3697     $(PHOBOS_REF to, std, conv) is used to convert the value to the type the
3698     pointer points to.
3699 
3700     If a $(D Nullable!T*) is given rather than a $(D T*), then it will be
3701     treated the same as if it had been $(D T*). So, $(D to!T) will be used to
3702     convert the attribute value if the matching attribute name is present. The
3703     advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's
3704     possible to distinguish between an attribute that wasn't present and one
3705     where it was present but was equivalent to $(D T.init).
3706 
3707     Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed
3708     rather than taking it by $(K_REF) and leaving the attributes that weren't
3709     matched in the range (since that really doesn't work with an arbitrary
3710     range as opposed to a dynamic array). However, if the second argument of
3711     getAttrs is not a $(K_STRING) but is instead an output range that accepts
3712     the element type of the range, then any attributes which aren't matched are
3713     put into the output range.
3714 
3715     Params:
3716         attrRange = A range of attributes (see $(LREF isAttrRange)).
3717         unmatched = An output range that any _unmatched attributes from the
3718                     range are put into (optional argument).
3719         args = An alternating list of strings and pointers where the names
3720                represent the attribute names to get the value of, and the
3721                corresponding values get assigned to what the pointers point to.
3722 
3723     Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to
3724             convert an attribute value.
3725 
3726     See_Also: $(LREF isAttrRange)$(BR)
3727               $(LREF EntityRange.Entity.attributes)$(BR)
3728               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3729   +/
3730 void getAttrs(R, Args...)(R attrRange, Args args)
3731     if(isAttrRange!R && Args.length % 2 == 0)
3732 {
3733     mixin(_genGetAttrs(false));
3734 }
3735 
3736 /// Ditto
3737 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args)
3738     if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0)
3739 {
3740     mixin(_genGetAttrs(true));
3741 }
3742 
3743 private string _genGetAttrs(bool includeUnmatched)
3744 {
3745     auto retval =
3746 `    import std.algorithm.comparison : equal;
3747     import std.conv : ConvException, to;
3748     import std.format : format;
3749     import std.typecons : Nullable;
3750     import std.utf : byChar;
3751 
3752     alias Attr = ElementType!R;
3753     alias SliceOfR = ElementType!(typeof(Attr.init.name));
3754 
3755     outer: foreach(attr; attrRange)
3756     {
3757         static foreach(i, arg; args)
3758         {
3759             static if(i % 2 == 0)
3760                 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i));
3761             else
3762             {
3763                 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i));
3764 
3765                 if(equal(attr.name, args[i - 1].byChar()))
3766                 {
3767                     alias ArgType = typeof(*arg);
3768 
3769                     static if(isInstanceOf!(Nullable, ArgType))
3770                         alias TargetType = TemplateArgsOf!ArgType;
3771                     else
3772                         alias TargetType = typeof(*arg);
3773 
3774                     try
3775                         *arg = to!TargetType(attr.value);
3776                     catch(ConvException ce)
3777                     {
3778                         enum fmt = "Failed to convert %s: %s";
3779                         throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos);
3780                     }
3781 
3782                     continue outer;
3783                 }
3784             }
3785         }`;
3786 
3787     if(includeUnmatched)
3788         retval ~= "\n        put(unmatched, attr);";
3789     retval ~= "\n    }";
3790 
3791     return retval;
3792 }
3793 
3794 unittest
3795 {
3796     import std.array : appender;
3797     import std.exception : collectException;
3798     import std.typecons : Nullable;
3799 
3800     {
3801         auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`;
3802         auto range = parseXML(xml);
3803         assert(range.front.type == EntityType.elementEmpty);
3804 
3805         string a;
3806         int b;
3807         bool c;
3808 
3809         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3810         assert(a == "foo");
3811         assert(b == 19);
3812         assert(c == true);
3813     }
3814 
3815     // Nullable!T* accepts the same as T*.
3816     {
3817         auto xml = `<root a="foo" c="true" d="rocks"/>`;
3818         auto range = parseXML(xml);
3819         assert(range.front.type == EntityType.elementEmpty);
3820 
3821         Nullable!string a;
3822         Nullable!int b;
3823         bool c;
3824 
3825         getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a);
3826         assert(a == "foo");
3827         assert(b.isNull);
3828         assert(c == true);
3829     }
3830 
3831     // If an output range of attributes is provided, then the ones that
3832     // weren't matched are put in it.
3833     {
3834         auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`;
3835         auto range = parseXML(xml);
3836         assert(range.front.type == EntityType.elementEmpty);
3837 
3838         alias Attribute = typeof(range).Entity.Attribute;
3839         auto unmatched = appender!(Attribute[])();
3840         int i;
3841         string s;
3842 
3843         getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s);
3844         assert(i == 42);
3845         assert(s == "silly");
3846         assert(unmatched.data.length == 2);
3847         assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28)));
3848         assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38)));
3849     }
3850 
3851     // An XMLParsingException gets thrown if a conversion fails.
3852     {
3853         auto xml = `<root foo="bar" false="true" d="rocks"/>`;
3854         auto range = parseXML(xml);
3855         assert(range.front.type == EntityType.elementEmpty);
3856 
3857         int i;
3858 
3859         auto xpe = collectException!XMLParsingException(
3860             getAttrs(range.front.attributes, "d", &i));
3861         assert(xpe.pos == TextPos(1, 30));
3862     }
3863 
3864     // Test parsing attributes with CTFE.
3865     enum dummy = (){
3866         auto xml = `<root a="foo" d="rocks" c="true" b="19" />`;
3867         auto range = parseXML(xml);
3868         assert(range.front.type == EntityType.elementEmpty);
3869 
3870         string a;
3871         int b;
3872         bool c;
3873 
3874         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3875         assert(a == "foo");
3876         assert(b == 19);
3877         assert(c == true);
3878         return 0;
3879     }();
3880 }
3881 
3882 unittest
3883 {
3884     auto range = parseXML("<root/>");
3885     auto attrs = range.front.attributes;
3886     int i;
3887     static assert(!__traits(compiles, getAttrs(attrs, "foo")));
3888     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar")));
3889     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i)));
3890     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i)));
3891     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo")));
3892     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i)));
3893     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar")));
3894 }
3895 
3896 @safe pure unittest
3897 {
3898     import std.typecons : Nullable;
3899 
3900     static test(R)(R range, int* i, Nullable!int* j) @safe pure
3901     {
3902         getAttrs(range.front.attributes, "foo", i, "bar", j);
3903     }
3904 
3905     test(parseXML("<root/>"), null, null);
3906 }
3907 
3908 
3909 /++
3910     Takes an $(LREF EntityRange) which is at a start tag and iterates it until
3911     it is at its corresponding end tag. It is an error to call skipContents when
3912     the current entity is not $(LREF EntityType.elementStart).
3913 
3914     $(TABLE
3915         $(TR $(TH Supported $(LREF EntityType)s:))
3916         $(TR $(TD $(LREF2 elementStart, EntityType)))
3917     )
3918 
3919     Returns: The range with its $(D front) now at the end tag corresponding to
3920              the start tag that was $(D front) when the function was called.
3921 
3922     Throws: $(LREF XMLParsingException) on invalid XML.
3923   +/
3924 R skipContents(R)(R entityRange)
3925     if(isInstanceOf!(EntityRange, R))
3926 {
3927     assert(entityRange._type == EntityType.elementStart);
3928 
3929     // We don't bother calling empty, because the only way for the entityRange
3930     // to be empty would be for it to reach the end of the document, and an
3931     // XMLParsingException would be thrown if the end of the document were
3932     // reached before we reached the corresponding end tag.
3933     for(int tagDepth = 1; tagDepth != 0;)
3934     {
3935         entityRange.popFront();
3936         immutable type = entityRange._type;
3937         if(type == EntityType.elementStart)
3938             ++tagDepth;
3939         else if(type == EntityType.elementEnd)
3940             --tagDepth;
3941     }
3942 
3943     return entityRange;
3944 }
3945 
3946 ///
3947 unittest
3948 {
3949     auto xml = "<root>\n" ~
3950                "    <foo>\n" ~
3951                "        <bar>\n" ~
3952                "        Some text\n" ~
3953                "        </bar>\n" ~
3954                "    </foo>\n" ~
3955                "    <!-- no comment -->\n" ~
3956                "</root>";
3957 
3958     auto range = parseXML(xml);
3959     assert(range.front.type == EntityType.elementStart);
3960     assert(range.front.name == "root");
3961 
3962     range.popFront();
3963     assert(range.front.type == EntityType.elementStart);
3964     assert(range.front.name == "foo");
3965 
3966     range = range.skipContents();
3967     assert(range.front.type == EntityType.elementEnd);
3968     assert(range.front.name == "foo");
3969 
3970     range.popFront();
3971     assert(range.front.type == EntityType.comment);
3972     assert(range.front.text == " no comment ");
3973 
3974     range.popFront();
3975     assert(range.front.type == EntityType.elementEnd);
3976     assert(range.front.name == "root");
3977 
3978     range.popFront();
3979     assert(range.empty);
3980 }
3981 
3982 
3983 /++
3984     Skips entities until the given $(LREF EntityType) is reached.
3985 
3986     If multiple $(LREF EntityType)s are given, then any one of them counts as
3987     a match.
3988 
3989     The current entity is skipped regardless of whether it is the given
3990     $(LREF EntityType).
3991 
3992     This is essentially a slightly optimized equivalent to
3993 
3994     ---
3995     if(!range.empty())
3996     {
3997         range.popFront();
3998         range = range.find!((a, b) => a.type == b.type)(entityTypes);
3999     }
4000     ---
4001 
4002     Returns: The given range with its $(D front) now at the first entity which
4003              matched one of the given $(LREF EntityType)s or an empty range if
4004              none were found.
4005 
4006     Throws: $(LREF XMLParsingException) on invalid XML.
4007   +/
4008 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...)
4009     if(isInstanceOf!(EntityRange, R))
4010 {
4011     if(entityRange.empty)
4012         return entityRange;
4013     entityRange.popFront();
4014     for(; !entityRange.empty; entityRange.popFront())
4015     {
4016         immutable type = entityRange._type;
4017         foreach(entityType; entityTypes)
4018         {
4019             if(type == entityType)
4020                 return entityRange;
4021         }
4022     }
4023     return entityRange;
4024 }
4025 
4026 ///
4027 unittest
4028 {
4029     auto xml = "<root>\n" ~
4030                "    <!-- blah blah blah -->\n" ~
4031                "    <foo>nothing to say</foo>\n" ~
4032                "</root>";
4033 
4034     auto range = parseXML(xml);
4035     assert(range.front.type == EntityType.elementStart);
4036     assert(range.front.name == "root");
4037 
4038     range = range.skipToEntityType(EntityType.elementStart,
4039                                    EntityType.elementEmpty);
4040     assert(range.front.type == EntityType.elementStart);
4041     assert(range.front.name == "foo");
4042 
4043     assert(range.skipToEntityType(EntityType.comment).empty);
4044 
4045     // skipToEntityType will work on an empty range but will always
4046     // return an empty range.
4047     assert(range.takeNone().skipToEntityType(EntityType.comment).empty);
4048 }
4049 
4050 
4051 /++
4052     Skips entities until the end tag is reached that corresponds to the start
4053     tag that is the parent of the current entity.
4054 
4055     Returns: The given range with its $(D front) now at the end tag which
4056              corresponds to the parent start tag of the entity that was
4057              $(D front) when skipToParentEndTag was called. If the current
4058              entity does not have a parent start tag (which means that it's
4059              either the root element or a comment or PI outside of the root
4060              element), then an empty range is returned.
4061 
4062     Throws: $(LREF XMLParsingException) on invalid XML.
4063   +/
4064 R skipToParentEndTag(R)(R entityRange)
4065     if(isInstanceOf!(EntityRange, R))
4066 {
4067     with(EntityType) final switch(entityRange._type)
4068     {
4069         case cdata:
4070         case comment:
4071         {
4072             entityRange = entityRange.skipToEntityType(elementStart, elementEnd);
4073             if(entityRange.empty || entityRange._type == elementEnd)
4074                 return entityRange;
4075             goto case elementStart;
4076         }
4077         case elementStart:
4078         {
4079             while(true)
4080             {
4081                 entityRange = entityRange.skipContents();
4082                 entityRange.popFront();
4083                 if(entityRange.empty || entityRange._type == elementEnd)
4084                     return entityRange;
4085                 if(entityRange._type == elementStart)
4086                     continue;
4087                 goto case comment;
4088             }
4089             assert(0); // the compiler isn't smart enough to see that this is unreachable.
4090         }
4091         case elementEnd:
4092         case elementEmpty:
4093         case pi:
4094         case text: goto case comment;
4095     }
4096 }
4097 
4098 ///
4099 unittest
4100 {
4101     auto xml = "<root>\n" ~
4102                "    <foo>\n" ~
4103                "        <!-- comment -->\n" ~
4104                "        <bar>exam</bar>\n" ~
4105                "    </foo>\n" ~
4106                "    <!-- another comment -->\n" ~
4107                "</root>";
4108     {
4109         auto range = parseXML(xml);
4110         assert(range.front.type == EntityType.elementStart);
4111         assert(range.front.name == "root");
4112 
4113         range.popFront();
4114         assert(range.front.type == EntityType.elementStart);
4115         assert(range.front.name == "foo");
4116 
4117         range.popFront();
4118         assert(range.front.type == EntityType.comment);
4119         assert(range.front.text == " comment ");
4120 
4121         range = range.skipToParentEndTag();
4122         assert(range.front.type == EntityType.elementEnd);
4123         assert(range.front.name == "foo");
4124 
4125         range = range.skipToParentEndTag();
4126         assert(range.front.type == EntityType.elementEnd);
4127         assert(range.front.name == "root");
4128 
4129         range = range.skipToParentEndTag();
4130         assert(range.empty);
4131     }
4132     {
4133         auto range = parseXML(xml);
4134         assert(range.front.type == EntityType.elementStart);
4135         assert(range.front.name == "root");
4136 
4137         range.popFront();
4138         assert(range.front.type == EntityType.elementStart);
4139         assert(range.front.name == "foo");
4140 
4141         range.popFront();
4142         assert(range.front.type == EntityType.comment);
4143         assert(range.front.text == " comment ");
4144 
4145         range.popFront();
4146         assert(range.front.type == EntityType.elementStart);
4147         assert(range.front.name == "bar");
4148 
4149         range.popFront();
4150         assert(range.front.type == EntityType.text);
4151         assert(range.front.text == "exam");
4152 
4153         range = range.skipToParentEndTag();
4154         assert(range.front.type == EntityType.elementEnd);
4155         assert(range.front.name == "bar");
4156 
4157         range = range.skipToParentEndTag();
4158         assert(range.front.type == EntityType.elementEnd);
4159         assert(range.front.name == "foo");
4160 
4161         range.popFront();
4162         assert(range.front.type == EntityType.comment);
4163         assert(range.front.text == " another comment ");
4164 
4165         range = range.skipToParentEndTag();
4166         assert(range.front.type == EntityType.elementEnd);
4167         assert(range.front.name == "root");
4168 
4169         assert(range.skipToParentEndTag().empty);
4170     }
4171     {
4172         auto range = parseXML("<root><foo>bar</foo></root>");
4173         assert(range.front.type == EntityType.elementStart);
4174         assert(range.front.name == "root");
4175         assert(range.skipToParentEndTag().empty);
4176     }
4177 }
4178 
4179 unittest
4180 {
4181     import core.exception : AssertError;
4182     import std.algorithm.comparison : equal;
4183     import std.exception : enforce;
4184     import dxml.internal : testRangeFuncs;
4185 
4186     static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__)
4187     {
4188         range.popFront();
4189         enforce!AssertError(!range.empty, "unittest 1", __FILE__, line);
4190         enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line);
4191     }
4192 
4193     static foreach(func; testRangeFuncs)
4194     {{
4195         // cdata
4196         {
4197             auto xml = "<root>\n" ~
4198                        "    <![CDATA[ cdata run ]]>\n" ~
4199                        "    <nothing/>\n" ~
4200                        "    <![CDATA[ cdata have its bits flipped ]]>\n" ~
4201                        "    <foo></foo>\n" ~
4202                        "    <![CDATA[ cdata play violin ]]>\n" ~
4203                        "</root>";
4204 
4205             auto range = parseXML(func(xml));
4206             assert(range.front.type == EntityType.elementStart);
4207             popAndCheck(range, EntityType.cdata);
4208             assert(equal(range.front.text, " cdata run "));
4209             {
4210                 auto temp = range.save.skipToParentEndTag();
4211                 assert(temp._type == EntityType.elementEnd);
4212                 assert(equal(temp.front.name, "root"));
4213             }
4214             popAndCheck(range, EntityType.elementEmpty);
4215             popAndCheck(range, EntityType.cdata);
4216             assert(equal(range.front.text, " cdata have its bits flipped "));
4217             {
4218                 auto temp = range.save.skipToParentEndTag();
4219                 assert(temp._type == EntityType.elementEnd);
4220                 assert(equal(temp.front.name, "root"));
4221             }
4222             popAndCheck(range, EntityType.elementStart);
4223             range = range.skipContents();
4224             popAndCheck(range, EntityType.cdata);
4225             assert(equal(range.front.text, " cdata play violin "));
4226             range = range.skipToParentEndTag();
4227             assert(range._type == EntityType.elementEnd);
4228             assert(equal(range.front.name, "root"));
4229         }
4230         // comment
4231         {
4232             auto xml = "<!-- before -->\n" ~
4233                        "<root>\n" ~
4234                        "    <!-- comment 1 -->\n" ~
4235                        "    <nothing/>\n" ~
4236                        "    <!-- comment 2 -->\n" ~
4237                        "    <foo></foo>\n" ~
4238                        "    <!-- comment 3 -->\n" ~
4239                        "</root>\n" ~
4240                        "<!-- after -->" ~
4241                        "<!-- end -->";
4242 
4243             auto text = func(xml);
4244             assert(parseXML(text.save).skipToParentEndTag().empty);
4245             {
4246                 auto range = parseXML(text.save);
4247                 assert(range.front.type == EntityType.comment);
4248                 popAndCheck(range, EntityType.elementStart);
4249                 popAndCheck(range, EntityType.comment);
4250                 assert(equal(range.front.text, " comment 1 "));
4251                 {
4252                     auto temp = range.save.skipToParentEndTag();
4253                     assert(temp._type == EntityType.elementEnd);
4254                     assert(equal(temp.front.name, "root"));
4255                 }
4256                 popAndCheck(range, EntityType.elementEmpty);
4257                 popAndCheck(range, EntityType.comment);
4258                 assert(equal(range.front.text, " comment 2 "));
4259                 {
4260                     auto temp = range.save.skipToParentEndTag();
4261                     assert(temp._type == EntityType.elementEnd);
4262                     assert(equal(temp.front.name, "root"));
4263                 }
4264                 popAndCheck(range, EntityType.elementStart);
4265                 range = range.skipContents();
4266                 popAndCheck(range, EntityType.comment);
4267                 assert(equal(range.front.text, " comment 3 "));
4268                 range = range.skipToParentEndTag();
4269                 assert(range._type == EntityType.elementEnd);
4270                 assert(equal(range.front.name, "root"));
4271             }
4272             {
4273                 auto range = parseXML(text.save);
4274                 assert(range.front.type == EntityType.comment);
4275                 popAndCheck(range, EntityType.elementStart);
4276                 range = range.skipContents();
4277                 popAndCheck(range, EntityType.comment);
4278                 assert(equal(range.front.text, " after "));
4279                 assert(range.save.skipToParentEndTag().empty);
4280                 popAndCheck(range, EntityType.comment);
4281                 assert(equal(range.front.text, " end "));
4282                 assert(range.skipToParentEndTag().empty);
4283             }
4284         }
4285         // elementStart
4286         {
4287             auto xml = "<root>\n" ~
4288                        "    <a><b>foo</b></a>\n" ~
4289                        "    <nothing/>\n" ~
4290                        "    <c></c>\n" ~
4291                        "    <d>\n" ~
4292                        "        <e>\n" ~
4293                        "        </e>\n" ~
4294                        "        <f>\n" ~
4295                        "            <g>\n" ~
4296                        "            </g>\n" ~
4297                        "        </f>\n" ~
4298                        "    </d>\n" ~
4299                        "</root>";
4300 
4301             auto range = parseXML(func(xml));
4302             assert(range.front.type == EntityType.elementStart);
4303             assert(equal(range.front.name, "root"));
4304             assert(range.save.skipToParentEndTag().empty);
4305             popAndCheck(range, EntityType.elementStart);
4306             assert(equal(range.front.name, "a"));
4307             {
4308                 auto temp = range.save.skipToParentEndTag();
4309                 assert(temp._type == EntityType.elementEnd);
4310                 assert(equal(temp.front.name, "root"));
4311             }
4312             popAndCheck(range, EntityType.elementStart);
4313             assert(equal(range.front.name, "b"));
4314             {
4315                 auto temp = range.save.skipToParentEndTag();
4316                 assert(temp._type == EntityType.elementEnd);
4317                 assert(equal(temp.front.name, "a"));
4318             }
4319             popAndCheck(range, EntityType.text);
4320             popAndCheck(range, EntityType.elementEnd);
4321             popAndCheck(range, EntityType.elementEnd);
4322             popAndCheck(range, EntityType.elementEmpty);
4323             popAndCheck(range, EntityType.elementStart);
4324             assert(equal(range.front.name, "c"));
4325             {
4326                 auto temp = range.save.skipToParentEndTag();
4327                 assert(temp._type == EntityType.elementEnd);
4328                 assert(equal(temp.front.name, "root"));
4329             }
4330             popAndCheck(range, EntityType.elementEnd);
4331             popAndCheck(range, EntityType.elementStart);
4332             assert(equal(range.front.name, "d"));
4333             popAndCheck(range, EntityType.elementStart);
4334             assert(equal(range.front.name, "e"));
4335             range = range.skipToParentEndTag();
4336             assert(range._type == EntityType.elementEnd);
4337             assert(equal(range.front.name, "d"));
4338             range = range.skipToParentEndTag();
4339             assert(range._type == EntityType.elementEnd);
4340             assert(equal(range.front.name, "root"));
4341         }
4342         // elementEnd
4343         {
4344             auto xml = "<root>\n" ~
4345                        "    <a><b>foo</b></a>\n" ~
4346                        "    <nothing/>\n" ~
4347                        "    <c></c>\n" ~
4348                        "</root>";
4349 
4350             auto range = parseXML(func(xml));
4351             assert(range.front.type == EntityType.elementStart);
4352             popAndCheck(range, EntityType.elementStart);
4353             popAndCheck(range, EntityType.elementStart);
4354             popAndCheck(range, EntityType.text);
4355             popAndCheck(range, EntityType.elementEnd);
4356             assert(equal(range.front.name, "b"));
4357             {
4358                 auto temp = range.save.skipToParentEndTag();
4359                 assert(temp._type == EntityType.elementEnd);
4360                 assert(equal(temp.front.name, "a"));
4361             }
4362             popAndCheck(range, EntityType.elementEnd);
4363             assert(equal(range.front.name, "a"));
4364             {
4365                 auto temp = range.save.skipToParentEndTag();
4366                 assert(temp._type == EntityType.elementEnd);
4367                 assert(equal(temp.front.name, "root"));
4368             }
4369             popAndCheck(range, EntityType.elementEmpty);
4370             popAndCheck(range, EntityType.elementStart);
4371             popAndCheck(range, EntityType.elementEnd);
4372             assert(equal(range.front.name, "c"));
4373             {
4374                 auto temp = range.save.skipToParentEndTag();
4375                 assert(temp._type == EntityType.elementEnd);
4376                 assert(equal(temp.front.name, "root"));
4377             }
4378             popAndCheck(range, EntityType.elementEnd);
4379             assert(range.skipToParentEndTag().empty);
4380         }
4381         // elementEmpty
4382         {
4383             auto range = parseXML(func("<root/>"));
4384             assert(range.front.type == EntityType.elementEmpty);
4385             assert(range.skipToParentEndTag().empty);
4386         }
4387         {
4388             auto xml = "<root>\n" ~
4389                        "    <a><b>foo</b></a>\n" ~
4390                        "    <nothing/>\n" ~
4391                        "    <c></c>\n" ~
4392                        "    <whatever/>\n" ~
4393                        "</root>";
4394 
4395             auto range = parseXML(func(xml));
4396             popAndCheck(range, EntityType.elementStart);
4397             assert(range.front.type == EntityType.elementStart);
4398             range = range.skipContents();
4399             popAndCheck(range, EntityType.elementEmpty);
4400             assert(equal(range.front.name, "nothing"));
4401             {
4402                 auto temp = range.save;
4403                 popAndCheck(temp, EntityType.elementStart);
4404                 popAndCheck(temp, EntityType.elementEnd);
4405                 popAndCheck(temp, EntityType.elementEmpty);
4406                 assert(equal(temp.front.name, "whatever"));
4407             }
4408             range = range.skipToParentEndTag();
4409             assert(range._type == EntityType.elementEnd);
4410             assert(equal(range.front.name, "root"));
4411         }
4412         // pi
4413         {
4414             auto xml = "<?Sherlock?>\n" ~
4415                        "<root>\n" ~
4416                        "    <?Foo?>\n" ~
4417                        "    <nothing/>\n" ~
4418                        "    <?Bar?>\n" ~
4419                        "    <foo></foo>\n" ~
4420                        "    <?Baz?>\n" ~
4421                        "</root>\n" ~
4422                        "<?Poirot?>\n" ~
4423                        "<?Conan?>";
4424 
4425             auto range = parseXML(func(xml));
4426             assert(range.front.type == EntityType.pi);
4427             assert(equal(range.front.name, "Sherlock"));
4428             assert(range.save.skipToParentEndTag().empty);
4429             popAndCheck(range, EntityType.elementStart);
4430             popAndCheck(range, EntityType.pi);
4431             assert(equal(range.front.name, "Foo"));
4432             {
4433                 auto temp = range.save.skipToParentEndTag();
4434                 assert(temp._type == EntityType.elementEnd);
4435                 assert(equal(temp.front.name, "root"));
4436             }
4437             popAndCheck(range, EntityType.elementEmpty);
4438             popAndCheck(range, EntityType.pi);
4439             assert(equal(range.front.name, "Bar"));
4440             {
4441                 auto temp = range.save.skipToParentEndTag();
4442                 assert(temp._type == EntityType.elementEnd);
4443                 assert(equal(temp.front.name, "root"));
4444             }
4445             popAndCheck(range, EntityType.elementStart);
4446             popAndCheck(range, EntityType.elementEnd);
4447             popAndCheck(range, EntityType.pi);
4448             assert(equal(range.front.name, "Baz"));
4449             range = range.skipToParentEndTag();
4450             assert(range._type == EntityType.elementEnd);
4451             assert(equal(range.front.name, "root"));
4452             popAndCheck(range, EntityType.pi);
4453             assert(equal(range.front.name, "Poirot"));
4454             assert(range.save.skipToParentEndTag().empty);
4455             popAndCheck(range, EntityType.pi);
4456             assert(equal(range.front.name, "Conan"));
4457             assert(range.skipToParentEndTag().empty);
4458         }
4459         // text
4460         {
4461             auto xml = "<root>\n" ~
4462                        "    nothing to say\n" ~
4463                        "    <nothing/>\n" ~
4464                        "    nothing whatsoever\n" ~
4465                        "    <foo></foo>\n" ~
4466                        "    but he keeps talking\n" ~
4467                        "</root>";
4468 
4469             auto range = parseXML(func(xml));
4470             assert(range.front.type == EntityType.elementStart);
4471             popAndCheck(range, EntityType.text);
4472             assert(equal(range.front.text, "\n    nothing to say\n    "));
4473             {
4474                 auto temp = range.save.skipToParentEndTag();
4475                 assert(temp._type == EntityType.elementEnd);
4476                 assert(equal(temp.front.name, "root"));
4477             }
4478             popAndCheck(range, EntityType.elementEmpty);
4479             popAndCheck(range, EntityType.text);
4480             assert(equal(range.front.text, "\n    nothing whatsoever\n    "));
4481             {
4482                 auto temp = range.save.skipToParentEndTag();
4483                 assert(temp._type == EntityType.elementEnd);
4484                 assert(equal(temp.front.name, "root"));
4485             }
4486             popAndCheck(range, EntityType.elementStart);
4487             range = range.skipContents();
4488             popAndCheck(range, EntityType.text);
4489             assert(equal(range.front.text, "\n    but he keeps talking\n"));
4490             range = range.skipToParentEndTag();
4491             assert(range._type == EntityType.elementEnd);
4492             assert(equal(range.front.name, "root"));
4493         }
4494     }}
4495 }
4496 
4497 
4498 /++
4499     Treats the given string like a file path except that each directory
4500     corresponds to the name of a start tag. Note that this does $(I not) try to
4501     implement XPath as that would be quite complicated, and it really doesn't
4502     fit with a StAX parser.
4503 
4504     A start tag should be thought of as a directory, with its child start tags
4505     as the directories it contains.
4506 
4507     All paths should be relative. $(LREF EntityRange) can only move forward
4508     through the document, so using an absolute path would only make sense at
4509     the beginning of the document. As such, absolute paths are treated as
4510     invalid paths.
4511 
4512     $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated
4513     slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are
4514     treated as an invalid path.
4515 
4516     If $(D range.front.type == EntityType.elementStart), then
4517     $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child
4518     start tag (be it $(LREF EntityType.elementStart) or
4519     $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity)
4520     $(D_CODE_STRING "foo"). That start tag must be a direct child of the current
4521     start tag.
4522 
4523     If $(D range.front.type) is any other $(LREF EntityType), then
4524     $(D range._skipToPath($(D_STRING "foo"))) will return an empty range,
4525     because no other $(LREF EntityType)s have child start tags.
4526 
4527     For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo")))
4528     will search for the first start tag with the
4529     $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level
4530     as the current entity. If the current entity is a start tag with the name
4531     $(D_CODE_STRING "foo"), it will not be considered a match.
4532 
4533     $(D range._skipToPath($(D_STRING "./"))) is a no-op. However,
4534     $(D range._skipToPath($(D_STRING "../"))) will result in the empty range
4535     (since it doesn't target a specific start tag).
4536 
4537     $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to
4538     $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))),
4539     and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to
4540     $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))).
4541 
4542     Returns: The given range with its $(D front) now at the requested entity if
4543              the path is valid; otherwise, an empty range is returned.
4544 
4545     Throws: $(LREF XMLParsingException) on invalid XML.
4546   +/
4547 R skipToPath(R)(R entityRange, string path)
4548     if(isInstanceOf!(EntityRange, R))
4549 {
4550     import std.algorithm.comparison : equal;
4551     import std.path : pathSplitter;
4552 
4553     if(entityRange.empty)
4554         return entityRange;
4555     if(path.empty || path[0] == '/')
4556         return entityRange.takeNone();
4557 
4558     with(EntityType)
4559     {
4560         static if(R.config.splitEmpty == SplitEmpty.yes)
4561             EntityType[2] startOrEnd = [elementStart, elementEnd];
4562         else
4563             EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty];
4564 
4565         R findOnCurrLevel(string name)
4566         {
4567             if(entityRange._type == elementStart)
4568                 entityRange = entityRange.skipContents();
4569             while(true)
4570             {
4571                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4572                 if(entityRange.empty)
4573                     return entityRange;
4574                 if(entityRange._type == elementEnd)
4575                     return entityRange.takeNone();
4576 
4577                 if(equal(name, entityRange._name.save))
4578                     return entityRange;
4579 
4580                 static if(R.config.splitEmpty == SplitEmpty.no)
4581                 {
4582                     if(entityRange._type == elementEmpty)
4583                         continue;
4584                 }
4585                 entityRange = entityRange.skipContents();
4586             }
4587         }
4588 
4589         for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront())
4590         {
4591             if(pieces.front == ".")
4592                 continue;
4593             else if(pieces.front == "..")
4594             {
4595                 pieces.popFront();
4596                 if(pieces.empty)
4597                     return entityRange.takeNone();
4598 
4599                 while(pieces.front == "..")
4600                 {
4601                     pieces.popFront();
4602                     if(pieces.empty)
4603                         return entityRange.takeNone();
4604                     entityRange = entityRange.skipToParentEndTag();
4605                     if(entityRange.empty)
4606                         return entityRange;
4607                 }
4608 
4609                 entityRange = findOnCurrLevel(pieces.front);
4610                 if(entityRange.empty)
4611                     return entityRange;
4612             }
4613             else
4614             {
4615                 if(entityRange._type != elementStart)
4616                     return entityRange.takeNone();
4617 
4618                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4619                 assert(!entityRange.empty);
4620                 if(entityRange._type == elementEnd)
4621                     return entityRange.takeNone();
4622 
4623                 if(!equal(pieces.front, entityRange._name.save))
4624                 {
4625                     entityRange = findOnCurrLevel(pieces.front);
4626                     if(entityRange.empty)
4627                         return entityRange;
4628                 }
4629             }
4630         }
4631 
4632         return entityRange;
4633     }
4634 }
4635 
4636 ///
4637 unittest
4638 {
4639     {
4640         auto xml = "<carrot>\n" ~
4641                    "    <foo>\n" ~
4642                    "        <bar>\n" ~
4643                    "            <baz/>\n" ~
4644                    "            <other/>\n" ~
4645                    "        </bar>\n" ~
4646                    "    </foo>\n" ~
4647                    "</carrot>";
4648 
4649         auto range = parseXML(xml);
4650         // "<carrot>"
4651         assert(range.front.type == EntityType.elementStart);
4652         assert(range.front.name == "carrot");
4653 
4654         range = range.skipToPath("foo/bar");
4655         // "        <bar>
4656         assert(!range.empty);
4657         assert(range.front.type == EntityType.elementStart);
4658         assert(range.front.name == "bar");
4659 
4660         range = range.skipToPath("baz");
4661         // "            <baz/>
4662         assert(!range.empty);
4663         assert(range.front.type == EntityType.elementEmpty);
4664 
4665         // other is not a child element of baz
4666         assert(range.skipToPath("other").empty);
4667 
4668         range = range.skipToPath("../other");
4669         // "            <other/>"
4670         assert(!range.empty);
4671         assert(range.front.type == EntityType.elementEmpty);
4672     }
4673     {
4674         auto xml = "<potato>\n" ~
4675                    "    <foo>\n" ~
4676                    "        <bar>\n "~
4677                    "        </bar>\n" ~
4678                    "        <crazy>\n" ~
4679                    "        </crazy>\n" ~
4680                    "        <fou/>\n" ~
4681                    "    </foo>\n" ~
4682                    "    <buzz/>\n" ~
4683                    "</potato>";
4684 
4685         auto range = parseXML(xml);
4686         // "<potato>"
4687         assert(range.front.type == EntityType.elementStart);
4688 
4689         range = range.skipToPath("./");
4690         // "<potato>"
4691         assert(!range.empty);
4692         assert(range.front.type == EntityType.elementStart);
4693         assert(range.front.name == "potato");
4694 
4695         range = range.skipToPath("./foo/bar");
4696         // "        <bar>"
4697         assert(!range.empty);
4698         assert(range.front.type == EntityType.elementStart);
4699         assert(range.front.name == "bar");
4700 
4701         range = range.skipToPath("../crazy");
4702         // "        <crazy>"
4703         assert(!range.empty);
4704         assert(range.front.type == EntityType.elementStart);
4705         assert(range.front.name == "crazy");
4706 
4707         // Whether popFront is called here before the call to
4708         // range.skipToPath("../fou") below, the result is the same, because
4709         // both <crazy> and </crazy> are at the same level.
4710         range.popFront();
4711         // "        </crazy>"
4712         assert(!range.empty);
4713         assert(range.front.type == EntityType.elementEnd);
4714         assert(range.front.name == "crazy");
4715 
4716         range = range.skipToPath("../fou");
4717         // "        <fou/>"
4718         assert(!range.empty);
4719         assert(range.front.type == EntityType.elementEmpty);
4720     }
4721     // Searching stops at the first matching start tag.
4722     {
4723         auto xml = "<beet>\n" ~
4724                    "    <foo a='42'>\n" ~
4725                    "    </foo>\n" ~
4726                    "    <foo b='451'>\n" ~
4727                    "    </foo>\n" ~
4728                    "</beet>";
4729 
4730         auto range = parseXML(xml);
4731         range = range.skipToPath("foo");
4732         assert(!range.empty);
4733         assert(range.front.type == EntityType.elementStart);
4734         assert(range.front.name == "foo");
4735 
4736         {
4737             auto attrs = range.front.attributes;
4738             assert(attrs.front.name == "a");
4739             assert(attrs.front.value == "42");
4740         }
4741 
4742         range = range.skipToPath("../foo");
4743         assert(!range.empty);
4744         assert(range.front.type == EntityType.elementStart);
4745         assert(range.front.name == "foo");
4746 
4747         {
4748             auto attrs = range.front.attributes;
4749             assert(attrs.front.name == "b");
4750             assert(attrs.front.value == "451");
4751         }
4752     }
4753     // skipToPath will work on an empty range but will always return an
4754     // empty range.
4755     {
4756         auto range = parseXML("<root/>");
4757         assert(range.takeNone().skipToPath("nowhere").empty);
4758     }
4759     // Empty and absolute paths will also result in an empty range as will
4760     // "../" without any actual tag name on the end.
4761     {
4762         auto range = parseXML("<root/>");
4763         assert(range.skipToPath("").empty);
4764         assert(range.skipToPath("/").empty);
4765         assert(range.skipToPath("../").empty);
4766     }
4767     // Only non-empty start tags have children; all other EntityTypes result
4768     // in an empty range unless "../" is used.
4769     {
4770         auto xml = "<!-- comment -->\n" ~
4771                    "<root>\n" ~
4772                    "    <foo/>\n" ~
4773                    "</root>";
4774         auto range = parseXML(xml);
4775         assert(range.skipToPath("root").empty);
4776         assert(range.skipToPath("foo").empty);
4777 
4778         range = range.skipToPath("../root");
4779         assert(!range.empty);
4780         assert(range.front.type == EntityType.elementStart);
4781         assert(range.front.name == "root");
4782     }
4783 }
4784 
4785 unittest
4786 {
4787     import core.exception : AssertError;
4788     import std.algorithm.comparison : equal;
4789     import std.exception : assertNotThrown, enforce;
4790     import dxml.internal : testRangeFuncs;
4791 
4792     static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__)
4793     {
4794         auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line);
4795         enforce!AssertError(!result.empty, "unittest 2", __FILE__, line);
4796         enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line);
4797         enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line);
4798     }
4799 
4800     static void popEmpty(R)(ref R range)
4801     {
4802         range.popFront();
4803         static if(range.config.splitEmpty == SplitEmpty.yes)
4804             range.popFront();
4805     }
4806 
4807     auto xml = "<superuser>\n" ~
4808                "    <!-- comment -->\n" ~
4809                "    <?pi?>\n" ~
4810                "    <![CDATA[cdata]]>\n" ~
4811                "    <foo/>\n" ~
4812                "    <bar/>\n" ~
4813                "    <!-- comment -->\n" ~
4814                "    <!-- comment -->\n" ~
4815                "    <baz/>\n" ~
4816                "    <frobozz>\n" ~
4817                "        <!-- comment -->\n" ~
4818                "        <!-- comment -->\n" ~
4819                "        <whatever/>\n" ~
4820                "        <!-- comment -->\n" ~
4821                "        <!-- comment -->\n" ~
4822                "    </frobozz>\n" ~
4823                "    <!-- comment -->\n" ~
4824                "    <!-- comment -->\n" ~
4825                "    <xyzzy/>\n" ~
4826                "</superuser>";
4827 
4828     static foreach(func; testRangeFuncs)
4829     {{
4830         auto text = func(xml);
4831 
4832         static foreach(config; someTestConfigs)
4833         {{
4834             static if(config.splitEmpty == SplitEmpty.yes)
4835                 enum empty = EntityType.elementStart;
4836             else
4837                 enum empty = EntityType.elementEmpty;
4838 
4839             auto range = parseXML!config(text.save);
4840 
4841             assert(range.save.skipToPath("whatever").empty);
4842             assert(range.save.skipToPath("frobozz/whateve").empty);
4843 
4844             testPath(range.save, "foo", empty, "foo");
4845             testPath(range.save, "bar", empty, "bar");
4846             testPath(range.save, "baz", empty, "baz");
4847             testPath(range.save, "frobozz", EntityType.elementStart, "frobozz");
4848             testPath(range.save, "frobozz/whatever", empty, "whatever");
4849             testPath(range.save, "xyzzy", empty, "xyzzy");
4850 
4851             range.popFront();
4852             for(; range.front.type != empty; range.popFront())
4853             {
4854                 assert(range.save.skipToPath("foo").empty);
4855                 testPath(range.save, "../foo", empty, "foo");
4856                 testPath(range.save, "../bar", empty, "bar");
4857                 testPath(range.save, "../baz", empty, "baz");
4858                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4859                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4860                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4861             }
4862             assert(equal(range.front.name, "foo"));
4863             assert(range.save.skipToPath("foo").empty);
4864             assert(range.save.skipToPath("./foo").empty);
4865             assert(range.save.skipToPath("../foo").empty);
4866             assert(range.save.skipToPath("bar").empty);
4867             assert(range.save.skipToPath("baz").empty);
4868             assert(range.save.skipToPath("frobozz").empty);
4869             assert(range.save.skipToPath("whatever").empty);
4870             assert(range.save.skipToPath("../").empty);
4871             assert(range.save.skipToPath("../../").empty);
4872 
4873             testPath(range.save, "../bar", empty, "bar");
4874             testPath(range.save, "../baz", empty, "baz");
4875             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4876             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4877             testPath(range.save, "../xyzzy", empty, "xyzzy");
4878 
4879             popEmpty(range);
4880             assert(range.save.skipToPath("bar").empty);
4881             testPath(range.save, "../baz", empty, "baz");
4882             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4883             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4884             testPath(range.save, "../xyzzy", empty, "xyzzy");
4885 
4886             range.popFront();
4887             for(; range.front.type != empty; range.popFront())
4888             {
4889                 assert(range.save.skipToPath("baz").empty);
4890                 testPath(range.save, "../baz", empty, "baz");
4891                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4892                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4893                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4894             }
4895             assert(equal(range.front.name, "baz"));
4896 
4897             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4898             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4899             testPath(range.save, "../xyzzy", empty, "xyzzy");
4900 
4901             popEmpty(range);
4902             assert(equal(range.front.name, "frobozz"));
4903             assert(range.save.skipToPath("wizard").empty);
4904             testPath(range.save, "whatever", empty, "whatever");
4905             testPath(range.save, "../xyzzy", empty, "xyzzy");
4906 
4907             range.popFront();
4908             for(; range.front.type != empty; range.popFront())
4909             {
4910                 assert(range.save.skipToPath("whatever").empty);
4911                 testPath(range.save, "../whatever", empty, "whatever");
4912                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4913             }
4914             assert(equal(range.front.name, "whatever"));
4915             assert(range.save.skipToPath("frobozz").empty);
4916             assert(range.save.skipToPath("../frobozz").empty);
4917             assert(range.save.skipToPath("../xyzzy").empty);
4918             assert(range.save.skipToPath("../../frobozz").empty);
4919 
4920             testPath(range.save, "../../xyzzy", empty, "xyzzy");
4921 
4922             popEmpty(range);
4923             for(; range.front.type != EntityType.elementEnd; range.popFront())
4924             {
4925                 assert(range.save.skipToPath("xyzzy").empty);
4926                 assert(range.save.skipToPath("../xyzzy").empty);
4927                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4928             }
4929             assert(equal(range.front.name, "frobozz"));
4930 
4931             range.popFront();
4932             for(; range.front.type != empty; range.popFront())
4933             {
4934                 assert(range.save.skipToPath("xyzzy").empty);
4935                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4936             }
4937             assert(equal(range.front.name, "xyzzy"));
4938 
4939             popEmpty(range);
4940             assert(equal(range.front.name, "superuser"));
4941             assert(range.save.skipToPath("superuser").empty);
4942             assert(range.save.skipToPath("foo").empty);
4943             assert(range.save.skipToPath("../foo").empty);
4944             assert(range.save.skipToPath("../../foo").empty);
4945         }}
4946     }}
4947 }
4948 
4949 
4950 //------------------------------------------------------------------------------
4951 // Private Section
4952 //------------------------------------------------------------------------------
4953 private:
4954 
4955 
4956 auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc
4957 {
4958     import std.utf : byCodeUnit;
4959     typeof(EntityRange!(config, R)._text) text;
4960     text.input = byCodeUnit(xmlText);
4961     return text;
4962 }
4963 
4964 
4965 // toCmpType is to make it easy for tests to convert the expected result to a
4966 // range with the correct element type, since comparing with equal won't do
4967 // the right thing if the result doesn't have dchar as its element type.
4968 auto toCmpType(alias func)(string str)
4969 {
4970     import std.range : takeExactly;
4971     import std.utf : byUTF;
4972 
4973     return str.byUTF!(immutable ElementType!(typeof(testParser(func(str)).input.takeExactly(1))))();
4974 }
4975 
4976 auto toCmpType(alias func, ThrowOnEntityRef toer)(string str)
4977 {
4978     import std.range : takeExactly;
4979     import std.utf : byUTF;
4980 
4981     return str.byUTF!(immutable ElementType!(typeof(testParser!(makeConfig(toer))(func(str)).input.takeExactly(1))))();
4982 }
4983 
4984 
4985 // Used to indicate where in the grammar we're currently parsing.
4986 enum GrammarPos
4987 {
4988     // Nothing has been parsed yet.
4989     documentStart,
4990 
4991     // document ::= prolog element Misc*
4992     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
4993     // This is that first Misc*. The next entity to parse is either a Misc, the
4994     // doctypedecl, or the root element which follows the prolog.
4995     prologMisc1,
4996 
4997     // document ::= prolog element Misc*
4998     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)
4999     // This is that second Misc*. The next entity to parse is either a Misc or
5000     // the root element which follows the prolog.
5001     prologMisc2,
5002 
5003     // Used with SplitEmpty.yes to tell the parser that we're currently at an
5004     // empty element tag that we're treating as a start tag, so the next entity
5005     // will be an end tag even though we didn't actually parse one.
5006     splittingEmpty,
5007 
5008     // element  ::= EmptyElemTag | STag content ETag
5009     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5010     // This is at the beginning of content at the first CharData?. The next
5011     // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag.
5012     // References are treated as part of the CharData and not parsed out by the
5013     // EntityRange (see EntityRange.Entity.text).
5014     contentCharData1,
5015 
5016     // element  ::= EmptyElemTag | STag content ETag
5017     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5018     // This is after the first CharData?. The next thing to parse will be a
5019     // element, CDSect, PI, Comment, or ETag.
5020     // References are treated as part of the CharData and not parsed out by the
5021     // EntityRange (see EntityRange.Entity.text).
5022     contentMid,
5023 
5024     // element  ::= EmptyElemTag | STag content ETag
5025     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5026     // This is at the second CharData?. The next thing to parse will be a
5027     // CharData, element, CDSect, PI, Comment, or ETag.
5028     // References are treated as part of the CharData and not parsed out by the
5029     // EntityRange (see EntityRange.Entity.text).
5030     contentCharData2,
5031 
5032     // element  ::= EmptyElemTag | STag content ETag
5033     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5034     // This is after the second CharData?. The next thing to parse is an ETag.
5035     endTag,
5036 
5037     // document ::= prolog element Misc*
5038     // This is the Misc* at the end of the document. The next thing to parse is
5039     // either another Misc, or we will hit the end of the document.
5040     endMisc,
5041 
5042     // The end of the document (and the grammar) has been reached.
5043     documentEnd
5044 }
5045 
5046 
5047 // Wrapper around skipOver which takes an EntityParser.Text and handles
5048 // incrementing pos.
5049 //
5050 // It is assumed that there are no newlines.
5051 bool stripStartsWith(Text)(ref Text text, string needle)
5052 {
5053     import std.algorithm.searching : skipOver;
5054     import std.utf : byCodeUnit;
5055 
5056     //TODO In the case where we're parsing an array of char, if we can cleanly
5057     // strip off any byCodeUnit and takeExactly wrappers, then we should be able
5058     // to have skipOver compare the string being parsed and the needle with ==.
5059     // It may happen in some cases right now when text.input is a byCodeUnit
5060     // result, but it won't happen in all cases where it ideally would. We may
5061     // also want to look into using byUTF on the needle so that it matches the
5062     // encoding of text.input or even make needle match the encoding when it's
5063     // passed in instead of always being string.
5064     if(!text.input.skipOver(needle.byCodeUnit()))
5065         return false;
5066 
5067     text.pos.col += needle.length;
5068 
5069     return true;
5070 }
5071 
5072 unittest
5073 {
5074     import core.exception : AssertError;
5075     import std.exception : enforce;
5076     import dxml.internal : equalCU, testRangeFuncs;
5077 
5078     static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith,
5079                                  int row, int col, size_t line = __LINE__)
5080     {
5081         auto haystack = func(origHaystack);
5082         {
5083             auto text = testParser(haystack.save);
5084             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line);
5085             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5086             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5087         }
5088         {
5089             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5090             auto text = testParser(haystack);
5091             text.pos.line += 3;
5092             text.pos.col += 7;
5093             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line);
5094             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5095             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5096         }
5097     }
5098 
5099     static foreach(func; testRangeFuncs)
5100     {
5101         test!func("hello world", "hello", " world", true, 1, "hello".length + 1);
5102         test!func("hello world", "hello world", "", true, 1, "hello world".length + 1);
5103         test!func("hello world", "foo", "hello world", false, 1, 1);
5104         test!func("hello world", "hello sally", "hello world", false, 1, 1);
5105         test!func("hello world", "hello world ", "hello world", false, 1, 1);
5106     }
5107 }
5108 
5109 @safe pure unittest
5110 {
5111     import std.algorithm.comparison : equal;
5112     import dxml.internal : testRangeFuncs;
5113 
5114     static foreach(func; testRangeFuncs)
5115     {{
5116         auto xml = func(`foo`);
5117         auto text = testParser!simpleXML(xml);
5118         assert(text.stripStartsWith("fo"));
5119     }}
5120 }
5121 
5122 
5123 // Strips whitespace while dealing with text.pos accordingly. Newlines are not
5124 // ignored.
5125 // Returns whether any whitespace was stripped.
5126 bool stripWS(Text)(ref Text text)
5127 {
5128     bool strippedSpace = false;
5129 
5130     static if(hasLength!(Text.Input))
5131         size_t lineStart = text.input.length;
5132 
5133     loop: while(!text.input.empty)
5134     {
5135         switch(text.input.front)
5136         {
5137             case ' ':
5138             case '\t':
5139             case '\r':
5140             {
5141                 strippedSpace = true;
5142                 text.input.popFront();
5143                 static if(!hasLength!(Text.Input))
5144                     ++text.pos.col;
5145                 break;
5146             }
5147             case '\n':
5148             {
5149                 strippedSpace = true;
5150                 text.input.popFront();
5151                 static if(hasLength!(Text.Input))
5152                     lineStart = text.input.length;
5153                 nextLine!(Text.config)(text.pos);
5154                 break;
5155             }
5156             default: break loop;
5157         }
5158     }
5159 
5160     static if(hasLength!(Text.Input))
5161         text.pos.col += lineStart - text.input.length;
5162 
5163     return strippedSpace;
5164 }
5165 
5166 unittest
5167 {
5168     import core.exception : AssertError;
5169     import std.exception : enforce;
5170     import dxml.internal : equalCU;
5171     import dxml.internal : testRangeFuncs;
5172 
5173     static void test(alias func)(string origHaystack, string remainder, bool stripped,
5174                                  int row, int col, size_t line = __LINE__)
5175     {
5176         auto haystack = func(origHaystack);
5177         {
5178             auto text = testParser(haystack.save);
5179             enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line);
5180             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5181             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5182         }
5183         {
5184             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5185             auto text = testParser(haystack);
5186             text.pos.line += 3;
5187             text.pos.col += 7;
5188             enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line);
5189             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5190             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5191         }
5192     }
5193 
5194     static foreach(func; testRangeFuncs)
5195     {
5196         test!func("  \t\rhello world", "hello world", true, 1, 5);
5197         test!func("  \n \n \n  \nhello world", "hello world", true, 5, 1);
5198         test!func("  \n \n \n  \n  hello world", "hello world", true, 5, 3);
5199         test!func("hello world", "hello world", false, 1, 1);
5200     }
5201 }
5202 
5203 @safe pure unittest
5204 {
5205     import dxml.internal : testRangeFuncs;
5206 
5207     static foreach(func; testRangeFuncs)
5208     {{
5209         auto xml = func(`foo`);
5210         auto text = testParser!simpleXML(xml);
5211         assert(!text.stripWS());
5212     }}
5213 }
5214 
5215 
5216 // Returns a slice (or takeExactly) of text.input up to but not including the
5217 // given needle, removing both that slice and the given needle from text.input
5218 // in the process. If the needle is not found, then an XMLParsingException is
5219 // thrown.
5220 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5221 {
5222     return _takeUntil!(true, needle, skipQuotedText, Text)(text);
5223 }
5224 
5225 unittest
5226 {
5227     import core.exception : AssertError;
5228     import std.algorithm.comparison : equal;
5229     import std.exception : collectException, enforce;
5230     import dxml.internal : codeLen, testRangeFuncs;
5231 
5232     static void test(alias func, string needle, bool sqt)(string origHaystack, string expected, string remainder,
5233                                                           int row, int col, size_t line = __LINE__)
5234     {
5235         auto haystack = func(origHaystack);
5236         auto adjExpected = expected.toCmpType!func();
5237         {
5238             auto text = testParser(haystack.save);
5239             auto temp = text.save;
5240             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected.save),
5241                                 "unittest failure 1", __FILE__, line);
5242             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5243             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5244         }
5245         {
5246             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5247             auto text = testParser(haystack);
5248             text.pos.line += 3;
5249             text.pos.col += 7;
5250             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected),
5251                                 "unittest failure 4", __FILE__, line);
5252             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5253             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5254         }
5255     }
5256 
5257     static void testFail(alias func, string needle, bool sqt)
5258                         (string origHaystack, int row, int col, size_t line = __LINE__)
5259     {
5260         auto haystack = func(origHaystack);
5261         {
5262             auto text = testParser(haystack.save);
5263             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5264             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5265             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5266         }
5267         {
5268             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5269             auto text = testParser(haystack);
5270             text.pos.line += 3;
5271             text.pos.col += 7;
5272             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5273             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5274             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5275         }
5276     }
5277 
5278     static foreach(func; testRangeFuncs)
5279     {
5280         static foreach(sqt; [false, true])
5281         {
5282             {
5283                 auto haystack = "hello world";
5284                 enum needle = "world";
5285 
5286                 static foreach(i; 1 .. needle.length)
5287                     test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i);
5288             }
5289 
5290             test!(func, "l", sqt)("lello world", "", "ello world", 1, 2);
5291             test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5);
5292             test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4);
5293             {
5294                 enum needle = "great";
5295                 enum expected = "プログラミング in D is ";
5296                 static foreach(i; 1 .. needle.length)
5297                 {
5298                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected,
5299                                                      "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1);
5300                 }
5301             }
5302             static foreach(haystack; ["", "a", "hello", "ディラン"])
5303                 testFail!(func, "x", sqt)(haystack, 1, 1);
5304             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5305                 testFail!(func, "le", sqt)(haystack, 1, 1);
5306             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5307                 testFail!(func, "web", sqt)(haystack, 1, 1);
5308         }
5309 
5310         test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9);
5311         test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10);
5312         test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10);
5313         test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12);
5314         test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12);
5315         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5316         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5317 
5318         test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16);
5319         test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13);
5320         test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13);
5321         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5322         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5323         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5324         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5325 
5326         test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14);
5327         test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7);
5328     }
5329 }
5330 
5331 @safe pure unittest
5332 {
5333     import std.algorithm.comparison : equal;
5334     import dxml.internal : testRangeFuncs;
5335 
5336     static foreach(func; testRangeFuncs)
5337     {{
5338         auto xml = func(`foo`);
5339         auto text = testParser!simpleXML(xml);
5340         assert(equal(text.takeUntilAndDrop!"o"(), "f"));
5341     }}
5342 }
5343 
5344 // Variant of takeUntilAndDrop which does not return a slice. It's intended for
5345 // when the config indicates that something should be skipped.
5346 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5347 {
5348     _takeUntil!(false, needle, skipQuotedText, Text)(text);
5349 }
5350 
5351 unittest
5352 {
5353     import core.exception : AssertError;
5354     import std.algorithm.comparison : equal;
5355     import std.exception : assertNotThrown, collectException, enforce;
5356     import dxml.internal : codeLen, testRangeFuncs;
5357 
5358     static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder,
5359                                                           int row, int col, size_t line = __LINE__)
5360     {
5361         auto haystack = func(origHaystack);
5362         {
5363             auto text = testParser(haystack.save);
5364             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1",
5365                                                 __FILE__, line);
5366             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5367             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5368         }
5369         {
5370             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5371             auto text = testParser(haystack);
5372             text.pos.line += 3;
5373             text.pos.col += 7;
5374             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4",
5375                                                 __FILE__, line);
5376             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5377             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5378         }
5379     }
5380 
5381     static void testFail(alias func, string needle, bool sqt)
5382                         (string origHaystack, int row, int col, size_t line = __LINE__)
5383     {
5384         auto haystack = func(origHaystack);
5385         {
5386             auto text = testParser(haystack.save);
5387             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5388             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5389             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5390         }
5391         {
5392             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5393             auto text = testParser(haystack);
5394             text.pos.line += 3;
5395             text.pos.col += 7;
5396             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5397             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5398             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5399         }
5400     }
5401 
5402     static foreach(func; testRangeFuncs)
5403     {
5404         static foreach(sqt; [false, true])
5405         {
5406             {
5407                 enum needle = "world";
5408                 static foreach(i; 1 .. needle.length)
5409                     test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i);
5410             }
5411 
5412             test!(func, "l", sqt)("lello world", "ello world", 1, 2);
5413             test!(func, "ll", sqt)("lello world", "o world", 1, 5);
5414             test!(func, "le", sqt)("llello world", "llo world", 1, 4);
5415 
5416             {
5417                 enum needle = "great";
5418                 static foreach(i; 1 .. needle.length)
5419                 {
5420                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $],
5421                                                      1, codeLen!(func, "プログラミング in D is ") + i + 1);
5422                 }
5423             }
5424 
5425             static foreach(haystack; ["", "a", "hello", "ディラン"])
5426                 testFail!(func, "x", sqt)(haystack, 1, 1);
5427             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5428                 testFail!(func, "le", sqt)(haystack, 1, 1);
5429             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5430                 testFail!(func, "web", sqt)(haystack, 1, 1);
5431         }
5432 
5433         test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9);
5434         test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10);
5435         test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10);
5436         test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12);
5437         test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12);
5438         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5439         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5440 
5441         test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16);
5442         test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13);
5443         test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13);
5444         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5445         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5446         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5447         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5448 
5449         test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14);
5450         test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7);
5451     }
5452 }
5453 
5454 @safe pure unittest
5455 {
5456     import std.algorithm.comparison : equal;
5457     import dxml.internal : testRangeFuncs;
5458 
5459     static foreach(func; testRangeFuncs)
5460     {{
5461         auto xml = func(`foo`);
5462         auto text = testParser!simpleXML(xml);
5463         text.skipUntilAndDrop!"o"();
5464         assert(equal(text.input, "o"));
5465     }}
5466 }
5467 
5468 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text)
5469 {
5470     import std.algorithm : find;
5471     import std.ascii : isWhite;
5472     import std.range : takeExactly;
5473 
5474     static assert(needle.find!isWhite().empty);
5475 
5476     auto orig = text.save;
5477     bool found = false;
5478     size_t takeLen = 0;
5479     size_t lineStart = 0;
5480 
5481     void processNewline()
5482     {
5483         ++takeLen;
5484         nextLine!(Text.config)(text.pos);
5485         lineStart = takeLen;
5486     }
5487 
5488     loop: while(!text.input.empty)
5489     {
5490         switch(text.input.front)
5491         {
5492             case cast(ElementType!(Text.Input))needle[0]:
5493             {
5494                 static if(needle.length == 1)
5495                 {
5496                     found = true;
5497                     text.input.popFront();
5498                     break loop;
5499                 }
5500                 else static if(needle.length == 2)
5501                 {
5502                     text.input.popFront();
5503                     if(!text.input.empty && text.input.front == needle[1])
5504                     {
5505                         found = true;
5506                         text.input.popFront();
5507                         break loop;
5508                     }
5509                     ++takeLen;
5510                     continue;
5511                 }
5512                 else
5513                 {
5514                     text.input.popFront();
5515                     auto saved = text.input.save;
5516                     foreach(i, c; needle[1 .. $])
5517                     {
5518                         if(text.input.empty)
5519                         {
5520                             takeLen += i + 1;
5521                             break loop;
5522                         }
5523                         if(text.input.front != c)
5524                         {
5525                             text.input = saved;
5526                             ++takeLen;
5527                             continue loop;
5528                         }
5529                         text.input.popFront();
5530                     }
5531                     found = true;
5532                     break loop;
5533                 }
5534             }
5535             static if(skipQuotedText)
5536             {
5537                 static foreach(quote; ['\'', '"'])
5538                 {
5539                     case quote:
5540                     {
5541                         auto quotePos = text.pos;
5542                         quotePos.col += takeLen - lineStart;
5543                         ++takeLen;
5544                         while(true)
5545                         {
5546                             text.input.popFront();
5547                             if(text.input.empty)
5548                                 throw new XMLParsingException("Failed to find matching quote", quotePos);
5549                             switch(text.input.front)
5550                             {
5551                                 case quote:
5552                                 {
5553                                     ++takeLen;
5554                                     text.input.popFront();
5555                                     continue loop;
5556                                 }
5557                                 case '\n':
5558                                 {
5559                                     processNewline();
5560                                     break;
5561                                 }
5562                                 default:
5563                                 {
5564                                     ++takeLen;
5565                                     break;
5566                                 }
5567                             }
5568                         }
5569                         assert(0); // the compiler isn't smart enough to see that this is unreachable.
5570                     }
5571                 }
5572             }
5573             case '\n':
5574             {
5575                 processNewline();
5576                 break;
5577             }
5578             default:
5579             {
5580                 ++takeLen;
5581                 break;
5582             }
5583         }
5584 
5585         text.input.popFront();
5586     }
5587 
5588     text.pos.col += takeLen - lineStart + needle.length;
5589 
5590     if(!found)
5591         throw new XMLParsingException("Failed to find: " ~ needle, orig.pos);
5592 
5593     static if(retSlice)
5594         return takeExactly(orig.input, takeLen);
5595 }
5596 
5597 
5598 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop,
5599 // but I'd rather do this than be passing template arguments to choose between
5600 // behaviors - especially when the logic is so different. It skips until it
5601 // reaches one of the delimiter characters. If it finds one of them, then the
5602 // first character in the input is the delimiter that was found, and if it
5603 // doesn't find either, then it throws.
5604 template skipToOneOf(delims...)
5605 {
5606     static foreach(delim; delims)
5607     {
5608         static assert(is(typeof(delim) == char));
5609         static assert(!isSpace(delim));
5610     }
5611 
5612     void skipToOneOf(Text)(ref Text text)
5613     {
5614         while(!text.input.empty)
5615         {
5616             switch(text.input.front)
5617             {
5618                 static foreach(delim; delims)
5619                     case delim: return;
5620                 case '\n':
5621                 {
5622                     nextLine!(Text.config)(text.pos);
5623                     text.input.popFront();
5624                     break;
5625                 }
5626                 default:
5627                 {
5628                     popFrontAndIncCol(text);
5629                     break;
5630                 }
5631             }
5632         }
5633         throw new XMLParsingException("Prematurely reached end of document", text.pos);
5634     }
5635 }
5636 
5637 unittest
5638 {
5639     import core.exception : AssertError;
5640     import std.algorithm.comparison : equal;
5641     import std.exception : assertNotThrown, collectException, enforce;
5642     import dxml.internal : codeLen, testRangeFuncs;
5643 
5644     static void test(alias func, delims...)(string origHaystack, string remainder,
5645                                             int row, int col, size_t line = __LINE__)
5646     {
5647         auto haystack = func(origHaystack);
5648         {
5649             auto text = testParser(haystack.save);
5650             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line);
5651             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5652             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5653         }
5654         {
5655             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5656             auto text = testParser(haystack);
5657             text.pos.line += 3;
5658             text.pos.col += 7;
5659             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line);
5660             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5661             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5662         }
5663     }
5664 
5665     static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__)
5666     {
5667         auto haystack = func(origHaystack);
5668         {
5669             auto text = testParser(haystack.save);
5670             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5671             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5672             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5673         }
5674         {
5675             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5676             auto text = testParser(haystack);
5677             text.pos.line += 3;
5678             text.pos.col += 7;
5679             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5680             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5681             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5682         }
5683     }
5684 
5685     static foreach(func; testRangeFuncs)
5686     {
5687         test!(func, 'o', 'w')("hello world", "o world", 1, 5);
5688         test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7);
5689         test!(func, 'z', 'y')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6);
5690         test!(func, 'o', 'g')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "g", 8, 1);
5691         test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed",
5692                               1, codeLen!(func, "プログラミング in D is ") + 1);
5693 
5694         testFail!(func, 'a', 'b')("hello world", 1, 12);
5695         testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6);
5696         testFail!(func, 'a', 'b')("プログラミング",  1, codeLen!(func, "プログラミング") + 1);
5697     }
5698 }
5699 
5700 @safe pure unittest
5701 {
5702     import std.algorithm.comparison : equal;
5703     import dxml.internal : testRangeFuncs;
5704 
5705     static foreach(func; testRangeFuncs)
5706     {{
5707         auto xml = func(`foo`);
5708         auto text = testParser!simpleXML(xml);
5709         text.skipToOneOf!('o')();
5710         assert(equal(text.input, "oo"));
5711     }}
5712 }
5713 
5714 
5715 // The front of the input should be text surrounded by single or double quotes.
5716 // This returns a slice of the input containing that text, and the input is
5717 // advanced to one code unit beyond the quote.
5718 auto takeEnquotedText(Text)(ref Text text)
5719 {
5720     checkNotEmpty(text);
5721     immutable quote = text.input.front;
5722     static foreach(quoteChar; [`"`, `'`])
5723     {
5724         // This would be a bit simpler if takeUntilAndDrop took a runtime
5725         // argument, but in all other cases, a compile-time argument makes more
5726         // sense, so this seemed like a reasonable way to handle this one case.
5727         if(quote == quoteChar[0])
5728         {
5729             popFrontAndIncCol(text);
5730             return takeUntilAndDrop!quoteChar(text);
5731         }
5732     }
5733     throw new XMLParsingException("Expected quoted text", text.pos);
5734 }
5735 
5736 unittest
5737 {
5738     import core.exception : AssertError;
5739     import std.algorithm.comparison : equal;
5740     import std.exception : assertThrown, enforce;
5741     import std.range : only;
5742     import dxml.internal : testRangeFuncs;
5743 
5744     static void test(alias func)(string origHaystack, string expected, string remainder,
5745                                  int row, int col, size_t line = __LINE__)
5746     {
5747         auto haystack = func(origHaystack);
5748         auto adjExpected = expected.toCmpType!func();
5749         {
5750             auto text = testParser(haystack.save);
5751             enforce!AssertError(equal(takeEnquotedText(text), adjExpected.save), "unittest failure 1", __FILE__, line);
5752             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5753             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5754         }
5755         {
5756             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5757             auto text = testParser(haystack);
5758             text.pos.line += 3;
5759             text.pos.col += 7;
5760             enforce!AssertError(equal(takeEnquotedText(text), adjExpected), "unittest failure 3", __FILE__, line);
5761             enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line);
5762             enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line);
5763         }
5764     }
5765 
5766     static void testFail(alias func)(string origHaystack, size_t line = __LINE__)
5767     {
5768         auto haystack = func(origHaystack);
5769         auto text = testParser(haystack);
5770         assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line);
5771     }
5772 
5773     static foreach(func; testRangeFuncs)
5774     {
5775         foreach(quote; only("\"", "'"))
5776         {
5777             test!func(quote ~ quote, "", "", 1, 3);
5778             test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14);
5779             test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14);
5780             {
5781                 import std.utf : codeLength;
5782                 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D";
5783                 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング ");
5784                 test!func(haystack, "プログラミング ", "in D", 1, len + 3);
5785             }
5786         }
5787 
5788         foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`))
5789             testFail!func(str);
5790     }
5791 }
5792 
5793 
5794 // This removes a name per the Name grammar rule from the front of the input and
5795 // returns it.
5796 // The parsing continues until either one of the given delimiters or an XML
5797 // whitespace character is encountered. The delimiter/whitespace is not returned
5798 // as part of the name and is left at the front of the input.
5799 template takeName(delims...)
5800 {
5801     static foreach(delim; delims)
5802     {
5803         static assert(is(typeof(delim) == char), delim);
5804         static assert(!isSpace(delim));
5805     }
5806 
5807     auto takeName(Text)(ref Text text)
5808     {
5809         import std.format : format;
5810         import std.range : takeExactly;
5811         import std.utf : decodeFront, UseReplacementDchar;
5812         import dxml.internal : formatInvalidCharMsg, isNameStartChar, isNameChar;
5813 
5814         assert(!text.input.empty);
5815 
5816         auto orig = text.input.save;
5817         size_t takeLen;
5818         {
5819             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen);
5820             if(!isNameStartChar(decodedC))
5821                 throw new XMLParsingException(formatInvalidCharMsg!"Name contains invalid character: %s"(decodedC), text.pos);
5822         }
5823 
5824         if(text.input.empty)
5825         {
5826             text.pos.col += takeLen;
5827             return takeExactly(orig, takeLen);
5828         }
5829 
5830         loop: while(true)
5831         {
5832             immutable c = text.input.front;
5833             if(isSpace(c))
5834                 break;
5835             static foreach(delim; delims)
5836             {
5837                 if(c == delim)
5838                     break loop;
5839             }
5840 
5841             size_t numCodeUnits;
5842             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
5843             if(!isNameChar(decodedC))
5844             {
5845                 text.pos.col += takeLen;
5846                 throw new XMLParsingException(formatInvalidCharMsg!"Name contains invalid character: %s"(decodedC), text.pos);
5847             }
5848             takeLen += numCodeUnits;
5849 
5850             if(text.input.empty)
5851                 break;
5852         }
5853 
5854         text.pos.col += takeLen;
5855 
5856         return takeExactly(orig, takeLen);
5857     }
5858 }
5859 
5860 unittest
5861 {
5862     import core.exception : AssertError;
5863     import std.algorithm.comparison : equal;
5864     import std.exception : collectException, enforce;
5865     import std.typecons : tuple;
5866     import dxml.internal : codeLen, testRangeFuncs;
5867 
5868     static void test(alias func, delim...)(string origHaystack, string expected, string remainder,
5869                                            int row, int col, size_t line = __LINE__)
5870     {
5871         auto haystack = func(origHaystack);
5872         auto adjExpected = expected.toCmpType!func();
5873         {
5874             auto text = testParser(haystack.save);
5875             enforce!AssertError(equal(text.takeName!delim(), adjExpected.save),
5876                                 "unittest failure 1", __FILE__, line);
5877             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5878             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5879         }
5880         {
5881             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5882             auto text = testParser(haystack);
5883             text.pos.line += 3;
5884             text.pos.col += 7;
5885             enforce!AssertError(equal(text.takeName!delim(), adjExpected),
5886                                 "unittest failure 4", __FILE__, line);
5887             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5888             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5889         }
5890     }
5891 
5892     static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__)
5893     {
5894         auto haystack = func(origHaystack);
5895         {
5896             auto text = testParser(haystack.save);
5897             auto e = collectException!XMLParsingException(text.takeName!delim());
5898             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5899             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5900         }
5901         {
5902             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5903             auto text = testParser(haystack);
5904             text.pos.line += 3;
5905             text.pos.col += 7;
5906             auto e = collectException!XMLParsingException(text.takeName!delim());
5907             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5908             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5909         }
5910     }
5911 
5912     static foreach(func; testRangeFuncs)
5913     {
5914         static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"])
5915         {{
5916             enum len = codeLen!(func, str);
5917 
5918             static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo",  "  foo \n \r "])
5919             {{
5920                 enum strRem = str ~ remainder;
5921                 enum delimRem = '>' ~ remainder;
5922                 enum hay = str ~ delimRem;
5923                 test!func(strRem, str, remainder, 1, len + 1);
5924                 test!(func, '=')(strRem, str, remainder, 1, len + 1);
5925                 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1);
5926                 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1);
5927             }}
5928         }}
5929 
5930         static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)])
5931         {{
5932             testFail!func(t[0], t[1], t[2]);
5933             testFail!func(t[0] ~ '>', t[1], t[2]);
5934             testFail!(func, '?')(t[0], t[1], t[2]);
5935             testFail!(func, '=')(t[0] ~ '=', t[1], t[2]);
5936         }}
5937 
5938         testFail!(func, '>')(">", 1, 1);
5939         testFail!(func, '?')("?", 1, 1);
5940         testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&"));
5941 
5942         static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)])
5943         {
5944             testFail!func(t[0], t[1], t[2]);
5945             testFail!(func, '>')(t[0], t[1], t[2]);
5946         }
5947     }
5948 }
5949 
5950 @safe pure unittest
5951 {
5952     import std.algorithm.comparison : equal;
5953     import dxml.internal : testRangeFuncs;
5954 
5955     static foreach(func; testRangeFuncs)
5956     {{
5957         auto xml = func(`foo`);
5958         auto text = testParser!simpleXML(xml);
5959         assert(equal(text.takeName(), "foo"));
5960     }}
5961 }
5962 
5963 
5964 // This removes an attribute value from the front of the input, partially
5965 // validates it, and returns it. The validation that is not done is whether
5966 // the value in a character reference is valid. It's checked for whether the
5967 // characters used in it are valid but not whether the number they form is a
5968 // valid Unicode character. Checking the number doesn't seem worth the extra
5969 // complication, and it's not required for the XML to be "well-formed."
5970 // dxml.util.parseCharRef will check that it is fully correct if it is used.
5971 auto takeAttValue(Text)(ref Text text)
5972 {
5973     // AttValue    ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
5974     // Reference   ::= EntityRef | CharRef
5975     // EntityRef   ::= '&' Name ';'
5976     // PEReference ::= '%' Name ';'
5977 
5978     import std.range : only;
5979 
5980     checkNotEmpty(text);
5981     immutable quote = text.input.front;
5982     immutable quotePos = text.pos;
5983     foreach(quoteChar; only('"', '\''))
5984     {
5985         // This would be a bit simpler if takeUntilAndDrop took a runtime
5986         // argument, but in all other cases, a compile-time argument makes more
5987         // sense, so this seemed like a reasonable way to handle this one case.
5988         if(quote == quoteChar)
5989         {
5990             popFrontAndIncCol(text);
5991             size_t lineStart = 0;
5992             auto orig = text.input.save;
5993             size_t takeLen;
5994             loop: while(true)
5995             {
5996                 if(text.input.empty)
5997                     throw new XMLParsingException("Unterminated attribute value", quotePos);
5998                 switch(text.input.front)
5999                 {
6000                     case '"':
6001                     {
6002                         if(quote == '"')
6003                         {
6004                             text.input.popFront();
6005                             goto done;
6006                         }
6007                         goto default;
6008                     }
6009                     case '\'':
6010                     {
6011                         if(quote == '\'')
6012                         {
6013                             text.input.popFront();
6014                             goto done;
6015                         }
6016                         goto default;
6017                     }
6018                     case '&':
6019                     {
6020                         {
6021                             import dxml.util : parseCharRef;
6022                             auto temp = text.input.save;
6023                             auto charRef = parseCharRef(temp);
6024                             if(!charRef.isNull)
6025                             {
6026                                 static if(hasLength!(Text.Input))
6027                                 {
6028                                     takeLen += text.input.length - temp.length;
6029                                     text.input = temp;
6030                                 }
6031                                 else
6032                                 {
6033                                     while(text.input.front != ';')
6034                                     {
6035                                         ++takeLen;
6036                                         text.input.popFront();
6037                                     }
6038                                     ++takeLen;
6039                                     text.input.popFront();
6040                                 }
6041                                 continue;
6042                             }
6043                         }
6044 
6045                         immutable ampLen = takeLen - lineStart;
6046                         ++takeLen;
6047                         text.input.popFront();
6048 
6049                         // Std Entity References
6050                         static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6051                         {
6052                             import std.algorithm.searching : startsWith;
6053 
6054                             static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6055                             {
6056                                 if(text.input.save.startsWith(entRef))
6057                                 {
6058                                     takeLen += entRef.length;
6059                                     text.input.popFrontN(entRef.length);
6060                                     continue loop;
6061                                 }
6062                             }
6063 
6064                             text.pos.col += ampLen;
6065                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6066                                                           "reference, and this parser only supports entity " ~
6067                                                           "references if they're predefined by the spec. This is not " ~
6068                                                           "a valid character reference or one of the predefined " ~
6069                                                           "entity references.", text.pos);
6070                         }
6071                         // All Entity References
6072                         else
6073                         {
6074                             import std.utf : decodeFront, UseReplacementDchar;
6075                             import dxml.internal : isNameStartChar, isNameChar;
6076 
6077                             if(text.input.empty || text.input.front == quote)
6078                                 goto failedEntityRef;
6079 
6080                             {
6081                                 size_t numCodeUnits;
6082                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6083                                 if(!isNameStartChar(decodedC))
6084                                     goto failedEntityRef;
6085                                 takeLen += numCodeUnits;
6086                             }
6087 
6088                             while(true)
6089                             {
6090                                 if(text.input.empty)
6091                                     goto failedEntityRef;
6092                                 immutable c = text.input.front;
6093                                 if(c == ';')
6094                                 {
6095                                     ++takeLen;
6096                                     break;
6097                                 }
6098                                 size_t numCodeUnits;
6099                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6100                                 if(!isNameChar(decodedC))
6101                                     goto failedEntityRef;
6102                                 takeLen += numCodeUnits;
6103                             }
6104                             break;
6105 
6106                             failedEntityRef:
6107                             text.pos.col += ampLen;
6108                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6109                                                           "character or entity reference, and this is not a valid " ~
6110                                                           "character or entity reference.", text.pos);
6111                         }
6112                     }
6113                     case '<':
6114                     {
6115                         text.pos.col += takeLen - lineStart;
6116                         throw new XMLParsingException("< is not legal in an attribute name", text.pos);
6117                     }
6118                     case '\n':
6119                     {
6120                         ++takeLen;
6121                         nextLine!(Text.config)(text.pos);
6122                         lineStart = takeLen;
6123                         break;
6124                     }
6125                     default:
6126                     {
6127                         import std.ascii : isASCII;
6128                         import std.format : format;
6129                         import dxml.internal : isXMLChar;
6130 
6131                         immutable c = text.input.front;
6132                         if(isASCII(c))
6133                         {
6134                             if(!isXMLChar(c))
6135                             {
6136                                 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6137                                                               text.pos);
6138                             }
6139                             ++takeLen;
6140                             break;
6141                         }
6142                         import std.utf : decodeFront, UseReplacementDchar, UTFException;
6143                         // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6144                         // replacement character is considered valid XML, and if we decoded using it, then
6145                         // all of the invalid Unicode characters would come out as the replacement character
6146                         // and then be treated as valid instead of being caught, which isn't all bad, but
6147                         // the spec requires that they be treated as invalid instead of playing nice and
6148                         // using the replacement character.
6149                         try
6150                         {
6151                             size_t numCodeUnits;
6152                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6153                             if(!isXMLChar(decodedC))
6154                             {
6155                                 enum fmt = "Character is not legal in an XML File: 0x%0x";
6156                                 throw new XMLParsingException(format!fmt(decodedC), text.pos);
6157                             }
6158                             takeLen += numCodeUnits;
6159                         }
6160                         catch(UTFException e)
6161                             throw new XMLParsingException("Invalid Unicode character", text.pos);
6162                         continue;
6163                     }
6164                 }
6165                 text.input.popFront();
6166             }
6167             done:
6168             {
6169                 import std.range : takeExactly;
6170                 text.pos.col += takeLen - lineStart + 1;
6171                 return takeExactly(orig, takeLen);
6172             }
6173         }
6174     }
6175     throw new XMLParsingException("Expected quoted text", text.pos);
6176 }
6177 
6178 unittest
6179 {
6180     import core.exception : AssertError;
6181     import std.algorithm.comparison : equal;
6182     import std.exception : collectException, enforce;
6183     import std.range : only;
6184     import dxml.internal : codeLen, testRangeFuncs;
6185 
6186     static void test(alias func, ThrowOnEntityRef toer)(string origHaystack, string expected, string remainder,
6187                                                         int row, int col, size_t line = __LINE__)
6188     {
6189         auto haystack = func(origHaystack);
6190         auto adjExpected = expected.toCmpType!(func, toer)();
6191         {
6192             auto text = testParser!(makeConfig(toer))(haystack.save);
6193             enforce!AssertError(equal(text.takeAttValue(), adjExpected.save),
6194                                 "unittest failure 1", __FILE__, line);
6195             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
6196             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
6197         }
6198         {
6199             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6200             auto text = testParser!(makeConfig(toer))(haystack);
6201             text.pos.line += 3;
6202             text.pos.col += 7;
6203             enforce!AssertError(equal(text.takeAttValue(), adjExpected),
6204                                 "unittest failure 4", __FILE__, line);
6205             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
6206             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
6207         }
6208     }
6209 
6210     static void testFail(alias func, ThrowOnEntityRef toer)(string origHaystack,
6211                                                             int row, int col, size_t line = __LINE__)
6212     {
6213         auto haystack = func(origHaystack);
6214         {
6215             auto text = testParser!(makeConfig(toer))(haystack.save);
6216             auto e = collectException!XMLParsingException(text.takeAttValue());
6217             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6218             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6219         }
6220         {
6221             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6222             auto text = testParser!(makeConfig(toer))(haystack);
6223             text.pos.line += 3;
6224             text.pos.col += 7;
6225             auto e = collectException!XMLParsingException(text.takeAttValue());
6226             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6227             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
6228         }
6229     }
6230 
6231     static foreach(i, func; testRangeFuncs)
6232     {
6233         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6234         {
6235             test!(func, toer)(`""`, "", "", 1, 3);
6236             test!(func, toer)(`"J"`, "J", "", 1, 4);
6237             test!(func, toer)(`"foo"`, "foo", "", 1, 6);
6238             test!(func, toer)(`"プログラミング"`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6239             test!(func, toer)(`"foo"bar`, "foo", "bar", 1, 6);
6240             test!(func, toer)(`"プログラミング" after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6241 
6242             test!(func, toer)(`''`, "", "", 1, 3);
6243             test!(func, toer)(`'J'`, "J", "", 1, 4);
6244             test!(func, toer)(`'foo'`, "foo", "", 1, 6);
6245             test!(func, toer)(`'プログラミング'`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6246             test!(func, toer)(`'foo'bar`, "foo", "bar", 1, 6);
6247             test!(func, toer)(`'プログラミング' after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6248 
6249             test!(func, toer)(`"&amp;&gt;&lt;"`, "&amp;&gt;&lt;", "", 1, 16);
6250             test!(func, toer)(`"&apos;&quot;"`, "&apos;&quot;", "", 1, 15);
6251             test!(func, toer)(`"hello&amp;&gt;&lt;world"`, "hello&amp;&gt;&lt;world", "", 1, 26);
6252             test!(func, toer)(`".....&amp;&gt;&lt;....."`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6253             test!(func, toer)(`"&#12487;&#12451;&#12521;&#12531;"`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6254             test!(func, toer)(`"hello&#xAF;&#77;&amp;world"`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6255 
6256             test!(func, toer)(`'&amp;&gt;&lt;'`, "&amp;&gt;&lt;", "", 1, 16);
6257             test!(func, toer)(`'hello&amp;&gt;&lt;world'`, "hello&amp;&gt;&lt;world", "", 1, 26);
6258             test!(func, toer)(`'&apos;&quot;'`, "&apos;&quot;", "", 1, 15);
6259             test!(func, toer)(`'.....&amp;&gt;&lt;.....'`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6260             test!(func, toer)(`'&#12487;&#12451;&#12521;&#12531;'`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6261             test!(func, toer)(`'hello&#xAF;&#77;&amp;world'`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6262 
6263             test!(func, toer)("'hello\nworld'", "hello\nworld", "", 2, 7);
6264             test!(func, toer)("'hello\nworld\n'", "hello\nworld\n", "", 3, 2);
6265 
6266             test!(func, toer)(`"'''"whatever`, "'''", "whatever", 1, 6);
6267             test!(func, toer)(`'"""'whatever`, `"""`, "whatever", 1, 6);
6268 
6269             test!(func, toer)(`"&#42;"`, "&#42;", "", 1, 8);
6270             test!(func, toer)(`"&#x42;"`, "&#x42;", "", 1, 9);
6271             test!(func, toer)(`"%foo"`, "%foo", "", 1, 7);
6272 
6273             testFail!(func, toer)(`"`, 1, 1);
6274             testFail!(func, toer)(`"foo`, 1, 1);
6275             testFail!(func, toer)(`"foo'`, 1, 1);
6276             testFail!(func, toer)(`"<"`, 1, 2);
6277             testFail!(func, toer)(`"&`, 1, 2);
6278             testFail!(func, toer)(`"&"`, 1, 2);
6279             testFail!(func, toer)(`"&x"`, 1, 2);
6280             testFail!(func, toer)(`"&.;"`, 1, 2);
6281             testFail!(func, toer)(`"&&;"`, 1, 2);
6282             testFail!(func, toer)(`"&a"`, 1, 2);
6283             testFail!(func, toer)(`"&a`, 1, 2);
6284             testFail!(func, toer)(`"hello&;"`, 1, 7);
6285             testFail!(func, toer)(`"hello&;world"`,1, 7);
6286             testFail!(func, toer)(`"hello&<;world"`,1, 7);
6287             testFail!(func, toer)(`"hello&world"`,1, 7);
6288             testFail!(func, toer)(`"hello<world"`,1, 7);
6289             testFail!(func, toer)(`"hello world&"`, 1, 13);
6290             testFail!(func, toer)(`"hello world&;"`, 1, 13);
6291             testFail!(func, toer)(`"hello world&foo"`, 1, 13);
6292             testFail!(func, toer)(`"foo<"`, 1, 5);
6293             testFail!(func, toer)(`"&#`, 1, 2);
6294             testFail!(func, toer)(`"&#"`, 1, 2);
6295             testFail!(func, toer)(`"&#;"`, 1, 2);
6296             testFail!(func, toer)(`"&#x;"`, 1, 2);
6297             testFail!(func, toer)(`"&#AF;"`, 1, 2);
6298             testFail!(func, toer)(`"&#x`, 1, 2);
6299             testFail!(func, toer)(`"&#77`, 1, 2);
6300             testFail!(func, toer)(`"&#77;`, 1, 1);
6301             testFail!(func, toer)(`"&#x0`, 1, 2);
6302             testFail!(func, toer)(`"&#x0;`, 1, 2);
6303             testFail!(func, toer)(`"&#x0;"`, 1, 2);
6304 
6305             testFail!(func, toer)(`'`, 1, 1);
6306             testFail!(func, toer)(`'foo`, 1, 1);
6307             testFail!(func, toer)(`'foo"`, 1, 1);
6308             testFail!(func, toer)(`'<'`, 1, 2);
6309             testFail!(func, toer)("'\v'", 1, 2);
6310             testFail!(func, toer)("'\uFFFE'", 1, 2);
6311             testFail!(func, toer)(`'&`, 1, 2);
6312             testFail!(func, toer)(`'&'`, 1, 2);
6313             testFail!(func, toer)(`'&x'`, 1, 2);
6314             testFail!(func, toer)(`'&.;'`, 1, 2);
6315             testFail!(func, toer)(`'&&;'`, 1, 2);
6316             testFail!(func, toer)(`'&a'`, 1, 2);
6317             testFail!(func, toer)(`'&a`, 1, 2);
6318             testFail!(func, toer)(`'hello&;'`, 1, 7);
6319             testFail!(func, toer)(`'hello&;world'`, 1, 7);
6320             testFail!(func, toer)(`'hello&<;world'`, 1, 7);
6321             testFail!(func, toer)(`'hello&world'`, 1, 7);
6322             testFail!(func, toer)(`'hello<world'`, 1, 7);
6323             testFail!(func, toer)(`'hello world&'`, 1, 13);
6324             testFail!(func, toer)(`'hello world&;'`, 1, 13);
6325             testFail!(func, toer)(`'hello world&foo'`, 1, 13);
6326             testFail!(func, toer)(`'foo<'`, 1, 5);
6327             testFail!(func, toer)(`'&#`, 1, 2);
6328             testFail!(func, toer)(`'&#'`, 1, 2);
6329             testFail!(func, toer)(`'&#;'`, 1, 2);
6330             testFail!(func, toer)(`'&#x;'`, 1, 2);
6331             testFail!(func, toer)(`'&#AF;'`, 1, 2);
6332             testFail!(func, toer)(`'&#x`, 1, 2);
6333             testFail!(func, toer)(`'&#77`, 1, 2);
6334             testFail!(func, toer)(`'&#77;`, 1, 1);
6335             testFail!(func, toer)(`'&#x0`, 1, 2);
6336             testFail!(func, toer)(`'&#x0;`, 1, 2);
6337             testFail!(func, toer)(`'&#x0;'`, 1, 2);
6338             testFail!(func, toer)("'&#xA\nF;'", 1, 2);
6339             testFail!(func, toer)("'&amp\n;'", 1, 2);
6340             testFail!(func, toer)("'&\namp;'", 1, 2);
6341             testFail!(func, toer)("'\n&amp;&;'", 2, 6);
6342         }
6343         {
6344             alias toer = ThrowOnEntityRef.yes;
6345             testFail!(func, toer)(`"&foo;"`, 1, 2);
6346             testFail!(func, toer)(`"hello world&foo;"`, 1, 13);
6347             testFail!(func, toer)(`"hello &foo; world"`, 1, 8);
6348             testFail!(func, toer)(`"&am;"`, 1, 2);
6349             testFail!(func, toer)(`"&ampe;"`, 1, 2);
6350             testFail!(func, toer)(`"&l;"`, 1, 2);
6351             testFail!(func, toer)(`"&lte;"`, 1, 2);
6352             testFail!(func, toer)(`"&g;"`, 1, 2);
6353             testFail!(func, toer)(`"&gte;"`, 1, 2);
6354             testFail!(func, toer)(`"&apo;"`, 1, 2);
6355             testFail!(func, toer)(`"&aposs;"`, 1, 2);
6356             testFail!(func, toer)(`"&quo;"`, 1, 2);
6357             testFail!(func, toer)(`"&quote;"`, 1, 2);
6358 
6359             testFail!(func, toer)(`'&foo;'`, 1, 2);
6360             testFail!(func, toer)(`'hello world&foo;'`, 1, 13);
6361             testFail!(func, toer)(`'hello &foo; world'`, 1, 8);
6362             testFail!(func, toer)(`'&am;'`, 1, 2);
6363             testFail!(func, toer)(`'&ampe;'`, 1, 2);
6364             testFail!(func, toer)(`'&l;'`, 1, 2);
6365             testFail!(func, toer)(`'&lte;'`, 1, 2);
6366             testFail!(func, toer)(`'&g;'`, 1, 2);
6367             testFail!(func, toer)(`'&gte;'`, 1, 2);
6368             testFail!(func, toer)(`'&apo;'`, 1, 2);
6369             testFail!(func, toer)(`'&aposs;'`, 1, 2);
6370             testFail!(func, toer)(`'&quo;'`, 1, 2);
6371             testFail!(func, toer)(`'&quote;'`, 1, 2);
6372         }
6373         {
6374             alias toer = ThrowOnEntityRef.no;
6375             test!(func, toer)(`"&foo;"`, "&foo;", "", 1, 8);
6376             test!(func, toer)(`"hello world&foo;"`, "hello world&foo;", "", 1, 19);
6377             test!(func, toer)(`"hello &foo; world"`, "hello &foo; world", "", 1, 20);
6378             test!(func, toer)(`"&am;"`, "&am;", "", 1, 7);
6379             test!(func, toer)(`"&ampe;"`, "&ampe;", "", 1, 9);
6380             test!(func, toer)(`"&l;"`, "&l;", "", 1, 6);
6381             test!(func, toer)(`"&lte;"`, "&lte;", "", 1, 8);
6382             test!(func, toer)(`"&g;"`, "&g;", "", 1, 6);
6383             test!(func, toer)(`"&gte;"`, "&gte;", "", 1, 8);
6384             test!(func, toer)(`"&apo;"`, "&apo;", "", 1, 8);
6385             test!(func, toer)(`"&aposs;"`, "&aposs;", "", 1, 10);
6386             test!(func, toer)(`"&quo;"`, "&quo;", "", 1, 8);
6387             test!(func, toer)(`"&quote;"`, "&quote;", "", 1, 10);
6388 
6389             test!(func, toer)(`'&foo;'`, "&foo;", "", 1, 8);
6390             test!(func, toer)(`'hello world&foo;'`, "hello world&foo;", "", 1, 19);
6391             test!(func, toer)(`'hello &foo; world'`, "hello &foo; world", "", 1, 20);
6392             test!(func, toer)(`'&am;'`, "&am;", "", 1, 7);
6393             test!(func, toer)(`'&ampe;'`, "&ampe;", "", 1, 9);
6394             test!(func, toer)(`'&l;'`, "&l;", "", 1, 6);
6395             test!(func, toer)(`'&lte;'`, "&lte;", "", 1, 8);
6396             test!(func, toer)(`'&g;'`, "&g;", "", 1, 6);
6397             test!(func, toer)(`'&gte;'`, "&gte;", "", 1, 8);
6398             test!(func, toer)(`'&apo;'`, "&apo;", "", 1, 8);
6399             test!(func, toer)(`'&aposs;'`, "&aposs;", "", 1, 10);
6400             test!(func, toer)(`'&quo;'`, "&quo;", "", 1, 8);
6401             test!(func, toer)(`'&quote;'`, "&quote;", "", 1, 10);
6402         }
6403     }
6404 
6405     // These can't be tested with testFail, because attempting to convert
6406     // invalid Unicode results in UnicodeExceptions before parseXML even
6407     // gets called.
6408     import std.meta : AliasSeq;
6409     static foreach(str; AliasSeq!("'" ~ cast(string)[255] ~ "'",
6410                                   "'"w ~ cast(wstring)[0xD800] ~ "'",
6411                                   "'"d ~ cast(dstring)[0xD800] ~ "'"))
6412     {{
6413         auto text = testParser(str);
6414         auto e = collectException!XMLParsingException(text.takeAttValue());
6415         assert(e ! is null);
6416         assert(e.pos == TextPos(1, 2));
6417     }}
6418 }
6419 
6420 @safe pure unittest
6421 {
6422     import std.algorithm.comparison : equal;
6423     import dxml.internal : testRangeFuncs;
6424 
6425     static foreach(func; testRangeFuncs)
6426     {
6427         static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6428         {{
6429             auto xml = func(`'foo'`);
6430             auto text = testParser!simpleXML(xml);
6431             assert(equal(text.takeAttValue(), "foo"));
6432         }}
6433     }
6434 }
6435 
6436 
6437 // Validates an EntityType.text field to verify that it does not contain invalid
6438 // characters.
6439 void checkText(bool allowRestrictedChars, Text)(ref Text orig)
6440 {
6441     import std.format : format;
6442     import std.utf : decodeFront, UseReplacementDchar;
6443 
6444     auto text = orig.save;
6445     loop: while(!text.input.empty)
6446     {
6447         switch(text.input.front)
6448         {
6449             static if(!allowRestrictedChars)
6450             {
6451                 case '&':
6452                 {
6453                     import dxml.util : parseCharRef;
6454 
6455                     {
6456                         auto temp = text.input.save;
6457                         auto charRef = parseCharRef(temp);
6458                         if(!charRef.isNull)
6459                         {
6460                             static if(hasLength!(Text.Input))
6461                             {
6462                                 text.pos.col += text.input.length - temp.length;
6463                                 text.input = temp;
6464                             }
6465                             else
6466                             {
6467                                 while(text.input.front != ';')
6468                                     popFrontAndIncCol(text);
6469                                 popFrontAndIncCol(text);
6470                             }
6471                             continue;
6472                         }
6473                     }
6474 
6475                     immutable ampPos = text.pos;
6476                     popFrontAndIncCol(text);
6477 
6478                     // Std Entity References
6479                     static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6480                     {
6481                         static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6482                         {
6483                             if(text.stripStartsWith(entRef))
6484                                 continue loop;
6485                         }
6486 
6487                         throw new XMLParsingException("& is only legal in an EntitType.text entity as part of a " ~
6488                                                       "reference, and this parser only supports entity references if " ~
6489                                                       "they're predefined by the spec. This is not a valid character " ~
6490                                                       "reference or one of the predefined entity references.", ampPos);
6491                     }
6492                     // All Entity References
6493                     else
6494                     {
6495                         import std.utf : decodeFront, UseReplacementDchar;
6496                         import dxml.internal : isNameStartChar, isNameChar;
6497 
6498                         if(text.input.empty)
6499                             goto failedEntityRef;
6500                         {
6501                             size_t numCodeUnits;
6502                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6503                             if(!isNameStartChar(decodedC))
6504                                 goto failedEntityRef;
6505                             text.pos.col += numCodeUnits;
6506                         }
6507                         while(true)
6508                         {
6509                             if(text.input.empty)
6510                                 goto failedEntityRef;
6511                             immutable c = text.input.front;
6512                             if(c == ';')
6513                                 break;
6514                             size_t numCodeUnits;
6515                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6516                             if(!isNameChar(decodedC))
6517                                 goto failedEntityRef;
6518                             text.pos.col += numCodeUnits;
6519                         }
6520                         assert(text.input.front == ';');
6521                         popFrontAndIncCol(text);
6522                         continue;
6523 
6524                         failedEntityRef:
6525                         throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6526                                                       "character or entity reference, and this is not a valid " ~
6527                                                       "character or entity reference.", ampPos);
6528                     }
6529                 }
6530                 case '<': throw new XMLParsingException("< is not legal in EntityType.text", text.pos);
6531                 case ']':
6532                 {
6533                     popFrontAndIncCol(text);
6534                     if(text.stripStartsWith("]>"))
6535                     {
6536                         text.pos.col -= 3;
6537                         throw new XMLParsingException("]]> is not legal in EntityType.text", text.pos);
6538                     }
6539                     break;
6540                 }
6541             }
6542             case '\n':
6543             {
6544                 nextLine!(text.config)(text.pos);
6545                 text.input.popFront();
6546                 break;
6547             }
6548             default:
6549             {
6550                 import std.ascii : isASCII;
6551                 import dxml.internal : isXMLChar;
6552                 immutable c = text.input.front;
6553                 if(isASCII(c))
6554                 {
6555                     if(!isXMLChar(c))
6556                     {
6557                         throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6558                                                       text.pos);
6559                     }
6560                     popFrontAndIncCol(text);
6561                 }
6562                 else
6563                 {
6564                     import std.utf : UTFException;
6565                     // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6566                     // replacement character is considered valid XML, and if we decoded using it, then
6567                     // all of the invalid Unicode characters would come out as the replacement character
6568                     // and then be treated as valid instead of being caught, which isn't all bad, but
6569                     // the spec requires that they be treated as invalid instead of playing nice and
6570                     // using the replacement character.
6571                     try
6572                     {
6573                         size_t numCodeUnits;
6574                         immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6575                         if(!isXMLChar(decodedC))
6576                         {
6577                             enum fmt = "Character is not legal in an XML File: 0x%0x";
6578                             throw new XMLParsingException(format!fmt(decodedC), text.pos);
6579                         }
6580                         text.pos.col += numCodeUnits;
6581                     }
6582                     catch(UTFException)
6583                         throw new XMLParsingException("Invalid Unicode character", text.pos);
6584                 }
6585                 break;
6586             }
6587         }
6588     }
6589 }
6590 
6591 unittest
6592 {
6593     import core.exception : AssertError;
6594     import std.exception : assertNotThrown, collectException, enforce;
6595     import dxml.internal : codeLen, testRangeFuncs;
6596 
6597     static void test(alias func, bool arc, ThrowOnEntityRef toer)(string text, size_t line = __LINE__)
6598     {
6599         auto xml = func(text);
6600         auto range = testParser!(makeConfig(toer))(xml);
6601         assertNotThrown(checkText!arc(range), "unittest failure", __FILE__, line);
6602     }
6603 
6604     static void testFail(alias func, bool arc, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
6605     {
6606         auto xml = func(text);
6607         {
6608             auto range = testParser!(makeConfig(toer))(xml.save);
6609             auto e = collectException!XMLParsingException(checkText!arc(range));
6610             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6611             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6612         }
6613         {
6614             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6615             auto range = testParser!(makeConfig(toer))(xml);
6616             range.pos.line += 3;
6617             range.pos.col += 7;
6618             auto e = collectException!XMLParsingException(checkText!arc(range));
6619             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
6620             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
6621         }
6622     }
6623 
6624     static foreach(func; testRangeFuncs)
6625     {
6626         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6627         {
6628             static foreach(arc; [false, true])
6629             {
6630                 test!(func, arc, toer)("");
6631                 test!(func, arc, toer)("J",);
6632                 test!(func, arc, toer)("foo");
6633                 test!(func, arc, toer)("プログラミング");
6634 
6635                 test!(func, arc, toer)("&amp;&gt;&lt;");
6636                 test!(func, arc, toer)("hello&amp;&gt;&lt;world");
6637                 test!(func, arc, toer)(".....&apos;&quot;&amp;.....");
6638                 test!(func, arc, toer)("&#12487;&#12451;&#12521;&#12531;");
6639                 test!(func, arc, toer)("hello&#xAF;&#42;&quot;world");
6640 
6641                 test!(func, arc, toer)("]]");
6642                 test!(func, arc, toer)("]>");
6643                 test!(func, arc, toer)("foo]]bar");
6644                 test!(func, arc, toer)("foo]>bar");
6645                 test!(func, arc, toer)("]] >");
6646 
6647                 testFail!(func, arc, toer)("\v", 1, 1);
6648                 testFail!(func, arc, toer)("\uFFFE", 1, 1);
6649                 testFail!(func, arc, toer)("hello\vworld", 1, 6);
6650                 testFail!(func, arc, toer)("he\nllo\vwo\nrld", 2, 4);
6651             }
6652 
6653             testFail!(func, false, toer)("<", 1, 1);
6654             testFail!(func, false, toer)("&", 1, 1);
6655             testFail!(func, false, toer)("&", 1, 1);
6656             testFail!(func, false, toer)("&x", 1, 1);
6657             testFail!(func, false, toer)("&&;", 1, 1);
6658             testFail!(func, false, toer)("&a", 1, 1);
6659             testFail!(func, false, toer)("hello&;", 1, 6);
6660             testFail!(func, false, toer)("hello&;world", 1, 6);
6661             testFail!(func, false, toer)("hello&<;world", 1, 6);
6662             testFail!(func, false, toer)("hello&world", 1, 6);
6663             testFail!(func, false, toer)("hello world&", 1, 12);
6664             testFail!(func, false, toer)("hello world&;", 1, 12);
6665             testFail!(func, false, toer)("hello world&foo", 1, 12);
6666             testFail!(func, false, toer)("&#;", 1, 1);
6667             testFail!(func, false, toer)("&#x;", 1, 1);
6668             testFail!(func, false, toer)("&#AF;", 1, 1);
6669             testFail!(func, false, toer)("&#x", 1, 1);
6670             testFail!(func, false, toer)("&#42", 1, 1);
6671             testFail!(func, false, toer)("&#x42", 1, 1);
6672             testFail!(func, false, toer)("&#12;", 1, 1);
6673             testFail!(func, false, toer)("&#x12;", 1, 1);
6674             testFail!(func, false, toer)("&#42;foo\nbar&#;", 2, 4);
6675             testFail!(func, false, toer)("&#42;foo\nbar&#x;", 2, 4);
6676             testFail!(func, false, toer)("&#42;foo\nbar&#AF;", 2, 4);
6677             testFail!(func, false, toer)("&#42;foo\nbar&#x", 2, 4);
6678             testFail!(func, false, toer)("&#42;foo\nbar&#42", 2, 4);
6679             testFail!(func, false, toer)("&#42;foo\nbar&#x42", 2, 4);
6680             testFail!(func, false, toer)("プログラミング&", 1, codeLen!(func, "プログラミング&"));
6681 
6682             static if(toer == ThrowOnEntityRef.yes)
6683             {
6684                 testFail!(func, false, toer)("&a;", 1, 1);
6685                 testFail!(func, false, toer)(`&am;`, 1, 1);
6686                 testFail!(func, false, toer)(`&ampe;`, 1, 1);
6687                 testFail!(func, false, toer)(`&l;`, 1, 1);
6688                 testFail!(func, false, toer)(`&lte;`, 1, 1);
6689                 testFail!(func, false, toer)(`&g;`, 1, 1);
6690                 testFail!(func, false, toer)(`&gte;`, 1, 1);
6691                 testFail!(func, false, toer)(`&apo;`, 1, 1);
6692                 testFail!(func, false, toer)(`&aposs;`, 1, 1);
6693                 testFail!(func, false, toer)(`&quo;`, 1, 1);
6694                 testFail!(func, false, toer)(`&quote;`, 1, 1);
6695                 testFail!(func, false, toer)(`hello &foo; world`, 1, 7);
6696                 testFail!(func, false, toer)("hello\n &foo; \nworld", 2, 2);
6697             }
6698             else
6699             {
6700                 test!(func, false, toer)("&a;");
6701                 test!(func, false, toer)(`&am;`);
6702                 test!(func, false, toer)(`&ampe;`);
6703                 test!(func, false, toer)(`&l;`);
6704                 test!(func, false, toer)(`&lte;`);
6705                 test!(func, false, toer)(`&g;`);
6706                 test!(func, false, toer)(`&gte;`);
6707                 test!(func, false, toer)(`&apo;`);
6708                 test!(func, false, toer)(`&aposs;`);
6709                 test!(func, false, toer)(`&quo;`);
6710                 test!(func, false, toer)(`&quote;`);
6711                 test!(func, false, toer)(`hello &foo; world`);
6712                 test!(func, false, toer)("hello\n &foo; \nworld");
6713             }
6714 
6715             testFail!(func, false, toer)("]]>", 1, 1);
6716             testFail!(func, false, toer)("foo]]>bar", 1, 4);
6717 
6718             test!(func, true, toer)("]]>");
6719             test!(func, true, toer)("foo]]>bar");
6720 
6721             test!(func, true, toer)("<");
6722             test!(func, true, toer)("&");
6723             test!(func, true, toer)("&x");
6724             test!(func, true, toer)("&&;");
6725             test!(func, true, toer)("&a");
6726             test!(func, true, toer)("&a;");
6727             test!(func, true, toer)(`&am;`);
6728             test!(func, true, toer)(`&ampe;`);
6729             test!(func, true, toer)(`&l;`);
6730             test!(func, true, toer)(`&lte;`);
6731             test!(func, true, toer)(`&g;`);
6732             test!(func, true, toer)(`&gte;`);
6733             test!(func, true, toer)(`&apo;`);
6734             test!(func, true, toer)(`&aposs;`);
6735             test!(func, true, toer)(`&quo;`);
6736             test!(func, true, toer)(`&quote;`);
6737             test!(func, true, toer)("hello&;");
6738             test!(func, true, toer)("hello&;world");
6739             test!(func, true, toer)("hello&<;world");
6740             test!(func, true, toer)("hello&world");
6741             test!(func, true, toer)("hello world&");
6742             test!(func, true, toer)("hello world&;");
6743             test!(func, true, toer)("hello world&foo");
6744             test!(func, true, toer)("&#;");
6745             test!(func, true, toer)("&#x;");
6746             test!(func, true, toer)("&#AF;");
6747             test!(func, true, toer)("&#x");
6748             test!(func, true, toer)("&#42");
6749             test!(func, true, toer)("&#x42");
6750             test!(func, true, toer)("&#12;");
6751             test!(func, true, toer)("&#x12;");
6752             test!(func, true, toer)("&#42;foo\nbar&#;");
6753             test!(func, true, toer)("&#42;foo\nbar&#x;");
6754             test!(func, true, toer)("&#42;foo\nbar&#AF;");
6755             test!(func, true, toer)("&#42;foo\nbar&#x");
6756             test!(func, true, toer)("&#42;foo\nbar&#42");
6757             test!(func, true, toer)("&#42;foo\nbar&#x42");
6758             test!(func, true, toer)("プログラミング&");
6759         }
6760     }
6761 
6762     // These can't be tested with testFail, because attempting to convert
6763     // invalid Unicode results in UnicodeExceptions before parseXML even
6764     // gets called.
6765     import std.meta : AliasSeq;
6766     static foreach(str; AliasSeq!(cast(string)[255], cast(wstring)[0xD800], cast(dstring)[0xD800]))
6767     {
6768         static foreach(arc; [false, true])
6769         {{
6770             auto text = testParser(str);
6771             auto e = collectException!XMLParsingException(text.checkText!arc());
6772             assert(e ! is null);
6773             assert(e.pos == TextPos(1, 1));
6774         }}
6775     }
6776 }
6777 
6778 @safe unittest
6779 {
6780     import dxml.internal : testRangeFuncs;
6781 
6782     static foreach(func; testRangeFuncs)
6783     {
6784         static foreach(arc; [false, true])
6785         {
6786             static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6787             {{
6788                 auto xml = func("foo");
6789                 auto text = testParser!config(xml);
6790                 checkText!arc(text);
6791             }}
6792         }
6793     }
6794 }
6795 
6796 
6797 // S := (#x20 | #x9 | #xD | #XA)+
6798 bool isSpace(C)(C c) @safe pure nothrow @nogc
6799     if(isSomeChar!C)
6800 {
6801     switch(c)
6802     {
6803         case ' ':
6804         case '\t':
6805         case '\r':
6806         case '\n': return true;
6807         default : return false;
6808     }
6809 }
6810 
6811 pure nothrow @safe @nogc unittest
6812 {
6813     foreach(char c; char.min .. char.max)
6814     {
6815         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6816             assert(isSpace(c));
6817         else
6818             assert(!isSpace(c));
6819     }
6820     foreach(wchar c; wchar.min .. wchar.max / 100)
6821     {
6822         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6823             assert(isSpace(c));
6824         else
6825             assert(!isSpace(c));
6826     }
6827     foreach(dchar c; dchar.min .. dchar.max / 1000)
6828     {
6829         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6830             assert(isSpace(c));
6831         else
6832             assert(!isSpace(c));
6833     }
6834 }
6835 
6836 
6837 pragma(inline, true) void popFrontAndIncCol(Text)(ref Text text)
6838 {
6839     text.input.popFront();
6840     ++text.pos.col;
6841 }
6842 
6843 pragma(inline, true) void nextLine(Config config)(ref TextPos pos)
6844 {
6845     ++pos.line;
6846     pos.col = 1;
6847 }
6848 
6849 // TODO create bug report, because this function cannot be inlined
6850 /+pragma(inline, true)+/ void checkNotEmpty(Text)(ref Text text, size_t line = __LINE__)
6851 {
6852     if(text.input.empty)
6853         throw new XMLParsingException("Prematurely reached end of document", text.pos, __FILE__, line);
6854 }
6855 
6856 
6857 version(unittest)
6858     enum someTestConfigs = [Config.init, simpleXML, makeConfig(SkipComments.yes), makeConfig(SkipPI.yes)];
6859 
6860 
6861 // Fuzz-testing failures
6862 unittest
6863 {
6864     static void parseEverything(string xml)
6865     {
6866         with(EntityType) foreach(entity; parseXML(xml))
6867         {
6868             final switch(entity.type)
6869             {
6870                 case cdata: break;
6871                 case comment: break;
6872                 case elementStart: auto name = entity.name; break;
6873                 case elementEnd: goto case elementStart;
6874                 case elementEmpty: goto case elementStart;
6875                 case pi: goto case elementStart;
6876                 case text: break;
6877             }
6878 
6879             final switch(entity.type)
6880             {
6881                 case cdata: auto text = entity.text; break;
6882                 case comment: goto case cdata;
6883                 case elementStart:
6884                 {
6885                     foreach(attr; entity.attributes)
6886                     {
6887                         auto name = attr.name;
6888                         auto value = attr.value;
6889                     }
6890                     break;
6891                 }
6892                 case elementEnd: break;
6893                 case elementEmpty: goto case elementStart;
6894                 case pi: goto case cdata;
6895                 case text: goto case cdata;
6896             }
6897         }
6898     }
6899 
6900     static void testFail(string xml, size_t line = __LINE__)
6901     {
6902         import std.exception : assertThrown;
6903         assertThrown!XMLParsingException(parseEverything(xml));
6904     }
6905 
6906     testFail([0x3c, 0xff, 0x3e, 0x3e, 0x3a, 0x3c, 0x2f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6907               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6908               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6909               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6910               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x31, 0xff,
6911               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xff, 0xff,
6912               0xff]);
6913 }