1 // Written in the D programming language
3 /++
4     This implements a range-based
5     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which
6     will work with XML 1.1 documents assuming that they don't use any
7     1.1-specific features). For the sake of simplicity, sanity, and efficiency,
8     the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD)
9     section is not supported beyond what is required to parse past it.
11     Start tags, end tags, comments, cdata sections, and processing instructions
12     are all supported and reported to the application. Anything in the DTD is
13     skipped (though it's parsed enough to parse past it correctly, and that
14     $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid
15     enough to be correctly skipped), and the
16     $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the
17     top is skipped if present (XML 1.1 requires that it be there, but XML 1.0
18     does not).
20     Regardless of what the XML declaration says (if present), any range of
21     $(K_CHAR) will be treated as being encoded in UTF-8, any range of
22     $(K_WCHAR) will be treated as being encoded in UTF-16, and any range of
23     $(K_DCHAR) will be treated as having been encoded in UTF-32. Strings will
24     be treated as ranges of their code units, not code points. Note that like
25     Phobos typically does when processing strings, the code assumes that BOMs
26     have already been removed, so if the range of characters comes from a file
27     that uses a BOM, the calling code needs to strip it out before calling
28     $(LREF parseXML), or parsing will fail due to invalid characters.
30     Since the DTD is skipped, entity references other than the five which are
31     predefined by the XML spec cannot be fully processed (since wherever they
32     were used in the document would be replaced by what they referred to, which
33     could be arbitrarily complex XML). As such, by default, if any entity
34     references which are not predefined are encountered outside of the DTD, an
35     $(LREF XMLParsingException) will be thrown (see
36     $(LREF Config.throwOnEntityRef) for how that can be configured). The
37     predefined entity references and any character references encountered will
38     be checked to verify that they're valid, but they will not be replaced
39     (since that does not work with returning slices of the original input).
41     However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
42     $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from
43     $(MREF dxml, util) can be used to convert the predefined entity references
44     to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
45     $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from
46     $(MREF dxml, util) can be used to convert character references to what they
47     refer to.
49     $(H3 Primary Symbols)
50     $(TABLE
51         $(TR $(TH Symbol) $(TH Description))
52         $(TR $(TD $(LREF parseXML))
53              $(TD The function used to initiate the parsing of an XML
54                   document.))
55         $(TR $(TD $(LREF EntityRange))
56              $(TD The range returned by $(LREF parseXML).))
57         $(TR $(TD $(LREF EntityRange.Entity))
58              $(TD The element type of $(LREF EntityRange).))
59     )
61     $(H3 Parser Configuration Helpers)
62     $(TABLE
63         $(TR $(TH Symbol) $(TH Description))
64         $(TR $(TD $(LREF Config))
65              $(TD Used to configure how $(LREF EntityRange) parses the XML.))
66         $(TR $(TD $(LREF simpleXML))
67              $(TD A user-friendly configuration for when the application just
68                   wants the element tags and the data in between them.))
69         $(TR $(TD $(LREF makeConfig))
70              $(TD A convenience function for constructing a custom
71                   $(LREF Config).))
72         $(TR $(TD $(LREF SkipComments))
73              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
74                   to tell the parser to skip comments.))
75         $(TR $(TD $(LREF SkipPI))
76              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
77                   to tell the parser to skip processing instructions.))
78         $(TR $(TD $(LREF SplitEmpty))
79              $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config)
80                   to configure how the parser deals with empty element tags.))
81     )
83     $(H3 Helper Types Used When Parsing)
84     $(TABLE
85         $(TR $(TH Symbol) $(TH Description))
86         $(TR $(TD $(LREF EntityType))
87              $(TD The type of an entity in the XML (e.g. a
88                   $(LREF_ALTTEXT start tag, EntityType.elementStart) or a
89                   $(LREF_ALTTEXT comment, EntityType.comment)).))
90         $(TR $(TD $(LREF TextPos))
91              $(TD Gives the line and column number in the XML document.))
92         $(TR $(TD $(LREF XMLParsingException))
93              $(TD Thrown by $(LREF EntityRange) when it encounters invalid
94                   XML.))
95     )
97     $(H3 Helper Functions Used When Parsing)
98     $(TABLE
99         $(TR $(TH Symbol) $(TH Description))
100         $(TR $(TD $(LREF getAttrs))
101              $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which
102                   allows for the easy processing of start tag attributes.))
103         $(TR $(TD $(LREF skipContents))
104              $(TD Iterates an $(LREF EntityRange) from a start tag to its
105                   matching end tag.))
106         $(TR $(TD $(LREF skipToPath))
107              $(TD Used to navigate from one start tag to another as if the start
108                   tag names formed a file path.))
109         $(TR $(TD $(LREF skipToEntityType))
110              $(TD Skips to the next entity of the given type in the range.))
111         $(TR $(TD $(LREF skipToParentEndTag))
112              $(TD Iterates an $(LREF EntityRange) until it reaches the end tag
113                   that matches the start tag which is the parent of the
114                   current entity.))
115     )
117     $(H3 Helper Traits)
118     $(TABLE
119         $(TR $(TH Symbol) $(TH Description))
120         $(TR $(TD $(LREF isAttrRange))
121              $(TD Whether the given range is a range of attributes.)))
123     Copyright: Copyright 2017 - 2023
124     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
125     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
126     Source:    $(LINK_TO_SRC dxml/_parser.d)
128     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
129   +/
130 module dxml.parser;
132 ///
133 version(dxmlTests) unittest
134 {
135     auto xml = "<!-- comment -->\n" ~
136                "<root>\n" ~
137                "    <foo>some text<whatever/></foo>\n" ~
138                "    <bar/>\n" ~
139                "    <baz></baz>\n" ~
140                "</root>";
141     {
142         auto range = parseXML(xml);
143         assert(range.front.type == EntityType.comment);
144         assert(range.front.text == " comment ");
145         range.popFront();
147         assert(range.front.type == EntityType.elementStart);
148         assert(range.front.name == "root");
149         range.popFront();
151         assert(range.front.type == EntityType.elementStart);
152         assert(range.front.name == "foo");
153         range.popFront();
155         assert(range.front.type == EntityType.text);
156         assert(range.front.text == "some text");
157         range.popFront();
159         assert(range.front.type == EntityType.elementEmpty);
160         assert(range.front.name == "whatever");
161         range.popFront();
163         assert(range.front.type == EntityType.elementEnd);
164         assert(range.front.name == "foo");
165         range.popFront();
167         assert(range.front.type == EntityType.elementEmpty);
168         assert(range.front.name == "bar");
169         range.popFront();
171         assert(range.front.type == EntityType.elementStart);
172         assert(range.front.name == "baz");
173         range.popFront();
175         assert(range.front.type == EntityType.elementEnd);
176         assert(range.front.name == "baz");
177         range.popFront();
179         assert(range.front.type == EntityType.elementEnd);
180         assert(range.front.name == "root");
181         range.popFront();
183         assert(range.empty);
184     }
185     {
186         auto range = parseXML!simpleXML(xml);
188         // simpleXML skips comments
190         assert(range.front.type == EntityType.elementStart);
191         assert(range.front.name == "root");
192         range.popFront();
194         assert(range.front.type == EntityType.elementStart);
195         assert(range.front.name == "foo");
196         range.popFront();
198         assert(range.front.type == EntityType.text);
199         assert(range.front.text == "some text");
200         range.popFront();
202         // simpleXML splits empty element tags into a start tag and end tag
203         // so that the code doesn't have to care whether a start tag with no
204         // content is an empty tag or a start tag and end tag with nothing but
205         // whitespace in between.
206         assert(range.front.type == EntityType.elementStart);
207         assert(range.front.name == "whatever");
208         range.popFront();
210         assert(range.front.type == EntityType.elementEnd);
211         assert(range.front.name == "whatever");
212         range.popFront();
214         assert(range.front.type == EntityType.elementEnd);
215         assert(range.front.name == "foo");
216         range.popFront();
218         assert(range.front.type == EntityType.elementStart);
219         assert(range.front.name == "bar");
220         range.popFront();
222         assert(range.front.type == EntityType.elementEnd);
223         assert(range.front.name == "bar");
224         range.popFront();
226         assert(range.front.type == EntityType.elementStart);
227         assert(range.front.name == "baz");
228         range.popFront();
230         assert(range.front.type == EntityType.elementEnd);
231         assert(range.front.name == "baz");
232         range.popFront();
234         assert(range.front.type == EntityType.elementEnd);
235         assert(range.front.name == "root");
236         range.popFront();
238         assert(range.empty);
239     }
240 }
243 import std.range.primitives;
244 import std.traits;
245 import std.typecons : Flag;
248 /++
249     The exception type thrown when the XML parser encounters invalid XML.
250   +/
251 class XMLParsingException : Exception
252 {
253     /++
254         The position in the XML input where the problem is.
255       +/
256     TextPos pos;
258 package:
260     this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure
261     {
262         import std.format : format;
263         super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line);
264         pos = textPos;
265     }
266 }
269 /++
270     Where in the XML document an entity is.
272     The line and column numbers are 1-based.
274     The primary use case for TextPos is $(LREF XMLParsingException), but an
275     application may have other uses for it. The TextPos for an
276     $(LREF2 Entity, EntityRange) can be obtained from
277     $(LREF2 Entity.pos, EntityRange).
279     See_Also: $(LREF XMLParsingException.pos)$(BR)
280               $(LREF EntityRange.Entity.pos)
281   +/
282 struct TextPos
283 {
284     /// A line number in the XML file.
285     int line = 1;
287     /++
288         A column number in a line of the XML file.
290         Each code unit is considered a column, so depending on what a program
291         is looking to do with the column number, it may need to examine the
292         actual text on that line and calculate the number that represents
293         what the program wants to display (e.g. the number of graphemes).
294       +/
295     int col = 1;
296 }
299 /++
300     Used to configure how the parser works.
302     See_Also:
303         $(LREF makeConfig)$(BR)
304         $(LREF parseXML)$(BR)
305         $(LREF simpleXML)
306   +/
307 struct Config
308 {
309     /++
310         Whether the comments should be skipped while parsing.
312         If $(D skipComments == SkipComments.yes), any entities of type
313         $(LREF EntityType.comment) will be omitted from the parsing results,
314         and they will not be validated beyond what is required to parse past
315         them.
317         Defaults to $(D SkipComments.no).
318       +/
319     auto skipComments = SkipComments.no;
321     /++
322         Whether processing instructions should be skipped.
324         If $(D skipPI == SkipPI.yes), any entities of type
325         $(LREF EntityType.pi) will be skipped, and they will not be validated
326         beyond what is required to parse past them.
328         Defaults to $(D SkipPI.no).
329       +/
330     auto skipPI = SkipPI.no;
332     /++
333         Whether the parser should report empty element tags as if they were a
334         start tag followed by an end tag with nothing in between.
336         If $(D splitEmpty == SplitEmpty.yes),  then whenever an
337         $(LREF EntityType.elementEmpty) is encountered, the parser will claim
338         that that entity is an $(LREF EntityType.elementStart), and then it
339         will provide an $(LREF EntityType.elementEnd) as the next entity before
340         the entity that actually follows it.
342         The purpose of this is to simplify the code using the parser, since most
343         code does not care about the difference between an empty tag and a start
344         and end tag with nothing in between. But since some code may care about
345         the difference, the behavior is configurable.
347         Defaults to $(D SplitEmpty.no).
348       +/
349     auto splitEmpty = SplitEmpty.no;
351     ///
352     version(dxmlTests) unittest
353     {
354         enum configSplitYes = makeConfig(SplitEmpty.yes);
356         {
357             auto range = parseXML("<root></root>");
358             assert(range.front.type == EntityType.elementStart);
359             assert(range.front.name == "root");
360             range.popFront();
361             assert(range.front.type == EntityType.elementEnd);
362             assert(range.front.name == "root");
363             range.popFront();
364             assert(range.empty);
365         }
366         {
367             // No difference if the tags are already split.
368             auto range = parseXML!configSplitYes("<root></root>");
369             assert(range.front.type == EntityType.elementStart);
370             assert(range.front.name == "root");
371             range.popFront();
372             assert(range.front.type == EntityType.elementEnd);
373             assert(range.front.name == "root");
374             range.popFront();
375             assert(range.empty);
376         }
377         {
378             // This treats <root></root> and <root/> as distinct.
379             auto range = parseXML("<root/>");
380             assert(range.front.type == EntityType.elementEmpty);
381             assert(range.front.name == "root");
382             range.popFront();
383             assert(range.empty);
384         }
385         {
386             // This is parsed as if it were <root></root> insead of <root/>.
387             auto range = parseXML!configSplitYes("<root/>");
388             assert(range.front.type == EntityType.elementStart);
389             assert(range.front.name == "root");
390             range.popFront();
391             assert(range.front.type == EntityType.elementEnd);
392             assert(range.front.name == "root");
393             range.popFront();
394             assert(range.empty);
395         }
396     }
398     /++
399         Whether the parser should throw when it encounters any entity references
400         other than the five entity references defined in the XML standard.
402         Any other entity references would have to be defined in the DTD in
403         order to be valid. And in order to know what XML they represent (which
404         could be arbitrarily complex, even effectively inserting entire XML
405         documents into the middle of the XML), the DTD would have to be parsed.
406         However, dxml does not support parsing the DTD beyond what is required
407         to correctly parse past it, and replacing entity references with what
408         they represent would not work with the slicing semantics that
409         $(LREF EntityRange) provides. As such, it is not possible for dxml to
410         correctly handle any entity references other than the five which are
411         defined in the XML standard, and even those are only parsed by using
412         $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util).
413         $(LREF EntityRange) always validates that entity references are one
414         of the five, predefined entity references, but otherwise, it lets them
415         pass through as normal text. It does not replace them with what they
416         represent.
418         As such, the default behavior of $(LREF EntityRange) is to throw an
419         $(LREF XMLParsingException) when it encounters an entity reference
420         which is not one of the five defined by the XML standard. With that
421         behavior, there is no risk of processing an XML document as if it had
422         no entity references and ending up with what the program using the
423         parser would probably consider incorrect results. However, there are
424         cases where a program may find it acceptable to treat entity references
425         as normal text and ignore them. As such, if a program wishes to take
426         that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no).
428         If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity
429         reference that it encounters will be validated to ensure that it is
430         syntactically valid (i.e. that the characters it contains form what
431         could be a valid entity reference assuming that the DTD declared it
432         properly), but otherwise, $(LREF EntityRange) will treat it as normal
433         text, just like it treats the five, predefined entity references as
434         normal text.
436         Note that any valid XML entity reference which contains start or end
437         tags must contain matching start or end tags, and entity references
438         cannot contain incomplete fragments of XML (e.g. the start or end of a
439         comment). So, missing entity references should only affect the data in
440         the XML document and not its overall structure (if that were not _true,
441         attempting to ignore entity references such as $(D ThrowOnEntityRef.no)
442         does would be a disaster in the making). However, how reasonable it is
443         to miss that data depends entirely on the application and what the XML
444         documents it's parsing contain - hence, the behavior is configurable.
446         See_Also: $(REF StdEntityRef, dxml, util)$(BR)
447                   $(REF parseStdEntityRef, dxml, util)$(BR)
448                   $(REF parseCharRef, dxml, util)$(BR)
449                   $(REF encodeCharRef, dxml, util)$(BR)
450                   $(REF decodeXML, dxml, util)$(BR)
451                   $(REF asDecodedXML, dxml, util)
452       +/
453     auto throwOnEntityRef = ThrowOnEntityRef.yes;
455     ///
456     version(dxmlTests) unittest
457     {
458         import std.exception : assertThrown;
459         import dxml.util : decodeXML;
461         auto xml = "<root>\n" ~
462                    "    <std>&amp;&apos;&gt;&lt;&quot;</std>\n" ~
463                    "    <other>&foobar;</other>\n" ~
464                    "    <invalid>&--;</invalid>\n" ~
465                    "</root>";
467         // ThrowOnEntityRef.yes
468         {
469             auto range = parseXML(xml);
470             assert(range.front.type == EntityType.elementStart);
471             assert(range.front.name == "root");
473             range.popFront();
474             assert(range.front.type == EntityType.elementStart);
475             assert(range.front.name == "std");
477             range.popFront();
478             assert(range.front.type == EntityType.text);
479             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
480             assert(range.front.text.decodeXML() == `&'><"`);
482             range.popFront();
483             assert(range.front.type == EntityType.elementEnd);
484             assert(range.front.name == "std");
486             range.popFront();
487             assert(range.front.type == EntityType.elementStart);
488             assert(range.front.name == "other");
490             // Attempted to parse past "&foobar;", which is syntactically
491             // valid, but it's not one of the five predefined entity references.
492             assertThrown!XMLParsingException(range.popFront());
493         }
495         // ThrowOnEntityRef.no
496         {
497             auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml);
498             assert(range.front.type == EntityType.elementStart);
499             assert(range.front.name == "root");
501             range.popFront();
502             assert(range.front.type == EntityType.elementStart);
503             assert(range.front.name == "std");
505             range.popFront();
506             assert(range.front.type == EntityType.text);
507             assert(range.front.text == "&amp;&apos;&gt;&lt;&quot;");
508             assert(range.front.text.decodeXML() == `&'><"`);
510             range.popFront();
511             assert(range.front.type == EntityType.elementEnd);
512             assert(range.front.name == "std");
514             range.popFront();
515             assert(range.front.type == EntityType.elementStart);
516             assert(range.front.name == "other");
518             // Doesn't throw, because "&foobar;" is syntactically valid.
519             range.popFront();
520             assert(range.front.type == EntityType.text);
521             assert(range.front.text == "&foobar;");
523             // decodeXML has no effect on non-standard entity references.
524             assert(range.front.text.decodeXML() == "&foobar;");
526             range.popFront();
527             assert(range.front.type == EntityType.elementEnd);
528             assert(range.front.name == "other");
530             range.popFront();
531             assert(range.front.type == EntityType.elementStart);
532             assert(range.front.name == "invalid");
534             // Attempted to parse past "&--;", which is not syntactically valid,
535             // because -- is not a valid name for an entity reference.
536             assertThrown!XMLParsingException(range.popFront());
537         }
538     }
539 }
542 /// See_Also: $(LREF2 skipComments, Config)
543 alias SkipComments = Flag!"SkipComments";
545 /// See_Also: $(LREF2 skipPI, Config)
546 alias SkipPI = Flag!"SkipPI";
548 /// See_Also: $(LREF2 splitEmpty, Config)
549 alias SplitEmpty = Flag!"SplitEmpty";
551 /// See_Also: $(LREF2 throwOnEntityRef, Config)
552 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef";
555 /++
556     Helper function for creating a custom config. It makes it easy to set one
557     or more of the member variables to something other than the default without
558     having to worry about explicitly setting them individually or setting them
559     all at once via a constructor.
561     The order of the arguments does not matter. The types of each of the members
562     of Config are unique, so that information alone is sufficient to determine
563     which argument should be assigned to which member.
564   +/
565 Config makeConfig(Args...)(Args args)
566 {
567     import std.format : format;
568     import std.meta : AliasSeq, staticIndexOf, staticMap;
570     template isValid(T, Types...)
571     {
572         static if(Types.length == 0)
573             enum isValid = false;
574         else static if(is(T == Types[0]))
575             enum isValid = true;
576         else
577             enum isValid = isValid!(T, Types[1 .. $]);
578     }
580     Config config;
582     alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName));
583     alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config)));
585     foreach(i, arg; args)
586     {
587         static assert(isValid!(typeof(arg), MemberTypes),
588                       format!"Argument %s does not match the type of any members of Config"(i));
590         static foreach(j, Other; Args)
591         {
592             static if(i != j)
593                 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j));
594         }
596         foreach(memberName; __traits(allMembers, Config))
597         {
598             static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg)))
599                 mixin("config." ~ memberName ~ " = arg;");
600         }
601     }
603     return config;
604 }
606 ///
607 version(dxmlTests) @safe pure nothrow @nogc unittest
608 {
609     {
610         auto config = makeConfig(SkipComments.yes);
611         assert(config.skipComments == SkipComments.yes);
612         assert(config.skipPI == Config.init.skipPI);
613         assert(config.splitEmpty == Config.init.splitEmpty);
614         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
615     }
616     {
617         auto config = makeConfig(SkipComments.yes, SkipPI.yes);
618         assert(config.skipComments == SkipComments.yes);
619         assert(config.skipPI == SkipPI.yes);
620         assert(config.splitEmpty == Config.init.splitEmpty);
621         assert(config.throwOnEntityRef == Config.init.throwOnEntityRef);
622     }
623     {
624         auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no);
625         assert(config.skipComments == SkipComments.yes);
626         assert(config.skipPI == Config.init.skipPI);
627         assert(config.splitEmpty == SplitEmpty.yes);
628         assert(config.throwOnEntityRef == ThrowOnEntityRef.no);
629     }
630 }
632 version(dxmlTests) unittest
633 {
634     import std.typecons : Flag;
635     static assert(!__traits(compiles, makeConfig(42)));
636     static assert(!__traits(compiles, makeConfig("hello")));
637     static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes)));
638     static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no)));
639 }
642 /++
643     This $(LREF Config) is intended for making it easy to parse XML by skipping
644     everything that isn't the actual data as well as making it simpler to deal
645     with empty element tags by treating them the same as a start tag and end
646     tag with nothing but whitespace between them.
647   +/
648 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes);
650 ///
651 version(dxmlTests) @safe pure nothrow @nogc unittest
652 {
653     static assert(simpleXML.skipComments == SkipComments.yes);
654     static assert(simpleXML.skipPI == SkipPI.yes);
655     static assert(simpleXML.splitEmpty == SplitEmpty.yes);
656     static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes);
657 }
660 /++
661     Represents the type of an XML entity. Used by $(LREF EntityRange.Entity).
662   +/
663 enum EntityType
664 {
665     /++
666         A cdata section: `<![CDATA[ ... ]]>`.
668         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect)
669       +/
670     cdata,
672     /++
673         An XML comment: `<!-- ... -->`.
675         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments)
676       +/
677     comment,
679     /++
680         The start tag for an element. e.g. `<foo name="value">`.
682         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
683       +/
684     elementStart,
686     /++
687         The end tag for an element. e.g. `</foo>`.
689         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
690       +/
691     elementEnd,
693     /++
694         The tag for an element with no contents or matching end tag. e.g.
695         `<foo name="value"/>`.
697         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)
698       +/
699     elementEmpty,
701     /++
702         A processing instruction such as `<?foo?>`. Note that the
703         `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi).
705         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi)
706       +/
707     pi,
709     /++
710         The content of an element tag that is simple text.
712         If there is an entity other than the end tag following the text, then
713         the text includes up to that entity.
715         Note however that character references (e.g.
716         $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g.
717         $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In
718         order for them to be processed, the text should be passed to either
719         $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or
720         $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references
721         which are not predefined are considered invalid XML, because the DTD
722         section is skipped, and thus they cannot be processed properly.
724         See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR)
725                   $(REF decodeXML, dxml, util)$(BR)
726                   $(REF asDecodedXML, dxml, util)$(BR)
727                   $(REF parseStdEntityRef, dxml, util)$(BR)
728                   $(REF parseCharRef, dxml, util)$(BR)
729                   $(LREF EntityRange.Entity._text)
730       +/
731     text,
732 }
735 /++
736     Lazily parses the given range of characters as an XML document.
738     EntityRange is essentially a
739     $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved
740     into that rather than being based on what Java did, and it's range-based
741     rather than iterator-based, so its API is likely to differ from other
742     implementations. The basic concept should be the same though.
744     One of the core design goals of this parser is to slice the original input
745     rather than having to allocate strings for the output or wrap it in a lazy
746     range that produces a mutated version of the data. So, all of the text that
747     the parser provides is either a slice or
748     $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases,
749     for the parser to be fully compliant with the XML spec,
750     $(REF decodeXML, dxml, util) must be called on the text to mutate certain
751     constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or
752     converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But
753     that's left up to the application.
755     The parser is not $(K_NOGC), but it allocates memory very minimally. It
756     allocates some of its state on the heap so it can validate attributes and
757     end tags. However, that state is shared among all the ranges that came from
758     the same call to parseXML (only the range farthest along in parsing
759     validates attributes or end tags), so $(LREF2 save, _EntityRange) does not
760     allocate memory unless $(D save) on the underlying range allocates memory.
761     The shared state currently uses a couple of dynamic arrays to validate the
762     tags and attributes, and if the document has a particularly deep tag depth
763     or has a lot of attributes on a start tag, then some reallocations may
764     occur until the maximum is reached, but enough is reserved that for most
765     documents, no reallocations will occur. The only other times that the
766     parser would allocate would be if an exception were thrown or if the range
767     that was passed to parseXML allocates for any reason when calling any of the
768     range primitives.
770     If invalid XML is encountered at any point during the parsing process, an
771     $(LREF XMLParsingException) will be thrown. If an exception has been thrown,
772     then the parser is in an invalid state, and it is an error to call any
773     functions on it.
775     However, note that XML validation is reduced for any entities that are
776     skipped (e.g. for anything in the DTD, validation is reduced to what is
777     required to correctly parse past it, and when
778     $(D Config.skipPI == SkipPI.yes), processing instructions are only validated
779     enough to correctly skip past them).
781     As the module documentation says, this parser does not provide any DTD
782     support. It is not possible to properly support the DTD while returning
783     slices of the original input, and the DTD portion of the spec makes parsing
784     XML far, far more complicated.
786     A quick note about carriage returns$(COLON) per the XML spec, they are all
787     supposed to either be stripped out or replaced with newlines or spaces
788     before the XML parser even processes the text. That doesn't work when the
789     parser is slicing the original text and not mutating it at all. So, for the
790     purposes of parsing, this parser treats all carriage returns as if they
791     were newlines or spaces (though they won't count as newlines when counting
792     the lines for $(LREF TextPos)). However, they $(I will) appear in any text
793     fields or attribute values if they are in the document (since the text
794     fields and attribute values are slices of the original text).
795     $(REF decodeXML, dxml, util) can be used to strip them along with
796     converting any character references in the text. Alternatively, the
797     application can remove them all before calling parseXML, but it's not
798     necessary.
799   +/
800 struct EntityRange(Config cfg, R)
801     if(isForwardRange!R && isSomeChar!(ElementType!R))
802 {
803     import std.algorithm : canFind;
804     import std.range : only, takeExactly;
805     import std.typecons : Nullable;
806     import std.utf : byCodeUnit;
808     enum compileInTests = is(R == EntityRangeCompileTests);
810 public:
812     /// The Config used for when parsing the XML.
813     alias config = cfg;
815     /// The type of the range that EntityRange is parsing.
816     alias Input = R;
818     /++
819         The type used when any slice of the original input is used. If $(D R)
820         is a string or supports slicing, then SliceOfR is the same as $(D R);
821         otherwise, it's the result of calling
822         $(PHOBOS_REF takeExactly, std, range) on the input.
824         ---
825         import std.algorithm : filter;
826         import std.range : takeExactly;
828         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
830         auto range = filter!(a => true)("some xml");
832         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
833                          typeof(takeExactly(range, 42))));
834         ---
835       +/
836     static if(isDynamicArray!R || hasSlicing!R)
837         alias SliceOfR = R;
838     else
839         alias SliceOfR = typeof(takeExactly(R.init, 42));
841     // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being
842     // a ddoc-ed unit test.
843     static if(compileInTests) @safe unittest
844     {
845         import std.algorithm : filter;
846         import std.range : takeExactly;
848         static assert(is(EntityRange!(Config.init, string).SliceOfR == string));
850         auto range = filter!(a => true)("some xml");
852         static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR ==
853                          typeof(takeExactly(range, 42))));
854     }
857     /++
858         Represents an entity in the XML document.
860         Note that the $(LREF2 type, EntityRange._Entity) determines which
861         properties can be used, and it can determine whether functions which
862         an Entity or $(LREF EntityRange) is passed to are allowed to be called.
863         Each function lists which $(LREF EntityType)s are allowed, and it is an
864         error to call them with any other $(LREF EntityType).
865       +/
866     struct Entity
867     {
868     public:
870         import std.typecons : Tuple;
872         /++
873             The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that
874             $(LREF2 attributes, EntityRange.EntityType) returns a range of.
876             See_Also: $(LREF2 attributes, EntityRange.Entity)
877           +/
878         alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos,  "pos");
881         /++
882             The $(LREF EntityType) for this Entity.
883           +/
884         @property EntityType type() @safe const pure nothrow @nogc
885         {
886             return _type;
887         }
889         ///
890         static if(compileInTests) unittest
891         {
892             auto xml = "<root>\n" ~
893                        "    <!--no comment-->\n" ~
894                        "    <![CDATA[cdata run]]>\n" ~
895                        "    <text>I am text!</text>\n" ~
896                        "    <empty/>\n" ~
897                        "    <?pi?>\n" ~
898                        "</root>";
900             auto range = parseXML(xml);
901             assert(range.front.type == EntityType.elementStart);
902             assert(range.front.name == "root");
903             range.popFront();
905             assert(range.front.type == EntityType.comment);
906             assert(range.front.text == "no comment");
907             range.popFront();
909             assert(range.front.type == EntityType.cdata);
910             assert(range.front.text == "cdata run");
911             range.popFront();
913             assert(range.front.type == EntityType.elementStart);
914             assert(range.front.name == "text");
915             range.popFront();
917             assert(range.front.type == EntityType.text);
918             assert(range.front.text == "I am text!");
919             range.popFront();
921             assert(range.front.type == EntityType.elementEnd);
922             assert(range.front.name == "text");
923             range.popFront();
925             assert(range.front.type == EntityType.elementEmpty);
926             assert(range.front.name == "empty");
927             range.popFront();
929             assert(range.front.type == EntityType.pi);
930             assert(range.front.name == "pi");
931             range.popFront();
933             assert(range.front.type == EntityType.elementEnd);
934             assert(range.front.name == "root");
935             range.popFront();
937             assert(range.empty);
938         }
941         /++
942             The position in the the original text where the entity starts.
944             See_Also: $(LREF TextPos)$(BR)
945                       $(LREF XMLParsingException._pos)
946           +/
947         @property TextPos pos() @safe const pure nothrow @nogc
948         {
949             return _pos;
950         }
952         ///
953         static if(compileInTests) unittest
954         {
955             auto xml = "<root>\n" ~
956                        "    <foo>\n" ~
957                        "        Foo and bar. Always foo and bar...\n" ~
958                        "    </foo>\n" ~
959                        "</root>";
961             auto range = parseXML(xml);
962             assert(range.front.type == EntityType.elementStart);
963             assert(range.front.name == "root");
964             assert(range.front.pos == TextPos(1, 1));
965             range.popFront();
967             assert(range.front.type == EntityType.elementStart);
968             assert(range.front.name == "foo");
969             assert(range.front.pos == TextPos(2, 5));
970             range.popFront();
972             assert(range.front.type == EntityType.text);
973             assert(range.front.text ==
974                    "\n" ~
975                    "        Foo and bar. Always foo and bar...\n" ~
976                    "    ");
977             assert(range.front.pos == TextPos(2, 10));
978             range.popFront();
980             assert(range.front.type == EntityType.elementEnd);
981             assert(range.front.name == "foo");
982             assert(range.front.pos == TextPos(4, 5));
983             range.popFront();
985             assert(range.front.type == EntityType.elementEnd);
986             assert(range.front.name == "root");
987             assert(range.front.pos == TextPos(5, 1));
988             range.popFront();
990             assert(range.empty);
991         }
993         static if(compileInTests) unittest
994         {
995             import core.exception : AssertError;
996             import std.exception : enforce;
998             static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__)
999             {
1000                 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line);
1001                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1002                 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
1003                 range.popFront();
1004             }
1006             auto xml = "<?xml?>\n" ~
1007                        "   <!--comment-->\n" ~
1008                        "   <?pi?>\n" ~
1009                        " <root>\n" ~
1010                        "          <!--comment--><!--comment-->\n" ~
1011                        "       <?pi?>\n" ~
1012                        "  <![CDATA[]]>\n" ~
1013                        "              <empty/>     </root>\n" ~
1014                        " <!--comment-->\n" ~
1015                        " <?pi?>\n";
1017             {
1018                 auto range = parseXML(xml);
1019                 test(range, EntityType.comment, 2, 4);
1020                 test(range, EntityType.pi, 3, 4);
1021                 test(range, EntityType.elementStart, 4, 2);
1022                 test(range, EntityType.comment, 5, 11);
1023                 test(range, EntityType.comment, 5, 25);
1024                 test(range, EntityType.pi, 6, 8);
1025                 test(range, EntityType.cdata, 7, 3);
1026                 test(range, EntityType.elementEmpty, 8, 15);
1027                 test(range, EntityType.elementEnd, 8, 28);
1028                 test(range, EntityType.comment, 9, 2);
1029                 test(range, EntityType.pi, 10, 2);
1030             }
1032             auto range = parseXML!simpleXML(xml);
1033             test(range, EntityType.elementStart, 4, 2);
1034             test(range, EntityType.cdata, 7, 3);
1035             test(range, EntityType.elementStart, 8, 15);
1036             test(range, EntityType.elementEnd, 8, 15);
1037             test(range, EntityType.elementEnd, 8, 28);
1038         }
1041         /++
1042             Gives the name of this Entity.
1044             Note that this is the direct name in the XML for this entity and
1045             does not contain any of the names of any of the parent entities that
1046             this entity has. If an application wants the full "path" of the
1047             entity, then it will have to keep track of that itself. The parser
1048             does not do that as it would require allocating memory.
1050             $(TABLE
1051                 $(TR $(TH Supported $(LREF EntityType)s:))
1052                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1053                 $(TR $(TD $(LREF2 elementEnd, EntityType)))
1054                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1055                 $(TR $(TD $(LREF2 pi, EntityType)))
1056             )
1057           +/
1058         @property SliceOfR name()
1059         {
1060             import dxml.internal : checkedSave, stripBCU;
1061             with(EntityType)
1062             {
1063                 import std.format : format;
1064                 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type),
1065                        format("name cannot be called with %s", _type));
1066             }
1067             return stripBCU!R(checkedSave(_name));
1068         }
1070         ///
1071         static if(compileInTests) unittest
1072         {
1073             auto xml = "<root>\n" ~
1074                        "    <empty/>\n" ~
1075                        "    <?pi?>\n" ~
1076                        "</root>";
1078             auto range = parseXML(xml);
1079             assert(range.front.type == EntityType.elementStart);
1080             assert(range.front.name == "root");
1081             range.popFront();
1083             assert(range.front.type == EntityType.elementEmpty);
1084             assert(range.front.name == "empty");
1085             range.popFront();
1087             assert(range.front.type == EntityType.pi);
1088             assert(range.front.name == "pi");
1089             range.popFront();
1091             assert(range.front.type == EntityType.elementEnd);
1092             assert(range.front.name == "root");
1093             range.popFront();
1095             assert(range.empty);
1096         }
1099         /++
1100             Returns a lazy range of attributes for a start tag where each
1101             attribute is represented as a$(BR)
1102             $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!(
1103                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"),
1104                       $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"),
1105                       $(LREF TextPos), $(D_STRING "pos"))).
1107             $(TABLE
1108                 $(TR $(TH Supported $(LREF EntityType)s:))
1109                 $(TR $(TD $(LREF2 elementStart, EntityType)))
1110                 $(TR $(TD $(LREF2 elementEmpty, EntityType)))
1111             )
1113             See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR)
1114                       $(REF decodeXML, dxml, util)$(BR)
1115                       $(REF asDecodedXML, dxml, util)
1116           +/
1117         @property auto attributes()
1118         {
1119             with(EntityType)
1120             {
1121                 import std.format : format;
1122                 assert(_type == elementStart || _type == elementEmpty,
1123                        format("attributes cannot be called with %s", _type));
1124             }
1126             // STag         ::= '<' Name (S Attribute)* S? '>'
1127             // Attribute    ::= Name Eq AttValue
1128             // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1130             static struct AttributeRange
1131             {
1132                 @property Attribute front()
1133                 {
1134                     return _front;
1135                 }
1137                 void popFront()
1138                 {
1139                     import dxml.internal : stripBCU;
1141                     stripWS(_text);
1142                     if(_text.input.empty)
1143                     {
1144                         empty = true;
1145                         return;
1146                     }
1148                     immutable pos = _text.pos;
1149                     auto name = stripBCU!R(_text.takeName!'='());
1150                     stripWS(_text);
1151                     popFrontAndIncCol(_text);
1152                     stripWS(_text);
1153                     _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos);
1154                 }
1156                 @property auto save()
1157                 {
1158                     import dxml.internal : checkedSave;
1159                     auto retval = this;
1160                     retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]);
1161                     retval._text.input = checkedSave(retval._text.input);
1162                     return retval;
1163                 }
1165                 this(typeof(_text) text)
1166                 {
1167                     _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945
1168                     _text = text;
1169                     if(_text.input.empty)
1170                         empty = true;
1171                     else
1172                         popFront();
1173                 }
1175                 bool empty;
1176                 Attribute _front;
1177                 typeof(_savedText) _text;
1178             }
1180             return AttributeRange(_savedText.save);
1181         }
1183         ///
1184         static if(compileInTests) unittest
1185         {
1186             import std.algorithm.comparison : equal;
1187             import std.algorithm.iteration : filter;
1188             {
1189                 auto xml = "<root/>";
1190                 auto range = parseXML(xml);
1191                 assert(range.front.type == EntityType.elementEmpty);
1192                 assert(range.front.attributes.empty);
1194                 static assert(is(ElementType!(typeof(range.front.attributes)) ==
1195                                  typeof(range).Entity.Attribute));
1196             }
1197             {
1198                 auto xml = "<root a='42' q='29' w='hello'/>";
1199                 auto range = parseXML(xml);
1200                 assert(range.front.type == EntityType.elementEmpty);
1202                 auto attrs = range.front.attributes;
1203                 assert(attrs.front.name == "a");
1204                 assert(attrs.front.value == "42");
1205                 assert(attrs.front.pos == TextPos(1, 7));
1206                 attrs.popFront();
1208                 assert(attrs.front.name == "q");
1209                 assert(attrs.front.value == "29");
1210                 assert(attrs.front.pos == TextPos(1, 14));
1211                 attrs.popFront();
1213                 assert(attrs.front.name == "w");
1214                 assert(attrs.front.value == "hello");
1215                 assert(attrs.front.pos == TextPos(1, 21));
1216                 attrs.popFront();
1218                 assert(attrs.empty);
1219             }
1220             // Because the type of name and value is SliceOfR, == with a string
1221             // only works if the range passed to parseXML was string.
1222             {
1223                 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>");
1224                 auto range = parseXML(xml);
1225                 assert(range.front.type == EntityType.elementEmpty);
1227                 auto attrs = range.front.attributes;
1228                 assert(equal(attrs.front.name, "a"));
1229                 assert(equal(attrs.front.value, "42"));
1230                 assert(attrs.front.pos == TextPos(1, 7));
1231                 attrs.popFront();
1233                 assert(equal(attrs.front.name, "q"));
1234                 assert(equal(attrs.front.value, "29"));
1235                 assert(attrs.front.pos == TextPos(1, 14));
1236                 attrs.popFront();
1238                 assert(equal(attrs.front.name, "w"));
1239                 assert(equal(attrs.front.value, "hello"));
1240                 assert(attrs.front.pos == TextPos(1, 21));
1241                 attrs.popFront();
1243                 assert(attrs.empty);
1244             }
1245         }
1247         static if(compileInTests) unittest
1248         {
1249             import core.exception : AssertError;
1250             import std.algorithm.comparison : equal;
1251             import std.exception : assertNotThrown, collectException, enforce;
1252             import std.typecons : Tuple, tuple;
1253             import dxml.internal : codeLen, testRangeFuncs;
1255             static bool cmpAttr(T, U)(T lhs, U rhs)
1256             {
1257                 return equal(lhs[0].save, rhs[0].save) &&
1258                        equal(lhs[1].save, rhs[1].save);
1259             }
1261             static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type,
1262                                                                 Tuple!(string, string)[] expected,
1263                                                                 int row, int col, size_t line = __LINE__)
1264             {
1265                 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)),
1266                                                                  "unittest 1", __FILE__, line);
1267                 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line);
1268                 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected),
1269                                     "unittest failure 3", __FILE__, line);
1270                 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line);
1271             }
1273             static void testFail(alias func, ThrowOnEntityRef toer)(string text,
1274                                                                     int row, int col, size_t line = __LINE__)
1275             {
1276                 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text)));
1277                 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
1278                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1279             }
1281             static foreach(func; testRangeFuncs)
1282             {
1283                 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
1284                 {
1285                     test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14);
1286                     test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17);
1287                     test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty,
1288                                       [tuple("a", "b")], 9, 4);
1289                     test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13);
1290                     test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16);
1291                     test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart,
1292                                       [tuple("a", "b")], 5, 3);
1294                     test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4);
1295                     test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18);
1296                     test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18);
1297                     test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16);
1298                     test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19);
1300                     test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty,
1301                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1302                     test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty,
1303                               [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38);
1305                     test!(func, toer)(`<root foo="&#42;" a='&#x42;' hello="%foo"/>`, EntityType.elementEmpty,
1306                               [tuple("foo", "&#42;"), tuple("a", "&#x42;"), tuple("hello", "%foo")], 1, 44);
1308                     test!(func, toer)(`<root foo="&amp;" a='vector&lt;int&gt;'></root>`, EntityType.elementStart,
1309                               [tuple("foo", "&amp;"), tuple("a", "vector&lt;int&gt;"),], 1, 41);
1311                     test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty,
1312                               [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1);
1314                     test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16);
1315                     test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21);
1316                     test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15);
1317                     test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20);
1319                     test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty,
1320                               [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30);
1322                     testFail!(func, toer)(`<root a="""/>`, 1, 11);
1323                     testFail!(func, toer)(`<root a='''/>`, 1, 11);
1324                     testFail!(func, toer)("<root a=/>", 1, 9);
1325                     testFail!(func, toer)("<root a='/>", 1, 9);
1326                     testFail!(func, toer)("<root a='/>", 1, 9);
1327                     testFail!(func, toer)("<root =''/>", 1, 7);
1328                     testFail!(func, toer)(`<root a ""/>`, 1, 9);
1329                     testFail!(func, toer)(`<root a""/>`, 1, 8);
1330                     testFail!(func, toer)(`<root a/>`, 1, 8);
1331                     testFail!(func, toer)("<root foo='bar' a=/>", 1, 19);
1332                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1333                     testFail!(func, toer)("<root foo='bar' a='/>", 1, 19);
1334                     testFail!(func, toer)("<root foo='bar' =''/>", 1, 17);
1335                     testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20);
1336                     // It's 33 rather than 28, because it throws when processing the start tag and not when processing
1337                     // the attributes. So, the mismatched quotes are detected before the attributes are checked.
1338                     testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33);
1339                     testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17);
1340                     testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16);
1341                     testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7);
1343                     testFail!(func, toer)(`<root foo="<"/>`, 1, 12);
1344                     testFail!(func, toer)(`<root foo="<world"/>`, 1, 12);
1345                     testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17);
1346                     testFail!(func, toer)(`<root foo="&"/>`, 1, 12);
1347                     testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17);
1348                     testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17);
1349                     testFail!(func, toer)(`<root foo="&;"/>`, 1, 12);
1350                     testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12);
1351                     testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12);
1352                     testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12);
1353                     testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12);
1354                     testFail!(func, toer)(`<root foo="&#42"/>`, 1, 12);
1355                     testFail!(func, toer)(`<root foo="&#x42"/>`, 1, 12);
1356                     testFail!(func, toer)(`<root foo="&#x12;"/>`, 1, 12);
1358                     testFail!(func, toer)("<root\n\nfoo='\nbar&#x42'></root>", 4, 4);
1360                     testFail!(func, toer)(`<root a="""></root>`, 1, 11);
1361                     testFail!(func, toer)(`<root a='''></root>`, 1, 11);
1362                     testFail!(func, toer)("<root a=></root>", 1, 9);
1363                     testFail!(func, toer)("<root a='></root>", 1, 9);
1364                     testFail!(func, toer)("<root a='></root>", 1, 9);
1365                     testFail!(func, toer)("<root =''></root>", 1, 7);
1366                     testFail!(func, toer)(`<root a ""></root>`, 1, 9);
1367                     testFail!(func, toer)(`<root a""></root>`, 1, 8);
1368                     testFail!(func, toer)(`<root a></root>`, 1, 8);
1369                     testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19);
1370                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1371                     testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19);
1372                     testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17);
1373                     testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20);
1374                     testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33);
1375                     testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17);
1376                     testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16);
1377                     testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7);
1379                     testFail!(func, toer)(`<root foo="<"></root>`, 1, 12);
1380                     testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12);
1381                     testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17);
1382                     testFail!(func, toer)(`<root foo="&"></root>`, 1, 12);
1383                     testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17);
1384                     testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17);
1385                     testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12);
1386                     testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12);
1387                     testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12);
1388                     testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12);
1389                     testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12);
1390                     testFail!(func, toer)(`<root foo="&#42"></root>`, 1, 12);
1391                     testFail!(func, toer)(`<root foo="&#x42"></root>`, 1, 12);
1392                     testFail!(func, toer)(`<root foo="&#x12;"></root>`, 1, 12);
1394                     testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14);
1395                     testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24);
1396                     testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24);
1397                     testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42);
1398                     testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17);
1400                     test!(func, toer)(`<root foo="&amp;"></root>`, EntityType.elementStart,
1401                                       [tuple("foo", "&amp;")], 1, 19);
1402                     test!(func, toer)(`<root foo="foo&amp;&lt;&gt;&apos;&quot;bar"></root>`, EntityType.elementStart,
1403                                       [tuple("foo", "foo&amp;&lt;&gt;&apos;&quot;bar")], 1, 45);
1404                     testFail!(func, toer)("<root foo='&;'></root>", 1, 12);
1405                     testFail!(func, toer)("<root foo='&.;'></root>", 1, 12);
1406                     testFail!(func, toer)("<root foo='\n &amp ule'></root>", 2, 2);
1407                     testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2);
1408                 }
1409                 {
1410                     alias toer = ThrowOnEntityRef.yes;
1411                     testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12);
1412                     testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12);
1413                     testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15);
1414                     testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18);
1415                     testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2);
1416                 }
1417                 {
1418                     alias toer = ThrowOnEntityRef.no;
1419                     test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty,
1420                                       [tuple("foo", "&foo;")], 1, 20);
1421                     test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart,
1422                                       [tuple("foo", "&foo;")], 1, 19);
1423                     test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart,
1424                                       [tuple("foo", "foo&bar.;")], 1, 23);
1425                     test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart,
1426                                         [tuple("foo", "hello &a; world")], 1, 29);
1427                     test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart,
1428                                         [tuple("foo", "hello \n &a; \n world")], 3, 9);
1429                 }
1430             }
1431         }
1434         /++
1435             Returns the textual value of this Entity.
1437             In the case of $(LREF EntityType.pi), this is the
1438             text that follows the name, whereas in the other cases, the text is
1439             the entire contents of the entity (save for the delimeters on the
1440             ends if that entity has them).
1442             $(TABLE
1443                 $(TR $(TH Supported $(LREF EntityType)s:))
1444                 $(TR $(TD $(LREF2 cdata, EntityType)))
1445                 $(TR $(TD $(LREF2 comment, EntityType)))
1446                 $(TR $(TD $(LREF2 pi, EntityType)))
1447                 $(TR $(TD $(LREF2 _text, EntityType)))
1448             )
1450             See_Also: $(REF decodeXML, dxml, util)$(BR)
1451                       $(REF asDecodedXML, dxml, util)$(BR)
1452                       $(REF stripIndent, dxml, util)$(BR)
1453                       $(REF withoutIndent, dxml, util)
1454           +/
1455         @property SliceOfR text()
1456         {
1457             import dxml.internal : checkedSave, stripBCU;
1458             with(EntityType)
1459             {
1460                 import std.format : format;
1461                 assert(only(cdata, comment, pi, text).canFind(_type),
1462                        format("text cannot be called with %s", _type));
1463             }
1464             return stripBCU!R(checkedSave(_savedText.input));
1465         }
1467         ///
1468         static if(compileInTests) unittest
1469         {
1470             import std.range.primitives : empty;
1472             auto xml = "<?xml version='1.0'?>\n" ~
1473                        "<?instructionName?>\n" ~
1474                        "<?foo here is something to say?>\n" ~
1475                        "<root>\n" ~
1476                        "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1477                        "    <!-- some random comment -->\n" ~
1478                        "    <p>something here</p>\n" ~
1479                        "    <p>\n" ~
1480                        "       something else\n" ~
1481                        "       here</p>\n" ~
1482                        "</root>";
1483             auto range = parseXML(xml);
1485             // "<?instructionName?>\n" ~
1486             assert(range.front.type == EntityType.pi);
1487             assert(range.front.name == "instructionName");
1488             assert(range.front.text.empty);
1490             // "<?foo here is something to say?>\n" ~
1491             range.popFront();
1492             assert(range.front.type == EntityType.pi);
1493             assert(range.front.name == "foo");
1494             assert(range.front.text == "here is something to say");
1496             // "<root>\n" ~
1497             range.popFront();
1498             assert(range.front.type == EntityType.elementStart);
1500             // "    <![CDATA[ Yay! random text >> << ]]>\n" ~
1501             range.popFront();
1502             assert(range.front.type == EntityType.cdata);
1503             assert(range.front.text == " Yay! random text >> << ");
1505             // "    <!-- some random comment -->\n" ~
1506             range.popFront();
1507             assert(range.front.type == EntityType.comment);
1508             assert(range.front.text == " some random comment ");
1510             // "    <p>something here</p>\n" ~
1511             range.popFront();
1512             assert(range.front.type == EntityType.elementStart);
1513             assert(range.front.name == "p");
1515             range.popFront();
1516             assert(range.front.type == EntityType.text);
1517             assert(range.front.text == "something here");
1519             range.popFront();
1520             assert(range.front.type == EntityType.elementEnd);
1521             assert(range.front.name == "p");
1523             // "    <p>\n" ~
1524             // "       something else\n" ~
1525             // "       here</p>\n" ~
1526             range.popFront();
1527             assert(range.front.type == EntityType.elementStart);
1529             range.popFront();
1530             assert(range.front.type == EntityType.text);
1531             assert(range.front.text == "\n       something else\n       here");
1533             range.popFront();
1534             assert(range.front.type == EntityType.elementEnd);
1536             // "</root>"
1537             range.popFront();
1538             assert(range.front.type == EntityType.elementEnd);
1540             range.popFront();
1541             assert(range.empty);
1542         }
1545         // Reduce the chance of bugs if reference-type ranges are involved.
1546         static if(!isDynamicArray!R) this(this)
1547         {
1548             with(EntityType) final switch(_type)
1549             {
1550                 case cdata: break;
1551                 case comment: break;
1552                 case elementStart:
1553                 {
1554                     _name = _name.save;
1555                     break;
1556                 }
1557                 case elementEnd: goto case elementStart;
1558                 case elementEmpty: goto case elementStart;
1559                 case text: break;
1560                 case pi: goto case elementStart;
1561             }
1563             if(_type != EntityType.elementEnd)
1564                 _savedText = _savedText.save;
1565         }
1567         static if(compileInTests) unittest
1568         {
1569             import std.algorithm.comparison : equal;
1570             import dxml.internal : testRangeFuncs;
1572             static bool cmpAttr(T)(T lhs, T rhs)
1573             {
1574                 return equal(lhs.name.save, rhs.name.save) &&
1575                        equal(lhs.value.save, rhs.value.save);
1576             }
1578             {
1579                 auto xml = "<root>\n" ~
1580                            "    <foo a='42'/>\n" ~
1581                            "    <foo b='42'/>\n" ~
1582                            "    <nocomment>nothing to say</nocomment>\n" ~
1583                            "</root>";
1585                 // The duplicate lines aren't typos. We want to ensure that the
1586                 // values are independent and that nothing was consumed.
1587                 static foreach(func; testRangeFuncs)
1588                 {{
1589                      auto range = parseXML(func(xml));
1590                      range.popFront();
1591                      {
1592                          auto entity = range.front;
1593                          auto entity2 = entity;
1594                          assert(entity.pos == entity2.pos);
1595                          assert(equal(entity.name, entity2.name));
1596                          assert(equal(entity.name, entity2.name));
1597                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1598                          assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1599                          range.popFront();
1600                          assert(entity.pos == entity2.pos);
1601                          assert(entity.pos != range.front.pos);
1602                      }
1603                      range.popFront();
1604                      range.popFront();
1605                      {
1606                          auto entity = range.front;
1607                          auto entity2 = entity;
1608                          assert(entity.pos == entity2.pos);
1609                          assert(equal(entity.text, entity2.text));
1610                          assert(equal(entity.text, entity2.text));
1611                          range.popFront();
1612                          assert(entity.pos == entity2.pos);
1613                          assert(entity.pos != range.front.pos);
1614                      }
1615                 }}
1616             }
1617             {
1618                 auto xml = "<root>\n" ~
1619                            "    <![CDATA[whatever]]>\n" ~
1620                            "    <?pi?>\n" ~
1621                            "    <!--comment-->\n" ~
1622                            "    <empty/>\n" ~
1623                            "    <noend a='foo' b='bar'/>\n" ~
1624                            "    <foo baz='42'></foo>\n" ~
1625                            "</root>";
1627                 static foreach(func; testRangeFuncs)
1628                 {
1629                     for(auto range = parseXML(func(xml)); !range.empty; range.popFront())
1630                     {
1631                         auto entity = range.front;
1632                         auto entity2 = entity;
1634                         assert(entity.pos == range.front.pos);
1635                         assert(entity.pos == entity2.pos);
1636                         assert(entity.type == range.front.type);
1637                         assert(entity.type == entity2.type);
1639                         with(EntityType) final switch(entity.type)
1640                         {
1641                             case cdata: goto case text;
1642                             case comment: goto case text;
1643                             case elementStart:
1644                             {
1645                                 assert(equal!cmpAttr(entity.attributes, range.front.attributes));
1646                                 assert(equal!cmpAttr(entity.attributes, entity2.attributes));
1647                                 goto case elementEnd;
1648                             }
1649                             case elementEnd:
1650                             {
1651                                 assert(equal(entity.name, range.front.name));
1652                                 assert(equal(entity.name, entity2.name));
1653                                 break;
1654                             }
1655                             case elementEmpty: goto case elementStart;
1656                             case text:
1657                             {
1658                                 assert(equal(entity.text, range.front.text));
1659                                 assert(equal(entity.text, entity2.text));
1660                                 break;
1661                             }
1662                             case pi:
1663                             {
1664                                 assert(equal(entity.name, range.front.name));
1665                                 assert(equal(entity.name, entity2.name));
1666                                 goto case text;
1667                             }
1668                         }
1669                     }
1670                 }
1671             }
1672         }
1675     private:
1677         this(EntityType type)
1678         {
1679             _type = type;
1681             // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
1682             _name = typeof(_name).init;
1683             _savedText = typeof(_savedText).init;
1684         }
1686         EntityType _type;
1687         TextPos _pos;
1688         Taken _name;
1689         typeof(EntityRange._savedText) _savedText;
1690     }
1693     /++
1694         Returns the $(LREF Entity) representing the entity in the XML document
1695         which was most recently parsed.
1696       +/
1697     @property Entity front()
1698     {
1699         auto retval = Entity(_type);
1700         with(EntityType) final switch(_type)
1701         {
1702             case cdata: retval._savedText = _savedText.save; break;
1703             case comment: goto case cdata;
1704             case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break;
1705             case elementEnd: retval._name = _name.save; break;
1706             case elementEmpty: goto case elementStart;
1707             case text: goto case cdata;
1708             case pi: goto case elementStart;
1709         }
1710         retval._pos = _entityPos;
1711         return retval;
1712     }
1715     /++
1716         Move to the next entity.
1718         The next entity is the next one that is linearly in the XML document.
1719         So, if the current entity has child entities, the next entity will be
1720         the first child entity, whereas if it has no child entities, it will be
1721         the next entity at the same level.
1723         Throws: $(LREF XMLParsingException) on invalid XML.
1724       +/
1725     void popFront()
1726     {
1727         final switch(_grammarPos) with(GrammarPos)
1728         {
1729             case documentStart: _parseDocumentStart(); break;
1730             case prologMisc1: _parseAtPrologMisc!1(); break;
1731             case prologMisc2: _parseAtPrologMisc!2(); break;
1732             case splittingEmpty:
1733             {
1734                 _type = EntityType.elementEnd;
1735                 _tagStack.sawEntity();
1736                 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
1737                 break;
1738             }
1739             case contentCharData1:
1740             {
1741                 assert(_type == EntityType.elementStart);
1742                 _tagStack.pushTag(_name.save);
1743                 _parseAtContentCharData();
1744                 break;
1745             }
1746             case contentMid: _parseAtContentMid(); break;
1747             case contentCharData2: _parseAtContentCharData(); break;
1748             case endTag: _parseElementEnd(); break;
1749             case endMisc: _parseAtEndMisc(); break;
1750             case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange.");
1751         }
1752     }
1755     /++
1756         Whether the end of the XML document has been reached.
1758         Note that because an $(LREF XMLParsingException) will be thrown an
1759         invalid XML, it's actually possible to call
1760         $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without
1761         checking empty if the only way that empty would be true is if the XML
1762         were invalid (e.g. if at a start tag, it's a given that there's at
1763         least one end tag left in the document unless it's invalid XML).
1765         However, of course, caution should be used to ensure that incorrect
1766         assumptions are not made that allow the document to reach its end
1767         earlier than predicted without throwing an $(LREF XMLParsingException),
1768         since it's still an error to call $(LREF2 front, EntityRange) or
1769         $(LREF2 popFront, EntityRange) if empty would return false.
1770       +/
1771     @property bool empty() @safe const pure nothrow @nogc
1772     {
1773         return _grammarPos == GrammarPos.documentEnd;
1774     }
1777     /++
1778         Forward range function for obtaining a copy of the range which can then
1779         be iterated independently of the original.
1780       +/
1781     @property auto save()
1782     {
1783         // The init check nonsense is because of ranges whose init values blow
1784         // up when save is called (e.g. a range that's a class).
1785         auto retval = this;
1786         if(retval._name !is typeof(retval._name).init)
1787             retval._name = _name.save;
1788         if(retval._text.input !is typeof(retval._text.input).init)
1789             retval._text.input = _text.input.save;
1790         if(retval._savedText.input !is typeof(retval._savedText.input).init)
1791             retval._savedText.input = _savedText.input.save;
1792         return retval;
1793     }
1795     static if(compileInTests) unittest
1796     {
1797         import std.algorithm.comparison : equal;
1798         import std.exception : assertNotThrown;
1799         import dxml.internal : testRangeFuncs;
1801         static bool cmpAttr(T)(T lhs, T rhs)
1802         {
1803             return equal(lhs.name.save, rhs.name.save) &&
1804                    equal(lhs.value.save, rhs.value.save);
1805         }
1807         static void testEqual(ER)(ER one, ER two)
1808         {
1809              while(!one.empty && !two.empty)
1810              {
1811                  auto left = one.front;
1812                  auto right = two.front;
1814                  assert(left.pos == right.pos);
1815                  assert(left.type == right.type);
1817                  with(EntityType) final switch(left.type)
1818                  {
1819                      case cdata: goto case text;
1820                      case comment: goto case text;
1821                      case elementStart:
1822                      {
1823                          assert(equal!cmpAttr(left.attributes, right.attributes));
1824                          goto case elementEnd;
1825                      }
1826                      case elementEnd: assert(equal(left.name, right.name)); break;
1827                      case elementEmpty: goto case elementStart;
1828                      case text: assert(equal(left.text, right.text)); break;
1829                      case pi: assert(equal(left.name, right.name)); goto case text;
1830                  }
1832                  one.popFront();
1833                  two.popFront();
1834              }
1836              assert(one.empty);
1837              assert(two.empty);
1838         }
1840          auto xml = "<root>\n" ~
1841                     "    <!-- comment -->\n" ~
1842                     "    <something>\n" ~
1843                     "         <else/>\n" ~
1844                     "         somet text <i>goes</i> here\n" ~
1845                     "    </something>\n" ~
1846                     "</root>";
1848         static foreach(i, func; testRangeFuncs)
1849         {{
1850              auto text = func(xml);
1851              testEqual(parseXML(text.save), parseXML(text.save));
1852              auto range = parseXML(text.save);
1853              testEqual(range.save, range.save);
1854         }}
1855     }
1858     /++
1859         Returns an empty range. This corresponds to
1860         $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a
1861         wrapper type.
1862       +/
1863     EntityRange takeNone()
1864     {
1865         auto retval = save;
1866         retval._grammarPos = GrammarPos.documentEnd;
1867         return retval;
1868     }
1871 private:
1873     void _parseDocumentStart()
1874     {
1875         auto orig = _text.save;
1876         immutable wasWS = _text.stripWS();
1877         if(_text.stripStartsWith("<?xml"))
1878         {
1879             if(wasWS)
1880                 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init);
1881             checkNotEmpty(_text);
1882             if(_text.input.front == '?' || isSpace(_text.input.front))
1883                 _text.skipUntilAndDrop!"?>"();
1884             else
1885                 _text = orig;
1886         }
1887         _grammarPos = GrammarPos.prologMisc1;
1888         _parseAtPrologMisc!1();
1889     }
1891     static if(compileInTests) unittest
1892     {
1893         import core.exception : AssertError;
1894         import std.exception : assertNotThrown, enforce;
1895         import dxml.internal : testRangeFuncs;
1897         static void test(alias func)(string xml, int row, int col, size_t line = __LINE__)
1898         {
1899             auto range = assertNotThrown!XMLParsingException(parseXML(func(xml)));
1900             enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line);
1901             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
1902         }
1904         static foreach(func; testRangeFuncs)
1905         {
1906             test!func("<root/>", 1, 8);
1907             test!func("\n\t\n <root/>   \n", 3, 9);
1908             test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10);
1909             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?><root/>", 6, 23);
1910             test!func("<?xml\n\n\n    \r\r\r\n\nversion='1.8'?>\n     <root/>", 7, 13);
1911             test!func("<root/>", 1, 8);
1912             test!func("\n\t\n <root/>   \n", 3, 9);
1913         }
1914     }
1917     // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2.
1918     void _parseAtPrologMisc(int miscNum)()
1919     {
1920         static assert(miscNum == 1 || miscNum == 2);
1922         // document ::= prolog element Misc*
1923         // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
1924         // Misc ::= Comment | PI | S
1926         stripWS(_text);
1927         checkNotEmpty(_text);
1928         if(_text.input.front != '<')
1929             throw new XMLParsingException("Expected <", _text.pos);
1930         popFrontAndIncCol(_text);
1931         checkNotEmpty(_text);
1933         switch(_text.input.front)
1934         {
1935             // Comment     ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1936             // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
1937             case '!':
1938             {
1939                 immutable bangPos = _text.pos;
1940                 popFrontAndIncCol(_text);
1941                 if(_text.stripStartsWith("--"))
1942                 {
1943                     _parseComment();
1944                     static if(config.skipComments == SkipComments.yes)
1945                         _parseAtPrologMisc!miscNum();
1946                     break;
1947                 }
1948                 static if(miscNum == 1)
1949                 {
1950                     if(_text.stripStartsWith("DOCTYPE"))
1951                     {
1952                         if(!_text.stripWS())
1953                             throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos);
1954                         _parseDoctypeDecl();
1955                         break;
1956                     }
1957                     throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos);
1958                 }
1959                 else
1960                 {
1961                     if(_text.stripStartsWith("DOCTYPE"))
1962                     {
1963                         throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document",
1964                                                       bangPos);
1965                     }
1966                     throw new XMLParsingException("Expected Comment", bangPos);
1967                 }
1968             }
1969             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1970             case '?':
1971             {
1972                 _parsePI();
1973                 static if(config.skipPI == SkipPI.yes)
1974                     popFront();
1975                 break;
1976             }
1977             // element ::= EmptyElemTag | STag content ETag
1978             default:
1979             {
1980                 _parseElementStart();
1981                 break;
1982             }
1983         }
1984     }
1987     // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1988     // Parses a comment. <!-- was already removed from the front of the input.
1989     void _parseComment()
1990     {
1991         static if(config.skipComments == SkipComments.yes)
1992             _text.skipUntilAndDrop!"--"();
1993         else
1994         {
1995             _entityPos = TextPos(_text.pos.line, _text.pos.col - 4);
1996             _type = EntityType.comment;
1997             _tagStack.sawEntity();
1998             _savedText.pos = _text.pos;
1999             _savedText.input = _text.takeUntilAndDrop!"--"();
2000         }
2001         if(_text.input.empty || _text.input.front != '>')
2002             throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos);
2003         // This is here rather than at the end of the previous static if block
2004         // so that the error message for improperly terminating a comment takes
2005         // precedence over the one involving invalid characters in the comment.
2006         static if(config.skipComments == SkipComments.no)
2007             checkText!true(_savedText);
2008         popFrontAndIncCol(_text);
2009     }
2011     static if(compileInTests) unittest
2012     {
2013         import core.exception : AssertError;
2014         import std.algorithm.comparison : equal;
2015         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2016         import dxml.internal : codeLen, testRangeFuncs;
2018         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2019         {
2020             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")));
2021             enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line);
2022             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2023             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2024         }
2026         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2027         {
2028             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2029             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2030             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2031         }
2033         static foreach(func; testRangeFuncs)
2034         {
2035             test!func("<!--foo-->", "foo", 1, 11);
2036             test!func("<!-- foo -->", " foo ", 1, 13);
2037             test!func("<!-- -->", " ", 1, 9);
2038             test!func("<!---->", "", 1, 8);
2039             test!func("<!--- comment -->", "- comment ", 1, 18);
2040             test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5);
2041             test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1);
2042             test!func("<!--&-->", "&", 1, 9);
2043             test!func("<!--<-->", "<", 1, 9);
2044             test!func("<!-->-->", ">", 1, 9);
2045             test!func("<!--->-->", "->", 1, 10);
2047             testFail!func("<!", 1, 2);
2048             testFail!func("<!- comment -->", 1, 2);
2049             testFail!func("<!-- comment ->", 1, 5);
2050             testFail!func("<!-- comment --->", 1, 16);
2051             testFail!func("<!---- comment -->", 1, 7);
2052             testFail!func("<!-- comment -- comment -->", 1, 16);
2053             testFail!func("<!->", 1, 2);
2054             testFail!func("<!-->", 1, 5);
2055             testFail!func("<!--->", 1, 5);
2056             testFail!func("<!----->", 1, 7);
2057             testFail!func("<!blah>", 1, 2);
2058             testFail!func("<! blah>", 1, 2);
2059             testFail!func("<!-- \n\n   \v \n -->", 3, 4);
2060             testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v"));
2062             {
2063                 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>");
2064                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2065                 assert(range.front.type == EntityType.comment);
2066                 assert(equal(range.front.text, " comment "));
2067             }
2068             {
2069                 auto xml = func("<root><!-- comment --></root>");
2070                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2071                 assertNotThrown!XMLParsingException(range.popFront());
2072                 assert(range.front.type == EntityType.comment);
2073                 assert(equal(range.front.text, " comment "));
2074             }
2075             {
2076                 auto xml = func("<root/><!-- comment -->");
2077                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2078                 assertNotThrown!XMLParsingException(range.popFront());
2079                 assert(range.front.type == EntityType.comment);
2080                 assert(equal(range.front.text, " comment "));
2081             }
2083             static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"])
2084             {
2085                 {
2086                     auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>");
2087                     assertThrown!XMLParsingException(parseXML(xml));
2088                 }
2089                 {
2090                     auto xml = func("<root>" ~ comment ~ "<root>");
2091                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2092                     assertThrown!XMLParsingException(range.popFront());
2093                 }
2094                 {
2095                     auto xml = func("<root/>" ~ comment);
2096                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2097                     assertThrown!XMLParsingException(range.popFront());
2098                 }
2099             }
2101             {
2102                 auto xml = "<!--one-->\n" ~
2103                            "<!--two-->\n" ~
2104                            "<root>\n" ~
2105                            "    <!--three-->\n" ~
2106                            "    <!--four-->\n" ~
2107                            "</root>\n" ~
2108                            "<!--five-->\n" ~
2109                            "<!--six-->";
2111                 auto text = func(xml);
2112                 {
2113                     auto range = parseXML(text.save);
2114                     assert(range.front.type == EntityType.comment);
2115                     assert(equal(range.front.text, "one"));
2116                     assertNotThrown!XMLParsingException(range.popFront());
2117                     assert(range.front.type == EntityType.comment);
2118                     assert(equal(range.front.text, "two"));
2119                     assertNotThrown!XMLParsingException(range.popFront());
2120                     assert(range.front.type == EntityType.elementStart);
2121                     assert(equal(range.front.name, "root"));
2122                     assertNotThrown!XMLParsingException(range.popFront());
2123                     assert(range.front.type == EntityType.comment);
2124                     assert(equal(range.front.text, "three"));
2125                     assertNotThrown!XMLParsingException(range.popFront());
2126                     assert(range.front.type == EntityType.comment);
2127                     assert(equal(range.front.text, "four"));
2128                     assertNotThrown!XMLParsingException(range.popFront());
2129                     assert(range.front.type == EntityType.elementEnd);
2130                     assert(equal(range.front.name, "root"));
2131                     assertNotThrown!XMLParsingException(range.popFront());
2132                     assert(range.front.type == EntityType.comment);
2133                     assert(equal(range.front.text, "five"));
2134                     assertNotThrown!XMLParsingException(range.popFront());
2135                     assert(range.front.type == EntityType.comment);
2136                     assert(equal(range.front.text, "six"));
2137                     assertNotThrown!XMLParsingException(range.popFront());
2138                     assert(range.empty);
2139                 }
2140                 {
2141                     auto range = parseXML!simpleXML(text.save);
2142                     assert(range.front.type == EntityType.elementStart);
2143                     assert(equal(range.front.name, "root"));
2144                     assertNotThrown!XMLParsingException(range.popFront());
2145                     assert(range.front.type == EntityType.elementEnd);
2146                     assert(equal(range.front.name, "root"));
2147                     assertNotThrown!XMLParsingException(range.popFront());
2148                     assert(range.empty);
2149                 }
2150             }
2151         }
2152     }
2155     // PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2156     // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2157     // Parses a processing instruction. < was already removed from the input.
2158     void _parsePI()
2159     {
2160         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2161         assert(_text.input.front == '?');
2162         popFrontAndIncCol(_text);
2163         static if(config.skipPI == SkipPI.yes)
2164             _text.skipUntilAndDrop!"?>"();
2165         else
2166         {
2167             immutable posAtName = _text.pos;
2168             if(_text.input.empty)
2169                 throw new XMLParsingException("Unterminated processing instruction", posAtName);
2170             _type = EntityType.pi;
2171             _tagStack.sawEntity();
2172             _name = takeName!'?'(_text);
2173             immutable posAtWS = _text.pos;
2174             stripWS(_text);
2175             checkNotEmpty(_text);
2176             _savedText.pos = _text.pos;
2177             _savedText.input = _text.takeUntilAndDrop!"?>"();
2178             checkText!true(_savedText);
2179             if(walkLength(_name.save) == 3)
2180             {
2181                 // FIXME icmp doesn't compile right now due to an issue with
2182                 // byUTF that needs to be looked into.
2183                 /+
2184                 import std.uni : icmp;
2185                 if(icmp(_name.save, "xml") == 0)
2186                     throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2187                 +/
2188                 auto temp = _name.save;
2189                 if(temp.front == 'x' || temp.front == 'X')
2190                 {
2191                     temp.popFront();
2192                     if(temp.front == 'm' || temp.front == 'M')
2193                     {
2194                         temp.popFront();
2195                         if(temp.front == 'l' || temp.front == 'L')
2196                             throw new XMLParsingException("Processing instructions cannot be named xml", posAtName);
2197                     }
2198                 }
2199             }
2200         }
2201     }
2203     static if(compileInTests) unittest
2204     {
2205         import core.exception : AssertError;
2206         import std.algorithm.comparison : equal;
2207         import std.exception : assertNotThrown, assertThrown, collectException, enforce;
2208         import std.utf : byUTF;
2209         import dxml.internal : codeLen, testRangeFuncs;
2211         static void test(alias func)(string text, string name, string expected,
2212                                      int row, int col, size_t line = __LINE__)
2213         {
2214             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2215                                                              "unittest failure 1", __FILE__, line);
2216             enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line);
2217             enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line);
2218             enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line);
2219             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line);
2220         }
2222         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2223         {
2224             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2225             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2226             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2227         }
2229         static foreach(func; testRangeFuncs)
2230         {
2231             test!func("<?a?>", "a", "", 1, 6);
2232             test!func("<?foo?>", "foo", "", 1, 8);
2233             test!func("<?foo.?>", "foo.", "", 1, 9);
2234             test!func("<?foo bar?>", "foo", "bar", 1, 12);
2235             test!func("<?xmf bar?>", "xmf", "bar", 1, 12);
2236             test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15);
2237             test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16);
2238             test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10);
2239             test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11);
2240             test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6);
2241             test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21);
2242             test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23);
2243             test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3);
2244             test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1);
2245             test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16);
2246             test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16);
2247             test!func("<?pi ?>", "pi", "", 1, 8);
2248             test!func("<?pi\n?>", "pi", "", 2, 3);
2249             test!func("<?foo ??>", "foo", "?", 1, 10);
2250             test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24);
2252             testFail!func("<?", 1, 3);
2253             testFail!func("<??>", 1, 3);
2254             testFail!func("<? ?>", 1, 3);
2255             testFail!func("<?xml?><?xml?>", 1, 10);
2256             testFail!func("<?XML?>", 1, 3);
2257             testFail!func("<?xMl?>", 1, 3);
2258             testFail!func("<?foo>", 1, 6);
2259             testFail!func("<? foo?>", 1, 3);
2260             testFail!func("<?\nfoo?>", 1, 3);
2261             testFail!func("<??foo?>", 1, 3);
2262             testFail!func("<?.foo?>", 1, 3);
2263             testFail!func("<?foo bar\vbaz?>", 1, 10);
2265             {
2266                 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>");
2267                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2268                 assert(range.front.type == EntityType.pi);
2269                 assert(equal(range.front.name, "foo"));
2270                 assert(equal(range.front.text, "bar"));
2271             }
2272             {
2273                 auto xml = func("<root><?foo bar?></root>");
2274                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2275                 assertNotThrown!XMLParsingException(range.popFront());
2276                 assert(equal(range.front.name, "foo"));
2277                 assert(equal(range.front.text, "bar"));
2278             }
2279             {
2280                 auto xml = func("<root/><?foo bar?>");
2281                 auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2282                 assertNotThrown!XMLParsingException(range.popFront());
2283                 assert(equal(range.front.name, "foo"));
2284                 assert(equal(range.front.text, "bar"));
2285             }
2287             static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"])
2288             {
2289                 {
2290                     auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>");
2291                     assertThrown!XMLParsingException(parseXML(xml));
2292                 }
2293                 {
2294                     auto xml = func("<root>" ~ pi ~ "<root>");
2295                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2296                     assertThrown!XMLParsingException(range.popFront());
2297                 }
2298                 {
2299                     auto xml = func("<root/>" ~ pi);
2300                     auto range = assertNotThrown!XMLParsingException(parseXML(xml));
2301                     assertThrown!XMLParsingException(range.popFront());
2302                 }
2303             }
2305             {
2306                 auto xml = "<?one?>\n" ~
2307                            "<?two?>\n" ~
2308                            "<root>\n" ~
2309                            "    <?three?>\n" ~
2310                            "    <?four?>\n" ~
2311                            "</root>\n" ~
2312                            "<?five?>\n" ~
2313                            "<?six?>";
2315                 auto text = func(xml);
2316                 {
2317                     auto range = parseXML(text.save);
2318                     assert(range.front.type == EntityType.pi);
2319                     assert(equal(range.front.name, "one"));
2320                     assertNotThrown!XMLParsingException(range.popFront());
2321                     assert(range.front.type == EntityType.pi);
2322                     assert(equal(range.front.name, "two"));
2323                     assertNotThrown!XMLParsingException(range.popFront());
2324                     assert(range.front.type == EntityType.elementStart);
2325                     assert(equal(range.front.name, "root"));
2326                     assertNotThrown!XMLParsingException(range.popFront());
2327                     assert(range.front.type == EntityType.pi);
2328                     assert(equal(range.front.name, "three"));
2329                     assertNotThrown!XMLParsingException(range.popFront());
2330                     assert(range.front.type == EntityType.pi);
2331                     assert(equal(range.front.name, "four"));
2332                     assertNotThrown!XMLParsingException(range.popFront());
2333                     assert(range.front.type == EntityType.elementEnd);
2334                     assert(equal(range.front.name, "root"));
2335                     assertNotThrown!XMLParsingException(range.popFront());
2336                     assert(range.front.type == EntityType.pi);
2337                     assert(equal(range.front.name, "five"));
2338                     assertNotThrown!XMLParsingException(range.popFront());
2339                     assert(range.front.type == EntityType.pi);
2340                     assert(equal(range.front.name, "six"));
2341                     assertNotThrown!XMLParsingException(range.popFront());
2342                     assert(range.empty);
2343                 }
2344                 {
2345                     auto range = parseXML!simpleXML(text.save);
2346                     assert(range.front.type == EntityType.elementStart);
2347                     assert(equal(range.front.name, "root"));
2348                     assertNotThrown!XMLParsingException(range.popFront());
2349                     assert(range.front.type == EntityType.elementEnd);
2350                     assert(equal(range.front.name, "root"));
2351                     assertNotThrown!XMLParsingException(range.popFront());
2352                     assert(range.empty);
2353                 }
2354             }
2355         }
2356     }
2359     // CDSect  ::= CDStart CData CDEnd
2360     // CDStart ::= '<![CDATA['
2361     // CData   ::= (Char* - (Char* ']]>' Char*))
2362     // CDEnd   ::= ']]>'
2363     // Parses a CDATA. <![CDATA[ was already removed from the front of the input.
2364     void _parseCDATA()
2365     {
2366         _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length);
2367         _type = EntityType.cdata;
2368         _tagStack.sawEntity();
2369         _savedText.pos = _text.pos;
2370         _savedText.input = _text.takeUntilAndDrop!"]]>";
2371         checkText!true(_savedText);
2372         _grammarPos = GrammarPos.contentCharData2;
2373     }
2375     static if(compileInTests) unittest
2376     {
2377         import core.exception : AssertError;
2378         import std.algorithm.comparison : equal;
2379         import std.exception : assertNotThrown, collectException, enforce;
2380         import dxml.internal : codeLen, testRangeFuncs;
2382         static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__)
2383         {
2384             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2385             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2386             assertNotThrown!XMLParsingException(range.popFront());
2387             enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line);
2388             enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line);
2389             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2390         }
2392         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2393         {
2394             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2395             auto range = parseXML(func("<root>" ~ text ~ "<root/>"));
2396             auto e = collectException!XMLParsingException(range.popFront());
2397             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2398             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2399         }
2401         static foreach(func; testRangeFuncs)
2402         {
2403             test!func("<![CDATA[]]>", "", 1, 13);
2404             test!func("<![CDATA[hello world]]>", "hello world", 1, 24);
2405             test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4);
2406             test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2);
2407             test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>",
2408                       "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57);
2409             test!func("<![CDATA[&]]>", "&", 1, 14);
2411             testFail!func("<[CDATA[]>", 1, 2);
2412             testFail!func("<![CDAT[]>", 1, 2);
2413             testFail!func("<![CDATA]>", 1, 2);
2414             testFail!func("<![CDATA[>", 1, 10);
2415             testFail!func("<![CDATA[]", 1, 10);
2416             testFail!func("<![CDATA[]>", 1, 10);
2417             testFail!func("<![CDATA[ \v ]]>", 1, 11);
2418             testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2);
2419         }
2420     }
2423     // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
2424     // DeclSep     ::= PEReference | S
2425     // intSubset   ::= (markupdecl | DeclSep)*
2426     // markupdecl  ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
2427     // Parse doctypedecl after GrammarPos.prologMisc1.
2428     // <!DOCTYPE and any whitespace after it should have already been removed
2429     // from the input.
2430     void _parseDoctypeDecl()
2431     {
2432         outer: while(true)
2433         {
2434             _text.skipToOneOf!('"', '\'', '[', '>')();
2435             switch(_text.input.front)
2436             {
2437                 static foreach(quote; ['"', '\''])
2438                 {
2439                     case quote:
2440                     {
2441                         popFrontAndIncCol(_text);
2442                         _text.skipUntilAndDrop!([quote])();
2443                         continue outer;
2444                     }
2445                 }
2446                 case '[':
2447                 {
2448                     popFrontAndIncCol(_text);
2449                     while(true)
2450                     {
2451                         checkNotEmpty(_text);
2452                         _text.skipToOneOf!('"', '\'', ']')();
2453                         switch(_text.input.front)
2454                         {
2455                             case '"':
2456                             {
2457                                 popFrontAndIncCol(_text);
2458                                 _text.skipUntilAndDrop!`"`();
2459                                 continue;
2460                             }
2461                             case '\'':
2462                             {
2463                                 popFrontAndIncCol(_text);
2464                                 _text.skipUntilAndDrop!`'`();
2465                                 continue;
2466                             }
2467                             case ']':
2468                             {
2469                                 popFrontAndIncCol(_text);
2470                                 stripWS(_text);
2471                                 if(_text.input.empty || _text.input.front != '>')
2472                                     throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos);
2473                                 popFrontAndIncCol(_text);
2474                                 _parseAtPrologMisc!2();
2475                                 return;
2476                             }
2477                             default: assert(0);
2478                         }
2479                     }
2480                 }
2481                 case '>':
2482                 {
2483                     popFrontAndIncCol(_text);
2484                     _parseAtPrologMisc!2();
2485                     break;
2486                 }
2487                 default: assert(0);
2488             }
2489             break;
2490         }
2491     }
2493     static if(compileInTests) unittest
2494     {
2495         import core.exception : AssertError;
2496         import std.exception : assertNotThrown, collectException, enforce;
2497         import dxml.internal : testRangeFuncs;
2499         static void test(alias func)(string text, int row, int col, size_t line = __LINE__)
2500         {
2501             auto pos = TextPos(row, col + cast(int)"<root/>".length);
2502             auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")),
2503                                                              "unittest failure 1", __FILE__, line);
2504             enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line);
2505             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2506         }
2508         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2509         {
2510             auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>")));
2511             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2512             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2513         }
2515         static foreach(func; testRangeFuncs)
2516         {
2517             test!func("<!DOCTYPE name>", 1, 16);
2518             test!func("<!DOCTYPE \n\n\n name>", 4, 7);
2519             test!func("<!DOCTYPE name \n\n\n >", 4, 3);
2521             test!func("<!DOCTYPE name []>", 1, 19);
2522             test!func("<!DOCTYPE \n\n\n name []>", 4, 10);
2523             test!func("<!DOCTYPE name \n\n\n []>", 4, 5);
2525             test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35);
2526             test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38);
2527             test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35);
2528             test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38);
2530             test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42);
2531             test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40);
2532             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48);
2533             test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53);
2534             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43);
2535             test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48);
2537             test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35);
2538             test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55);
2540             test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40);
2541             test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40);
2542             test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47);
2543             test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53);
2545             test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51);
2547             test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26);
2549             test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36);
2551             test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56);
2552             test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3);
2554             test!func("<!DOCTYPE doc [\n" ~
2555                       "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~
2556                       "]>\n", 4, 1);
2558             testFail!func("<!DOCTYP name>", 1, 2);
2559             testFail!func("<!DOCTYPEname>", 1, 10);
2560             testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18);
2561             testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8);
2562             testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19);
2564             // FIXME This really should have the exception point at the quote and
2565             // say that it couldn't find the matching quote rather than point at
2566             // the character after it and say that it couldn't find a quote, but
2567             // that requires reworking some helper functions with better error
2568             // messages in mind.
2569             testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~
2570                           "\n<!ELEMENT student (#PCDATA)>\n" ~
2571                           "]>", 1, 40);
2572         }
2573     }
2576     // Parse a start tag or empty element tag. It could be the root element, or
2577     // it could be a sub-element.
2578     // < was already removed from the front of the input.
2579     void _parseElementStart()
2580     {
2581         _entityPos = TextPos(_text.pos.line, _text.pos.col - 1);
2582         _savedText.pos = _text.pos;
2583         _savedText.input = _text.takeUntilAndDrop!(">", true)();
2585         if(_savedText.input.empty)
2586             throw new XMLParsingException("Tag missing name", _savedText.pos);
2587         if(_savedText.input.front == '/')
2588             throw new XMLParsingException("Invalid end tag", _savedText.pos);
2590         if(_savedText.input.length > 1)
2591         {
2592             auto temp = _savedText.input.save;
2593             temp.popFrontN(temp.length - 1);
2594             if(temp.front == '/')
2595             {
2596                 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1);
2598                 static if(config.splitEmpty == SplitEmpty.no)
2599                 {
2600                     _type = EntityType.elementEmpty;
2601                     _tagStack.sawEntity();
2602                     _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2603                 }
2604                 else
2605                 {
2606                     _type = EntityType.elementStart;
2607                     _tagStack.sawEntity();
2608                     _grammarPos = GrammarPos.splittingEmpty;
2609                 }
2610             }
2611             else
2612             {
2613                 _type = EntityType.elementStart;
2614                 _tagStack.sawEntity();
2615                 _grammarPos = GrammarPos.contentCharData1;
2616             }
2617         }
2618         else
2619         {
2620             _type = EntityType.elementStart;
2621             _tagStack.sawEntity();
2622             _grammarPos = GrammarPos.contentCharData1;
2623         }
2625         _name = _savedText.takeName();
2626         // The attributes should be all that's left in savedText.
2627         if(_tagStack.atMax)
2628         {
2629             auto temp = _savedText.save;
2630             auto attrChecker = _tagStack.attrChecker;
2632             while(true)
2633             {
2634                 immutable wasWS = stripWS(temp);
2635                 if(temp.input.empty)
2636                     break;
2637                 if(!wasWS)
2638                     throw new XMLParsingException("Whitespace missing before attribute name", temp.pos);
2640                 immutable attrPos = temp.pos;
2641                 attrChecker.pushAttr(temp.takeName!'='(), attrPos);
2642                 stripWS(temp);
2644                 checkNotEmpty(temp);
2645                 if(temp.input.front != '=')
2646                     throw new XMLParsingException("= missing", temp.pos);
2647                 popFrontAndIncCol(temp);
2649                 stripWS(temp);
2650                 temp.takeAttValue();
2651             }
2653             attrChecker.checkAttrs();
2654         }
2655     }
2657     static if(compileInTests) unittest
2658     {
2659         import core.exception : AssertError;
2660         import std.algorithm.comparison : equal;
2661         import std.exception : assertNotThrown, collectException, enforce;
2662         import dxml.internal : codeLen, testRangeFuncs;
2664         static void test(alias func)(string text, EntityType type, string name,
2665                                      int row, int col, size_t line = __LINE__)
2666         {
2667             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2668             enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line);
2669             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2670             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2671         }
2673         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2674         {
2675             auto xml = func(text);
2676             auto e = collectException!XMLParsingException(parseXML(func(text)));
2677             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2678             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2679         }
2681         static foreach(func; testRangeFuncs)
2682         {
2683             test!func("<a/>", EntityType.elementEmpty, "a", 1, 5);
2684             test!func("<a></a>", EntityType.elementStart, "a", 1, 4);
2685             test!func("<root/>", EntityType.elementEmpty, "root", 1, 8);
2686             test!func("<root></root>", EntityType.elementStart, "root", 1, 7);
2687             test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7);
2688             test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6);
2689             test!func("<foo       />", EntityType.elementEmpty, "foo", 1, 14);
2690             test!func("<foo       ></foo>", EntityType.elementStart, "foo", 1, 13);
2691             test!func("<foo  \n\n\n />", EntityType.elementEmpty, "foo", 4, 4);
2692             test!func("<foo  \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3);
2693             test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7);
2694             test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1);
2696             testFail!func(`<.foo/>`, 1, 2);
2697             testFail!func(`<>`, 1, 2);
2698             testFail!func(`</>`, 1, 2);
2699             testFail!func(`</foo>`, 1, 2);
2701             {
2702                 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>")));
2703                 assert(range.front.type == EntityType.elementStart);
2704                 assert(equal(range.front.name, "root"));
2705                 assert(range._text.pos == TextPos(1, 8));
2706                 assertNotThrown!XMLParsingException(range.popFront());
2707                 assert(range.front.type == EntityType.elementEnd);
2708                 assert(equal(range.front.name, "root"));
2709                 assert(range._text.pos == TextPos(1, 8));
2710             }
2711         }
2712     }
2715     // Parse an end tag. It could be the root element, or it could be a
2716     // sub-element.
2717     // </ was already removed from the front of the input.
2718     void _parseElementEnd()
2719     {
2720         if(_text.input.empty)
2721             throw new XMLParsingException("Unterminated end tag", _text.pos);
2722         _entityPos = TextPos(_text.pos.line, _text.pos.col - 2);
2723         _type = EntityType.elementEnd;
2724         _tagStack.sawEntity();
2725         immutable namePos = _text.pos;
2726         _name = _text.takeName!'>'();
2727         stripWS(_text);
2728         if(_text.input.empty || _text.input.front != '>')
2729         {
2730             throw new XMLParsingException("There can only be whitespace between an end tag's name and the >",
2731                                           _text.pos);
2732         }
2733         popFrontAndIncCol(_text);
2734         _tagStack.popTag(_name.save, namePos);
2735         _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2;
2736     }
2738     static if(compileInTests) unittest
2739     {
2740         import core.exception : AssertError;
2741         import std.algorithm.comparison : equal;
2742         import std.exception : assertNotThrown, collectException, enforce;
2743         import dxml.internal : codeLen, testRangeFuncs;
2745         static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__)
2746         {
2747             auto range = assertNotThrown!XMLParsingException(parseXML(func(text)));
2748             range.popFront();
2749             enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line);
2750             enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line);
2751             enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
2752         }
2754         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
2755         {
2756             auto range = parseXML(func(text));
2757             auto e = collectException!XMLParsingException(range.popFront());
2758             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2759             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
2760         }
2762         static foreach(func; testRangeFuncs)
2763         {
2764             test!func("<a></a>", "a", 1, 8);
2765             test!func("<foo></foo>", "foo", 1, 12);
2766             test!func("<foo    ></foo    >", "foo", 1, 20);
2767             test!func("<foo \n ></foo \n >", "foo", 3, 3);
2768             test!func("<foo>\n\n\n</foo>", "foo", 4, 7);
2769             test!func("<foo.></foo.>", "foo.", 1, 14);
2770             test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1);
2772             testFail!func(`<foo></ foo>`, 1, 8);
2773             testFail!func(`<foo></bar>`, 1, 8);
2774             testFail!func(`<foo></fo>`, 1, 8);
2775             testFail!func(`<foo></food>`, 1, 8);
2776             testFail!func(`<a></>`, 1, 6);
2777             testFail!func(`<a></`, 1, 6);
2778             testFail!func(`<a><`, 1, 5);
2779             testFail!func(`<a></a b='42'>`, 1, 8);
2780         }
2781     }
2784     // GrammarPos.contentCharData1
2785     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2786     // Parses at either CharData?. Nothing from the CharData? (or what's after it
2787     // if it's not there) has been consumed.
2788     void _parseAtContentCharData()
2789     {
2790         checkNotEmpty(_text);
2791         auto orig = _text.save;
2792         stripWS(_text);
2793         checkNotEmpty(_text);
2794         if(_text.input.front != '<')
2795         {
2796             _text = orig;
2797             _entityPos = _text.pos;
2798             _type = EntityType.text;
2799             _tagStack.sawEntity();
2800             _savedText.pos = _text.pos;
2801             _savedText.input = _text.takeUntilAndDrop!"<"();
2802             checkText!false(_savedText);
2803             checkNotEmpty(_text);
2804             if(_text.input.front == '/')
2805             {
2806                 popFrontAndIncCol(_text);
2807                 _grammarPos = GrammarPos.endTag;
2808             }
2809             else
2810                 _grammarPos = GrammarPos.contentMid;
2811         }
2812         else
2813         {
2814             popFrontAndIncCol(_text);
2815             checkNotEmpty(_text);
2816             if(_text.input.front == '/')
2817             {
2818                 popFrontAndIncCol(_text);
2819                 _parseElementEnd();
2820             }
2821             else
2822                 _parseAtContentMid();
2823         }
2824     }
2826     static if(compileInTests) unittest
2827     {
2828         import core.exception : AssertError;
2829         import std.algorithm.comparison : equal;
2830         import std.exception : assertNotThrown, collectException, enforce;
2831         import dxml.internal : codeLen, testRangeFuncs;
2833         static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2834         {
2835             auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length));
2836             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2837             assertNotThrown!XMLParsingException(range.popFront());
2838             enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line);
2839             enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line);
2840             enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line);
2841         }
2843         static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
2844         {
2845             auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0));
2846             auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>"));
2847             auto e = collectException!XMLParsingException(range.popFront());
2848             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
2849             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
2850         }
2852         static foreach(func; testRangeFuncs)
2853         {
2854             static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
2855             {
2856                 test!(func, toer)("hello world", 1, 12);
2857                 test!(func, toer)("\nhello\n\nworld", 4, 6);
2858                 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1);
2859                 test!(func, toer)("&#x42;", 1, 7);
2860                 test!(func, toer)("]", 1, 2);
2861                 test!(func, toer)("]]", 1, 3);
2862                 test!(func, toer)("]>", 1, 3);
2863                 test!(func, toer)("foo \n\n &lt; \n bar", 4, 5);
2865                 testFail!(func, toer)("&", 1, 1);
2866                 testFail!(func, toer)("&;", 1, 1);
2867                 testFail!(func, toer)("&f", 1, 1);
2868                 testFail!(func, toer)("\v", 1, 1);
2869                 testFail!(func, toer)("hello&world", 1, 6);
2870                 testFail!(func, toer)("hello\vworld", 1, 6);
2871                 testFail!(func, toer)("hello&;world", 1, 6);
2872                 testFail!(func, toer)("hello&#;world", 1, 6);
2873                 testFail!(func, toer)("hello&#x;world", 1, 6);
2874                 testFail!(func, toer)("hello&.;world", 1, 6);
2875                 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4);
2877                 testFail!(func, toer)("]]>", 1, 1);
2878                 testFail!(func, toer)("foo]]>bar", 1, 4);
2880                 static if(toer == ThrowOnEntityRef.yes)
2881                 {
2882                     testFail!(func, toer)("&foo; &bar baz", 1, 1);
2883                     testFail!(func, toer)("foo \n\n &ampe; \n bar", 3, 2);
2884                 }
2885                 else
2886                 {
2887                     testFail!(func, toer)("&foo; &bar baz", 1, 7);
2888                     test!(func, toer)("foo \n\n &ampe; \n bar", 4, 5);
2889                 }
2890             }
2891         }
2892     }
2895     // GrammarPos.contentMid
2896     // content     ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
2897     // The text right after the start tag was what was parsed previously. So,
2898     // that first CharData? was what was parsed last, and this parses starting
2899     // right after. The < should have already been removed from the input.
2900     void _parseAtContentMid()
2901     {
2902         // Note that References are treated as part of the CharData and not
2903         // parsed out by the EntityRange (see EntityRange.text).
2905         switch(_text.input.front)
2906         {
2907             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2908             // CDSect  ::= CDStart CData CDEnd
2909             // CDStart ::= '<![CDATA['
2910             // CData   ::= (Char* - (Char* ']]>' Char*))
2911             // CDEnd   ::= ']]>'
2912             case '!':
2913             {
2914                 popFrontAndIncCol(_text);
2915                 if(_text.stripStartsWith("--"))
2916                 {
2917                     _parseComment();
2918                     static if(config.skipComments == SkipComments.yes)
2919                         _parseAtContentCharData();
2920                     else
2921                         _grammarPos = GrammarPos.contentCharData2;
2922                 }
2923                 else if(_text.stripStartsWith("[CDATA["))
2924                     _parseCDATA();
2925                 else
2926                 {
2927                     immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2928                     throw new XMLParsingException("Expected Comment or CDATA section", bangPos);
2929                 }
2930                 break;
2931             }
2932             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2933             case '?':
2934             {
2935                 _parsePI();
2936                 _grammarPos = GrammarPos.contentCharData2;
2937                 static if(config.skipPI == SkipPI.yes)
2938                     popFront();
2939                 break;
2940             }
2941             // element ::= EmptyElemTag | STag content ETag
2942             default:
2943             {
2944                 _parseElementStart();
2945                 break;
2946             }
2947         }
2948     }
2951     // This parses the Misc* that come after the root element.
2952     void _parseAtEndMisc()
2953     {
2954         // Misc ::= Comment | PI | S
2956         stripWS(_text);
2958         if(_text.input.empty)
2959         {
2960             _grammarPos = GrammarPos.documentEnd;
2961             return;
2962         }
2964         if(_text.input.front != '<')
2965             throw new XMLParsingException("Expected <", _text.pos);
2966         popFrontAndIncCol(_text);
2967         checkNotEmpty(_text);
2969         switch(_text.input.front)
2970         {
2971             // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2972             case '!':
2973             {
2974                 popFrontAndIncCol(_text);
2975                 if(_text.stripStartsWith("--"))
2976                 {
2977                     _parseComment();
2978                     static if(config.skipComments == SkipComments.yes)
2979                         _parseAtEndMisc();
2980                     break;
2981                 }
2982                 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1);
2983                 throw new XMLParsingException("Expected Comment", bangPos);
2984             }
2985             // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2986             case '?':
2987             {
2988                 _parsePI();
2989                 static if(config.skipPI == SkipPI.yes)
2990                     popFront();
2991                 break;
2992             }
2993             default: throw new XMLParsingException("Must be a comment or PI", _text.pos);
2994         }
2995     }
2997     // Used for keeping track of the names of start tags so that end tags can be
2998     // verified as well as making it possible to avoid redoing other validation.
2999     // We keep track of the total number of entities which have been parsed thus
3000     // far so that only whichever EntityRange is farthest along in parsing
3001     // actually adds or removes tags from the TagStack, and the parser can skip
3002     // some of the validation for ranges that are farther behind. That way, the
3003     // end tags get verified, but we only have one stack. If the stack were
3004     // duplicated with every call to save, then there would be a lot more
3005     // allocations, which we don't want. But because we only need to verify the
3006     // end tags once, we can get away with having a shared tag stack. The cost
3007     // is that we have to keep track of how many tags we've parsed so that we
3008     // know if an EntityRange should actually be pushing or popping tags from
3009     // the stack, but that's a lot cheaper than duplicating the stack, and it's
3010     // a lot less annoying then making EntityRange an input range and not a
3011     // forward range or making it a cursor rather than a range.
3012     struct TagStack
3013     {
3014         void pushTag(Taken tagName)
3015         {
3016             if(entityCount++ == state.maxEntities)
3017             {
3018                 ++state.maxEntities;
3019                 put(state.tags, tagName);
3020             }
3021             ++depth;
3022         }
3024         void popTag(Taken tagName, TextPos pos)
3025         {
3026             import std.algorithm : equal;
3027             import std.format : format;
3028             if(entityCount++ == state.maxEntities)
3029             {
3030                 assert(!state.tags.data.empty);
3031                 if(!equal(state.tags.data.back.save, tagName.save))
3032                 {
3033                     enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>";
3034                     throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos);
3035                 }
3036                 ++state.maxEntities;
3037                 state.tags.shrinkTo(state.tags.data.length - 1);
3038             }
3039             --depth;
3040         }
3042         @property auto attrChecker()
3043         {
3044             assert(atMax);
3046             static struct AttrChecker
3047             {
3048                 void pushAttr(Taken attrName, TextPos attrPos)
3049                 {
3050                     put(state.attrs, Attribute(attrName, attrPos));
3051                 }
3053                 void checkAttrs()
3054                 {
3055                     import std.algorithm.comparison : cmp, equal;
3056                     import std.algorithm.sorting : sort;
3057                     import std.conv : to;
3059                     if(state.attrs.data.length < 2)
3060                         return;
3062                     sort!((a,b) => cmp(a.taken.save, b.taken.save) < 0)(state.attrs.data);
3063                     auto prev = state.attrs.data.front;
3064                     foreach(attr; state.attrs.data[1 .. $])
3065                     {
3066                         if(equal(prev.taken, attr.taken))
3067                             throw new XMLParsingException("Duplicate attribute name", attr.pos);
3068                         prev = attr;
3069                     }
3070                 }
3072                 ~this()
3073                 {
3074                     state.attrs.clear();
3075                 }
3077                 SharedState* state;
3078             }
3080             return AttrChecker(state);
3081         }
3083         void sawEntity()
3084         {
3085             if(entityCount++ == state.maxEntities)
3086                 ++state.maxEntities;
3087         }
3089         @property bool atMax()
3090         {
3091             return entityCount == state.maxEntities;
3092         }
3094         struct Attribute
3095         {
3096             Taken taken;
3097             TextPos pos;
3098         }
3100         struct SharedState
3101         {
3102             import std.array : Appender;
3104             Appender!(Taken[]) tags;
3105             Appender!(Attribute[]) attrs;
3106             size_t maxEntities;
3107         }
3109         static create()
3110         {
3111             TagStack tagStack;
3112             tagStack.state = new SharedState;
3113             tagStack.state.tags.reserve(10);
3114             tagStack.state.attrs.reserve(10);
3115             return tagStack;
3116         }
3118         SharedState* state;
3119         size_t entityCount;
3120         int depth;
3121     }
3123     static if(compileInTests) unittest
3124     {
3125         import core.exception : AssertError;
3126         import std.algorithm.comparison : equal;
3127         import std.exception : assertNotThrown, collectException, enforce;
3128         import dxml.internal : testRangeFuncs;
3130         static void test(alias func)(string text, size_t line = __LINE__)
3131         {
3132             auto xml = func(text);
3133             static foreach(config; someTestConfigs)
3134             {{
3135                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3136                                                                  __FILE__, line);
3137                 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line);
3138             }}
3139         }
3141         static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3142         {
3143             auto xml = func(text);
3144             static foreach(config; someTestConfigs)
3145             {{
3146                 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1",
3147                                                                  __FILE__, line);
3148                 auto e = collectException!XMLParsingException(walkLength(range));
3149                 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line);
3150                 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
3151             }}
3152         }
3154         static foreach(func; testRangeFuncs)
3155         {
3156             test!func("<root></root>");
3157             test!func("<root><a></a></root>");
3158             test!func("<root><a><b></b></a></root>");
3159             test!func("<root><a><b></b></a></root>");
3160             test!func("<root><a><b></b></a><foo><bar></bar></foo></root>");
3161             test!func("<a>\n" ~
3162                       "    <b>\n" ~
3163                       "        <c>\n" ~
3164                       "            <d>\n" ~
3165                       "                <e>\n" ~
3166                       "                    <f>\n" ~
3167                       "                        <g>\n" ~
3168                       "                            <h>\n" ~
3169                       "                                 <i><i><i><i>\n" ~
3170                       "                                 </i></i></i></i>\n" ~
3171                       "                                 <i>\n" ~
3172                       "                                     <j>\n" ~
3173                       "                                         <k>\n" ~
3174                       "                                             <l>\n" ~
3175                       "                                                 <m>\n" ~
3176                       "                                                     <n>\n" ~
3177                       "                                                         <o>\n" ~
3178                       "                                                             <p>\n" ~
3179                       "                                                                 <q>\n" ~
3180                       "                                                                     <r>\n" ~
3181                       "                                                                         <s>\n" ~
3182                       "          <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~
3183                       "                                                                         </s>\n" ~
3184                       "                                                                     </r>\n" ~
3185                       "                                                                 </q>\n" ~
3186                       "                                                </p></o></n></m>\n" ~
3187                       "                                                               </l>\n" ~
3188                       "                    </k>\n" ~
3189                       "           </j>\n" ~
3190                       "</i></h>" ~
3191                       "                        </g>\n" ~
3192                       "                    </f>\n" ~
3193                       "                </e>\n" ~
3194                       "            </d>\n" ~
3195                       "        </c>\n" ~
3196                       "    </b>\n" ~
3197                       "</a>");
3198             test!func(`<京都市></京都市>`);
3200             testFail!func(`<a>`, 1, 4);
3201             testFail!func(`<foo></foobar>`, 1, 8);
3202             testFail!func(`<foobar></foo>`, 1, 11);
3203             testFail!func(`<a><\a>`, 1, 5);
3204             testFail!func(`<a><a/>`, 1, 8);
3205             testFail!func(`<a><b>`, 1, 7);
3206             testFail!func(`<a><b><c>`, 1, 10);
3207             testFail!func(`<a></a><b>`, 1, 9);
3208             testFail!func(`<a></a><b></b>`, 1, 9);
3209             testFail!func(`<a><b></a></b>`, 1, 9);
3210             testFail!func(`<a><b><c></c><b></a>`, 1, 19);
3211             testFail!func(`<a><b></c><c></b></a>`, 1, 9);
3212             testFail!func(`<a><b></c></b></a>`, 1, 9);
3213             testFail!func("<a>\n" ~
3214                           "    <b>\n" ~
3215                           "        <c>\n" ~
3216                           "            <d>\n" ~
3217                           "                <e>\n" ~
3218                           "                    <f>\n" ~
3219                           "                    </f>\n" ~
3220                           "                </e>\n" ~
3221                           "            </d>\n" ~
3222                           "        </c>\n" ~
3223                           "    </b>\n" ~
3224                           "<a>", 12, 4);
3225             testFail!func("<a>\n" ~
3226                           "    <b>\n" ~
3227                           "        <c>\n" ~
3228                           "            <d>\n" ~
3229                           "                <e>\n" ~
3230                           "                    <f>\n" ~
3231                           "                    </f>\n" ~
3232                           "                </e>\n" ~
3233                           "            </d>\n" ~
3234                           "        </c>\n" ~
3235                           "    </b>\n" ~
3236                           "</q>", 12, 3);
3237         }
3238     }
3241     struct Text(R)
3242     {
3243         alias config = cfg;
3244         alias Input = R;
3246         Input input;
3247         TextPos pos;
3249         @property save() { return typeof(this)(input.save, pos); }
3250     }
3253     alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42));
3256     EntityType _type;
3257     TextPos _entityPos;
3258     auto _grammarPos = GrammarPos.documentStart;
3260     Taken _name;
3261     TagStack _tagStack;
3263     Text!(typeof(byCodeUnit(R.init))) _text;
3264     Text!Taken _savedText;
3267     this(R xmlText)
3268     {
3269         _tagStack = TagStack.create();
3270         _text.input = byCodeUnit(xmlText);
3272         // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945
3273         _savedText = typeof(_savedText).init;
3274         _name = typeof(_name).init;
3276         popFront();
3277     }
3278 }
3280 /// Ditto
3281 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText)
3282     if(isForwardRange!R && isSomeChar!(ElementType!R))
3283 {
3284     return EntityRange!(config, R)(xmlText);
3285 }
3287 ///
3288 version(dxmlTests) unittest
3289 {
3290     import std.range.primitives : walkLength;
3292     auto xml = "<?xml version='1.0'?>\n" ~
3293                "<?instruction start?>\n" ~
3294                "<foo attr='42'>\n" ~
3295                "    <bar/>\n" ~
3296                "    <!-- no comment -->\n" ~
3297                "    <baz hello='world'>\n" ~
3298                "    nothing to say.\n" ~
3299                "    nothing at all...\n" ~
3300                "    </baz>\n" ~
3301                "</foo>\n" ~
3302                "<?some foo?>";
3304     {
3305         auto range = parseXML(xml);
3306         assert(range.front.type == EntityType.pi);
3307         assert(range.front.name == "instruction");
3308         assert(range.front.text == "start");
3310         range.popFront();
3311         assert(range.front.type == EntityType.elementStart);
3312         assert(range.front.name == "foo");
3314         {
3315             auto attrs = range.front.attributes;
3316             assert(walkLength(attrs.save) == 1);
3317             assert(attrs.front.name == "attr");
3318             assert(attrs.front.value == "42");
3319         }
3321         range.popFront();
3322         assert(range.front.type == EntityType.elementEmpty);
3323         assert(range.front.name == "bar");
3325         range.popFront();
3326         assert(range.front.type == EntityType.comment);
3327         assert(range.front.text == " no comment ");
3329         range.popFront();
3330         assert(range.front.type == EntityType.elementStart);
3331         assert(range.front.name == "baz");
3333         {
3334             auto attrs = range.front.attributes;
3335             assert(walkLength(attrs.save) == 1);
3336             assert(attrs.front.name == "hello");
3337             assert(attrs.front.value == "world");
3338         }
3340         range.popFront();
3341         assert(range.front.type == EntityType.text);
3342         assert(range.front.text ==
3343                "\n    nothing to say.\n    nothing at all...\n    ");
3345         range.popFront();
3346         assert(range.front.type == EntityType.elementEnd); // </baz>
3347         range.popFront();
3348         assert(range.front.type == EntityType.elementEnd); // </foo>
3350         range.popFront();
3351         assert(range.front.type == EntityType.pi);
3352         assert(range.front.name == "some");
3353         assert(range.front.text == "foo");
3355         range.popFront();
3356         assert(range.empty);
3357     }
3358     {
3359         auto range = parseXML!simpleXML(xml);
3361         // simpleXML is set to skip processing instructions.
3363         assert(range.front.type == EntityType.elementStart);
3364         assert(range.front.name == "foo");
3366         {
3367             auto attrs = range.front.attributes;
3368             assert(walkLength(attrs.save) == 1);
3369             assert(attrs.front.name == "attr");
3370             assert(attrs.front.value == "42");
3371         }
3373         // simpleXML is set to split empty tags so that <bar/> is treated
3374         // as the same as <bar></bar> so that code does not have to
3375         // explicitly handle empty tags.
3376         range.popFront();
3377         assert(range.front.type == EntityType.elementStart);
3378         assert(range.front.name == "bar");
3379         range.popFront();
3380         assert(range.front.type == EntityType.elementEnd);
3381         assert(range.front.name == "bar");
3383         // simpleXML is set to skip comments.
3385         range.popFront();
3386         assert(range.front.type == EntityType.elementStart);
3387         assert(range.front.name == "baz");
3389         {
3390             auto attrs = range.front.attributes;
3391             assert(walkLength(attrs.save) == 1);
3392             assert(attrs.front.name == "hello");
3393             assert(attrs.front.value == "world");
3394         }
3396         range.popFront();
3397         assert(range.front.type == EntityType.text);
3398         assert(range.front.text ==
3399                "\n    nothing to say.\n    nothing at all...\n    ");
3401         range.popFront();
3402         assert(range.front.type == EntityType.elementEnd); // </baz>
3403         range.popFront();
3404         assert(range.front.type == EntityType.elementEnd); // </foo>
3405         range.popFront();
3406         assert(range.empty);
3407     }
3408 }
3410 // Test the state of the range immediately after parseXML returns.
3411 version(dxmlTests) unittest
3412 {
3413     import std.algorithm.comparison : equal;
3414     import dxml.internal : testRangeFuncs;
3416     static foreach(func; testRangeFuncs)
3417     {
3418         static foreach(config; someTestConfigs)
3419         {{
3420             auto range = parseXML!config("<?xml?><root></root>");
3421             assert(!range.empty);
3422             assert(range.front.type == EntityType.elementStart);
3423             assert(equal(range.front.name, "root"));
3424         }}
3426         static foreach(config; [Config.init, makeConfig(SkipPI.yes)])
3427         {{
3428             auto range = parseXML!config("<!--no comment--><root></root>");
3429             assert(!range.empty);
3430             assert(range.front.type == EntityType.comment);
3431             assert(equal(range.front.text, "no comment"));
3432         }}
3433         static foreach(config; [simpleXML, makeConfig(SkipComments.yes)])
3434         {{
3435             auto range = parseXML!config("<!--no comment--><root></root>");
3436             assert(!range.empty);
3437             assert(range.front.type == EntityType.elementStart);
3438             assert(equal(range.front.name, "root"));
3439         }}
3441         static foreach(config; [Config.init, makeConfig(SkipComments.yes)])
3442         {{
3443             auto range = parseXML!config("<?private eye?><root></root>");
3444             assert(!range.empty);
3445             assert(range.front.type == EntityType.pi);
3446             assert(equal(range.front.name, "private"));
3447             assert(equal(range.front.text, "eye"));
3448         }}
3449         static foreach(config; [simpleXML, makeConfig(SkipPI.yes)])
3450         {{
3451             auto range = parseXML!config("<?private eye?><root></root>");
3452             assert(!range.empty);
3453             assert(range.front.type == EntityType.elementStart);
3454             assert(equal(range.front.name, "root"));
3455         }}
3457         static foreach(config; someTestConfigs)
3458         {{
3459             auto range = parseXML!config("<root></root>");
3460             assert(!range.empty);
3461             assert(range.front.type == EntityType.elementStart);
3462             assert(equal(range.front.name, "root"));
3463         }}
3464     }
3465 }
3467 // Test various invalid states that didn't seem to fit well into tests elsewhere.
3468 version(dxmlTests) unittest
3469 {
3470     import core.exception : AssertError;
3471     import std.exception : collectException, enforce;
3472     import dxml.internal : testRangeFuncs;
3474     static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__)
3475     {
3476         auto xml = func(text);
3477         static foreach(config; someTestConfigs)
3478         {{
3479             auto e = collectException!XMLParsingException(
3480                 {
3481                     auto range = parseXML!config(xml.save);
3482                     while(!range.empty)
3483                         range.popFront();
3484                 }());
3485             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
3486             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
3487         }}
3488     }
3490     static foreach(func; testRangeFuncs)
3491     {{
3492         testFail!func("<root></root><invalid></invalid>", 1, 15);
3493         testFail!func("<root></root><invalid/>", 1, 15);
3494         testFail!func("<root/><invalid></invalid>", 1, 9);
3495         testFail!func("<root/><invalid/>", 1, 9);
3497         testFail!func("<root></root>invalid", 1, 14);
3498         testFail!func("<root/>invalid", 1, 8);
3500         testFail!func("<root/><?pi?>invalid", 1, 14);
3501         testFail!func("<root/><?pi?><invalid/>", 1, 15);
3503         testFail!func("<root/><!DOCTYPE foo>", 1, 9);
3504         testFail!func("<root/></root>", 1, 9);
3506         testFail!func("invalid<root></root>", 1, 1);
3507         testFail!func("invalid<?xml?><root></root>", 1, 1);
3508         testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1);
3509         testFail!func("invalid<!--comment--><root></root>", 1, 1);
3510         testFail!func("invalid<?Poirot?><root></root>", 1, 1);
3512         testFail!func("<?xml?>invalid<root></root>", 1, 8);
3513         testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15);
3514         testFail!func("<!--comment-->invalid<root></root>", 1, 15);
3515         testFail!func("<?Poirot?>invalid<root></root>", 1, 11);
3517         testFail!func("<?xml?>", 1, 8);
3518         testFail!func("<!DOCTYPE name>", 1, 16);
3519         testFail!func("<?Sherlock?>", 1, 13);
3520         testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33);
3521         testFail!func("<?Poirot?></Poirot>", 1, 12);
3522         testFail!func("</Poirot>", 1, 2);
3523         testFail!func("<", 1, 2);
3524         testFail!func(`</`, 1, 2);
3525         testFail!func(`</a`, 1, 2);
3526         testFail!func(`</a>`, 1, 2);
3529         testFail!func("<doc>]]></doc>", 1, 6);
3531         testFail!func(" <?xml?><root/>", 1, 1);
3532         testFail!func("\n<?xml?><root/>", 1, 1);
3533     }}
3534 }
3536 // Test that parseXML and EntityRange's properties work with @safe.
3537 // pure would be nice too, but at minimum, the use of format for exception
3538 // messages, and the use of assumeSafeAppend prevent it. It may or may not be
3539 // worth trying to fix that.
3540 version(dxmlTests) @safe unittest
3541 {
3542     import std.algorithm.comparison : equal;
3543     import dxml.internal : testRangeFuncs;
3545     auto xml = "<root>\n" ~
3546                "    <![CDATA[nothing]]>\n" ~
3547                "    <foo a='42'/>\n" ~
3548                "</root>";
3550     static foreach(func; testRangeFuncs)
3551     {{
3552         auto range = parseXML(xml);
3553         assert(range.front.type == EntityType.elementStart);
3554         assert(equal(range.front.name, "root"));
3555         range.popFront();
3556         assert(!range.empty);
3557         assert(range.front.type == EntityType.cdata);
3558         assert(equal(range.front.text, "nothing"));
3559         range.popFront();
3560         assert(!range.empty);
3561         assert(range.front.type == EntityType.elementEmpty);
3562         assert(equal(range.front.name, "foo"));
3563         {
3564             auto attrs = range.front.attributes;
3565             auto saved = attrs.save;
3566             auto attr = attrs.front;
3567             assert(attr.name == "a");
3568             assert(attr.value == "42");
3569             attrs.popFront();
3570             assert(attrs.empty);
3571         }
3572         auto saved = range.save;
3573     }}
3574 }
3576 // This is purely to provide a way to trigger the unittest blocks in EntityRange
3577 // without compiling them in normally.
3578 private struct EntityRangeCompileTests
3579 {
3580     @property bool empty() @safe pure nothrow @nogc { assert(0); }
3581     @property char front() @safe pure nothrow @nogc { assert(0); }
3582     void popFront() @safe pure nothrow @nogc { assert(0); }
3583     @property typeof(this) save() @safe pure nothrow @nogc { assert(0); }
3584 }
3586 version(dxmlTests)
3587     EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests;
3590 /++
3591     Whether the given type is a forward range of attributes.
3593     Essentially, an attribute range must be a forward range where
3595     $(UL
3596         $(LI each element has the members $(D name), $(D value), and $(D pos))
3597         $(LI $(D name) and $(D value) are forward ranges of characters)
3598         $(LI $(D name) and $(D value) have the same type)
3599         $(LI $(D pos) is a $(LREF TextPos)))
3601     Normally, an attribute range would come from
3602     $(LREF EntityRange.Entity.attributes) or
3603     $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but
3604     as long as a range has the correct API, it qualifies as an attribute range.
3606     See_Also: $(LREF EntityRange.Entity.Attribute)$(BR)
3607               $(LREF EntityRange.Entity.attributes)$(BR)
3608               $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR)
3609               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3610   +/
3611 template isAttrRange(R)
3612 {
3613     static if(isForwardRange!R &&
3614               is(typeof(R.init.front.name)) &&
3615               is(typeof(R.init.front.value)) &&
3616               is(ReturnType!((R r) => r.front.pos) == TextPos))
3617     {
3618         alias NameType  = ReturnType!((R r) => r.front.name);
3619         alias ValueType = ReturnType!((R r) => r.front.value);
3621         enum isAttrRange = is(NameType == ValueType) &&
3622                            isForwardRange!NameType &&
3623                            isSomeChar!(ElementType!NameType);
3624     }
3625     else
3626         enum isAttrRange = false;
3627 }
3629 ///
3630 version(dxmlTests) unittest
3631 {
3632     import std.typecons : Tuple;
3633     import dxml.dom : parseDOM;
3635     alias R1 = typeof(parseXML("<root/>").front.attributes);
3636     static assert(isAttrRange!R1);
3638     alias R2 = typeof(parseDOM("<root/>").children[0].attributes);
3639     static assert(isAttrRange!R2);
3641     alias T = Tuple!(string, "name", string, "value", TextPos, "pos");
3642     static assert(isAttrRange!(T[]));
3644     static assert(!isAttrRange!string);
3645 }
3647 version(dxmlTests) unittest
3648 {
3649     import std.typecons : Tuple;
3650     {
3651         alias T = Tuple!(string, "nam", string, "value", TextPos, "pos");
3652         static assert(!isAttrRange!(T[]));
3653     }
3654     {
3655         alias T = Tuple!(string, "name", string, "valu", TextPos, "pos");
3656         static assert(!isAttrRange!(T[]));
3657     }
3658     {
3659         alias T = Tuple!(string, "name", string, "value", TextPos, "po");
3660         static assert(!isAttrRange!(T[]));
3661     }
3662     {
3663         alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos");
3664         static assert(!isAttrRange!(T[]));
3665     }
3666     {
3667         alias T = Tuple!(string, "name", string, "value");
3668         static assert(!isAttrRange!(T[]));
3669     }
3670     {
3671         alias T = Tuple!(int, "name", string, "value", TextPos, "pos");
3672         static assert(!isAttrRange!(T[]));
3673     }
3674     {
3675         alias T = Tuple!(string, "name", int, "value", TextPos, "pos");
3676         static assert(!isAttrRange!(T[]));
3677     }
3678     {
3679         alias T = Tuple!(string, "name", string, "value", int, "pos");
3680         static assert(!isAttrRange!(T[]));
3681     }
3682 }
3685 /++
3686     A helper function for processing start tag attributes.
3688     It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a
3689     range of attributes and a list of alternating strings and pointers where
3690     each string represents the name of the attribute to parse and the pointer
3691     immediately after it is assigned the value that corresponds to the attribute
3692     name (if present). If the given pointer does not point to the same type as
3693     the range of characters used in the attributes, then
3694     $(PHOBOS_REF to, std, conv) is used to convert the value to the type the
3695     pointer points to.
3697     If a $(D Nullable!T*) is given rather than a $(D T*), then it will be
3698     treated the same as if it had been $(D T*). So, $(D to!T) will be used to
3699     convert the attribute value if the matching attribute name is present. The
3700     advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's
3701     possible to distinguish between an attribute that wasn't present and one
3702     where it was present but was equivalent to $(D T.init).
3704     Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed
3705     rather than taking it by $(K_REF) and leaving the attributes that weren't
3706     matched in the range (since that really doesn't work with an arbitrary
3707     range as opposed to a dynamic array). However, if the second argument of
3708     getAttrs is not a $(K_STRING) but is instead an output range that accepts
3709     the element type of the range, then any attributes which aren't matched are
3710     put into the output range.
3712     Params:
3713         attrRange = A range of attributes (see $(LREF isAttrRange)).
3714         unmatched = An output range that any _unmatched attributes from the
3715                     range are put into (optional argument).
3716         args = An alternating list of strings and pointers where the names
3717                represent the attribute names to get the value of, and the
3718                corresponding values get assigned to what the pointers point to.
3720     Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to
3721             convert an attribute value.
3723     See_Also: $(LREF isAttrRange)$(BR)
3724               $(LREF EntityRange.Entity.attributes)$(BR)
3725               $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom)
3726   +/
3727 void getAttrs(R, Args...)(R attrRange, Args args)
3728     if(isAttrRange!R && Args.length % 2 == 0)
3729 {
3730     mixin(_genGetAttrs(false));
3731 }
3733 /// Ditto
3734 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args)
3735     if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0)
3736 {
3737     mixin(_genGetAttrs(true));
3738 }
3740 private string _genGetAttrs(bool includeUnmatched)
3741 {
3742     auto retval =
3743 `    import std.algorithm.comparison : equal;
3744     import std.conv : ConvException, to;
3745     import std.format : format;
3746     import std.typecons : Nullable;
3747     import std.utf : byChar;
3749     alias Attr = ElementType!R;
3750     alias SliceOfR = ElementType!(typeof(Attr.init.name));
3752     outer: foreach(attr; attrRange)
3753     {
3754         static foreach(i, arg; args)
3755         {
3756             static if(i % 2 == 0)
3757                 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i));
3758             else
3759             {
3760                 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i));
3762                 if(equal(attr.name, args[i - 1].byChar()))
3763                 {
3764                     alias ArgType = typeof(*arg);
3766                     static if(isInstanceOf!(Nullable, ArgType))
3767                         alias TargetType = TemplateArgsOf!ArgType;
3768                     else
3769                         alias TargetType = typeof(*arg);
3771                     try
3772                         *arg = to!TargetType(attr.value);
3773                     catch(ConvException ce)
3774                     {
3775                         enum fmt = "Failed to convert %s: %s";
3776                         throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos);
3777                     }
3779                     continue outer;
3780                 }
3781             }
3782         }`;
3784     if(includeUnmatched)
3785         retval ~= "\n        put(unmatched, attr);";
3786     retval ~= "\n    }";
3788     return retval;
3789 }
3791 version(dxmlTests) unittest
3792 {
3793     import std.array : appender;
3794     import std.exception : collectException;
3795     import std.typecons : Nullable;
3797     {
3798         auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`;
3799         auto range = parseXML(xml);
3800         assert(range.front.type == EntityType.elementEmpty);
3802         string a;
3803         int b;
3804         bool c;
3806         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3807         assert(a == "foo");
3808         assert(b == 19);
3809         assert(c == true);
3810     }
3812     // Nullable!T* accepts the same as T*.
3813     {
3814         auto xml = `<root a="foo" c="true" d="rocks"/>`;
3815         auto range = parseXML(xml);
3816         assert(range.front.type == EntityType.elementEmpty);
3818         Nullable!string a;
3819         Nullable!int b;
3820         bool c;
3822         getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a);
3823         assert(a == "foo");
3824         assert(b.isNull);
3825         assert(c == true);
3826     }
3828     // If an output range of attributes is provided, then the ones that
3829     // weren't matched are put in it.
3830     {
3831         auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`;
3832         auto range = parseXML(xml);
3833         assert(range.front.type == EntityType.elementEmpty);
3835         alias Attribute = typeof(range).Entity.Attribute;
3836         auto unmatched = appender!(Attribute[])();
3837         int i;
3838         string s;
3840         getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s);
3841         assert(i == 42);
3842         assert(s == "silly");
3843         assert(unmatched.data.length == 2);
3844         assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28)));
3845         assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38)));
3846     }
3848     // An XMLParsingException gets thrown if a conversion fails.
3849     {
3850         auto xml = `<root foo="bar" false="true" d="rocks"/>`;
3851         auto range = parseXML(xml);
3852         assert(range.front.type == EntityType.elementEmpty);
3854         int i;
3856         auto xpe = collectException!XMLParsingException(
3857             getAttrs(range.front.attributes, "d", &i));
3858         assert(xpe.pos == TextPos(1, 30));
3859     }
3861     // Test parsing attributes with CTFE.
3862     enum dummy = (){
3863         auto xml = `<root a="foo" d="rocks" c="true" b="19" />`;
3864         auto range = parseXML(xml);
3865         assert(range.front.type == EntityType.elementEmpty);
3867         string a;
3868         int b;
3869         bool c;
3871         getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c);
3872         assert(a == "foo");
3873         assert(b == 19);
3874         assert(c == true);
3875         return 0;
3876     }();
3877 }
3879 version(dxmlTests) unittest
3880 {
3881     auto range = parseXML("<root/>");
3882     auto attrs = range.front.attributes;
3883     int i;
3884     static assert(!__traits(compiles, getAttrs(attrs, "foo")));
3885     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar")));
3886     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i)));
3887     static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i)));
3888     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo")));
3889     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i)));
3890     static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar")));
3891 }
3893 version(dxmlTests) @safe pure unittest
3894 {
3895     import std.typecons : Nullable;
3897     static test(R)(R range, int* i, Nullable!int* j) @safe pure
3898     {
3899         getAttrs(range.front.attributes, "foo", i, "bar", j);
3900     }
3902     test(parseXML("<root/>"), null, null);
3903 }
3906 /++
3907     Takes an $(LREF EntityRange) which is at a start tag and iterates it until
3908     it is at its corresponding end tag. It is an error to call skipContents when
3909     the current entity is not $(LREF EntityType.elementStart).
3911     $(TABLE
3912         $(TR $(TH Supported $(LREF EntityType)s:))
3913         $(TR $(TD $(LREF2 elementStart, EntityType)))
3914     )
3916     Returns: The range with its $(D front) now at the end tag corresponding to
3917              the start tag that was $(D front) when the function was called.
3919     Throws: $(LREF XMLParsingException) on invalid XML.
3920   +/
3921 R skipContents(R)(R entityRange)
3922     if(isInstanceOf!(EntityRange, R))
3923 {
3924     assert(entityRange._type == EntityType.elementStart);
3926     // We don't bother calling empty, because the only way for the entityRange
3927     // to be empty would be for it to reach the end of the document, and an
3928     // XMLParsingException would be thrown if the end of the document were
3929     // reached before we reached the corresponding end tag.
3930     for(int tagDepth = 1; tagDepth != 0;)
3931     {
3932         entityRange.popFront();
3933         immutable type = entityRange._type;
3934         if(type == EntityType.elementStart)
3935             ++tagDepth;
3936         else if(type == EntityType.elementEnd)
3937             --tagDepth;
3938     }
3940     return entityRange;
3941 }
3943 ///
3944 version(dxmlTests) unittest
3945 {
3946     auto xml = "<root>\n" ~
3947                "    <foo>\n" ~
3948                "        <bar>\n" ~
3949                "        Some text\n" ~
3950                "        </bar>\n" ~
3951                "    </foo>\n" ~
3952                "    <!-- no comment -->\n" ~
3953                "</root>";
3955     auto range = parseXML(xml);
3956     assert(range.front.type == EntityType.elementStart);
3957     assert(range.front.name == "root");
3959     range.popFront();
3960     assert(range.front.type == EntityType.elementStart);
3961     assert(range.front.name == "foo");
3963     range = range.skipContents();
3964     assert(range.front.type == EntityType.elementEnd);
3965     assert(range.front.name == "foo");
3967     range.popFront();
3968     assert(range.front.type == EntityType.comment);
3969     assert(range.front.text == " no comment ");
3971     range.popFront();
3972     assert(range.front.type == EntityType.elementEnd);
3973     assert(range.front.name == "root");
3975     range.popFront();
3976     assert(range.empty);
3977 }
3980 /++
3981     Skips entities until the given $(LREF EntityType) is reached.
3983     If multiple $(LREF EntityType)s are given, then any one of them counts as
3984     a match.
3986     The current entity is skipped regardless of whether it is the given
3987     $(LREF EntityType).
3989     This is essentially a slightly optimized equivalent to
3991     ---
3992     if(!range.empty())
3993     {
3994         range.popFront();
3995         range = range.find!((a, b) => a.type == b.type)(entityTypes);
3996     }
3997     ---
3999     Returns: The given range with its $(D front) now at the first entity which
4000              matched one of the given $(LREF EntityType)s or an empty range if
4001              none were found.
4003     Throws: $(LREF XMLParsingException) on invalid XML.
4004   +/
4005 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...)
4006     if(isInstanceOf!(EntityRange, R))
4007 {
4008     if(entityRange.empty)
4009         return entityRange;
4010     entityRange.popFront();
4011     for(; !entityRange.empty; entityRange.popFront())
4012     {
4013         immutable type = entityRange._type;
4014         foreach(entityType; entityTypes)
4015         {
4016             if(type == entityType)
4017                 return entityRange;
4018         }
4019     }
4020     return entityRange;
4021 }
4023 ///
4024 version(dxmlTests) unittest
4025 {
4026     auto xml = "<root>\n" ~
4027                "    <!-- blah blah blah -->\n" ~
4028                "    <foo>nothing to say</foo>\n" ~
4029                "</root>";
4031     auto range = parseXML(xml);
4032     assert(range.front.type == EntityType.elementStart);
4033     assert(range.front.name == "root");
4035     range = range.skipToEntityType(EntityType.elementStart,
4036                                    EntityType.elementEmpty);
4037     assert(range.front.type == EntityType.elementStart);
4038     assert(range.front.name == "foo");
4040     assert(range.skipToEntityType(EntityType.comment).empty);
4042     // skipToEntityType will work on an empty range but will always
4043     // return an empty range.
4044     assert(range.takeNone().skipToEntityType(EntityType.comment).empty);
4045 }
4048 /++
4049     Skips entities until the end tag is reached that corresponds to the start
4050     tag that is the parent of the current entity.
4052     Returns: The given range with its $(D front) now at the end tag which
4053              corresponds to the parent start tag of the entity that was
4054              $(D front) when skipToParentEndTag was called. If the current
4055              entity does not have a parent start tag (which means that it's
4056              either the root element or a comment or PI outside of the root
4057              element), then an empty range is returned.
4059     Throws: $(LREF XMLParsingException) on invalid XML.
4060   +/
4061 R skipToParentEndTag(R)(R entityRange)
4062     if(isInstanceOf!(EntityRange, R))
4063 {
4064     with(EntityType) final switch(entityRange._type)
4065     {
4066         case cdata:
4067         case comment:
4068         {
4069             entityRange = entityRange.skipToEntityType(elementStart, elementEnd);
4070             if(entityRange.empty || entityRange._type == elementEnd)
4071                 return entityRange;
4072             goto case elementStart;
4073         }
4074         case elementStart:
4075         {
4076             while(true)
4077             {
4078                 entityRange = entityRange.skipContents();
4079                 entityRange.popFront();
4080                 if(entityRange.empty || entityRange._type == elementEnd)
4081                     return entityRange;
4082                 if(entityRange._type == elementStart)
4083                     continue;
4084                 goto case comment;
4085             }
4086             assert(0); // the compiler isn't smart enough to see that this is unreachable.
4087         }
4088         case elementEnd:
4089         case elementEmpty:
4090         case pi:
4091         case text: goto case comment;
4092     }
4093 }
4095 ///
4096 version(dxmlTests) unittest
4097 {
4098     auto xml = "<root>\n" ~
4099                "    <foo>\n" ~
4100                "        <!-- comment -->\n" ~
4101                "        <bar>exam</bar>\n" ~
4102                "    </foo>\n" ~
4103                "    <!-- another comment -->\n" ~
4104                "</root>";
4105     {
4106         auto range = parseXML(xml);
4107         assert(range.front.type == EntityType.elementStart);
4108         assert(range.front.name == "root");
4110         range.popFront();
4111         assert(range.front.type == EntityType.elementStart);
4112         assert(range.front.name == "foo");
4114         range.popFront();
4115         assert(range.front.type == EntityType.comment);
4116         assert(range.front.text == " comment ");
4118         range = range.skipToParentEndTag();
4119         assert(range.front.type == EntityType.elementEnd);
4120         assert(range.front.name == "foo");
4122         range = range.skipToParentEndTag();
4123         assert(range.front.type == EntityType.elementEnd);
4124         assert(range.front.name == "root");
4126         range = range.skipToParentEndTag();
4127         assert(range.empty);
4128     }
4129     {
4130         auto range = parseXML(xml);
4131         assert(range.front.type == EntityType.elementStart);
4132         assert(range.front.name == "root");
4134         range.popFront();
4135         assert(range.front.type == EntityType.elementStart);
4136         assert(range.front.name == "foo");
4138         range.popFront();
4139         assert(range.front.type == EntityType.comment);
4140         assert(range.front.text == " comment ");
4142         range.popFront();
4143         assert(range.front.type == EntityType.elementStart);
4144         assert(range.front.name == "bar");
4146         range.popFront();
4147         assert(range.front.type == EntityType.text);
4148         assert(range.front.text == "exam");
4150         range = range.skipToParentEndTag();
4151         assert(range.front.type == EntityType.elementEnd);
4152         assert(range.front.name == "bar");
4154         range = range.skipToParentEndTag();
4155         assert(range.front.type == EntityType.elementEnd);
4156         assert(range.front.name == "foo");
4158         range.popFront();
4159         assert(range.front.type == EntityType.comment);
4160         assert(range.front.text == " another comment ");
4162         range = range.skipToParentEndTag();
4163         assert(range.front.type == EntityType.elementEnd);
4164         assert(range.front.name == "root");
4166         assert(range.skipToParentEndTag().empty);
4167     }
4168     {
4169         auto range = parseXML("<root><foo>bar</foo></root>");
4170         assert(range.front.type == EntityType.elementStart);
4171         assert(range.front.name == "root");
4172         assert(range.skipToParentEndTag().empty);
4173     }
4174 }
4176 version(dxmlTests) unittest
4177 {
4178     import core.exception : AssertError;
4179     import std.algorithm.comparison : equal;
4180     import std.exception : enforce;
4181     import dxml.internal : testRangeFuncs;
4183     static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__)
4184     {
4185         range.popFront();
4186         enforce!AssertError(!range.empty, "unittest 1", __FILE__, line);
4187         enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line);
4188     }
4190     static foreach(func; testRangeFuncs)
4191     {{
4192         // cdata
4193         {
4194             auto xml = "<root>\n" ~
4195                        "    <![CDATA[ cdata run ]]>\n" ~
4196                        "    <nothing/>\n" ~
4197                        "    <![CDATA[ cdata have its bits flipped ]]>\n" ~
4198                        "    <foo></foo>\n" ~
4199                        "    <![CDATA[ cdata play violin ]]>\n" ~
4200                        "</root>";
4202             auto range = parseXML(func(xml));
4203             assert(range.front.type == EntityType.elementStart);
4204             popAndCheck(range, EntityType.cdata);
4205             assert(equal(range.front.text, " cdata run "));
4206             {
4207                 auto temp = range.save.skipToParentEndTag();
4208                 assert(temp._type == EntityType.elementEnd);
4209                 assert(equal(temp.front.name, "root"));
4210             }
4211             popAndCheck(range, EntityType.elementEmpty);
4212             popAndCheck(range, EntityType.cdata);
4213             assert(equal(range.front.text, " cdata have its bits flipped "));
4214             {
4215                 auto temp = range.save.skipToParentEndTag();
4216                 assert(temp._type == EntityType.elementEnd);
4217                 assert(equal(temp.front.name, "root"));
4218             }
4219             popAndCheck(range, EntityType.elementStart);
4220             range = range.skipContents();
4221             popAndCheck(range, EntityType.cdata);
4222             assert(equal(range.front.text, " cdata play violin "));
4223             range = range.skipToParentEndTag();
4224             assert(range._type == EntityType.elementEnd);
4225             assert(equal(range.front.name, "root"));
4226         }
4227         // comment
4228         {
4229             auto xml = "<!-- before -->\n" ~
4230                        "<root>\n" ~
4231                        "    <!-- comment 1 -->\n" ~
4232                        "    <nothing/>\n" ~
4233                        "    <!-- comment 2 -->\n" ~
4234                        "    <foo></foo>\n" ~
4235                        "    <!-- comment 3 -->\n" ~
4236                        "</root>\n" ~
4237                        "<!-- after -->" ~
4238                        "<!-- end -->";
4240             auto text = func(xml);
4241             assert(parseXML(text.save).skipToParentEndTag().empty);
4242             {
4243                 auto range = parseXML(text.save);
4244                 assert(range.front.type == EntityType.comment);
4245                 popAndCheck(range, EntityType.elementStart);
4246                 popAndCheck(range, EntityType.comment);
4247                 assert(equal(range.front.text, " comment 1 "));
4248                 {
4249                     auto temp = range.save.skipToParentEndTag();
4250                     assert(temp._type == EntityType.elementEnd);
4251                     assert(equal(temp.front.name, "root"));
4252                 }
4253                 popAndCheck(range, EntityType.elementEmpty);
4254                 popAndCheck(range, EntityType.comment);
4255                 assert(equal(range.front.text, " comment 2 "));
4256                 {
4257                     auto temp = range.save.skipToParentEndTag();
4258                     assert(temp._type == EntityType.elementEnd);
4259                     assert(equal(temp.front.name, "root"));
4260                 }
4261                 popAndCheck(range, EntityType.elementStart);
4262                 range = range.skipContents();
4263                 popAndCheck(range, EntityType.comment);
4264                 assert(equal(range.front.text, " comment 3 "));
4265                 range = range.skipToParentEndTag();
4266                 assert(range._type == EntityType.elementEnd);
4267                 assert(equal(range.front.name, "root"));
4268             }
4269             {
4270                 auto range = parseXML(text.save);
4271                 assert(range.front.type == EntityType.comment);
4272                 popAndCheck(range, EntityType.elementStart);
4273                 range = range.skipContents();
4274                 popAndCheck(range, EntityType.comment);
4275                 assert(equal(range.front.text, " after "));
4276                 assert(range.save.skipToParentEndTag().empty);
4277                 popAndCheck(range, EntityType.comment);
4278                 assert(equal(range.front.text, " end "));
4279                 assert(range.skipToParentEndTag().empty);
4280             }
4281         }
4282         // elementStart
4283         {
4284             auto xml = "<root>\n" ~
4285                        "    <a><b>foo</b></a>\n" ~
4286                        "    <nothing/>\n" ~
4287                        "    <c></c>\n" ~
4288                        "    <d>\n" ~
4289                        "        <e>\n" ~
4290                        "        </e>\n" ~
4291                        "        <f>\n" ~
4292                        "            <g>\n" ~
4293                        "            </g>\n" ~
4294                        "        </f>\n" ~
4295                        "    </d>\n" ~
4296                        "</root>";
4298             auto range = parseXML(func(xml));
4299             assert(range.front.type == EntityType.elementStart);
4300             assert(equal(range.front.name, "root"));
4301             assert(range.save.skipToParentEndTag().empty);
4302             popAndCheck(range, EntityType.elementStart);
4303             assert(equal(range.front.name, "a"));
4304             {
4305                 auto temp = range.save.skipToParentEndTag();
4306                 assert(temp._type == EntityType.elementEnd);
4307                 assert(equal(temp.front.name, "root"));
4308             }
4309             popAndCheck(range, EntityType.elementStart);
4310             assert(equal(range.front.name, "b"));
4311             {
4312                 auto temp = range.save.skipToParentEndTag();
4313                 assert(temp._type == EntityType.elementEnd);
4314                 assert(equal(temp.front.name, "a"));
4315             }
4316             popAndCheck(range, EntityType.text);
4317             popAndCheck(range, EntityType.elementEnd);
4318             popAndCheck(range, EntityType.elementEnd);
4319             popAndCheck(range, EntityType.elementEmpty);
4320             popAndCheck(range, EntityType.elementStart);
4321             assert(equal(range.front.name, "c"));
4322             {
4323                 auto temp = range.save.skipToParentEndTag();
4324                 assert(temp._type == EntityType.elementEnd);
4325                 assert(equal(temp.front.name, "root"));
4326             }
4327             popAndCheck(range, EntityType.elementEnd);
4328             popAndCheck(range, EntityType.elementStart);
4329             assert(equal(range.front.name, "d"));
4330             popAndCheck(range, EntityType.elementStart);
4331             assert(equal(range.front.name, "e"));
4332             range = range.skipToParentEndTag();
4333             assert(range._type == EntityType.elementEnd);
4334             assert(equal(range.front.name, "d"));
4335             range = range.skipToParentEndTag();
4336             assert(range._type == EntityType.elementEnd);
4337             assert(equal(range.front.name, "root"));
4338         }
4339         // elementEnd
4340         {
4341             auto xml = "<root>\n" ~
4342                        "    <a><b>foo</b></a>\n" ~
4343                        "    <nothing/>\n" ~
4344                        "    <c></c>\n" ~
4345                        "</root>";
4347             auto range = parseXML(func(xml));
4348             assert(range.front.type == EntityType.elementStart);
4349             popAndCheck(range, EntityType.elementStart);
4350             popAndCheck(range, EntityType.elementStart);
4351             popAndCheck(range, EntityType.text);
4352             popAndCheck(range, EntityType.elementEnd);
4353             assert(equal(range.front.name, "b"));
4354             {
4355                 auto temp = range.save.skipToParentEndTag();
4356                 assert(temp._type == EntityType.elementEnd);
4357                 assert(equal(temp.front.name, "a"));
4358             }
4359             popAndCheck(range, EntityType.elementEnd);
4360             assert(equal(range.front.name, "a"));
4361             {
4362                 auto temp = range.save.skipToParentEndTag();
4363                 assert(temp._type == EntityType.elementEnd);
4364                 assert(equal(temp.front.name, "root"));
4365             }
4366             popAndCheck(range, EntityType.elementEmpty);
4367             popAndCheck(range, EntityType.elementStart);
4368             popAndCheck(range, EntityType.elementEnd);
4369             assert(equal(range.front.name, "c"));
4370             {
4371                 auto temp = range.save.skipToParentEndTag();
4372                 assert(temp._type == EntityType.elementEnd);
4373                 assert(equal(temp.front.name, "root"));
4374             }
4375             popAndCheck(range, EntityType.elementEnd);
4376             assert(range.skipToParentEndTag().empty);
4377         }
4378         // elementEmpty
4379         {
4380             auto range = parseXML(func("<root/>"));
4381             assert(range.front.type == EntityType.elementEmpty);
4382             assert(range.skipToParentEndTag().empty);
4383         }
4384         {
4385             auto xml = "<root>\n" ~
4386                        "    <a><b>foo</b></a>\n" ~
4387                        "    <nothing/>\n" ~
4388                        "    <c></c>\n" ~
4389                        "    <whatever/>\n" ~
4390                        "</root>";
4392             auto range = parseXML(func(xml));
4393             popAndCheck(range, EntityType.elementStart);
4394             assert(range.front.type == EntityType.elementStart);
4395             range = range.skipContents();
4396             popAndCheck(range, EntityType.elementEmpty);
4397             assert(equal(range.front.name, "nothing"));
4398             {
4399                 auto temp = range.save;
4400                 popAndCheck(temp, EntityType.elementStart);
4401                 popAndCheck(temp, EntityType.elementEnd);
4402                 popAndCheck(temp, EntityType.elementEmpty);
4403                 assert(equal(temp.front.name, "whatever"));
4404             }
4405             range = range.skipToParentEndTag();
4406             assert(range._type == EntityType.elementEnd);
4407             assert(equal(range.front.name, "root"));
4408         }
4409         // pi
4410         {
4411             auto xml = "<?Sherlock?>\n" ~
4412                        "<root>\n" ~
4413                        "    <?Foo?>\n" ~
4414                        "    <nothing/>\n" ~
4415                        "    <?Bar?>\n" ~
4416                        "    <foo></foo>\n" ~
4417                        "    <?Baz?>\n" ~
4418                        "</root>\n" ~
4419                        "<?Poirot?>\n" ~
4420                        "<?Conan?>";
4422             auto range = parseXML(func(xml));
4423             assert(range.front.type == EntityType.pi);
4424             assert(equal(range.front.name, "Sherlock"));
4425             assert(range.save.skipToParentEndTag().empty);
4426             popAndCheck(range, EntityType.elementStart);
4427             popAndCheck(range, EntityType.pi);
4428             assert(equal(range.front.name, "Foo"));
4429             {
4430                 auto temp = range.save.skipToParentEndTag();
4431                 assert(temp._type == EntityType.elementEnd);
4432                 assert(equal(temp.front.name, "root"));
4433             }
4434             popAndCheck(range, EntityType.elementEmpty);
4435             popAndCheck(range, EntityType.pi);
4436             assert(equal(range.front.name, "Bar"));
4437             {
4438                 auto temp = range.save.skipToParentEndTag();
4439                 assert(temp._type == EntityType.elementEnd);
4440                 assert(equal(temp.front.name, "root"));
4441             }
4442             popAndCheck(range, EntityType.elementStart);
4443             popAndCheck(range, EntityType.elementEnd);
4444             popAndCheck(range, EntityType.pi);
4445             assert(equal(range.front.name, "Baz"));
4446             range = range.skipToParentEndTag();
4447             assert(range._type == EntityType.elementEnd);
4448             assert(equal(range.front.name, "root"));
4449             popAndCheck(range, EntityType.pi);
4450             assert(equal(range.front.name, "Poirot"));
4451             assert(range.save.skipToParentEndTag().empty);
4452             popAndCheck(range, EntityType.pi);
4453             assert(equal(range.front.name, "Conan"));
4454             assert(range.skipToParentEndTag().empty);
4455         }
4456         // text
4457         {
4458             auto xml = "<root>\n" ~
4459                        "    nothing to say\n" ~
4460                        "    <nothing/>\n" ~
4461                        "    nothing whatsoever\n" ~
4462                        "    <foo></foo>\n" ~
4463                        "    but he keeps talking\n" ~
4464                        "</root>";
4466             auto range = parseXML(func(xml));
4467             assert(range.front.type == EntityType.elementStart);
4468             popAndCheck(range, EntityType.text);
4469             assert(equal(range.front.text, "\n    nothing to say\n    "));
4470             {
4471                 auto temp = range.save.skipToParentEndTag();
4472                 assert(temp._type == EntityType.elementEnd);
4473                 assert(equal(temp.front.name, "root"));
4474             }
4475             popAndCheck(range, EntityType.elementEmpty);
4476             popAndCheck(range, EntityType.text);
4477             assert(equal(range.front.text, "\n    nothing whatsoever\n    "));
4478             {
4479                 auto temp = range.save.skipToParentEndTag();
4480                 assert(temp._type == EntityType.elementEnd);
4481                 assert(equal(temp.front.name, "root"));
4482             }
4483             popAndCheck(range, EntityType.elementStart);
4484             range = range.skipContents();
4485             popAndCheck(range, EntityType.text);
4486             assert(equal(range.front.text, "\n    but he keeps talking\n"));
4487             range = range.skipToParentEndTag();
4488             assert(range._type == EntityType.elementEnd);
4489             assert(equal(range.front.name, "root"));
4490         }
4491     }}
4492 }
4495 /++
4496     Treats the given string like a file path except that each directory
4497     corresponds to the name of a start tag. Note that this does $(I not) try to
4498     implement XPath as that would be quite complicated, and it really doesn't
4499     fit with a StAX parser.
4501     A start tag should be thought of as a directory, with its child start tags
4502     as the directories it contains.
4504     All paths should be relative. $(LREF EntityRange) can only move forward
4505     through the document, so using an absolute path would only make sense at
4506     the beginning of the document. As such, absolute paths are treated as
4507     invalid paths.
4509     $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated
4510     slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are
4511     treated as an invalid path.
4513     If $(D range.front.type == EntityType.elementStart), then
4514     $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child
4515     start tag (be it $(LREF EntityType.elementStart) or
4516     $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity)
4517     $(D_CODE_STRING "foo"). That start tag must be a direct child of the current
4518     start tag.
4520     If $(D range.front.type) is any other $(LREF EntityType), then
4521     $(D range._skipToPath($(D_STRING "foo"))) will return an empty range,
4522     because no other $(LREF EntityType)s have child start tags.
4524     For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo")))
4525     will search for the first start tag with the
4526     $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level
4527     as the current entity. If the current entity is a start tag with the name
4528     $(D_CODE_STRING "foo"), it will not be considered a match.
4530     $(D range._skipToPath($(D_STRING "./"))) is a no-op. However,
4531     $(D range._skipToPath($(D_STRING "../"))) will result in the empty range
4532     (since it doesn't target a specific start tag).
4534     $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to
4535     $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))),
4536     and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to
4537     $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))).
4539     Returns: The given range with its $(D front) now at the requested entity if
4540              the path is valid; otherwise, an empty range is returned.
4542     Throws: $(LREF XMLParsingException) on invalid XML.
4543   +/
4544 R skipToPath(R)(R entityRange, string path)
4545     if(isInstanceOf!(EntityRange, R))
4546 {
4547     import std.algorithm.comparison : equal;
4548     import std.path : pathSplitter;
4550     if(entityRange.empty)
4551         return entityRange;
4552     if(path.empty || path[0] == '/')
4553         return entityRange.takeNone();
4555     with(EntityType)
4556     {
4557         static if(R.config.splitEmpty == SplitEmpty.yes)
4558             EntityType[2] startOrEnd = [elementStart, elementEnd];
4559         else
4560             EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty];
4562         R findOnCurrLevel(string name)
4563         {
4564             if(entityRange._type == elementStart)
4565                 entityRange = entityRange.skipContents();
4566             while(true)
4567             {
4568                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4569                 if(entityRange.empty)
4570                     return entityRange;
4571                 if(entityRange._type == elementEnd)
4572                     return entityRange.takeNone();
4574                 if(equal(name, entityRange._name.save))
4575                     return entityRange;
4577                 static if(R.config.splitEmpty == SplitEmpty.no)
4578                 {
4579                     if(entityRange._type == elementEmpty)
4580                         continue;
4581                 }
4582                 entityRange = entityRange.skipContents();
4583             }
4584         }
4586         for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront())
4587         {
4588             if(pieces.front == ".")
4589                 continue;
4590             else if(pieces.front == "..")
4591             {
4592                 pieces.popFront();
4593                 if(pieces.empty)
4594                     return entityRange.takeNone();
4596                 while(pieces.front == "..")
4597                 {
4598                     pieces.popFront();
4599                     if(pieces.empty)
4600                         return entityRange.takeNone();
4601                     entityRange = entityRange.skipToParentEndTag();
4602                     if(entityRange.empty)
4603                         return entityRange;
4604                 }
4606                 entityRange = findOnCurrLevel(pieces.front);
4607                 if(entityRange.empty)
4608                     return entityRange;
4609             }
4610             else
4611             {
4612                 if(entityRange._type != elementStart)
4613                     return entityRange.takeNone();
4615                 entityRange = entityRange.skipToEntityType(startOrEnd[]);
4616                 assert(!entityRange.empty);
4617                 if(entityRange._type == elementEnd)
4618                     return entityRange.takeNone();
4620                 if(!equal(pieces.front, entityRange._name.save))
4621                 {
4622                     entityRange = findOnCurrLevel(pieces.front);
4623                     if(entityRange.empty)
4624                         return entityRange;
4625                 }
4626             }
4627         }
4629         return entityRange;
4630     }
4631 }
4633 ///
4634 version(dxmlTests) unittest
4635 {
4636     {
4637         auto xml = "<carrot>\n" ~
4638                    "    <foo>\n" ~
4639                    "        <bar>\n" ~
4640                    "            <baz/>\n" ~
4641                    "            <other/>\n" ~
4642                    "        </bar>\n" ~
4643                    "    </foo>\n" ~
4644                    "</carrot>";
4646         auto range = parseXML(xml);
4647         // "<carrot>"
4648         assert(range.front.type == EntityType.elementStart);
4649         assert(range.front.name == "carrot");
4651         range = range.skipToPath("foo/bar");
4652         // "        <bar>
4653         assert(!range.empty);
4654         assert(range.front.type == EntityType.elementStart);
4655         assert(range.front.name == "bar");
4657         range = range.skipToPath("baz");
4658         // "            <baz/>
4659         assert(!range.empty);
4660         assert(range.front.type == EntityType.elementEmpty);
4662         // other is not a child element of baz
4663         assert(range.skipToPath("other").empty);
4665         range = range.skipToPath("../other");
4666         // "            <other/>"
4667         assert(!range.empty);
4668         assert(range.front.type == EntityType.elementEmpty);
4669     }
4670     {
4671         auto xml = "<potato>\n" ~
4672                    "    <foo>\n" ~
4673                    "        <bar>\n "~
4674                    "        </bar>\n" ~
4675                    "        <crazy>\n" ~
4676                    "        </crazy>\n" ~
4677                    "        <fou/>\n" ~
4678                    "    </foo>\n" ~
4679                    "    <buzz/>\n" ~
4680                    "</potato>";
4682         auto range = parseXML(xml);
4683         // "<potato>"
4684         assert(range.front.type == EntityType.elementStart);
4686         range = range.skipToPath("./");
4687         // "<potato>"
4688         assert(!range.empty);
4689         assert(range.front.type == EntityType.elementStart);
4690         assert(range.front.name == "potato");
4692         range = range.skipToPath("./foo/bar");
4693         // "        <bar>"
4694         assert(!range.empty);
4695         assert(range.front.type == EntityType.elementStart);
4696         assert(range.front.name == "bar");
4698         range = range.skipToPath("../crazy");
4699         // "        <crazy>"
4700         assert(!range.empty);
4701         assert(range.front.type == EntityType.elementStart);
4702         assert(range.front.name == "crazy");
4704         // Whether popFront is called here before the call to
4705         // range.skipToPath("../fou") below, the result is the same, because
4706         // both <crazy> and </crazy> are at the same level.
4707         range.popFront();
4708         // "        </crazy>"
4709         assert(!range.empty);
4710         assert(range.front.type == EntityType.elementEnd);
4711         assert(range.front.name == "crazy");
4713         range = range.skipToPath("../fou");
4714         // "        <fou/>"
4715         assert(!range.empty);
4716         assert(range.front.type == EntityType.elementEmpty);
4717     }
4718     // Searching stops at the first matching start tag.
4719     {
4720         auto xml = "<beet>\n" ~
4721                    "    <foo a='42'>\n" ~
4722                    "    </foo>\n" ~
4723                    "    <foo b='451'>\n" ~
4724                    "    </foo>\n" ~
4725                    "</beet>";
4727         auto range = parseXML(xml);
4728         range = range.skipToPath("foo");
4729         assert(!range.empty);
4730         assert(range.front.type == EntityType.elementStart);
4731         assert(range.front.name == "foo");
4733         {
4734             auto attrs = range.front.attributes;
4735             assert(attrs.front.name == "a");
4736             assert(attrs.front.value == "42");
4737         }
4739         range = range.skipToPath("../foo");
4740         assert(!range.empty);
4741         assert(range.front.type == EntityType.elementStart);
4742         assert(range.front.name == "foo");
4744         {
4745             auto attrs = range.front.attributes;
4746             assert(attrs.front.name == "b");
4747             assert(attrs.front.value == "451");
4748         }
4749     }
4750     // skipToPath will work on an empty range but will always return an
4751     // empty range.
4752     {
4753         auto range = parseXML("<root/>");
4754         assert(range.takeNone().skipToPath("nowhere").empty);
4755     }
4756     // Empty and absolute paths will also result in an empty range as will
4757     // "../" without any actual tag name on the end.
4758     {
4759         auto range = parseXML("<root/>");
4760         assert(range.skipToPath("").empty);
4761         assert(range.skipToPath("/").empty);
4762         assert(range.skipToPath("../").empty);
4763     }
4764     // Only non-empty start tags have children; all other EntityTypes result
4765     // in an empty range unless "../" is used.
4766     {
4767         auto xml = "<!-- comment -->\n" ~
4768                    "<root>\n" ~
4769                    "    <foo/>\n" ~
4770                    "</root>";
4771         auto range = parseXML(xml);
4772         assert(range.skipToPath("root").empty);
4773         assert(range.skipToPath("foo").empty);
4775         range = range.skipToPath("../root");
4776         assert(!range.empty);
4777         assert(range.front.type == EntityType.elementStart);
4778         assert(range.front.name == "root");
4779     }
4780 }
4782 version(dxmlTests) unittest
4783 {
4784     import core.exception : AssertError;
4785     import std.algorithm.comparison : equal;
4786     import std.exception : assertNotThrown, enforce;
4787     import dxml.internal : testRangeFuncs;
4789     static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__)
4790     {
4791         auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line);
4792         enforce!AssertError(!result.empty, "unittest 2", __FILE__, line);
4793         enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line);
4794         enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line);
4795     }
4797     static void popEmpty(R)(ref R range)
4798     {
4799         range.popFront();
4800         static if(range.config.splitEmpty == SplitEmpty.yes)
4801             range.popFront();
4802     }
4804     auto xml = "<superuser>\n" ~
4805                "    <!-- comment -->\n" ~
4806                "    <?pi?>\n" ~
4807                "    <![CDATA[cdata]]>\n" ~
4808                "    <foo/>\n" ~
4809                "    <bar/>\n" ~
4810                "    <!-- comment -->\n" ~
4811                "    <!-- comment -->\n" ~
4812                "    <baz/>\n" ~
4813                "    <frobozz>\n" ~
4814                "        <!-- comment -->\n" ~
4815                "        <!-- comment -->\n" ~
4816                "        <whatever/>\n" ~
4817                "        <!-- comment -->\n" ~
4818                "        <!-- comment -->\n" ~
4819                "    </frobozz>\n" ~
4820                "    <!-- comment -->\n" ~
4821                "    <!-- comment -->\n" ~
4822                "    <xyzzy/>\n" ~
4823                "</superuser>";
4825     static foreach(func; testRangeFuncs)
4826     {{
4827         auto text = func(xml);
4829         static foreach(config; someTestConfigs)
4830         {{
4831             static if(config.splitEmpty == SplitEmpty.yes)
4832                 enum empty = EntityType.elementStart;
4833             else
4834                 enum empty = EntityType.elementEmpty;
4836             auto range = parseXML!config(text.save);
4838             assert(range.save.skipToPath("whatever").empty);
4839             assert(range.save.skipToPath("frobozz/whateve").empty);
4841             testPath(range.save, "foo", empty, "foo");
4842             testPath(range.save, "bar", empty, "bar");
4843             testPath(range.save, "baz", empty, "baz");
4844             testPath(range.save, "frobozz", EntityType.elementStart, "frobozz");
4845             testPath(range.save, "frobozz/whatever", empty, "whatever");
4846             testPath(range.save, "xyzzy", empty, "xyzzy");
4848             range.popFront();
4849             for(; range.front.type != empty; range.popFront())
4850             {
4851                 assert(range.save.skipToPath("foo").empty);
4852                 testPath(range.save, "../foo", empty, "foo");
4853                 testPath(range.save, "../bar", empty, "bar");
4854                 testPath(range.save, "../baz", empty, "baz");
4855                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4856                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4857                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4858             }
4859             assert(equal(range.front.name, "foo"));
4860             assert(range.save.skipToPath("foo").empty);
4861             assert(range.save.skipToPath("./foo").empty);
4862             assert(range.save.skipToPath("../foo").empty);
4863             assert(range.save.skipToPath("bar").empty);
4864             assert(range.save.skipToPath("baz").empty);
4865             assert(range.save.skipToPath("frobozz").empty);
4866             assert(range.save.skipToPath("whatever").empty);
4867             assert(range.save.skipToPath("../").empty);
4868             assert(range.save.skipToPath("../../").empty);
4870             testPath(range.save, "../bar", empty, "bar");
4871             testPath(range.save, "../baz", empty, "baz");
4872             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4873             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4874             testPath(range.save, "../xyzzy", empty, "xyzzy");
4876             popEmpty(range);
4877             assert(range.save.skipToPath("bar").empty);
4878             testPath(range.save, "../baz", empty, "baz");
4879             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4880             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4881             testPath(range.save, "../xyzzy", empty, "xyzzy");
4883             range.popFront();
4884             for(; range.front.type != empty; range.popFront())
4885             {
4886                 assert(range.save.skipToPath("baz").empty);
4887                 testPath(range.save, "../baz", empty, "baz");
4888                 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4889                 testPath(range.save, "../frobozz/whatever", empty, "whatever");
4890                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4891             }
4892             assert(equal(range.front.name, "baz"));
4894             testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz");
4895             testPath(range.save, "../frobozz/whatever", empty, "whatever");
4896             testPath(range.save, "../xyzzy", empty, "xyzzy");
4898             popEmpty(range);
4899             assert(equal(range.front.name, "frobozz"));
4900             assert(range.save.skipToPath("wizard").empty);
4901             testPath(range.save, "whatever", empty, "whatever");
4902             testPath(range.save, "../xyzzy", empty, "xyzzy");
4904             range.popFront();
4905             for(; range.front.type != empty; range.popFront())
4906             {
4907                 assert(range.save.skipToPath("whatever").empty);
4908                 testPath(range.save, "../whatever", empty, "whatever");
4909                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4910             }
4911             assert(equal(range.front.name, "whatever"));
4912             assert(range.save.skipToPath("frobozz").empty);
4913             assert(range.save.skipToPath("../frobozz").empty);
4914             assert(range.save.skipToPath("../xyzzy").empty);
4915             assert(range.save.skipToPath("../../frobozz").empty);
4917             testPath(range.save, "../../xyzzy", empty, "xyzzy");
4919             popEmpty(range);
4920             for(; range.front.type != EntityType.elementEnd; range.popFront())
4921             {
4922                 assert(range.save.skipToPath("xyzzy").empty);
4923                 assert(range.save.skipToPath("../xyzzy").empty);
4924                 testPath(range.save, "../../xyzzy", empty, "xyzzy");
4925             }
4926             assert(equal(range.front.name, "frobozz"));
4928             range.popFront();
4929             for(; range.front.type != empty; range.popFront())
4930             {
4931                 assert(range.save.skipToPath("xyzzy").empty);
4932                 testPath(range.save, "../xyzzy", empty, "xyzzy");
4933             }
4934             assert(equal(range.front.name, "xyzzy"));
4936             popEmpty(range);
4937             assert(equal(range.front.name, "superuser"));
4938             assert(range.save.skipToPath("superuser").empty);
4939             assert(range.save.skipToPath("foo").empty);
4940             assert(range.save.skipToPath("../foo").empty);
4941             assert(range.save.skipToPath("../../foo").empty);
4942         }}
4943     }}
4944 }
4947 //------------------------------------------------------------------------------
4948 // Private Section
4949 //------------------------------------------------------------------------------
4950 private:
4953 version(dxmlTests) auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc
4954 {
4955     import std.utf : byCodeUnit;
4956     typeof(EntityRange!(config, R)._text) text;
4957     text.input = byCodeUnit(xmlText);
4958     return text;
4959 }
4962 // toCmpType is to make it easy for tests to convert the expected result to a
4963 // range with the correct element type, since comparing with equal won't do
4964 // the right thing if the result doesn't have dchar as its element type.
4965 auto toCmpType(alias func)(string str)
4966 {
4967     import std.range : takeExactly;
4968     import std.utf : byUTF;
4970     return str.byUTF!(immutable ElementType!(typeof(testParser(func(str)).input.takeExactly(1))))();
4971 }
4973 auto toCmpType(alias func, ThrowOnEntityRef toer)(string str)
4974 {
4975     import std.range : takeExactly;
4976     import std.utf : byUTF;
4978     return str.byUTF!(immutable ElementType!(typeof(testParser!(makeConfig(toer))(func(str)).input.takeExactly(1))))();
4979 }
4982 // Used to indicate where in the grammar we're currently parsing.
4983 enum GrammarPos
4984 {
4985     // Nothing has been parsed yet.
4986     documentStart,
4988     // document ::= prolog element Misc*
4989     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)?
4990     // This is that first Misc*. The next entity to parse is either a Misc, the
4991     // doctypedecl, or the root element which follows the prolog.
4992     prologMisc1,
4994     // document ::= prolog element Misc*
4995     // prolog   ::= XMLDecl? Misc* (doctypedecl Misc*)
4996     // This is that second Misc*. The next entity to parse is either a Misc or
4997     // the root element which follows the prolog.
4998     prologMisc2,
5000     // Used with SplitEmpty.yes to tell the parser that we're currently at an
5001     // empty element tag that we're treating as a start tag, so the next entity
5002     // will be an end tag even though we didn't actually parse one.
5003     splittingEmpty,
5005     // element  ::= EmptyElemTag | STag content ETag
5006     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5007     // This is at the beginning of content at the first CharData?. The next
5008     // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag.
5009     // References are treated as part of the CharData and not parsed out by the
5010     // EntityRange (see EntityRange.Entity.text).
5011     contentCharData1,
5013     // element  ::= EmptyElemTag | STag content ETag
5014     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5015     // This is after the first CharData?. The next thing to parse will be a
5016     // element, CDSect, PI, Comment, or ETag.
5017     // References are treated as part of the CharData and not parsed out by the
5018     // EntityRange (see EntityRange.Entity.text).
5019     contentMid,
5021     // element  ::= EmptyElemTag | STag content ETag
5022     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5023     // This is at the second CharData?. The next thing to parse will be a
5024     // CharData, element, CDSect, PI, Comment, or ETag.
5025     // References are treated as part of the CharData and not parsed out by the
5026     // EntityRange (see EntityRange.Entity.text).
5027     contentCharData2,
5029     // element  ::= EmptyElemTag | STag content ETag
5030     // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
5031     // This is after the second CharData?. The next thing to parse is an ETag.
5032     endTag,
5034     // document ::= prolog element Misc*
5035     // This is the Misc* at the end of the document. The next thing to parse is
5036     // either another Misc, or we will hit the end of the document.
5037     endMisc,
5039     // The end of the document (and the grammar) has been reached.
5040     documentEnd
5041 }
5044 // Wrapper around skipOver which takes an EntityParser.Text and handles
5045 // incrementing pos.
5046 //
5047 // It is assumed that there are no newlines.
5048 bool stripStartsWith(Text)(ref Text text, string needle)
5049 {
5050     import std.algorithm.searching : skipOver;
5051     import std.utf : byCodeUnit;
5053     //TODO In the case where we're parsing an array of char, if we can cleanly
5054     // strip off any byCodeUnit and takeExactly wrappers, then we should be able
5055     // to have skipOver compare the string being parsed and the needle with ==.
5056     // It may happen in some cases right now when text.input is a byCodeUnit
5057     // result, but it won't happen in all cases where it ideally would. We may
5058     // also want to look into using byUTF on the needle so that it matches the
5059     // encoding of text.input or even make needle match the encoding when it's
5060     // passed in instead of always being string.
5061     if(!text.input.skipOver(needle.byCodeUnit()))
5062         return false;
5064     text.pos.col += needle.length;
5066     return true;
5067 }
5069 version(dxmlTests) unittest
5070 {
5071     import core.exception : AssertError;
5072     import std.exception : enforce;
5073     import dxml.internal : equalCU, testRangeFuncs;
5075     static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith,
5076                                  int row, int col, size_t line = __LINE__)
5077     {
5078         auto haystack = func(origHaystack);
5079         {
5080             auto text = testParser(haystack.save);
5081             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line);
5082             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5083             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5084         }
5085         {
5086             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5087             auto text = testParser(haystack);
5088             text.pos.line += 3;
5089             text.pos.col += 7;
5090             enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line);
5091             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5092             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5093         }
5094     }
5096     static foreach(func; testRangeFuncs)
5097     {
5098         test!func("hello world", "hello", " world", true, 1, "hello".length + 1);
5099         test!func("hello world", "hello world", "", true, 1, "hello world".length + 1);
5100         test!func("hello world", "foo", "hello world", false, 1, 1);
5101         test!func("hello world", "hello sally", "hello world", false, 1, 1);
5102         test!func("hello world", "hello world ", "hello world", false, 1, 1);
5103     }
5104 }
5106 version(dxmlTests) @safe pure unittest
5107 {
5108     import std.algorithm.comparison : equal;
5109     import dxml.internal : testRangeFuncs;
5111     static foreach(func; testRangeFuncs)
5112     {{
5113         auto xml = func(`foo`);
5114         auto text = testParser!simpleXML(xml);
5115         assert(text.stripStartsWith("fo"));
5116     }}
5117 }
5120 // Strips whitespace while dealing with text.pos accordingly. Newlines are not
5121 // ignored.
5122 // Returns whether any whitespace was stripped.
5123 bool stripWS(Text)(ref Text text)
5124 {
5125     bool strippedSpace = false;
5127     static if(hasLength!(Text.Input))
5128         size_t lineStart = text.input.length;
5130     loop: while(!text.input.empty)
5131     {
5132         switch(text.input.front)
5133         {
5134             case ' ':
5135             case '\t':
5136             case '\r':
5137             {
5138                 strippedSpace = true;
5139                 text.input.popFront();
5140                 static if(!hasLength!(Text.Input))
5141                     ++text.pos.col;
5142                 break;
5143             }
5144             case '\n':
5145             {
5146                 strippedSpace = true;
5147                 text.input.popFront();
5148                 static if(hasLength!(Text.Input))
5149                     lineStart = text.input.length;
5150                 nextLine!(Text.config)(text.pos);
5151                 break;
5152             }
5153             default: break loop;
5154         }
5155     }
5157     static if(hasLength!(Text.Input))
5158         text.pos.col += lineStart - text.input.length;
5160     return strippedSpace;
5161 }
5163 version(dxmlTests) unittest
5164 {
5165     import core.exception : AssertError;
5166     import std.exception : enforce;
5167     import dxml.internal : equalCU;
5168     import dxml.internal : testRangeFuncs;
5170     static void test(alias func)(string origHaystack, string remainder, bool stripped,
5171                                  int row, int col, size_t line = __LINE__)
5172     {
5173         auto haystack = func(origHaystack);
5174         {
5175             auto text = testParser(haystack.save);
5176             enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line);
5177             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line);
5178             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5179         }
5180         {
5181             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5182             auto text = testParser(haystack);
5183             text.pos.line += 3;
5184             text.pos.col += 7;
5185             enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line);
5186             enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line);
5187             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5188         }
5189     }
5191     static foreach(func; testRangeFuncs)
5192     {
5193         test!func("  \t\rhello world", "hello world", true, 1, 5);
5194         test!func("  \n \n \n  \nhello world", "hello world", true, 5, 1);
5195         test!func("  \n \n \n  \n  hello world", "hello world", true, 5, 3);
5196         test!func("hello world", "hello world", false, 1, 1);
5197     }
5198 }
5200 version(dxmlTests) @safe pure unittest
5201 {
5202     import dxml.internal : testRangeFuncs;
5204     static foreach(func; testRangeFuncs)
5205     {{
5206         auto xml = func(`foo`);
5207         auto text = testParser!simpleXML(xml);
5208         assert(!text.stripWS());
5209     }}
5210 }
5213 // Returns a slice (or takeExactly) of text.input up to but not including the
5214 // given needle, removing both that slice and the given needle from text.input
5215 // in the process. If the needle is not found, then an XMLParsingException is
5216 // thrown.
5217 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5218 {
5219     return _takeUntil!(true, needle, skipQuotedText, Text)(text);
5220 }
5222 version(dxmlTests) unittest
5223 {
5224     import core.exception : AssertError;
5225     import std.algorithm.comparison : equal;
5226     import std.exception : collectException, enforce;
5227     import dxml.internal : codeLen, testRangeFuncs;
5229     static void test(alias func, string needle, bool sqt)(string origHaystack, string expected, string remainder,
5230                                                           int row, int col, size_t line = __LINE__)
5231     {
5232         auto haystack = func(origHaystack);
5233         auto adjExpected = expected.toCmpType!func();
5234         {
5235             auto text = testParser(haystack.save);
5236             auto temp = text.save;
5237             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected.save),
5238                                 "unittest failure 1", __FILE__, line);
5239             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5240             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5241         }
5242         {
5243             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5244             auto text = testParser(haystack);
5245             text.pos.line += 3;
5246             text.pos.col += 7;
5247             enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected),
5248                                 "unittest failure 4", __FILE__, line);
5249             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5250             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5251         }
5252     }
5254     static void testFail(alias func, string needle, bool sqt)
5255                         (string origHaystack, int row, int col, size_t line = __LINE__)
5256     {
5257         auto haystack = func(origHaystack);
5258         {
5259             auto text = testParser(haystack.save);
5260             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5261             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5262             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5263         }
5264         {
5265             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5266             auto text = testParser(haystack);
5267             text.pos.line += 3;
5268             text.pos.col += 7;
5269             auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)());
5270             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5271             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5272         }
5273     }
5275     static foreach(func; testRangeFuncs)
5276     {
5277         static foreach(sqt; [false, true])
5278         {
5279             {
5280                 auto haystack = "hello world";
5281                 enum needle = "world";
5283                 static foreach(i; 1 .. needle.length)
5284                     test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i);
5285             }
5287             test!(func, "l", sqt)("lello world", "", "ello world", 1, 2);
5288             test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5);
5289             test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4);
5290             {
5291                 enum needle = "great";
5292                 enum expected = "プログラミング in D is ";
5293                 static foreach(i; 1 .. needle.length)
5294                 {
5295                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected,
5296                                                      "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1);
5297                 }
5298             }
5299             static foreach(haystack; ["", "a", "hello", "ディラン"])
5300                 testFail!(func, "x", sqt)(haystack, 1, 1);
5301             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5302                 testFail!(func, "le", sqt)(haystack, 1, 1);
5303             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5304                 testFail!(func, "web", sqt)(haystack, 1, 1);
5305         }
5307         test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9);
5308         test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10);
5309         test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10);
5310         test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12);
5311         test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12);
5312         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5313         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5315         test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16);
5316         test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13);
5317         test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13);
5318         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5319         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5320         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5321         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5323         test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14);
5324         test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7);
5325     }
5326 }
5328 version(dxmlTests) @safe pure unittest
5329 {
5330     import std.algorithm.comparison : equal;
5331     import dxml.internal : testRangeFuncs;
5333     static foreach(func; testRangeFuncs)
5334     {{
5335         auto xml = func(`foo`);
5336         auto text = testParser!simpleXML(xml);
5337         assert(equal(text.takeUntilAndDrop!"o"(), "f"));
5338     }}
5339 }
5341 // Variant of takeUntilAndDrop which does not return a slice. It's intended for
5342 // when the config indicates that something should be skipped.
5343 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text)
5344 {
5345     _takeUntil!(false, needle, skipQuotedText, Text)(text);
5346 }
5348 version(dxmlTests) unittest
5349 {
5350     import core.exception : AssertError;
5351     import std.algorithm.comparison : equal;
5352     import std.exception : assertNotThrown, collectException, enforce;
5353     import dxml.internal : codeLen, testRangeFuncs;
5355     static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder,
5356                                                           int row, int col, size_t line = __LINE__)
5357     {
5358         auto haystack = func(origHaystack);
5359         {
5360             auto text = testParser(haystack.save);
5361             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1",
5362                                                 __FILE__, line);
5363             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5364             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5365         }
5366         {
5367             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5368             auto text = testParser(haystack);
5369             text.pos.line += 3;
5370             text.pos.col += 7;
5371             assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4",
5372                                                 __FILE__, line);
5373             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5374             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5375         }
5376     }
5378     static void testFail(alias func, string needle, bool sqt)
5379                         (string origHaystack, int row, int col, size_t line = __LINE__)
5380     {
5381         auto haystack = func(origHaystack);
5382         {
5383             auto text = testParser(haystack.save);
5384             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5385             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5386             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5387         }
5388         {
5389             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5390             auto text = testParser(haystack);
5391             text.pos.line += 3;
5392             text.pos.col += 7;
5393             auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)());
5394             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5395             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5396         }
5397     }
5399     static foreach(func; testRangeFuncs)
5400     {
5401         static foreach(sqt; [false, true])
5402         {
5403             {
5404                 enum needle = "world";
5405                 static foreach(i; 1 .. needle.length)
5406                     test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i);
5407             }
5409             test!(func, "l", sqt)("lello world", "ello world", 1, 2);
5410             test!(func, "ll", sqt)("lello world", "o world", 1, 5);
5411             test!(func, "le", sqt)("llello world", "llo world", 1, 4);
5413             {
5414                 enum needle = "great";
5415                 static foreach(i; 1 .. needle.length)
5416                 {
5417                     test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $],
5418                                                      1, codeLen!(func, "プログラミング in D is ") + i + 1);
5419                 }
5420             }
5422             static foreach(haystack; ["", "a", "hello", "ディラン"])
5423                 testFail!(func, "x", sqt)(haystack, 1, 1);
5424             static foreach(haystack; ["", "l", "lte", "world", "nomatch"])
5425                 testFail!(func, "le", sqt)(haystack, 1, 1);
5426             static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"])
5427                 testFail!(func, "web", sqt)(haystack, 1, 1);
5428         }
5430         test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9);
5431         test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10);
5432         test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10);
5433         test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12);
5434         test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12);
5435         testFail!(func, "*", false)("foo\n\n   '   \n\nbar", 1, 1);
5436         testFail!(func, "*", false)(`ディラン   "   `, 1, 1);
5438         test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16);
5439         test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13);
5440         test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13);
5441         testFail!(func, "*", true)(`hello ''' * world`, 1, 9);
5442         testFail!(func, "*", true)(`hello """ * world`, 1, 9);
5443         testFail!(func, "*", true)("foo\n\n   '   \n\nbar", 3, 4);
5444         testFail!(func, "*", true)(`ディラン   "   `, 1, codeLen!(func, `ディラン   "`));
5446         test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14);
5447         test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7);
5448     }
5449 }
5451 version(dxmlTests) @safe pure unittest
5452 {
5453     import std.algorithm.comparison : equal;
5454     import dxml.internal : testRangeFuncs;
5456     static foreach(func; testRangeFuncs)
5457     {{
5458         auto xml = func(`foo`);
5459         auto text = testParser!simpleXML(xml);
5460         text.skipUntilAndDrop!"o"();
5461         assert(equal(text.input, "o"));
5462     }}
5463 }
5465 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text)
5466 {
5467     import std.algorithm : find;
5468     import std.ascii : isWhite;
5469     import std.range : takeExactly;
5471     static assert(needle.find!isWhite().empty);
5473     auto orig = text.save;
5474     bool found = false;
5475     size_t takeLen = 0;
5476     size_t lineStart = 0;
5478     void processNewline()
5479     {
5480         ++takeLen;
5481         nextLine!(Text.config)(text.pos);
5482         lineStart = takeLen;
5483     }
5485     loop: while(!text.input.empty)
5486     {
5487         switch(text.input.front)
5488         {
5489             case cast(ElementType!(Text.Input))needle[0]:
5490             {
5491                 static if(needle.length == 1)
5492                 {
5493                     found = true;
5494                     text.input.popFront();
5495                     break loop;
5496                 }
5497                 else static if(needle.length == 2)
5498                 {
5499                     text.input.popFront();
5500                     if(!text.input.empty && text.input.front == needle[1])
5501                     {
5502                         found = true;
5503                         text.input.popFront();
5504                         break loop;
5505                     }
5506                     ++takeLen;
5507                     continue;
5508                 }
5509                 else
5510                 {
5511                     text.input.popFront();
5512                     auto saved = text.input.save;
5513                     foreach(i, c; needle[1 .. $])
5514                     {
5515                         if(text.input.empty)
5516                         {
5517                             takeLen += i + 1;
5518                             break loop;
5519                         }
5520                         if(text.input.front != c)
5521                         {
5522                             text.input = saved;
5523                             ++takeLen;
5524                             continue loop;
5525                         }
5526                         text.input.popFront();
5527                     }
5528                     found = true;
5529                     break loop;
5530                 }
5531             }
5532             static if(skipQuotedText)
5533             {
5534                 static foreach(quote; ['\'', '"'])
5535                 {
5536                     case quote:
5537                     {
5538                         auto quotePos = text.pos;
5539                         quotePos.col += takeLen - lineStart;
5540                         ++takeLen;
5541                         while(true)
5542                         {
5543                             text.input.popFront();
5544                             if(text.input.empty)
5545                                 throw new XMLParsingException("Failed to find matching quote", quotePos);
5546                             switch(text.input.front)
5547                             {
5548                                 case quote:
5549                                 {
5550                                     ++takeLen;
5551                                     text.input.popFront();
5552                                     continue loop;
5553                                 }
5554                                 case '\n':
5555                                 {
5556                                     processNewline();
5557                                     break;
5558                                 }
5559                                 default:
5560                                 {
5561                                     ++takeLen;
5562                                     break;
5563                                 }
5564                             }
5565                         }
5566                         assert(0); // the compiler isn't smart enough to see that this is unreachable.
5567                     }
5568                 }
5569             }
5570             case '\n':
5571             {
5572                 processNewline();
5573                 break;
5574             }
5575             default:
5576             {
5577                 ++takeLen;
5578                 break;
5579             }
5580         }
5582         text.input.popFront();
5583     }
5585     text.pos.col += takeLen - lineStart + needle.length;
5587     if(!found)
5588         throw new XMLParsingException("Failed to find: " ~ needle, orig.pos);
5590     static if(retSlice)
5591         return takeExactly(orig.input, takeLen);
5592 }
5595 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop,
5596 // but I'd rather do this than be passing template arguments to choose between
5597 // behaviors - especially when the logic is so different. It skips until it
5598 // reaches one of the delimiter characters. If it finds one of them, then the
5599 // first character in the input is the delimiter that was found, and if it
5600 // doesn't find either, then it throws.
5601 template skipToOneOf(delims...)
5602 {
5603     static foreach(delim; delims)
5604     {
5605         static assert(is(typeof(delim) == char));
5606         static assert(!isSpace(delim));
5607     }
5609     void skipToOneOf(Text)(ref Text text)
5610     {
5611         while(!text.input.empty)
5612         {
5613             switch(text.input.front)
5614             {
5615                 static foreach(delim; delims)
5616                     case delim: return;
5617                 case '\n':
5618                 {
5619                     nextLine!(Text.config)(text.pos);
5620                     text.input.popFront();
5621                     break;
5622                 }
5623                 default:
5624                 {
5625                     popFrontAndIncCol(text);
5626                     break;
5627                 }
5628             }
5629         }
5630         throw new XMLParsingException("Prematurely reached end of document", text.pos);
5631     }
5632 }
5634 version(dxmlTests) unittest
5635 {
5636     import core.exception : AssertError;
5637     import std.algorithm.comparison : equal;
5638     import std.exception : assertNotThrown, collectException, enforce;
5639     import dxml.internal : codeLen, testRangeFuncs;
5641     static void test(alias func, delims...)(string origHaystack, string remainder,
5642                                             int row, int col, size_t line = __LINE__)
5643     {
5644         auto haystack = func(origHaystack);
5645         {
5646             auto text = testParser(haystack.save);
5647             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line);
5648             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5649             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5650         }
5651         {
5652             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5653             auto text = testParser(haystack);
5654             text.pos.line += 3;
5655             text.pos.col += 7;
5656             assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line);
5657             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5658             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5659         }
5660     }
5662     static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__)
5663     {
5664         auto haystack = func(origHaystack);
5665         {
5666             auto text = testParser(haystack.save);
5667             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5668             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5669             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5670         }
5671         {
5672             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5673             auto text = testParser(haystack);
5674             text.pos.line += 3;
5675             text.pos.col += 7;
5676             auto e = collectException!XMLParsingException(text.skipToOneOf!delims());
5677             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5678             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5679         }
5680     }
5682     static foreach(func; testRangeFuncs)
5683     {
5684         test!(func, 'o', 'w')("hello world", "o world", 1, 5);
5685         test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7);
5686         test!(func, 'z', 'y')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6);
5687         test!(func, 'o', 'g')("abc\n\n\n  \n\n   wxyzzy \nf\ng", "g", 8, 1);
5688         test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed",
5689                               1, codeLen!(func, "プログラミング in D is ") + 1);
5691         testFail!(func, 'a', 'b')("hello world", 1, 12);
5692         testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6);
5693         testFail!(func, 'a', 'b')("プログラミング",  1, codeLen!(func, "プログラミング") + 1);
5694     }
5695 }
5697 version(dxmlTests) @safe pure unittest
5698 {
5699     import std.algorithm.comparison : equal;
5700     import dxml.internal : testRangeFuncs;
5702     static foreach(func; testRangeFuncs)
5703     {{
5704         auto xml = func(`foo`);
5705         auto text = testParser!simpleXML(xml);
5706         text.skipToOneOf!('o')();
5707         assert(equal(text.input, "oo"));
5708     }}
5709 }
5712 // The front of the input should be text surrounded by single or double quotes.
5713 // This returns a slice of the input containing that text, and the input is
5714 // advanced to one code unit beyond the quote.
5715 auto takeEnquotedText(Text)(ref Text text)
5716 {
5717     checkNotEmpty(text);
5718     immutable quote = text.input.front;
5719     static foreach(quoteChar; [`"`, `'`])
5720     {
5721         // This would be a bit simpler if takeUntilAndDrop took a runtime
5722         // argument, but in all other cases, a compile-time argument makes more
5723         // sense, so this seemed like a reasonable way to handle this one case.
5724         if(quote == quoteChar[0])
5725         {
5726             popFrontAndIncCol(text);
5727             return takeUntilAndDrop!quoteChar(text);
5728         }
5729     }
5730     throw new XMLParsingException("Expected quoted text", text.pos);
5731 }
5733 version(dxmlTests) unittest
5734 {
5735     import core.exception : AssertError;
5736     import std.algorithm.comparison : equal;
5737     import std.exception : assertThrown, enforce;
5738     import std.range : only;
5739     import dxml.internal : testRangeFuncs;
5741     static void test(alias func)(string origHaystack, string expected, string remainder,
5742                                  int row, int col, size_t line = __LINE__)
5743     {
5744         auto haystack = func(origHaystack);
5745         auto adjExpected = expected.toCmpType!func();
5746         {
5747             auto text = testParser(haystack.save);
5748             enforce!AssertError(equal(takeEnquotedText(text), adjExpected.save), "unittest failure 1", __FILE__, line);
5749             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5750             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5751         }
5752         {
5753             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5754             auto text = testParser(haystack);
5755             text.pos.line += 3;
5756             text.pos.col += 7;
5757             enforce!AssertError(equal(takeEnquotedText(text), adjExpected), "unittest failure 3", __FILE__, line);
5758             enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line);
5759             enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line);
5760         }
5761     }
5763     static void testFail(alias func)(string origHaystack, size_t line = __LINE__)
5764     {
5765         auto haystack = func(origHaystack);
5766         auto text = testParser(haystack);
5767         assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line);
5768     }
5770     static foreach(func; testRangeFuncs)
5771     {
5772         foreach(quote; only("\"", "'"))
5773         {
5774             test!func(quote ~ quote, "", "", 1, 3);
5775             test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14);
5776             test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14);
5777             {
5778                 import std.utf : codeLength;
5779                 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D";
5780                 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング ");
5781                 test!func(haystack, "プログラミング ", "in D", 1, len + 3);
5782             }
5783         }
5785         foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`))
5786             testFail!func(str);
5787     }
5788 }
5791 // This removes a name per the Name grammar rule from the front of the input and
5792 // returns it.
5793 // The parsing continues until either one of the given delimiters or an XML
5794 // whitespace character is encountered. The delimiter/whitespace is not returned
5795 // as part of the name and is left at the front of the input.
5796 template takeName(delims...)
5797 {
5798     static foreach(delim; delims)
5799     {
5800         static assert(is(typeof(delim) == char), delim);
5801         static assert(!isSpace(delim));
5802     }
5804     auto takeName(Text)(ref Text text)
5805     {
5806         import std.format : format;
5807         import std.range : takeExactly;
5808         import std.utf : decodeFront, UseReplacementDchar;
5809         import dxml.internal : isNameStartChar, isNameChar;
5811         assert(!text.input.empty);
5813         auto orig = text.input.save;
5814         size_t takeLen;
5815         {
5816             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen);
5817             if(!isNameStartChar(decodedC))
5818                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5819         }
5821         if(text.input.empty)
5822         {
5823             text.pos.col += takeLen;
5824             return takeExactly(orig, takeLen);
5825         }
5827         loop: while(true)
5828         {
5829             immutable c = text.input.front;
5830             if(isSpace(c))
5831                 break;
5832             static foreach(delim; delims)
5833             {
5834                 if(c == delim)
5835                     break loop;
5836             }
5838             size_t numCodeUnits;
5839             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
5840             if(!isNameChar(decodedC))
5841             {
5842                 text.pos.col += takeLen;
5843                 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos);
5844             }
5845             takeLen += numCodeUnits;
5847             if(text.input.empty)
5848                 break;
5849         }
5851         text.pos.col += takeLen;
5853         return takeExactly(orig, takeLen);
5854     }
5855 }
5857 version(dxmlTests) unittest
5858 {
5859     import core.exception : AssertError;
5860     import std.algorithm.comparison : equal;
5861     import std.exception : collectException, enforce;
5862     import std.typecons : tuple;
5863     import dxml.internal : codeLen, testRangeFuncs;
5865     static void test(alias func, delim...)(string origHaystack, string expected, string remainder,
5866                                            int row, int col, size_t line = __LINE__)
5867     {
5868         auto haystack = func(origHaystack);
5869         auto adjExpected = expected.toCmpType!func();
5870         {
5871             auto text = testParser(haystack.save);
5872             enforce!AssertError(equal(text.takeName!delim(), adjExpected.save),
5873                                 "unittest failure 1", __FILE__, line);
5874             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
5875             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
5876         }
5877         {
5878             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5879             auto text = testParser(haystack);
5880             text.pos.line += 3;
5881             text.pos.col += 7;
5882             enforce!AssertError(equal(text.takeName!delim(), adjExpected),
5883                                 "unittest failure 4", __FILE__, line);
5884             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
5885             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
5886         }
5887     }
5889     static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__)
5890     {
5891         auto haystack = func(origHaystack);
5892         {
5893             auto text = testParser(haystack.save);
5894             auto e = collectException!XMLParsingException(text.takeName!delim());
5895             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
5896             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
5897         }
5898         {
5899             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
5900             auto text = testParser(haystack);
5901             text.pos.line += 3;
5902             text.pos.col += 7;
5903             auto e = collectException!XMLParsingException(text.takeName!delim());
5904             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
5905             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
5906         }
5907     }
5909     static foreach(func; testRangeFuncs)
5910     {
5911         static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"])
5912         {{
5913             enum len = codeLen!(func, str);
5915             static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo",  "  foo \n \r "])
5916             {{
5917                 enum strRem = str ~ remainder;
5918                 enum delimRem = '>' ~ remainder;
5919                 enum hay = str ~ delimRem;
5920                 test!func(strRem, str, remainder, 1, len + 1);
5921                 test!(func, '=')(strRem, str, remainder, 1, len + 1);
5922                 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1);
5923                 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1);
5924             }}
5925         }}
5927         static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)])
5928         {{
5929             testFail!func(t[0], t[1], t[2]);
5930             testFail!func(t[0] ~ '>', t[1], t[2]);
5931             testFail!(func, '?')(t[0], t[1], t[2]);
5932             testFail!(func, '=')(t[0] ~ '=', t[1], t[2]);
5933         }}
5935         testFail!(func, '>')(">", 1, 1);
5936         testFail!(func, '?')("?", 1, 1);
5937         testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&"));
5939         static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)])
5940         {
5941             testFail!func(t[0], t[1], t[2]);
5942             testFail!(func, '>')(t[0], t[1], t[2]);
5943         }
5944     }
5945 }
5947 version(dxmlTests) @safe pure unittest
5948 {
5949     import std.algorithm.comparison : equal;
5950     import dxml.internal : testRangeFuncs;
5952     static foreach(func; testRangeFuncs)
5953     {{
5954         auto xml = func(`foo`);
5955         auto text = testParser!simpleXML(xml);
5956         assert(equal(text.takeName(), "foo"));
5957     }}
5958 }
5961 // This removes an attribute value from the front of the input, partially
5962 // validates it, and returns it. The validation that is not done is whether
5963 // the value in a character reference is valid. It's checked for whether the
5964 // characters used in it are valid but not whether the number they form is a
5965 // valid Unicode character. Checking the number doesn't seem worth the extra
5966 // complication, and it's not required for the XML to be "well-formed."
5967 // dxml.util.parseCharRef will check that it is fully correct if it is used.
5968 auto takeAttValue(Text)(ref Text text)
5969 {
5970     // AttValue    ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
5971     // Reference   ::= EntityRef | CharRef
5972     // EntityRef   ::= '&' Name ';'
5973     // PEReference ::= '%' Name ';'
5975     import std.range : only;
5977     checkNotEmpty(text);
5978     immutable quote = text.input.front;
5979     immutable quotePos = text.pos;
5980     foreach(quoteChar; only('"', '\''))
5981     {
5982         // This would be a bit simpler if takeUntilAndDrop took a runtime
5983         // argument, but in all other cases, a compile-time argument makes more
5984         // sense, so this seemed like a reasonable way to handle this one case.
5985         if(quote == quoteChar)
5986         {
5987             popFrontAndIncCol(text);
5988             size_t lineStart = 0;
5989             auto orig = text.input.save;
5990             size_t takeLen;
5991             loop: while(true)
5992             {
5993                 if(text.input.empty)
5994                     throw new XMLParsingException("Unterminated attribute value", quotePos);
5995                 switch(text.input.front)
5996                 {
5997                     case '"':
5998                     {
5999                         if(quote == '"')
6000                         {
6001                             text.input.popFront();
6002                             goto done;
6003                         }
6004                         goto default;
6005                     }
6006                     case '\'':
6007                     {
6008                         if(quote == '\'')
6009                         {
6010                             text.input.popFront();
6011                             goto done;
6012                         }
6013                         goto default;
6014                     }
6015                     case '&':
6016                     {
6017                         {
6018                             import dxml.util : parseCharRef;
6019                             auto temp = text.input.save;
6020                             auto charRef = parseCharRef(temp);
6021                             if(!charRef.isNull)
6022                             {
6023                                 static if(hasLength!(Text.Input))
6024                                 {
6025                                     takeLen += text.input.length - temp.length;
6026                                     text.input = temp;
6027                                 }
6028                                 else
6029                                 {
6030                                     while(text.input.front != ';')
6031                                     {
6032                                         ++takeLen;
6033                                         text.input.popFront();
6034                                     }
6035                                     ++takeLen;
6036                                     text.input.popFront();
6037                                 }
6038                                 continue;
6039                             }
6040                         }
6042                         immutable ampLen = takeLen - lineStart;
6043                         ++takeLen;
6044                         text.input.popFront();
6046                         // Std Entity References
6047                         static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6048                         {
6049                             import std.algorithm.searching : startsWith;
6051                             static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6052                             {
6053                                 if(text.input.save.startsWith(entRef))
6054                                 {
6055                                     takeLen += entRef.length;
6056                                     text.input.popFrontN(entRef.length);
6057                                     continue loop;
6058                                 }
6059                             }
6061                             text.pos.col += ampLen;
6062                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6063                                                           "reference, and this parser only supports entity " ~
6064                                                           "references if they're predefined by the spec. This is not " ~
6065                                                           "a valid character reference or one of the predefined " ~
6066                                                           "entity references.", text.pos);
6067                         }
6068                         // All Entity References
6069                         else
6070                         {
6071                             import std.utf : decodeFront, UseReplacementDchar;
6072                             import dxml.internal : isNameStartChar, isNameChar;
6074                             if(text.input.empty || text.input.front == quote)
6075                                 goto failedEntityRef;
6077                             {
6078                                 size_t numCodeUnits;
6079                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6080                                 if(!isNameStartChar(decodedC))
6081                                     goto failedEntityRef;
6082                                 takeLen += numCodeUnits;
6083                             }
6085                             while(true)
6086                             {
6087                                 if(text.input.empty)
6088                                     goto failedEntityRef;
6089                                 immutable c = text.input.front;
6090                                 if(c == ';')
6091                                 {
6092                                     ++takeLen;
6093                                     break;
6094                                 }
6095                                 size_t numCodeUnits;
6096                                 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6097                                 if(!isNameChar(decodedC))
6098                                     goto failedEntityRef;
6099                                 takeLen += numCodeUnits;
6100                             }
6101                             break;
6103                             failedEntityRef:
6104                             text.pos.col += ampLen;
6105                             throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6106                                                           "character or entity reference, and this is not a valid " ~
6107                                                           "character or entity reference.", text.pos);
6108                         }
6109                     }
6110                     case '<':
6111                     {
6112                         text.pos.col += takeLen - lineStart;
6113                         throw new XMLParsingException("< is not legal in an attribute name", text.pos);
6114                     }
6115                     case '\n':
6116                     {
6117                         ++takeLen;
6118                         nextLine!(Text.config)(text.pos);
6119                         lineStart = takeLen;
6120                         break;
6121                     }
6122                     default:
6123                     {
6124                         import std.ascii : isASCII;
6125                         import std.format : format;
6126                         import dxml.internal : isXMLChar;
6128                         immutable c = text.input.front;
6129                         if(isASCII(c))
6130                         {
6131                             if(!isXMLChar(c))
6132                             {
6133                                 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6134                                                               text.pos);
6135                             }
6136                             ++takeLen;
6137                             break;
6138                         }
6139                         import std.utf : decodeFront, UseReplacementDchar, UTFException;
6140                         // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6141                         // replacement character is considered valid XML, and if we decoded using it, then
6142                         // all of the invalid Unicode characters would come out as the replacement character
6143                         // and then be treated as valid instead of being caught, which isn't all bad, but
6144                         // the spec requires that they be treated as invalid instead of playing nice and
6145                         // using the replacement character.
6146                         try
6147                         {
6148                             size_t numCodeUnits;
6149                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6150                             if(!isXMLChar(decodedC))
6151                             {
6152                                 enum fmt = "Character is not legal in an XML File: 0x%0x";
6153                                 throw new XMLParsingException(format!fmt(decodedC), text.pos);
6154                             }
6155                             takeLen += numCodeUnits;
6156                         }
6157                         catch(UTFException e)
6158                             throw new XMLParsingException("Invalid Unicode character", text.pos);
6159                         continue;
6160                     }
6161                 }
6162                 text.input.popFront();
6163             }
6164             done:
6165             {
6166                 import std.range : takeExactly;
6167                 text.pos.col += takeLen - lineStart + 1;
6168                 return takeExactly(orig, takeLen);
6169             }
6170         }
6171     }
6172     throw new XMLParsingException("Expected quoted text", text.pos);
6173 }
6175 version(dxmlTests) unittest
6176 {
6177     import core.exception : AssertError;
6178     import std.algorithm.comparison : equal;
6179     import std.exception : collectException, enforce;
6180     import std.range : only;
6181     import dxml.internal : codeLen, testRangeFuncs;
6183     static void test(alias func, ThrowOnEntityRef toer)(string origHaystack, string expected, string remainder,
6184                                                         int row, int col, size_t line = __LINE__)
6185     {
6186         auto haystack = func(origHaystack);
6187         auto adjExpected = expected.toCmpType!(func, toer)();
6188         {
6189             auto text = testParser!(makeConfig(toer))(haystack.save);
6190             enforce!AssertError(equal(text.takeAttValue(), adjExpected.save),
6191                                 "unittest failure 1", __FILE__, line);
6192             enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line);
6193             enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line);
6194         }
6195         {
6196             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6197             auto text = testParser!(makeConfig(toer))(haystack);
6198             text.pos.line += 3;
6199             text.pos.col += 7;
6200             enforce!AssertError(equal(text.takeAttValue(), adjExpected),
6201                                 "unittest failure 4", __FILE__, line);
6202             enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line);
6203             enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line);
6204         }
6205     }
6207     static void testFail(alias func, ThrowOnEntityRef toer)(string origHaystack,
6208                                                             int row, int col, size_t line = __LINE__)
6209     {
6210         auto haystack = func(origHaystack);
6211         {
6212             auto text = testParser!(makeConfig(toer))(haystack.save);
6213             auto e = collectException!XMLParsingException(text.takeAttValue());
6214             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6215             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6216         }
6217         {
6218             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6219             auto text = testParser!(makeConfig(toer))(haystack);
6220             text.pos.line += 3;
6221             text.pos.col += 7;
6222             auto e = collectException!XMLParsingException(text.takeAttValue());
6223             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6224             enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line);
6225         }
6226     }
6228     static foreach(i, func; testRangeFuncs)
6229     {
6230         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6231         {
6232             test!(func, toer)(`""`, "", "", 1, 3);
6233             test!(func, toer)(`"J"`, "J", "", 1, 4);
6234             test!(func, toer)(`"foo"`, "foo", "", 1, 6);
6235             test!(func, toer)(`"プログラミング"`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6236             test!(func, toer)(`"foo"bar`, "foo", "bar", 1, 6);
6237             test!(func, toer)(`"プログラミング" after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6239             test!(func, toer)(`''`, "", "", 1, 3);
6240             test!(func, toer)(`'J'`, "J", "", 1, 4);
6241             test!(func, toer)(`'foo'`, "foo", "", 1, 6);
6242             test!(func, toer)(`'プログラミング'`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3);
6243             test!(func, toer)(`'foo'bar`, "foo", "bar", 1, 6);
6244             test!(func, toer)(`'プログラミング' after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3);
6246             test!(func, toer)(`"&amp;&gt;&lt;"`, "&amp;&gt;&lt;", "", 1, 16);
6247             test!(func, toer)(`"&apos;&quot;"`, "&apos;&quot;", "", 1, 15);
6248             test!(func, toer)(`"hello&amp;&gt;&lt;world"`, "hello&amp;&gt;&lt;world", "", 1, 26);
6249             test!(func, toer)(`".....&amp;&gt;&lt;....."`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6250             test!(func, toer)(`"&#12487;&#12451;&#12521;&#12531;"`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6251             test!(func, toer)(`"hello&#xAF;&#77;&amp;world"`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6253             test!(func, toer)(`'&amp;&gt;&lt;'`, "&amp;&gt;&lt;", "", 1, 16);
6254             test!(func, toer)(`'hello&amp;&gt;&lt;world'`, "hello&amp;&gt;&lt;world", "", 1, 26);
6255             test!(func, toer)(`'&apos;&quot;'`, "&apos;&quot;", "", 1, 15);
6256             test!(func, toer)(`'.....&amp;&gt;&lt;.....'`, ".....&amp;&gt;&lt;.....", "", 1, 26);
6257             test!(func, toer)(`'&#12487;&#12451;&#12521;&#12531;'`, "&#12487;&#12451;&#12521;&#12531;", "", 1, 35);
6258             test!(func, toer)(`'hello&#xAF;&#77;&amp;world'`, "hello&#xAF;&#77;&amp;world", "", 1, 29);
6260             test!(func, toer)("'hello\nworld'", "hello\nworld", "", 2, 7);
6261             test!(func, toer)("'hello\nworld\n'", "hello\nworld\n", "", 3, 2);
6263             test!(func, toer)(`"'''"whatever`, "'''", "whatever", 1, 6);
6264             test!(func, toer)(`'"""'whatever`, `"""`, "whatever", 1, 6);
6266             test!(func, toer)(`"&#42;"`, "&#42;", "", 1, 8);
6267             test!(func, toer)(`"&#x42;"`, "&#x42;", "", 1, 9);
6268             test!(func, toer)(`"%foo"`, "%foo", "", 1, 7);
6270             testFail!(func, toer)(`"`, 1, 1);
6271             testFail!(func, toer)(`"foo`, 1, 1);
6272             testFail!(func, toer)(`"foo'`, 1, 1);
6273             testFail!(func, toer)(`"<"`, 1, 2);
6274             testFail!(func, toer)(`"&`, 1, 2);
6275             testFail!(func, toer)(`"&"`, 1, 2);
6276             testFail!(func, toer)(`"&x"`, 1, 2);
6277             testFail!(func, toer)(`"&.;"`, 1, 2);
6278             testFail!(func, toer)(`"&&;"`, 1, 2);
6279             testFail!(func, toer)(`"&a"`, 1, 2);
6280             testFail!(func, toer)(`"&a`, 1, 2);
6281             testFail!(func, toer)(`"hello&;"`, 1, 7);
6282             testFail!(func, toer)(`"hello&;world"`,1, 7);
6283             testFail!(func, toer)(`"hello&<;world"`,1, 7);
6284             testFail!(func, toer)(`"hello&world"`,1, 7);
6285             testFail!(func, toer)(`"hello<world"`,1, 7);
6286             testFail!(func, toer)(`"hello world&"`, 1, 13);
6287             testFail!(func, toer)(`"hello world&;"`, 1, 13);
6288             testFail!(func, toer)(`"hello world&foo"`, 1, 13);
6289             testFail!(func, toer)(`"foo<"`, 1, 5);
6290             testFail!(func, toer)(`"&#`, 1, 2);
6291             testFail!(func, toer)(`"&#"`, 1, 2);
6292             testFail!(func, toer)(`"&#;"`, 1, 2);
6293             testFail!(func, toer)(`"&#x;"`, 1, 2);
6294             testFail!(func, toer)(`"&#AF;"`, 1, 2);
6295             testFail!(func, toer)(`"&#x`, 1, 2);
6296             testFail!(func, toer)(`"&#77`, 1, 2);
6297             testFail!(func, toer)(`"&#77;`, 1, 1);
6298             testFail!(func, toer)(`"&#x0`, 1, 2);
6299             testFail!(func, toer)(`"&#x0;`, 1, 2);
6300             testFail!(func, toer)(`"&#x0;"`, 1, 2);
6302             testFail!(func, toer)(`'`, 1, 1);
6303             testFail!(func, toer)(`'foo`, 1, 1);
6304             testFail!(func, toer)(`'foo"`, 1, 1);
6305             testFail!(func, toer)(`'<'`, 1, 2);
6306             testFail!(func, toer)("'\v'", 1, 2);
6307             testFail!(func, toer)("'\uFFFE'", 1, 2);
6308             testFail!(func, toer)(`'&`, 1, 2);
6309             testFail!(func, toer)(`'&'`, 1, 2);
6310             testFail!(func, toer)(`'&x'`, 1, 2);
6311             testFail!(func, toer)(`'&.;'`, 1, 2);
6312             testFail!(func, toer)(`'&&;'`, 1, 2);
6313             testFail!(func, toer)(`'&a'`, 1, 2);
6314             testFail!(func, toer)(`'&a`, 1, 2);
6315             testFail!(func, toer)(`'hello&;'`, 1, 7);
6316             testFail!(func, toer)(`'hello&;world'`, 1, 7);
6317             testFail!(func, toer)(`'hello&<;world'`, 1, 7);
6318             testFail!(func, toer)(`'hello&world'`, 1, 7);
6319             testFail!(func, toer)(`'hello<world'`, 1, 7);
6320             testFail!(func, toer)(`'hello world&'`, 1, 13);
6321             testFail!(func, toer)(`'hello world&;'`, 1, 13);
6322             testFail!(func, toer)(`'hello world&foo'`, 1, 13);
6323             testFail!(func, toer)(`'foo<'`, 1, 5);
6324             testFail!(func, toer)(`'&#`, 1, 2);
6325             testFail!(func, toer)(`'&#'`, 1, 2);
6326             testFail!(func, toer)(`'&#;'`, 1, 2);
6327             testFail!(func, toer)(`'&#x;'`, 1, 2);
6328             testFail!(func, toer)(`'&#AF;'`, 1, 2);
6329             testFail!(func, toer)(`'&#x`, 1, 2);
6330             testFail!(func, toer)(`'&#77`, 1, 2);
6331             testFail!(func, toer)(`'&#77;`, 1, 1);
6332             testFail!(func, toer)(`'&#x0`, 1, 2);
6333             testFail!(func, toer)(`'&#x0;`, 1, 2);
6334             testFail!(func, toer)(`'&#x0;'`, 1, 2);
6335             testFail!(func, toer)("'&#xA\nF;'", 1, 2);
6336             testFail!(func, toer)("'&amp\n;'", 1, 2);
6337             testFail!(func, toer)("'&\namp;'", 1, 2);
6338             testFail!(func, toer)("'\n&amp;&;'", 2, 6);
6339         }
6340         {
6341             alias toer = ThrowOnEntityRef.yes;
6342             testFail!(func, toer)(`"&foo;"`, 1, 2);
6343             testFail!(func, toer)(`"hello world&foo;"`, 1, 13);
6344             testFail!(func, toer)(`"hello &foo; world"`, 1, 8);
6345             testFail!(func, toer)(`"&am;"`, 1, 2);
6346             testFail!(func, toer)(`"&ampe;"`, 1, 2);
6347             testFail!(func, toer)(`"&l;"`, 1, 2);
6348             testFail!(func, toer)(`"&lte;"`, 1, 2);
6349             testFail!(func, toer)(`"&g;"`, 1, 2);
6350             testFail!(func, toer)(`"&gte;"`, 1, 2);
6351             testFail!(func, toer)(`"&apo;"`, 1, 2);
6352             testFail!(func, toer)(`"&aposs;"`, 1, 2);
6353             testFail!(func, toer)(`"&quo;"`, 1, 2);
6354             testFail!(func, toer)(`"&quote;"`, 1, 2);
6356             testFail!(func, toer)(`'&foo;'`, 1, 2);
6357             testFail!(func, toer)(`'hello world&foo;'`, 1, 13);
6358             testFail!(func, toer)(`'hello &foo; world'`, 1, 8);
6359             testFail!(func, toer)(`'&am;'`, 1, 2);
6360             testFail!(func, toer)(`'&ampe;'`, 1, 2);
6361             testFail!(func, toer)(`'&l;'`, 1, 2);
6362             testFail!(func, toer)(`'&lte;'`, 1, 2);
6363             testFail!(func, toer)(`'&g;'`, 1, 2);
6364             testFail!(func, toer)(`'&gte;'`, 1, 2);
6365             testFail!(func, toer)(`'&apo;'`, 1, 2);
6366             testFail!(func, toer)(`'&aposs;'`, 1, 2);
6367             testFail!(func, toer)(`'&quo;'`, 1, 2);
6368             testFail!(func, toer)(`'&quote;'`, 1, 2);
6369         }
6370         {
6371             alias toer = ThrowOnEntityRef.no;
6372             test!(func, toer)(`"&foo;"`, "&foo;", "", 1, 8);
6373             test!(func, toer)(`"hello world&foo;"`, "hello world&foo;", "", 1, 19);
6374             test!(func, toer)(`"hello &foo; world"`, "hello &foo; world", "", 1, 20);
6375             test!(func, toer)(`"&am;"`, "&am;", "", 1, 7);
6376             test!(func, toer)(`"&ampe;"`, "&ampe;", "", 1, 9);
6377             test!(func, toer)(`"&l;"`, "&l;", "", 1, 6);
6378             test!(func, toer)(`"&lte;"`, "&lte;", "", 1, 8);
6379             test!(func, toer)(`"&g;"`, "&g;", "", 1, 6);
6380             test!(func, toer)(`"&gte;"`, "&gte;", "", 1, 8);
6381             test!(func, toer)(`"&apo;"`, "&apo;", "", 1, 8);
6382             test!(func, toer)(`"&aposs;"`, "&aposs;", "", 1, 10);
6383             test!(func, toer)(`"&quo;"`, "&quo;", "", 1, 8);
6384             test!(func, toer)(`"&quote;"`, "&quote;", "", 1, 10);
6386             test!(func, toer)(`'&foo;'`, "&foo;", "", 1, 8);
6387             test!(func, toer)(`'hello world&foo;'`, "hello world&foo;", "", 1, 19);
6388             test!(func, toer)(`'hello &foo; world'`, "hello &foo; world", "", 1, 20);
6389             test!(func, toer)(`'&am;'`, "&am;", "", 1, 7);
6390             test!(func, toer)(`'&ampe;'`, "&ampe;", "", 1, 9);
6391             test!(func, toer)(`'&l;'`, "&l;", "", 1, 6);
6392             test!(func, toer)(`'&lte;'`, "&lte;", "", 1, 8);
6393             test!(func, toer)(`'&g;'`, "&g;", "", 1, 6);
6394             test!(func, toer)(`'&gte;'`, "&gte;", "", 1, 8);
6395             test!(func, toer)(`'&apo;'`, "&apo;", "", 1, 8);
6396             test!(func, toer)(`'&aposs;'`, "&aposs;", "", 1, 10);
6397             test!(func, toer)(`'&quo;'`, "&quo;", "", 1, 8);
6398             test!(func, toer)(`'&quote;'`, "&quote;", "", 1, 10);
6399         }
6400     }
6402     // These can't be tested with testFail, because attempting to convert
6403     // invalid Unicode results in UnicodeExceptions before parseXML even
6404     // gets called.
6405     import std.meta : AliasSeq;
6406     static foreach(str; AliasSeq!("'" ~ cast(string)[255] ~ "'",
6407                                   "'"w ~ cast(wstring)[0xD800] ~ "'",
6408                                   "'"d ~ cast(dstring)[0xD800] ~ "'"))
6409     {{
6410         auto text = testParser(str);
6411         auto e = collectException!XMLParsingException(text.takeAttValue());
6412         assert(e ! is null);
6413         assert(e.pos == TextPos(1, 2));
6414     }}
6415 }
6417 version(dxmlTests) @safe pure unittest
6418 {
6419     import std.algorithm.comparison : equal;
6420     import dxml.internal : testRangeFuncs;
6422     static foreach(func; testRangeFuncs)
6423     {
6424         static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6425         {{
6426             auto xml = func(`'foo'`);
6427             auto text = testParser!simpleXML(xml);
6428             assert(equal(text.takeAttValue(), "foo"));
6429         }}
6430     }
6431 }
6434 // Validates an EntityType.text field to verify that it does not contain invalid
6435 // characters.
6436 void checkText(bool allowRestrictedChars, Text)(ref Text orig)
6437 {
6438     import std.format : format;
6439     import std.utf : decodeFront, UseReplacementDchar;
6441     auto text = orig.save;
6442     loop: while(!text.input.empty)
6443     {
6444         switch(text.input.front)
6445         {
6446             static if(!allowRestrictedChars)
6447             {
6448                 case '&':
6449                 {
6450                     import dxml.util : parseCharRef;
6452                     {
6453                         auto temp = text.input.save;
6454                         auto charRef = parseCharRef(temp);
6455                         if(!charRef.isNull)
6456                         {
6457                             static if(hasLength!(Text.Input))
6458                             {
6459                                 text.pos.col += text.input.length - temp.length;
6460                                 text.input = temp;
6461                             }
6462                             else
6463                             {
6464                                 while(text.input.front != ';')
6465                                     popFrontAndIncCol(text);
6466                                 popFrontAndIncCol(text);
6467                             }
6468                             continue;
6469                         }
6470                     }
6472                     immutable ampPos = text.pos;
6473                     popFrontAndIncCol(text);
6475                     // Std Entity References
6476                     static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes)
6477                     {
6478                         static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"])
6479                         {
6480                             if(text.stripStartsWith(entRef))
6481                                 continue loop;
6482                         }
6484                         throw new XMLParsingException("& is only legal in an EntitType.text entity as part of a " ~
6485                                                       "reference, and this parser only supports entity references if " ~
6486                                                       "they're predefined by the spec. This is not a valid character " ~
6487                                                       "reference or one of the predefined entity references.", ampPos);
6488                     }
6489                     // All Entity References
6490                     else
6491                     {
6492                         import std.utf : decodeFront, UseReplacementDchar;
6493                         import dxml.internal : isNameStartChar, isNameChar;
6495                         if(text.input.empty)
6496                             goto failedEntityRef;
6497                         {
6498                             size_t numCodeUnits;
6499                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6500                             if(!isNameStartChar(decodedC))
6501                                 goto failedEntityRef;
6502                             text.pos.col += numCodeUnits;
6503                         }
6504                         while(true)
6505                         {
6506                             if(text.input.empty)
6507                                 goto failedEntityRef;
6508                             immutable c = text.input.front;
6509                             if(c == ';')
6510                                 break;
6511                             size_t numCodeUnits;
6512                             immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits);
6513                             if(!isNameChar(decodedC))
6514                                 goto failedEntityRef;
6515                             text.pos.col += numCodeUnits;
6516                         }
6517                         assert(text.input.front == ';');
6518                         popFrontAndIncCol(text);
6519                         continue;
6521                         failedEntityRef:
6522                         throw new XMLParsingException("& is only legal in an attribute value as part of a " ~
6523                                                       "character or entity reference, and this is not a valid " ~
6524                                                       "character or entity reference.", ampPos);
6525                     }
6526                 }
6527                 case '<': throw new XMLParsingException("< is not legal in EntityType.text", text.pos);
6528                 case ']':
6529                 {
6530                     popFrontAndIncCol(text);
6531                     if(text.stripStartsWith("]>"))
6532                     {
6533                         text.pos.col -= 3;
6534                         throw new XMLParsingException("]]> is not legal in EntityType.text", text.pos);
6535                     }
6536                     break;
6537                 }
6538             }
6539             case '\n':
6540             {
6541                 nextLine!(text.config)(text.pos);
6542                 text.input.popFront();
6543                 break;
6544             }
6545             default:
6546             {
6547                 import std.ascii : isASCII;
6548                 import dxml.internal : isXMLChar;
6549                 immutable c = text.input.front;
6550                 if(isASCII(c))
6551                 {
6552                     if(!isXMLChar(c))
6553                     {
6554                         throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c),
6555                                                       text.pos);
6556                     }
6557                     popFrontAndIncCol(text);
6558                 }
6559                 else
6560                 {
6561                     import std.utf : UTFException;
6562                     // Annoyngly, letting decodeFront throw is the easier way to handle this, since the
6563                     // replacement character is considered valid XML, and if we decoded using it, then
6564                     // all of the invalid Unicode characters would come out as the replacement character
6565                     // and then be treated as valid instead of being caught, which isn't all bad, but
6566                     // the spec requires that they be treated as invalid instead of playing nice and
6567                     // using the replacement character.
6568                     try
6569                     {
6570                         size_t numCodeUnits;
6571                         immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits);
6572                         if(!isXMLChar(decodedC))
6573                         {
6574                             enum fmt = "Character is not legal in an XML File: 0x%0x";
6575                             throw new XMLParsingException(format!fmt(decodedC), text.pos);
6576                         }
6577                         text.pos.col += numCodeUnits;
6578                     }
6579                     catch(UTFException)
6580                         throw new XMLParsingException("Invalid Unicode character", text.pos);
6581                 }
6582                 break;
6583             }
6584         }
6585     }
6586 }
6588 version(dxmlTests) unittest
6589 {
6590     import core.exception : AssertError;
6591     import std.exception : assertNotThrown, collectException, enforce;
6592     import dxml.internal : codeLen, testRangeFuncs;
6594     static void test(alias func, bool arc, ThrowOnEntityRef toer)(string text, size_t line = __LINE__)
6595     {
6596         auto xml = func(text);
6597         auto range = testParser!(makeConfig(toer))(xml);
6598         assertNotThrown(checkText!arc(range), "unittest failure", __FILE__, line);
6599     }
6601     static void testFail(alias func, bool arc, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__)
6602     {
6603         auto xml = func(text);
6604         {
6605             auto range = testParser!(makeConfig(toer))(xml.save);
6606             auto e = collectException!XMLParsingException(checkText!arc(range));
6607             enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line);
6608             enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line);
6609         }
6610         {
6611             auto pos = TextPos(row + 3, row == 1 ? col + 7 : col);
6612             auto range = testParser!(makeConfig(toer))(xml);
6613             range.pos.line += 3;
6614             range.pos.col += 7;
6615             auto e = collectException!XMLParsingException(checkText!arc(range));
6616             enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line);
6617             enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line);
6618         }
6619     }
6621     static foreach(func; testRangeFuncs)
6622     {
6623         static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no])
6624         {
6625             static foreach(arc; [false, true])
6626             {
6627                 test!(func, arc, toer)("");
6628                 test!(func, arc, toer)("J",);
6629                 test!(func, arc, toer)("foo");
6630                 test!(func, arc, toer)("プログラミング");
6632                 test!(func, arc, toer)("&amp;&gt;&lt;");
6633                 test!(func, arc, toer)("hello&amp;&gt;&lt;world");
6634                 test!(func, arc, toer)(".....&apos;&quot;&amp;.....");
6635                 test!(func, arc, toer)("&#12487;&#12451;&#12521;&#12531;");
6636                 test!(func, arc, toer)("hello&#xAF;&#42;&quot;world");
6638                 test!(func, arc, toer)("]]");
6639                 test!(func, arc, toer)("]>");
6640                 test!(func, arc, toer)("foo]]bar");
6641                 test!(func, arc, toer)("foo]>bar");
6642                 test!(func, arc, toer)("]] >");
6644                 testFail!(func, arc, toer)("\v", 1, 1);
6645                 testFail!(func, arc, toer)("\uFFFE", 1, 1);
6646                 testFail!(func, arc, toer)("hello\vworld", 1, 6);
6647                 testFail!(func, arc, toer)("he\nllo\vwo\nrld", 2, 4);
6648             }
6650             testFail!(func, false, toer)("<", 1, 1);
6651             testFail!(func, false, toer)("&", 1, 1);
6652             testFail!(func, false, toer)("&", 1, 1);
6653             testFail!(func, false, toer)("&x", 1, 1);
6654             testFail!(func, false, toer)("&&;", 1, 1);
6655             testFail!(func, false, toer)("&a", 1, 1);
6656             testFail!(func, false, toer)("hello&;", 1, 6);
6657             testFail!(func, false, toer)("hello&;world", 1, 6);
6658             testFail!(func, false, toer)("hello&<;world", 1, 6);
6659             testFail!(func, false, toer)("hello&world", 1, 6);
6660             testFail!(func, false, toer)("hello world&", 1, 12);
6661             testFail!(func, false, toer)("hello world&;", 1, 12);
6662             testFail!(func, false, toer)("hello world&foo", 1, 12);
6663             testFail!(func, false, toer)("&#;", 1, 1);
6664             testFail!(func, false, toer)("&#x;", 1, 1);
6665             testFail!(func, false, toer)("&#AF;", 1, 1);
6666             testFail!(func, false, toer)("&#x", 1, 1);
6667             testFail!(func, false, toer)("&#42", 1, 1);
6668             testFail!(func, false, toer)("&#x42", 1, 1);
6669             testFail!(func, false, toer)("&#12;", 1, 1);
6670             testFail!(func, false, toer)("&#x12;", 1, 1);
6671             testFail!(func, false, toer)("&#42;foo\nbar&#;", 2, 4);
6672             testFail!(func, false, toer)("&#42;foo\nbar&#x;", 2, 4);
6673             testFail!(func, false, toer)("&#42;foo\nbar&#AF;", 2, 4);
6674             testFail!(func, false, toer)("&#42;foo\nbar&#x", 2, 4);
6675             testFail!(func, false, toer)("&#42;foo\nbar&#42", 2, 4);
6676             testFail!(func, false, toer)("&#42;foo\nbar&#x42", 2, 4);
6677             testFail!(func, false, toer)("プログラミング&", 1, codeLen!(func, "プログラミング&"));
6679             static if(toer == ThrowOnEntityRef.yes)
6680             {
6681                 testFail!(func, false, toer)("&a;", 1, 1);
6682                 testFail!(func, false, toer)(`&am;`, 1, 1);
6683                 testFail!(func, false, toer)(`&ampe;`, 1, 1);
6684                 testFail!(func, false, toer)(`&l;`, 1, 1);
6685                 testFail!(func, false, toer)(`&lte;`, 1, 1);
6686                 testFail!(func, false, toer)(`&g;`, 1, 1);
6687                 testFail!(func, false, toer)(`&gte;`, 1, 1);
6688                 testFail!(func, false, toer)(`&apo;`, 1, 1);
6689                 testFail!(func, false, toer)(`&aposs;`, 1, 1);
6690                 testFail!(func, false, toer)(`&quo;`, 1, 1);
6691                 testFail!(func, false, toer)(`&quote;`, 1, 1);
6692                 testFail!(func, false, toer)(`hello &foo; world`, 1, 7);
6693                 testFail!(func, false, toer)("hello\n &foo; \nworld", 2, 2);
6694             }
6695             else
6696             {
6697                 test!(func, false, toer)("&a;");
6698                 test!(func, false, toer)(`&am;`);
6699                 test!(func, false, toer)(`&ampe;`);
6700                 test!(func, false, toer)(`&l;`);
6701                 test!(func, false, toer)(`&lte;`);
6702                 test!(func, false, toer)(`&g;`);
6703                 test!(func, false, toer)(`&gte;`);
6704                 test!(func, false, toer)(`&apo;`);
6705                 test!(func, false, toer)(`&aposs;`);
6706                 test!(func, false, toer)(`&quo;`);
6707                 test!(func, false, toer)(`&quote;`);
6708                 test!(func, false, toer)(`hello &foo; world`);
6709                 test!(func, false, toer)("hello\n &foo; \nworld");
6710             }
6712             testFail!(func, false, toer)("]]>", 1, 1);
6713             testFail!(func, false, toer)("foo]]>bar", 1, 4);
6715             test!(func, true, toer)("]]>");
6716             test!(func, true, toer)("foo]]>bar");
6718             test!(func, true, toer)("<");
6719             test!(func, true, toer)("&");
6720             test!(func, true, toer)("&x");
6721             test!(func, true, toer)("&&;");
6722             test!(func, true, toer)("&a");
6723             test!(func, true, toer)("&a;");
6724             test!(func, true, toer)(`&am;`);
6725             test!(func, true, toer)(`&ampe;`);
6726             test!(func, true, toer)(`&l;`);
6727             test!(func, true, toer)(`&lte;`);
6728             test!(func, true, toer)(`&g;`);
6729             test!(func, true, toer)(`&gte;`);
6730             test!(func, true, toer)(`&apo;`);
6731             test!(func, true, toer)(`&aposs;`);
6732             test!(func, true, toer)(`&quo;`);
6733             test!(func, true, toer)(`&quote;`);
6734             test!(func, true, toer)("hello&;");
6735             test!(func, true, toer)("hello&;world");
6736             test!(func, true, toer)("hello&<;world");
6737             test!(func, true, toer)("hello&world");
6738             test!(func, true, toer)("hello world&");
6739             test!(func, true, toer)("hello world&;");
6740             test!(func, true, toer)("hello world&foo");
6741             test!(func, true, toer)("&#;");
6742             test!(func, true, toer)("&#x;");
6743             test!(func, true, toer)("&#AF;");
6744             test!(func, true, toer)("&#x");
6745             test!(func, true, toer)("&#42");
6746             test!(func, true, toer)("&#x42");
6747             test!(func, true, toer)("&#12;");
6748             test!(func, true, toer)("&#x12;");
6749             test!(func, true, toer)("&#42;foo\nbar&#;");
6750             test!(func, true, toer)("&#42;foo\nbar&#x;");
6751             test!(func, true, toer)("&#42;foo\nbar&#AF;");
6752             test!(func, true, toer)("&#42;foo\nbar&#x");
6753             test!(func, true, toer)("&#42;foo\nbar&#42");
6754             test!(func, true, toer)("&#42;foo\nbar&#x42");
6755             test!(func, true, toer)("プログラミング&");
6756         }
6757     }
6759     // These can't be tested with testFail, because attempting to convert
6760     // invalid Unicode results in UnicodeExceptions before parseXML even
6761     // gets called.
6762     import std.meta : AliasSeq;
6763     static foreach(str; AliasSeq!(cast(string)[255], cast(wstring)[0xD800], cast(dstring)[0xD800]))
6764     {
6765         static foreach(arc; [false, true])
6766         {{
6767             auto text = testParser(str);
6768             auto e = collectException!XMLParsingException(text.checkText!arc());
6769             assert(e ! is null);
6770             assert(e.pos == TextPos(1, 1));
6771         }}
6772     }
6773 }
6775 version(dxmlTests) @safe unittest
6776 {
6777     import dxml.internal : testRangeFuncs;
6779     static foreach(func; testRangeFuncs)
6780     {
6781         static foreach(arc; [false, true])
6782         {
6783             static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)])
6784             {{
6785                 auto xml = func("foo");
6786                 auto text = testParser!config(xml);
6787                 checkText!arc(text);
6788             }}
6789         }
6790     }
6791 }
6794 // S := (#x20 | #x9 | #xD | #XA)+
6795 bool isSpace(C)(C c) @safe pure nothrow @nogc
6796     if(isSomeChar!C)
6797 {
6798     switch(c)
6799     {
6800         case ' ':
6801         case '\t':
6802         case '\r':
6803         case '\n': return true;
6804         default : return false;
6805     }
6806 }
6808 version(dxmlTests) pure nothrow @safe @nogc unittest
6809 {
6810     foreach(char c; char.min .. char.max)
6811     {
6812         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6813             assert(isSpace(c));
6814         else
6815             assert(!isSpace(c));
6816     }
6817     foreach(wchar c; wchar.min .. wchar.max / 100)
6818     {
6819         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6820             assert(isSpace(c));
6821         else
6822             assert(!isSpace(c));
6823     }
6824     foreach(dchar c; dchar.min .. dchar.max / 1000)
6825     {
6826         if(c == ' ' || c == '\t' || c == '\r' || c == '\n')
6827             assert(isSpace(c));
6828         else
6829             assert(!isSpace(c));
6830     }
6831 }
6834 pragma(inline, true) void popFrontAndIncCol(Text)(ref Text text)
6835 {
6836     text.input.popFront();
6837     ++text.pos.col;
6838 }
6840 pragma(inline, true) void nextLine(Config config)(ref TextPos pos)
6841 {
6842     ++pos.line;
6843     pos.col = 1;
6844 }
6846 // TODO create bug report, because this function cannot be inlined
6847 /+pragma(inline, true)+/ void checkNotEmpty(Text)(ref Text text, size_t line = __LINE__)
6848 {
6849     if(text.input.empty)
6850         throw new XMLParsingException("Prematurely reached end of document", text.pos, __FILE__, line);
6851 }
6854 version(dxmlTests)
6855     enum someTestConfigs = [Config.init, simpleXML, makeConfig(SkipComments.yes), makeConfig(SkipPI.yes)];
6858 // Fuzz-testing failures
6859 version(dxmlTests) unittest
6860 {
6861     static void parseEverything(string xml)
6862     {
6863         with(EntityType) foreach(entity; parseXML(xml))
6864         {
6865             final switch(entity.type)
6866             {
6867                 case cdata: break;
6868                 case comment: break;
6869                 case elementStart: auto name = entity.name; break;
6870                 case elementEnd: goto case elementStart;
6871                 case elementEmpty: goto case elementStart;
6872                 case pi: goto case elementStart;
6873                 case text: break;
6874             }
6876             final switch(entity.type)
6877             {
6878                 case cdata: auto text = entity.text; break;
6879                 case comment: goto case cdata;
6880                 case elementStart:
6881                 {
6882                     foreach(attr; entity.attributes)
6883                     {
6884                         auto name = attr.name;
6885                         auto value = attr.value;
6886                     }
6887                     break;
6888                 }
6889                 case elementEnd: break;
6890                 case elementEmpty: goto case elementStart;
6891                 case pi: goto case cdata;
6892                 case text: goto case cdata;
6893             }
6894         }
6895     }
6897     static void testFail(string xml, size_t line = __LINE__)
6898     {
6899         import std.exception : assertThrown;
6900         assertThrown!XMLParsingException(parseEverything(xml));
6901     }
6903     testFail([0x3c, 0xff, 0x3e, 0x3e, 0x3a, 0x3c, 0x2f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6904               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6905               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6906               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
6907               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x31, 0xff,
6908               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xff, 0xff,
6909               0xff]);
6910 }