1 // Written in the D programming language
2 
3 /++
4     This module contains helper functions which aren't specific to the parser,
5     the DOM, or the writer.
6 
7     $(TABLE
8         $(TR $(TH Symbol) $(TH Description))
9         $(TR $(TD $(LREF decodeXML))
10              $(TD Takes a range of characters, strips carriage returns from it,
11                   and converts both character references and the predefined
12                   entity references in the range into the characters that they
13                   refer to.))
14         $(TR $(TD $(LREF asDecodedXML))
15              $(TD The version of $(LREF decodeXML) that returns a lazy range.))
16         $(TR $(TD $(LREF parseCharRef))
17              $(TD Parses a character reference from the front of a range of
18                   characters.))
19         $(TR $(TD $(LREF parseStdEntityRef))
20              $(TD Parses one of the predefined entity references from the start
21                   of a range of characters.))
22         $(TR $(TD $(LREF stripIndent))
23              $(TD Removes the indent from the front of each line of a range of
24                   characters that was XML text which was formatted for
25                   human-readability.))
26         $(TR $(TD $(LREF withoutIndent))
27              $(TD The version of $(LREF stripIndent) that returns a lazy
28                   range.))
29         $(TR $(TD $(LREF StdEntityRef))
30              $(TD Enum containing the string representations of the five,
31                   predefined entity references.))
32         $(TR $(TD $(LREF encodeText))
33              $(TD Encodes characters which cannot appear in
34                   $(REF_ALTTEXT EntityType.text, EntityType.text, dxml, parser)
35                   in their literal form.))
36         $(TR $(TD $(LREF encodeAttr))
37              $(TD Encodes characters which cannot appear in the attribute value
38                   of an element start tag in their literal form.))
39         $(TR $(TD $(LREF encodeCharRef))
40              $(TD Encodes a character as a character reference.))
41     )
42 
43     Copyright: Copyright 2018
44     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
45     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
46     Source:    $(LINK_TO_SRC dxml/_util.d)
47 
48     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
49   +/
50 module dxml.util;
51 
52 import std.range.primitives;
53 import std.traits;
54 import std.typecons : Nullable;
55 
56 /++
57     Decodes any XML character references and standard XML entity references in
58     the text as well as removing any carriage returns. It's intended to be used
59     on the text fields of element tags and on the values of start tag
60     attributes.
61 
62     There are a number of characters that either can't be directly represented
63     in the text fields or attribute values in XML or which can sometimes be
64     directly represented but not always (e.g. an attribute value can contain
65     either a single quote or a double quote, but it can't contain both at the
66     same time, because one of them would match the opening quote). So, those
67     characters have alternate representations in order to be allowed (e.g.
68     $(D_CODE_STRING "$(AMP)lt;") for $(D_CODE_STRING '<'), because
69     $(D_CODE_STRING '<') would normally be the beginning of an entity).
70     Technically, they're entity references, but the ones handled by decodeXML
71     are the ones explicitly defined in the XML standard and which don't require
72     a DTD section.
73 
74     Ideally, the parser would transform all such alternate representations to
75     what they represent when providing the text to the application, but that
76     would make it impossible to return slices of the original text from the
77     properties of an $(REF_ALTTEXT Entity, EntityRange.Entity, dxml, parser).
78     So, instead of having those properties do the transformation themselves,
79     decodeXML and asDecodedXML do that so that the application can choose to do
80     it or not (in many cases, there is nothing to decode, making the calls
81     unnecessary).
82 
83     Similarly, an application can choose to encode a character as a character
84     reference (e.g. $(D_CODE_STRING '$(AMP)#65") or
85     $(D_CODE_STRING '$(AMP)#x40") for $(D_CODE_STRING 'A')). decodeXML will
86     decode such character references to their corresponding characters.
87 
88     However, decodeXML does not handle any entity references beyond the five
89     predefined ones listed below. All others are left unprocessed. Processing
90     them properly would require handling the DTD section, which dxml does not
91     support. The parser considers any entity references other than the
92     predefined ones to be invalid XML, so unless the text being passed to
93     decodeXML doesn't come from dxml's parser, it can't have any entity
94     references in it other than the predefined ones. Similarly, invalid
95     character references are left unprocessed as well as any character that is
96     not valid in an XML document. decodeXML never throws on invalid XML.
97 
98     Also, $(D_CODE_STRING '\r') is not supposed to appear in an XML document
99     except as a character reference unless it's in a CDATA section. So, it
100     really should be stripped out before being handed off to the application,
101     but again, that doesn't work with slices. So, decodeXML also handles that.
102 
103     Specifically, what decodeXML and asDecodedXML do is
104 
105     $(TABLE
106         $(TR $(TD convert $(D_CODE_STRING $(AMP)amp;) to $(D_CODE_STRING &)))
107         $(TR $(TD convert $(D_CODE_STRING $(AMP)gt;) to $(D_CODE_STRING >)))
108         $(TR $(TD convert $(D_CODE_STRING $(AMP)lt;) to $(D_CODE_STRING <)))
109         $(TR $(TD convert $(D_CODE_STRING $(AMP)apos;) to $(D_CODE_STRING ')))
110         $(TR $(TD convert $(D_CODE_STRING $(AMP)quot;) to $(D_CODE_STRING ")))
111         $(TR $(TD remove all instances of $(D_CODE_STRING \r)))
112         $(TR $(TD convert all character references (e.g.
113                   $(D_CODE_STRING $(AMP)#xA;)) to the characters that they
114                   represent))
115     )
116 
117     All other entity references are left untouched, and any $(D_CODE_STRING '&')
118     which is not used in one of the constructs listed in the table as well as
119     any malformed constructs (e.g. $(D_CODE_STRING "&Amp;") or
120     $(D_CODE_STRING "&#xGGA2;")) are left untouched.
121 
122     The difference between decodeXML and asDecodedXML is that decodeXML returns
123     a $(K_STRING), whereas asDecodedXML returns a lazy _range of code
124     units. In the case where a $(K_STRING) is passed to decodeXML, it
125     will simply return the original $(K_STRING) if there is no text to decode
126     (whereas in other cases, decodeXML and asDecodedXML are forced to return
127     new ranges even if there is no text to decode).
128 
129     Params:
130         range = The _range of characters to decodeXML.
131 
132     Returns: The decoded text. decodeXML returns a $(K_STRING), whereas
133              asDecodedXML returns a lazy _range of code units (so it could be a
134              _range of $(K_CHAR) or $(K_WCHAR) and not just $(K_DCHAR); which it
135              is depends on the code units of the _range being passed in).
136 
137     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
138               $(LREF parseStdEntityRef)$(BR)
139               $(LREF parseCharRef)$(BR)
140               $(REF EntityRange.Entity.attributes, dxml, parser)$(BR)
141               $(REF EntityRange.Entity.text, dxml, parser)$(BR)
142               $(LREF encodeAttr)$(BR)
143               $(LREF encodeText)
144   +/
145 string decodeXML(R)(R range)
146     if(isForwardRange!R && isSomeChar!(ElementType!R))
147 {
148     static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char))
149     {
150         import std.algorithm.searching : find, startsWith;
151         import std.array : appender;
152         import std.meta : AliasSeq;
153 
154         auto found = range.find('&', '\r');
155         if(found[1] == 0)
156             return range;
157 
158         auto retval = appender!string();
159         retval.reserve(range.length);
160         put(retval, range[0 .. $ - found[0].length]);
161         range = range[$ - found[0].length .. $];
162 
163         size_t i = 0;
164         loop: for(; i != range.length;)
165         {
166             switch(range[i])
167             {
168                 case '&':
169                 {
170                     if(i + 1 == range.length)
171                     {
172                         ++i;
173                         break loop;
174                     }
175                     put(retval, range[0 .. i]);
176                     range = range[i .. $];
177                     i = 0;
178                     static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef))
179                     {{
180                         immutable c = func(range);
181                         if(!c.isNull)
182                         {
183                             put(retval, c.get);
184                             continue loop;
185                         }
186                     }}
187                     put(retval, '&');
188                     range = range[1 .. $];
189                     continue;
190                 }
191                 case '\r':
192                 {
193                     if(i != 0)
194                     {
195                         put(retval, range[0 .. i]);
196                         range = range[i + 1 .. $];
197                         i = 0;
198                     }
199                     else
200                         range = range[1 .. $];
201                     continue;
202                 }
203                 default: ++i; continue;
204             }
205         }
206 
207         if(i != 0)
208             put(retval, range[0 .. i]);
209 
210         return retval.data;
211     }
212     else
213     {
214         import std.conv : to;
215         return range.asDecodedXML().to!string();
216     }
217 }
218 
219 
220 /// Ditto
221 auto asDecodedXML(R)(R range)
222     if(isForwardRange!R && isSomeChar!(ElementType!R))
223 {
224     import std.meta : AliasSeq;
225     import std.utf : byCodeUnit, encode, UseReplacementDchar;
226 
227     static struct DecodedXML
228     {
229     public:
230 
231         @property empty() { return _range.empty && _begin == _end; }
232 
233         void popFront()
234         {
235             if(_begin != _end)
236             {
237                 if(++_begin != _end)
238                     return;
239             }
240             else
241                 _range.popFront();
242             _popFrontImpl();
243         }
244 
245         @property save()
246         {
247             auto retval = this;
248             retval._range = _range.save;
249             return retval;
250         }
251 
252     private:
253 
254         void _popFrontImpl()
255         {
256             while(!_range.empty)
257             {
258                 switch(_range.front)
259                 {
260                     case '&':
261                     {
262                         static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef))
263                         {{
264                             immutable c = func(_range);
265                             if(!c.isNull)
266                             {
267                                 _begin = 0;
268                                 _end = _buffer.encode!(UseReplacementDchar.yes)(c);
269                                 return;
270                             }
271                         }}
272                         goto default;
273                     }
274                     case '\r':
275                     {
276                         assert(_begin == _end);
277                         _range.popFront();
278                         continue;
279                     }
280                     default:
281                     {
282                         assert(_begin == _end);
283                         return;
284                     }
285                 }
286             }
287         }
288 
289         this(R range) @safe
290         {
291             _range = byCodeUnit(range);
292             _popFrontImpl();
293         }
294 
295         typeof(byCodeUnit(R.init)) _range;
296         static if(is(Unqual!(ElementEncodingType!R) == char))
297             char[4] _buffer;
298         else static if(is(Unqual!(ElementEncodingType!R) == wchar))
299             wchar[2] _buffer;
300         else
301             dchar[1] _buffer;
302         size_t _begin;
303         size_t _end;
304 
305     public:
306 
307         // FIXME A compiler bug prevents this from going with the public declarations
308         // above. If it's there, the compiler thinks that _buffer isn't defined when
309         // it tries to compile front. It needs to be reduced and reported.
310         @property typeof(_buffer[0]) front() { return _begin == _end ? _range.front : _buffer[_begin]; }
311     }
312 
313     return DecodedXML(range);
314 }
315 
316 ///
317 version(dxmlTests) unittest
318 {
319     assert(decodeXML("hello world &amp;&gt;&lt;&apos;&quot; \r\r\r\r\r foo") ==
320            `hello world &><'"  foo`);
321 
322     assert(decodeXML("if(foo &amp;&amp; bar)\r\n" ~
323                      "    left = right;") ==
324            "if(foo && bar)\n" ~
325            "    left = right;");
326 
327     assert(decodeXML("&#12487;&#12451;&#12521;&#12531;") == "ディラン");
328     assert(decodeXML("foo") == "foo");
329     assert(decodeXML("&#   ;") == "&#   ;");
330 
331     {
332         import std.algorithm.comparison : equal;
333         auto range = asDecodedXML("hello world &amp;&gt;&lt;&apos;&quot; " ~
334                                   "\r\r\r\r\r foo");
335         assert(equal(range, `hello world &><'"  foo`));
336     }
337 
338     {
339         import dxml.parser;
340         auto xml = "<root>\n" ~
341                    "    <function return='vector&lt;int&gt;' name='foo'>\r\n" ~
342                    "        <doc_comment>This function does something really\r\n" ~
343                    "                 fancy, and you will love it.</doc_comment>\r\n" ~
344                    "        <param type='int' name='i'>\r\n" ~
345                    "        <param type='const std::string&amp;' name='s'>\r\n" ~
346                    "    </function>\n" ~
347                    "</root>";
348         auto range = parseXML!simpleXML(xml);
349         range.popFront();
350         assert(range.front.type == EntityType.elementStart);
351         assert(range.front.name == "function");
352         {
353             auto attrs = range.front.attributes;
354             assert(attrs.front.name == "return");
355             assert(attrs.front.value == "vector&lt;int&gt;");
356             assert(decodeXML(attrs.front.value) == "vector<int>");
357             attrs.popFront();
358             assert(attrs.front.name == "name");
359             assert(attrs.front.value == "foo");
360             assert(decodeXML(attrs.front.value) == "foo");
361         }
362         range.popFront();
363 
364         assert(range.front.type == EntityType.elementStart);
365         assert(range.front.name == "doc_comment");
366         range.popFront();
367 
368         assert(range.front.text ==
369                "This function does something really\r\n" ~
370                "                 fancy, and you will love it.");
371         assert(decodeXML(range.front.text) ==
372                "This function does something really\n" ~
373                "                 fancy, and you will love it.");
374         range.popFront();
375 
376         assert(range.front.type == EntityType.elementEnd);
377         assert(range.front.name == "doc_comment");
378         range.popFront();
379 
380         assert(range.front.type == EntityType.elementStart);
381         assert(range.front.name == "param");
382         {
383             auto attrs = range.front.attributes;
384             assert(attrs.front.name == "type");
385             assert(attrs.front.value == "int");
386             assert(decodeXML(attrs.front.value) == "int");
387             attrs.popFront();
388             assert(attrs.front.name == "name");
389             assert(attrs.front.value == "i");
390             assert(decodeXML(attrs.front.value) == "i");
391         }
392         range.popFront();
393 
394         assert(range.front.type == EntityType.elementStart);
395         assert(range.front.name == "param");
396         {
397             auto attrs = range.front.attributes;
398             assert(attrs.front.name == "type");
399             assert(attrs.front.value == "const std::string&amp;");
400             assert(decodeXML(attrs.front.value) == "const std::string&");
401             attrs.popFront();
402             assert(attrs.front.name == "name");
403             assert(attrs.front.value == "s");
404             assert(decodeXML(attrs.front.value) == "s");
405         }
406     }
407 }
408 
409 version(dxmlTests) unittest
410 {
411     import core.exception : AssertError;
412     import std.algorithm.comparison : equal;
413     import std.exception : enforce;
414     import std.utf : byUTF;
415     import dxml.internal : testRangeFuncs;
416 
417     static void test(alias func)(string text, string expected, size_t line = __LINE__)
418     {
419         auto range = func(text);
420         enforce!AssertError(range.save.decodeXML() == expected, "unittest failed 1", __FILE__, line);
421         alias C = ElementType!(typeof(range.save.asDecodedXML()));
422         enforce!AssertError(equal(range.save.asDecodedXML(), expected.byUTF!C), "unittest failed 2", __FILE__, line);
423     }
424 
425     static foreach(func; testRangeFuncs)
426     {{
427         test!func("hello world &amp;  &gt;  &lt;  &apos;  &quot; \r\r\r\r\r foo", `hello world &  >  <  '  "  foo`);
428         test!func("&amp", "&amp");
429         test!func("&#01234567890;", "&#01234567890;");
430         test!func("&", "&");
431         test!func("&&&&", "&&&&");
432         test!func("&&&&amp;", "&&&&");
433         test!func("&#", "&#");
434         test!func("&#;", "&#;");
435         test!func("&#0", "&#0");
436         test!func("&#0;", "&#0;");
437         test!func("&#48;", "0");
438         test!func("&#0amp;", "&#0amp;");
439         test!func("&#amp;", "&#amp;");
440         test!func("&#x", "&#x");
441         test!func("&#x;", "&#x;");
442         test!func("&#x0;", "&#x0;");
443         test!func("&#x9;", "\t");
444         test!func("&#x20;", " ");
445         test!func("&#12487;&#12451;&#12521;&#12531;", "ディラン");
446     }}
447 }
448 
449 version(dxmlTests) @safe pure unittest
450 {
451     import std.algorithm.comparison : equal;
452     import dxml.internal : testRangeFuncs;
453 
454     static foreach(func; testRangeFuncs)
455     {{
456         assert(decodeXML(func("foo")) == "foo");
457         assert(equal(asDecodedXML(func("foo")), "foo"));
458     }}
459 }
460 
461 
462 /++
463     This parses one of the five, predefined entity references mention in the XML
464     spec from the front of a range of characters.
465 
466     If the given range starts with one of the five, predefined entity
467     references, then it is removed from the range, and the corresponding
468     character is returned.
469 
470     If the range does not start with one of those references, then the return
471     value is null, and the range is unchanged.
472 
473     $(TABLE
474         $(TR $(TH Std Entity Ref)$(TH Converts To))
475         $(TR $(TD $(D_CODE_STRING $(AMP)amp;))$(TD $(D_CODE_STRING &)))
476         $(TR $(TD $(D_CODE_STRING $(AMP)gt;))$(TD $(D_CODE_STRING >)))
477         $(TR $(TD $(D_CODE_STRING $(AMP)lt;))$(TD $(D_CODE_STRING $(LT))))
478         $(TR $(TD $(D_CODE_STRING $(AMP)apos;))$(TD $(D_CODE_STRING ')))
479         $(TR $(TD $(D_CODE_STRING $(AMP)quot;))$(TD $(D_CODE_STRING ")))
480     )
481 
482     Any other entity references would require processing a DTD section in order
483     to be handled and are untouched by parseStdEntityRef as are any other types
484     of references.
485 
486     Params:
487         range = A range of characters.
488 
489     Returns: The character represented by the predefined entity reference that
490              was parsed from the front of the given range or null if the range
491              did not start with one of the five predefined entity references.
492 
493     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
494               $(LREF parseCharRef)$(BR)
495               $(LREF decodeXML)$(BR)
496               $(LREF asDecodedXML)
497   +/
498 Nullable!dchar parseStdEntityRef(R)(ref R range)
499     if(isForwardRange!R && isSomeChar!(ElementType!R))
500 {
501     import std.algorithm.searching : startsWith;
502     import std.typecons : nullable, tuple;
503     import std.utf : byCodeUnit;
504 
505     auto orig = range.save;
506 
507     static if(isNarrowString!R)
508         auto cuRange = range.byCodeUnit();
509     else
510         alias cuRange = range;
511 
512     if(!cuRange.save.startsWith('&'))
513         return typeof(return).init;
514     cuRange.popFront();
515 
516     if(cuRange.empty)
517         goto invalid;
518 
519     static foreach(t; [tuple("amp;", '&'), tuple("gt;", '>'), tuple("lt;", '<'),
520                        tuple("apos;", '\''), tuple("quot;", '"')])
521     {
522         if(cuRange.save.startsWith(t[0]))
523         {
524             cuRange.popFrontN(t[0].length);
525             static if(isNarrowString!R)
526                 range = cuRange.source;
527             return nullable(cast(dchar)t[1]);
528         }
529     }
530 
531     invalid: range = orig;
532     return typeof(return).init;
533 }
534 
535 ///
536 version(dxmlTests) unittest
537 {
538     {
539         auto range = "&amp;foo";
540         assert(range.parseStdEntityRef() == '&');
541         assert(range == "foo");
542     }
543     {
544         auto range = "&gt;bar";
545         assert(range.parseStdEntityRef() == '>');
546         assert(range == "bar");
547     }
548     {
549         auto range = "&lt;baz";
550         assert(range.parseStdEntityRef() == '<');
551         assert(range == "baz");
552     }
553     {
554         auto range = "&apos;dlang";
555         assert(range.parseStdEntityRef() == '\'');
556         assert(range == "dlang");
557     }
558     {
559         auto range = "&quot;rocks";
560         assert(range.parseStdEntityRef() == '"');
561         assert(range == "rocks");
562     }
563     {
564         auto range = " &amp;foo";
565         assert(range.parseStdEntityRef().isNull);
566         assert(range == " &amp;foo");
567     }
568     {
569         auto range = "&Amp;hello";
570         assert(range.parseStdEntityRef().isNull);
571         assert(range == "&Amp;hello");
572     }
573     {
574         auto range = "&nbsp;foo";
575         assert(range.parseStdEntityRef().isNull);
576         assert(range == "&nbsp;foo");
577     }
578     {
579         auto range = "hello world";
580         assert(range.parseStdEntityRef().isNull);
581         assert(range == "hello world");
582     }
583 }
584 
585 version(dxmlTests) unittest
586 {
587     import std.algorithm.comparison : equal;
588     import dxml.internal : testRangeFuncs;
589 
590     static foreach(func; testRangeFuncs)
591     {
592         for(auto range = func(";Amp;amp;&#amp;&copy;& amp;"); !range.empty; range.popFront())
593         {
594             auto temp = range.save;
595             assert(temp.parseStdEntityRef().isNull);
596             assert(equal(range.save, temp.save));
597         }
598         {
599             auto range = func("&amp");
600             assert(range.parseStdEntityRef().isNull);
601             assert(equal(range.save, "&amp"));
602         }
603         {
604             auto range = func(" &amp;&gt;&lt;&apos;&quot;");
605             assert(range.parseStdEntityRef().isNull);
606             assert(equal(range.save, " &amp;&gt;&lt;&apos;&quot;"));
607             range.popFront();
608 
609             assert(range.parseStdEntityRef() == '&');
610             assert(equal(range.save, "&gt;&lt;&apos;&quot;"));
611             assert(range.parseStdEntityRef() == '>');
612             assert(equal(range.save, "&lt;&apos;&quot;"));
613             assert(range.parseStdEntityRef() == '<');
614             assert(equal(range.save, "&apos;&quot;"));
615             assert(range.parseStdEntityRef() == '\'');
616             assert(equal(range.save, "&quot;"));
617             assert(range.parseStdEntityRef() == '"');
618             assert(range.empty);
619         }
620     }
621 }
622 
623 version(dxmlTests) @safe pure unittest
624 {
625     import dxml.internal : testRangeFuncs;
626 
627     static foreach(func; testRangeFuncs)
628     {{
629         auto range = func("foo");
630         assert(range.parseStdEntityRef().isNull);
631     }}
632 }
633 
634 
635 /++
636     If the given range starts with a valid, XML, character reference, it is
637     removed from the range, and the corresponding character is returned.
638 
639     If the range does not start with a valid, XML, character reference, then
640     the return value is null, and the range is unchanged.
641 
642     Params:
643         range = A range of characters.
644 
645     Returns: The character represented by the character reference that was
646              parsed from the front of the given range or null if the range did
647              not start with a valid, XML, character reference.
648 
649     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#NT-CharRef)$(BR)
650               $(LREF parseStdEntityRef)$(BR)
651               $(LREF decodeXML)$(BR)
652               $(LREF asDecodedXML)$(BR)
653               $(LREF encodeCharRef)
654   +/
655 Nullable!dchar parseCharRef(R)(ref R range)
656     if(isForwardRange!R && isSomeChar!(ElementType!R))
657 {
658     import std.algorithm.searching : startsWith;
659     import std.conv : ConvException, parse, to;
660     import std.range : popFrontN;
661     import std.typecons : nullable;
662     import std.utf : byCodeUnit;
663     import dxml.internal : isXMLChar;
664 
665     auto orig = range.save;
666 
667     static if(isNarrowString!R)
668         auto cuRange = range.byCodeUnit();
669     else
670         alias cuRange = range;
671 
672     if(!cuRange.save.startsWith("&#"))
673         return typeof(return).init;
674     cuRange.popFrontN(2);
675 
676     if(cuRange.empty)
677         goto invalid;
678 
679     {
680         bool hex = false;
681         if(cuRange.front == 'x')
682         {
683             cuRange.popFront();
684             hex = true;
685             // https://issues.dlang.org/show_bug.cgi?id=18248
686             import std.ascii : isHexDigit;
687             if(cuRange.empty || !isHexDigit(cuRange.front))
688                 goto invalid;
689         }
690         try
691         {
692             immutable c = to!dchar(cuRange.parse!uint(hex ? 16 : 10));
693             if(!cuRange.startsWith(';') || (c != '\n' && !isXMLChar(c)))
694                 goto invalid;
695             cuRange.popFront();
696             static if(isNarrowString!R)
697                 range = cuRange.source;
698             return nullable(cast()c);
699         }
700         catch(ConvException)
701         {}
702     }
703 
704     invalid: range = orig;
705     return typeof(return).init;
706 }
707 
708 ///
709 version(dxmlTests) unittest
710 {
711     import std.range.primitives : empty;
712 
713     {
714         auto range = "&#48; hello world";
715         assert(parseCharRef(range) == '0');
716         assert(range == " hello world");
717     }
718     {
719         auto range = "&#x30; hello world";
720         assert(parseCharRef(range) == '0');
721         assert(range == " hello world");
722     }
723     {
724         auto range = "&#12487;&#12451;&#12521;&#12531;";
725         assert(parseCharRef(range) == 'デ');
726         assert(range == "&#12451;&#12521;&#12531;");
727         assert(parseCharRef(range) == 'ィ');
728         assert(range == "&#12521;&#12531;");
729         assert(parseCharRef(range) == 'ラ');
730         assert(range == "&#12531;");
731         assert(parseCharRef(range) == 'ン');
732         assert(range.empty);
733     }
734     {
735         auto range = "&#x;foo";
736         assert(parseCharRef(range).isNull);
737         assert(range == "&#x;foo");
738     }
739     {
740         auto range = "foobar";
741         assert(parseCharRef(range).isNull);
742         assert(range == "foobar");
743     }
744     {
745         auto range = " &x48;";
746         assert(parseCharRef(range).isNull);
747         assert(range == " &x48;");
748     }
749 }
750 
751 version(dxmlTests) unittest
752 {
753     import std.algorithm.comparison : equal;
754     import dxml.internal : testRangeFuncs;
755 
756     static foreach(func; testRangeFuncs)
757     {
758         for(auto range = func(";;&#;&#G;&#1234567890;&#F;"); !range.empty; range.popFront())
759         {
760             auto temp = range.save;
761             assert(temp.parseCharRef().isNull);
762             assert(equal(range.save, temp.save));
763         }
764         {
765             auto range = func("&#65");
766             assert(range.parseCharRef().isNull);
767             assert(equal(range.save, "&#65"));
768         }
769         {
770             auto range = func(" &#65;&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;");
771             assert(range.parseCharRef().isNull);
772             assert(equal(range.save, " &#65;&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;"));
773             range.popFront();
774 
775             assert(range.parseCharRef() == 'A');
776             assert(equal(range.save, "&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;"));
777             assert(range.parseCharRef() == 'B');
778             assert(equal(range.save, "&#67; &#x4EAC;&#x90FD;&#x5E02;"));
779             assert(range.parseCharRef() == 'C');
780             assert(equal(range.save, " &#x4EAC;&#x90FD;&#x5E02;"));
781 
782             assert(range.parseCharRef().isNull);
783             assert(equal(range.save, " &#x4EAC;&#x90FD;&#x5E02;"));
784             range.popFront();
785 
786             assert(range.parseCharRef() == '京');
787             assert(equal(range.save, "&#x90FD;&#x5E02;"));
788             assert(range.parseCharRef() == '都');
789             assert(equal(range.save, "&#x5E02;"));
790             assert(range.parseCharRef() == '市');
791             assert(range.empty);
792         }
793     }
794 }
795 
796 version(dxmlTests) @safe pure unittest
797 {
798     import dxml.internal : testRangeFuncs;
799 
800     static foreach(func; testRangeFuncs)
801     {{
802         auto range = func("foo");
803         assert(range.parseCharRef().isNull);
804     }}
805 }
806 
807 
808 /++
809     Strips the indent from a character range (most likely from
810     $(REF_ALTTEXT Entity.text, EntityRange.Entity.text, dxml, parser)).
811     The idea is that if the XML is formatted to be human-readable, and it's
812     multiple lines long, the lines are likely to be indented, but the
813     application probably doesn't want that extra whitespace. So, stripIndent
814     and withoutIndent attempt to intelligently strip off the leading
815     whitespace.
816 
817     For these functions, whitespace is considered to be some combination of
818     $(D_CODE_STRING ' '), $(D_CODE_STRING '\t'), and $(D_CODE_STRING '\r')
819     ($(D_CODE_STRING '\n') is used to delineate lines, so it's not considered
820      whitespace).
821 
822     Whitespace characters are stripped from the start of the first line, and
823     then those same number of whitespace characters are stripped from the
824     beginning of each subsequent line (or up to the first non-whitespace
825     character if the line starts with fewer whitespace characters).
826 
827     If the first line has no leading whitespace, then the leading whitespace on
828     the second line is treated as the indent. This is done to handle case where
829     there is text immediately after a start tag and then subsequent lines are
830     indented rather than the text starting on the line after the start tag.
831 
832     If neither of the first two lines has any leading whitespace, then no
833     whitespace is stripped.
834 
835     So, if the text is well-formatted, then the indent should be cleanly
836     removed, and if it's unformatted or badly formatted, then no characters
837     other than leading whitespace will be removed, and in principle, no real
838     data will have been lost - though of course, it's up to the programmer to
839     decide whether it's better for the application to try to cleanly strip the
840     indent or to leave the text as-is.
841 
842     The difference between stripIndent and withoutIndent is that stripIndent
843     returns a $(K_STRING), whereas withoutIndent returns a lazy range
844     of code units. In the case where a $(K_STRING) is passed to
845     stripIndent, it will simply return the original string if there is no
846     indent (whereas in other cases, stripIndent and withoutIndent are forced to
847     return new ranges).
848 
849     Params:
850         range = A range of characters.
851 
852     Returns: The text with the indent stripped from each line. stripIndent
853              returns a $(K_STRING), whereas withoutIndent returns a lazy range
854              of code units (so it could be a range of $(K_CHAR) or $(K_WCHAR)
855              and not just $(K_DCHAR); which it is depends on the code units of
856              the range being passed in).
857 
858     See_Also: $(REF EntityRange.Entity.text, dxml, parser)
859   +/
860 string stripIndent(R)(R range)
861     if(isForwardRange!R && isSomeChar!(ElementType!R))
862 {
863     static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char))
864     {
865         static bool notHWhite(char c)
866         {
867             switch(c)
868             {
869                 case ' ':
870                 case '\t':
871                 case '\r': return false;
872                 default : return true;
873             }
874         }
875 
876         import std.algorithm.searching : find;
877         import std.utf : byCodeUnit;
878 
879         if(range.empty)
880             return range;
881 
882         auto orig = range.save;
883         auto text = range.byCodeUnit();
884         string firstLine;
885 
886         if(notHWhite(text.front))
887         {
888             text = text.find('\n');
889             if(text.empty)
890                 return orig;
891             text.popFront();
892             firstLine = orig[0 .. orig.length - text.length];
893         }
894 
895         immutable beforeIndent = text.length;
896         text = text.find!notHWhite();
897         if(text.empty)
898             return text.source;
899         immutable indent = beforeIndent - text.length;
900 
901         if(indent == 0)
902             return orig;
903 
904         import std.array : appender;
905         auto retval = appender!string();
906         retval.reserve(orig.length / 3);
907 
908         // > 1 because we don't want a newline by itself.
909         if(firstLine.length > 1)
910             put(retval, firstLine);
911 
912         outer: while(true)
913         {
914             auto start = text.save;
915             text = text.find('\n');
916             if(text.empty)
917             {
918                 if(!start.empty)
919                     put(retval, start);
920                 return retval.data;
921             }
922             text.popFront();
923             auto line = start[0 .. $ - text.length];
924             foreach(_; 0 .. indent)
925             {
926                 if(text.empty)
927                     goto isEmpty;
928                 if(notHWhite(text.front))
929                     goto notEmpty;
930                 text.popFront();
931             }
932             if(text.empty)
933             {
934                 isEmpty: put(retval, line[0 .. $ - 1]);
935                 return retval.data;
936             }
937             notEmpty: put(retval, line);
938         }
939         // The compiler is not smart enough to realize that this line is unreachable.
940         assert(0);
941     }
942     else
943     {
944         import std.conv : to;
945         return range.withoutIndent().to!string();
946     }
947 }
948 
949 /// Ditto
950 auto withoutIndent(R)(R range)
951     if(isForwardRange!R && isSomeChar!(ElementType!R))
952 {
953     import std.utf : byCodeUnit;
954 
955     static struct WithoutIndent
956     {
957     public:
958 
959         @property empty() { return _line.empty; }
960 
961         @property front() { return _line.front; }
962 
963         void popFront()
964         {
965             if(_indent == 0)
966             {
967                 _line.popFront();
968                 return;
969             }
970 
971             if(_line.front == '\n')
972                 _nextLine();
973             else
974                 _line.popFront();
975             // Skip last newline
976             if(_range.empty && !_line.empty && _line.front == '\n')
977                 _line = _range;
978         }
979 
980         @property save()
981         {
982             auto retval = this;
983             retval._line = _line.save;
984             retval._range = _range.save;
985             return retval;
986         }
987 
988     private:
989 
990         static bool notHWhite(ElementEncodingType!R c)
991         {
992             switch(c)
993             {
994                 case ' ':
995                 case '\t':
996                 case '\r': return false;
997                 default : return true;
998             }
999         }
1000 
1001         void _nextLine()
1002         {
1003             import std.algorithm.searching : find;
1004             _line = _range.save;
1005             _range = _range.find('\n');
1006             if(_range.empty)
1007                 return;
1008             _range.popFront();
1009             _popIndent();
1010         }
1011 
1012         void _popIndent()
1013         {
1014             foreach(_; 0 .. _indent)
1015             {
1016                 if(_range.empty)
1017                     return;
1018                 if(notHWhite(_range.front))
1019                     return;
1020                 _range.popFront();
1021             }
1022         }
1023 
1024         this(R range)
1025         {
1026             import std.algorithm : countUntil, find;
1027             import std.range : popFrontN;
1028 
1029             _range = byCodeUnit(range);
1030             if(_range.empty)
1031             {
1032                 _line = _range;
1033                 return;
1034             }
1035 
1036             auto orig = _range.save;
1037             immutable noFirstIndent = notHWhite(_range.front);
1038             if(noFirstIndent)
1039             {
1040                 _range = _range.find('\n');
1041                 if(_range.empty)
1042                     goto noIndent;
1043                 _range.popFront();
1044             }
1045 
1046             _indent = _range.save.countUntil!(a => notHWhite(a))();
1047             if(_indent == 0)
1048             {
1049                 noIndent: _line = orig;
1050                 return;
1051             }
1052             if(noFirstIndent && orig.front != '\n')
1053             {
1054                 _range = orig;
1055                 _popIndent();
1056             }
1057             else
1058                 _range.popFrontN(_indent);
1059             _nextLine();
1060         }
1061 
1062         typeof(byCodeUnit(R.init)) _range;
1063         typeof(byCodeUnit(R.init)) _line;
1064         size_t _indent;
1065     }
1066 
1067     return WithoutIndent(range);
1068 }
1069 
1070 ///
1071 version(dxmlTests) unittest
1072 {
1073     import std.algorithm.comparison : equal;
1074 
1075     // The prime use case for these two functions is for an Entity.text section
1076     // that is formatted to be human-readable, and the rules of what whitespace
1077     // is stripped from the beginning or end of the range are geared towards
1078     // the text coming from a well-formatted Entity.text section.
1079     {
1080         import dxml.parser;
1081         auto xml = "<root>\n" ~
1082                    "    <code>\n" ~
1083                    "    bool isASCII(string str)\n" ~
1084                    "    {\n" ~
1085                    "        import std.algorithm : all;\n" ~
1086                    "        import std.ascii : isASCII;\n" ~
1087                    "        return str.all!isASCII();\n" ~
1088                    "    }\n" ~
1089                    "    </code>\n" ~
1090                    "<root>";
1091         auto range = parseXML(xml);
1092         range.popFront();
1093         range.popFront();
1094         assert(range.front.type == EntityType.text);
1095         assert(range.front.text ==
1096                "\n" ~
1097                "    bool isASCII(string str)\n" ~
1098                "    {\n" ~
1099                "        import std.algorithm : all;\n" ~
1100                "        import std.ascii : isASCII;\n" ~
1101                "        return str.all!isASCII();\n" ~
1102                "    }\n" ~
1103                "    ");
1104         assert(range.front.text.stripIndent() ==
1105                "bool isASCII(string str)\n" ~
1106                "{\n" ~
1107                "    import std.algorithm : all;\n" ~
1108                "    import std.ascii : isASCII;\n" ~
1109                "    return str.all!isASCII();\n" ~
1110                "}");
1111     }
1112 
1113     // The indent that is stripped matches the amount of whitespace at the front
1114     // of the first line.
1115     assert(("    start\n" ~
1116             "    foo\n" ~
1117             "    bar\n" ~
1118             "        baz\n" ~
1119             "        xyzzy\n" ~
1120             "           ").stripIndent() ==
1121            "start\n" ~
1122            "foo\n" ~
1123            "bar\n" ~
1124            "    baz\n" ~
1125            "    xyzzy\n" ~
1126            "       ");
1127 
1128     // If the first line has no leading whitespace but the second line does,
1129     // then the second line's leading whitespace is treated as the indent.
1130     assert(("foo\n" ~
1131             "    bar\n" ~
1132             "        baz\n" ~
1133             "        xyzzy").stripIndent() ==
1134            "foo\n" ~
1135            "bar\n" ~
1136            "    baz\n" ~
1137            "    xyzzy");
1138 
1139     assert(("\n" ~
1140             "    foo\n" ~
1141             "    bar\n" ~
1142             "        baz\n" ~
1143             "        xyzzy").stripIndent() ==
1144            "foo\n" ~
1145            "bar\n" ~
1146            "    baz\n" ~
1147            "    xyzzy");
1148 
1149     // If neither of the first two lines has leading whitespace, then nothing
1150     // is stripped.
1151     assert(("foo\n" ~
1152             "bar\n" ~
1153             "    baz\n" ~
1154             "    xyzzy\n" ~
1155             "    ").stripIndent() ==
1156            "foo\n" ~
1157            "bar\n" ~
1158            "    baz\n" ~
1159            "    xyzzy\n" ~
1160            "    ");
1161 
1162     // If a subsequent line starts with less whitespace than the indent, then
1163     // all of its leading whitespace is stripped but no other characters are
1164     // stripped.
1165     assert(("      foo\n" ~
1166             "         bar\n" ~
1167             "   baz\n" ~
1168             "         xyzzy").stripIndent() ==
1169            "foo\n" ~
1170            "   bar\n" ~
1171            "baz\n" ~
1172            "   xyzzy");
1173 
1174     // If the last line is just the indent, then it and the newline before it
1175     // are stripped.
1176     assert(("    foo\n" ~
1177             "       bar\n" ~
1178             "    ").stripIndent() ==
1179            "foo\n" ~
1180            "   bar");
1181 
1182     // If the last line is just whitespace, but it's more than the indent, then
1183     // the whitespace after the indent is kept.
1184     assert(("    foo\n" ~
1185             "       bar\n" ~
1186             "       ").stripIndent() ==
1187            "foo\n" ~
1188            "   bar\n" ~
1189            "   ");
1190 
1191     // withoutIndent does the same as stripIndent but with a lazy range.
1192     assert(equal(("  foo\n" ~
1193                   "    bar\n" ~
1194                   "    baz\n").withoutIndent(),
1195                  "foo\n" ~
1196                  "  bar\n" ~
1197                  "  baz"));
1198 }
1199 
1200 version(dxmlTests) unittest
1201 {
1202     import core.exception : AssertError;
1203     import std.algorithm.comparison : equal;
1204     import std.exception : enforce;
1205     import std.utf : byUTF;
1206     import dxml.internal : testRangeFuncs;
1207 
1208     static void test(alias func)(string text, string expected, size_t line = __LINE__)
1209     {
1210         auto range = func(text);
1211         enforce!AssertError(range.save.stripIndent() == expected, "unittest failed 1", __FILE__, line);
1212         alias C = ElementType!(typeof(range.save.withoutIndent()));
1213         enforce!AssertError(equal(range.save.withoutIndent(), expected.byUTF!C), "unittest failed 2", __FILE__, line);
1214     }
1215 
1216     static foreach(func; testRangeFuncs)
1217     {
1218         test!func("", "");
1219         test!func("     ", "");
1220         test!func("foo", "foo");
1221         test!func("\nfoo", "\nfoo");
1222         test!func("    foo", "foo");
1223         test!func("\n    foo", "foo");
1224         test!func("\n    foo\n", "foo");
1225         test!func("\n    foo\n    ", "foo");
1226         test!func("\n    foo\n     ", "foo\n ");
1227 
1228         test!func("  foo\n  bar  \n    baz", "foo\nbar  \n  baz");
1229         test!func("  foo\nbar\n  baz", "foo\nbar\nbaz");
1230         test!func("  foo\n bar\n  baz", "foo\nbar\nbaz");
1231         test!func("  foo\n  bar\n  baz", "foo\nbar\nbaz");
1232         test!func("  foo\n   bar\n  baz", "foo\n bar\nbaz");
1233         test!func("  foo\n    bar\n  baz", "foo\n  bar\nbaz");
1234         test!func("  foo\n     bar\n  baz", "foo\n   bar\nbaz");
1235         test!func("  foo\n     bar\n  baz\n\n\n\n\n", "foo\n   bar\nbaz\n\n\n\n");
1236 
1237         test!func("     foo\n  bar\n       baz", "foo\nbar\n  baz");
1238 
1239         test!func("foo\n     bar\n      baz", "foo\nbar\n baz");
1240         test!func("foo\nbar\n      baz\n", "foo\nbar\n      baz\n");
1241     }
1242 }
1243 
1244 version(dxmlTests) @safe pure unittest
1245 {
1246     import std.algorithm.comparison : equal;
1247     import dxml.internal : testRangeFuncs;
1248 
1249     static foreach(func; testRangeFuncs)
1250     {{
1251         assert(stripIndent(func("foo")) == "foo");
1252         assert(equal(withoutIndent(func("foo")), "foo"));
1253     }}
1254 }
1255 
1256 
1257 /++
1258     The string representations of the five, entity references predefined by the
1259     XML spec.
1260 
1261     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
1262               $(LREF parseStdEntityRef)
1263   +/
1264 enum StdEntityRef
1265 {
1266     /// Entity reference for $(D_CODE_STRING $(AMP))
1267     amp = "&amp;",
1268 
1269     /// Entity reference for $(D_CODE_STRING >)
1270     gt = "&gt;",
1271 
1272     /// Entity reference for $(D_CODE_STRING <)
1273     lt = "&lt;",
1274 
1275     /// Entity reference for $(D_CODE_STRING ')
1276     apos = "&apos;",
1277 
1278     /// Entity reference for $(D_CODE_STRING ")
1279     quot = "&quot;",
1280 }
1281 
1282 
1283 /++
1284     Returns a lazy range of code units which encodes any characters which cannot
1285     be put in an $(REF EntityType._text, dxml, parser) in their literal form.
1286 
1287     encodeText is intended primarily to be used with
1288     $(REF XMLWriter.writeText, dxml, writer) to ensure that characters which
1289     cannot appear in their literal form do not appear in their literal form.
1290 
1291     Specifically, what encodeText does is
1292 
1293     $(TABLE
1294         $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) ))
1295         $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) ))
1296         $(TR $(TD convert $(D_CODE_STRING ]]>) to $(D_CODE_STRING ]]$(AMP)gt;) ))
1297     )
1298 
1299     See_Also: $(REF XMLWriter.writeText, dxml, writer)$(BR)
1300               $(LREF encodeAttr)$(BR)
1301               $(LREF decodeXML)$(BR)
1302               $(LREF asDecodedXML)
1303   +/
1304 auto encodeText(R)(R text)
1305     if(isForwardRange!R && isSomeChar!(ElementType!R))
1306 {
1307     import std.utf : byCodeUnit;
1308 
1309     static struct EncodeText
1310     {
1311     public:
1312 
1313         @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; }
1314 
1315         @property empty() { return _text.empty; }
1316 
1317         void popFront()
1318         {
1319             if(_len != 0)
1320             {
1321                 if(--_len != 0)
1322                     return;
1323             }
1324             _text.popFront();
1325             _handleEntity();
1326         }
1327 
1328         @property save()
1329         {
1330             auto retval = this;
1331             retval._text = _text.save;
1332             return retval;
1333         }
1334 
1335     private:
1336 
1337         void _handleEntity()
1338         {
1339             if(_text.empty)
1340                 return;
1341             switch(_text.front)
1342             {
1343                 case '&':
1344                 {
1345                     enum entity = ";pma&";
1346                     _buffer = entity;
1347                     _len = entity.length;
1348                     return;
1349                 }
1350                 case '<':
1351                 {
1352                     enum entity = ";tl&";
1353                     _buffer = entity;
1354                     _len = entity.length;
1355                     return;
1356                 }
1357                 case ']':
1358                 {
1359                     import std.range : dropOne;
1360 
1361                     // FIXME This should use startsWith, but for some reason,
1362                     // startsWith doesn't currently work with @nogc or nothrow
1363                     // even when this code should be able to be @nogc and/or
1364                     // nothrow.
1365                     auto temp = _text.save.dropOne();
1366                     if(!temp.empty && temp.front == ']')
1367                     {
1368                         temp.popFront();
1369                         if(!temp.empty && temp.front == '>')
1370                         {
1371                             _text = temp;
1372                             enum entity = ";tg&]]";
1373                             _buffer = entity;
1374                             _len = entity.length;
1375                         }
1376                     }
1377                     return;
1378                 }
1379                 default: return;
1380             }
1381         }
1382 
1383         this(R text)
1384         {
1385             _text = byCodeUnit(text);
1386             _handleEntity();
1387         }
1388 
1389         char["]]&gt;".length] _buffer;
1390         size_t _len;
1391         typeof(byCodeUnit(R.init)) _text;
1392     }
1393 
1394     return EncodeText(text);
1395 }
1396 
1397 ///
1398 version(dxmlTests) @safe pure nothrow @nogc unittest
1399 {
1400     import std.algorithm.comparison : equal;
1401 
1402     assert(equal(encodeText(`foo & bar`), `foo &amp; bar`));
1403     assert(equal(encodeText(`foo < bar`), `foo &lt; bar`));
1404     assert(equal(encodeText(`foo > bar`), `foo > bar`));
1405     assert(equal(encodeText(`foo ' bar`), `foo ' bar`));
1406     assert(equal(encodeText(`foo " bar`), `foo " bar`));
1407     assert(equal(encodeText("foo ]]> bar"), "foo ]]&gt; bar"));
1408 
1409     assert(equal(encodeText("hello world"), "hello world"));
1410 }
1411 
1412 version(dxmlTests) @safe pure unittest
1413 {
1414     import std.algorithm.comparison : equal;
1415     import dxml.internal : testRangeFuncs;
1416 
1417     static foreach(func; testRangeFuncs)
1418     {{
1419         assert(encodeText(func("")).empty);
1420         assert(equal(encodeText(func(`& < > ' "`)), `&amp; &lt; > ' "`));
1421         assert(equal(encodeText(func("&&&")), "&amp;&amp;&amp;"));
1422 
1423         auto range = encodeText(func(`&&<<>>''""hello ] ]> world"">><<&&`));
1424         assert(equal(range.save, range.save));
1425         assert(equal(range.save, `&amp;&amp;&lt;&lt;>>''""hello ] ]> world"">>&lt;&lt;&amp;&amp;`));
1426     }}
1427 }
1428 
1429 
1430 /++
1431     Returns a lazy range of code units which encodes any characters which cannot
1432     be put in an attribute value of an element tag in their literal form.
1433 
1434     encodeAttr is intended primarily to be used with
1435     $(REF XMLWriter.writeAttr, dxml, writer) to ensure that characters
1436     which cannot appear in their literal form do not appear in their literal
1437     form.
1438 
1439     Specifically, what encodeAttr does is
1440 
1441     $(TABLE
1442         $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) ))
1443         $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) ))
1444         $(TR $(TD convert $(D_CODE_STRING ') to $(D_CODE_STRING $(AMP)pos;) if
1445               $(D quote == $(D_STRING '\''))))
1446         $(TR $(TD convert $(D_CODE_STRING ") to $(D_CODE_STRING $(AMP)quot;) if
1447               $(D quote == $(D_STRING '"'))))
1448     )
1449 
1450     See_Also: $(REF XMLWriter.writeAttr, dxml, writer)$(BR)
1451               $(LREF encodeText)$(BR)
1452               $(LREF decodeXML)$(BR)
1453               $(LREF asDecodedXML)
1454   +/
1455 auto encodeAttr(char quote = '"', R)(R text)
1456     if((quote == '"' || quote == '\'') && isForwardRange!R && isSomeChar!(ElementType!R))
1457 {
1458     import std.utf : byCodeUnit;
1459 
1460     static struct EncodeAttr
1461     {
1462     public:
1463 
1464         @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; }
1465 
1466         @property empty() { return _text.empty; }
1467 
1468         void popFront()
1469         {
1470             if(_len != 0)
1471             {
1472                 if(--_len != 0)
1473                     return;
1474             }
1475             _text.popFront();
1476             _handleEntity();
1477         }
1478 
1479         @property save()
1480         {
1481             auto retval = this;
1482             retval._text = _text.save;
1483             return retval;
1484         }
1485 
1486     private:
1487 
1488         void _handleEntity()
1489         {
1490             if(_text.empty)
1491                 return;
1492             switch(_text.front)
1493             {
1494                 case '&':
1495                 {
1496                     enum entity = ";pma&";
1497                     _buffer = entity;
1498                     _len = entity.length;
1499                     return;
1500                 }
1501                 case '<':
1502                 {
1503                     enum entity = ";tl&";
1504                     _buffer = entity;
1505                     _len = entity.length;
1506                     return;
1507                 }
1508                 case quote:
1509                 {
1510                     static if(quote == '"')
1511                         enum entity = ";touq&";
1512                     else
1513                         enum entity = ";sopa&";
1514                     _buffer = entity;
1515                     _len = entity.length;
1516                     return;
1517                 }
1518                 default: return;
1519             }
1520         }
1521 
1522         this(R text)
1523         {
1524             _text = byCodeUnit(text);
1525             _handleEntity();
1526         }
1527 
1528         char["&quot;".length] _buffer;
1529         size_t _len;
1530         typeof(byCodeUnit(R.init)) _text;
1531     }
1532 
1533     return EncodeAttr(text);
1534 }
1535 
1536 ///
1537 version(dxmlTests) @safe pure nothrow @nogc unittest
1538 {
1539     import std.algorithm.comparison : equal;
1540 
1541     assert(equal(encodeAttr(`foo & bar`), `foo &amp; bar`));
1542     assert(equal(encodeAttr(`foo < bar`), `foo &lt; bar`));
1543     assert(equal(encodeAttr(`foo > bar`), `foo > bar`));
1544     assert(equal(encodeAttr(`foo ' bar`), `foo ' bar`));
1545     assert(equal(encodeAttr(`foo " bar`), `foo &quot; bar`));
1546 
1547     assert(equal(encodeAttr!'\''(`foo ' bar`), `foo &apos; bar`));
1548     assert(equal(encodeAttr!'\''(`foo " bar`), `foo " bar`));
1549 
1550     assert(equal(encodeAttr("hello world"), "hello world"));
1551 }
1552 
1553 version(dxmlTests) @safe pure unittest
1554 {
1555     import std.algorithm.comparison : equal;
1556     import dxml.internal : testRangeFuncs;
1557 
1558     static foreach(func; testRangeFuncs)
1559     {{
1560         assert(encodeAttr(func("")).empty);
1561         assert(encodeAttr!'\''(func("")).empty);
1562         assert(equal(encodeAttr(func(`& < > ' "`)), `&amp; &lt; > ' &quot;`));
1563         assert(equal(encodeAttr!'\''(func(`& < > ' "`)), `&amp; &lt; > &apos; "`));
1564         assert(equal(encodeAttr(func("&&&")), "&amp;&amp;&amp;"));
1565 
1566         {
1567             auto range = encodeAttr(func(`&&<<>>''""hello world"">><<&&`));
1568             assert(equal(range.save, range.save));
1569             assert(equal(range.save, `&amp;&amp;&lt;&lt;>>''&quot;&quot;hello world&quot;&quot;>>&lt;&lt;&amp;&amp;`));
1570         }
1571 
1572         {
1573             auto range = encodeAttr!'\''(func(`&&<<>>''""hello world"">><<&&`));
1574             assert(equal(range.save, range.save));
1575             assert(equal(range.save, `&amp;&amp;&lt;&lt;>>&apos;&apos;""hello world"">>&lt;&lt;&amp;&amp;`));
1576         }
1577     }}
1578 }
1579 
1580 
1581 /++
1582     Returns a range of $(K_CHAR) containing the character reference
1583     corresponding to the given character.
1584 
1585     Params:
1586         c = The character to encode.
1587 
1588     See_Also: $(LREF parseCharRef)
1589   +/
1590 auto encodeCharRef(dchar c)
1591 {
1592     static struct EncodeCharRef
1593     {
1594     public:
1595 
1596         @property front() { return _buffer[_index]; }
1597 
1598         @property empty() { return _buffer[_index] == '$'; }
1599 
1600         void popFront() { ++_index; }
1601 
1602         @property save() { return this; }
1603 
1604     private:
1605 
1606         import std.conv : to;
1607 
1608         char[to!string(cast(uint)dchar.max).length + 5] _buffer;
1609         size_t _index;
1610     }
1611 
1612     import std.format : formattedWrite;
1613     import std..string : representation;
1614 
1615     EncodeCharRef retval;
1616     formattedWrite!"&#x%x;$"(retval._buffer[].representation, c);
1617     return retval;
1618 }
1619 
1620 ///
1621 version(dxmlTests) unittest
1622 {
1623     import std.algorithm.comparison : equal;
1624 
1625     assert(equal(encodeCharRef(' '), "&#x20;"));
1626     assert(equal(encodeCharRef('A'), "&#x41;"));
1627     assert(equal(encodeCharRef('\u2424'), "&#x2424;"));
1628 
1629     auto range = encodeCharRef('*');
1630     assert(parseCharRef(range) == '*');
1631 }
1632 
1633 version(dxmlTests) unittest
1634 {
1635     import std.algorithm.comparison : equal;
1636 
1637     enum pound = "&#x23;";
1638     auto range = encodeCharRef('#');
1639     assert(equal(range.save, range.save));
1640     assert(equal(range.save, pound));
1641 }