1 // Written in the D programming language
2 
3 /++
4     This module contains helper functions which aren't specific to the parser,
5     the DOM, or the writer.
6 
7     $(TABLE
8         $(TR $(TH Symbol) $(TH Description))
9         $(TR $(TD $(LREF decodeXML))
10              $(TD Takes a range of characters, strips carriage returns from it,
11                   and converts both character references and the predefined
12                   entity references in the range into the characters that they
13                   refer to.))
14         $(TR $(TD $(LREF asDecodedXML))
15              $(TD The version of $(LREF decodeXML) that returns a lazy range.))
16         $(TR $(TD $(LREF parseCharRef))
17              $(TD Parses a character reference from the front of a range of
18                   characters.))
19         $(TR $(TD $(LREF parseStdEntityRef))
20              $(TD Parses one of the predefined entity references from the start
21                   of a range of characters.))
22         $(TR $(TD $(LREF stripIndent))
23              $(TD Removes the indent from the front of each line of a range of
24                   characters that was XML text which was formatted for
25                   human-readability.))
26         $(TR $(TD $(LREF withoutIndent))
27              $(TD The version of $(LREF stripIndent) that returns a lazy
28                   range.))
29         $(TR $(TD $(LREF StdEntityRef))
30              $(TD Enum containing the string representations of the five,
31                   predefined entity references.))
32         $(TR $(TD $(LREF encodeText))
33              $(TD Encodes characters which cannot appear in
34                   $(REF_ALTTEXT EntityType.text, EntityType.text, dxml, parser)
35                   in their literal form.))
36         $(TR $(TD $(LREF encodeAttr))
37              $(TD Encodes characters which cannot appear in the attribute value
38                   of an element start tag in their literal form.))
39         $(TR $(TD $(LREF encodeCharRef))
40              $(TD Encodes a character as a character reference.))
41     )
42 
43     Copyright: Copyright 2018 - 2020
44     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
45     Authors:   $(HTTPS jmdavisprog.com, Jonathan M Davis)
46     Source:    $(LINK_TO_SRC dxml/_util.d)
47 
48     See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0)
49   +/
50 module dxml.util;
51 
52 import std.range.primitives;
53 import std.traits;
54 import std.typecons : Nullable;
55 
56 /++
57     Decodes any XML character references and standard XML entity references in
58     the text as well as removing any carriage returns. It's intended to be used
59     on the text fields of element tags and on the values of start tag
60     attributes.
61 
62     There are a number of characters that either can't be directly represented
63     in the text fields or attribute values in XML or which can sometimes be
64     directly represented but not always (e.g. an attribute value can contain
65     either a single quote or a double quote, but it can't contain both at the
66     same time, because one of them would match the opening quote). So, those
67     characters have alternate representations in order to be allowed (e.g.
68     $(D_CODE_STRING "$(AMP)lt;") for $(D_CODE_STRING '<'), because
69     $(D_CODE_STRING '<') would normally be the beginning of an entity).
70     Technically, they're entity references, but the ones handled by decodeXML
71     are the ones explicitly defined in the XML standard and which don't require
72     a DTD section.
73 
74     Ideally, the parser would transform all such alternate representations to
75     what they represent when providing the text to the application, but that
76     would make it impossible to return slices of the original text from the
77     properties of an $(REF_ALTTEXT Entity, EntityRange.Entity, dxml, parser).
78     So, instead of having those properties do the transformation themselves,
79     decodeXML and asDecodedXML do that so that the application can choose to do
80     it or not (in many cases, there is nothing to decode, making the calls
81     unnecessary).
82 
83     Similarly, an application can choose to encode a character as a character
84     reference (e.g. $(D_CODE_STRING '$(AMP)#65") or
85     $(D_CODE_STRING '$(AMP)#x40") for $(D_CODE_STRING 'A')). decodeXML will
86     decode such character references to their corresponding characters.
87 
88     However, decodeXML does not handle any entity references beyond the five
89     predefined ones listed below. All others are left unprocessed. Processing
90     them properly would require handling the DTD section, which dxml does not
91     support. The parser considers any entity references other than the
92     predefined ones to be invalid XML, so unless the text being passed to
93     decodeXML doesn't come from dxml's parser, it can't have any entity
94     references in it other than the predefined ones. Similarly, invalid
95     character references are left unprocessed as well as any character that is
96     not valid in an XML document. decodeXML never throws on invalid XML.
97 
98     Also, $(D_CODE_STRING '\r') is not supposed to appear in an XML document
99     except as a character reference unless it's in a CDATA section. So, it
100     really should be stripped out before being handed off to the application,
101     but again, that doesn't work with slices. So, decodeXML also handles that.
102 
103     Specifically, what decodeXML and asDecodedXML do is
104 
105     $(TABLE
106         $(TR $(TD convert $(D_CODE_STRING $(AMP)amp;) to $(D_CODE_STRING &)))
107         $(TR $(TD convert $(D_CODE_STRING $(AMP)gt;) to $(D_CODE_STRING >)))
108         $(TR $(TD convert $(D_CODE_STRING $(AMP)lt;) to $(D_CODE_STRING <)))
109         $(TR $(TD convert $(D_CODE_STRING $(AMP)apos;) to $(D_CODE_STRING ')))
110         $(TR $(TD convert $(D_CODE_STRING $(AMP)quot;) to $(D_CODE_STRING ")))
111         $(TR $(TD remove all instances of $(D_CODE_STRING \r)))
112         $(TR $(TD convert all character references (e.g.
113                   $(D_CODE_STRING $(AMP)#xA;)) to the characters that they
114                   represent))
115     )
116 
117     All other entity references are left untouched, and any $(D_CODE_STRING '&')
118     which is not used in one of the constructs listed in the table as well as
119     any malformed constructs (e.g. $(D_CODE_STRING "&Amp;") or
120     $(D_CODE_STRING "&#xGGA2;")) are left untouched.
121 
122     The difference between decodeXML and asDecodedXML is that decodeXML returns
123     a $(K_STRING), whereas asDecodedXML returns a lazy _range of code
124     units. In the case where a $(K_STRING) is passed to decodeXML, it
125     will simply return the original $(K_STRING) if there is no text to decode
126     (whereas in other cases, decodeXML and asDecodedXML are forced to return
127     new ranges even if there is no text to decode).
128 
129     Params:
130         range = The _range of characters to decodeXML.
131 
132     Returns: The decoded text. decodeXML returns a $(K_STRING), whereas
133              asDecodedXML returns a lazy _range of code units (so it could be a
134              _range of $(K_CHAR) or $(K_WCHAR) and not just $(K_DCHAR); which it
135              is depends on the code units of the _range being passed in).
136 
137     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
138               $(LREF parseStdEntityRef)$(BR)
139               $(LREF parseCharRef)$(BR)
140               $(REF EntityRange.Entity.attributes, dxml, parser)$(BR)
141               $(REF EntityRange.Entity.text, dxml, parser)$(BR)
142               $(LREF encodeAttr)$(BR)
143               $(LREF encodeText)
144   +/
145 string decodeXML(R)(R range)
146     if(isForwardRange!R && isSomeChar!(ElementType!R))
147 {
148     import std.conv : to;
149 
150     static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char))
151     {
152         import std.algorithm.searching : find, startsWith;
153         import std.array : appender;
154         import std.meta : AliasSeq;
155 
156         auto found = range.find('&', '\r');
157         if(found[1] == 0)
158             return range.to!string();
159 
160         auto retval = appender!string();
161         retval.reserve(range.length);
162         put(retval, range[0 .. $ - found[0].length]);
163         range = range[$ - found[0].length .. $];
164 
165         size_t i = 0;
166         loop: for(; i != range.length;)
167         {
168             switch(range[i])
169             {
170                 case '&':
171                 {
172                     if(i + 1 == range.length)
173                     {
174                         ++i;
175                         break loop;
176                     }
177                     put(retval, range[0 .. i]);
178                     range = range[i .. $];
179                     i = 0;
180                     static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef))
181                     {{
182                         immutable c = func(range);
183                         if(!c.isNull)
184                         {
185                             put(retval, c.get);
186                             continue loop;
187                         }
188                     }}
189                     put(retval, '&');
190                     range = range[1 .. $];
191                     continue;
192                 }
193                 case '\r':
194                 {
195                     if(i != 0)
196                     {
197                         put(retval, range[0 .. i]);
198                         range = range[i + 1 .. $];
199                         i = 0;
200                     }
201                     else
202                         range = range[1 .. $];
203                     continue;
204                 }
205                 default: ++i; continue;
206             }
207         }
208 
209         if(i != 0)
210             put(retval, range[0 .. i]);
211 
212         return retval.data;
213     }
214     else
215         return range.asDecodedXML().to!string();
216 }
217 
218 
219 /// Ditto
220 auto asDecodedXML(R)(R range)
221     if(isForwardRange!R && isSomeChar!(ElementType!R))
222 {
223     import std.meta : AliasSeq;
224     import std.utf : byCodeUnit, encode, UseReplacementDchar;
225 
226     static struct DecodedXML
227     {
228     public:
229 
230         @property empty() { return _range.empty && _begin == _end; }
231 
232         void popFront()
233         {
234             if(_begin != _end)
235             {
236                 if(++_begin != _end)
237                     return;
238             }
239             else
240                 _range.popFront();
241             _popFrontImpl();
242         }
243 
244         @property save()
245         {
246             auto retval = this;
247             retval._range = _range.save;
248             return retval;
249         }
250 
251     private:
252 
253         void _popFrontImpl()
254         {
255             while(!_range.empty)
256             {
257                 switch(_range.front)
258                 {
259                     case '&':
260                     {
261                         static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef))
262                         {{
263                             immutable c = func(_range);
264                             if(!c.isNull)
265                             {
266                                 _begin = 0;
267                                 _end = _buffer.encode!(UseReplacementDchar.yes)(c.get);
268                                 return;
269                             }
270                         }}
271                         goto default;
272                     }
273                     case '\r':
274                     {
275                         assert(_begin == _end);
276                         _range.popFront();
277                         continue;
278                     }
279                     default:
280                     {
281                         assert(_begin == _end);
282                         return;
283                     }
284                 }
285             }
286         }
287 
288         this(R range) @safe
289         {
290             _range = byCodeUnit(range);
291             _popFrontImpl();
292         }
293 
294         typeof(byCodeUnit(R.init)) _range;
295         static if(is(Unqual!(ElementEncodingType!R) == char))
296             char[4] _buffer;
297         else static if(is(Unqual!(ElementEncodingType!R) == wchar))
298             wchar[2] _buffer;
299         else
300             dchar[1] _buffer;
301         size_t _begin;
302         size_t _end;
303 
304     public:
305 
306         // FIXME A compiler bug prevents this from going with the public declarations
307         // above. If it's there, the compiler thinks that _buffer isn't defined when
308         // it tries to compile front. It needs to be reduced and reported.
309         @property typeof(_buffer[0]) front() { return _begin == _end ? _range.front : _buffer[_begin]; }
310     }
311 
312     return DecodedXML(range);
313 }
314 
315 ///
316 unittest
317 {
318     assert(decodeXML("hello world &amp;&gt;&lt;&apos;&quot; \r\r\r\r\r foo") ==
319            `hello world &><'"  foo`);
320 
321     assert(decodeXML("if(foo &amp;&amp; bar)\r\n" ~
322                      "    left = right;") ==
323            "if(foo && bar)\n" ~
324            "    left = right;");
325 
326     assert(decodeXML("&#12487;&#12451;&#12521;&#12531;") == "ディラン");
327     assert(decodeXML("foo") == "foo");
328     assert(decodeXML("&#   ;") == "&#   ;");
329 
330     {
331         import std.algorithm.comparison : equal;
332         auto range = asDecodedXML("hello world &amp;&gt;&lt;&apos;&quot; " ~
333                                   "\r\r\r\r\r foo");
334         assert(equal(range, `hello world &><'"  foo`));
335     }
336 
337     {
338         import dxml.parser;
339         auto xml = "<root>\n" ~
340                    "    <function return='vector&lt;int&gt;' name='foo'>\r\n" ~
341                    "        <doc_comment>This function does something really\r\n" ~
342                    "                 fancy, and you will love it.</doc_comment>\r\n" ~
343                    "        <param type='int' name='i'>\r\n" ~
344                    "        <param type='const std::string&amp;' name='s'>\r\n" ~
345                    "    </function>\n" ~
346                    "</root>";
347         auto range = parseXML!simpleXML(xml);
348         range.popFront();
349         assert(range.front.type == EntityType.elementStart);
350         assert(range.front.name == "function");
351         {
352             auto attrs = range.front.attributes;
353             assert(attrs.front.name == "return");
354             assert(attrs.front.value == "vector&lt;int&gt;");
355             assert(decodeXML(attrs.front.value) == "vector<int>");
356             attrs.popFront();
357             assert(attrs.front.name == "name");
358             assert(attrs.front.value == "foo");
359             assert(decodeXML(attrs.front.value) == "foo");
360         }
361         range.popFront();
362 
363         assert(range.front.type == EntityType.elementStart);
364         assert(range.front.name == "doc_comment");
365         range.popFront();
366 
367         assert(range.front.text ==
368                "This function does something really\r\n" ~
369                "                 fancy, and you will love it.");
370         assert(decodeXML(range.front.text) ==
371                "This function does something really\n" ~
372                "                 fancy, and you will love it.");
373         range.popFront();
374 
375         assert(range.front.type == EntityType.elementEnd);
376         assert(range.front.name == "doc_comment");
377         range.popFront();
378 
379         assert(range.front.type == EntityType.elementStart);
380         assert(range.front.name == "param");
381         {
382             auto attrs = range.front.attributes;
383             assert(attrs.front.name == "type");
384             assert(attrs.front.value == "int");
385             assert(decodeXML(attrs.front.value) == "int");
386             attrs.popFront();
387             assert(attrs.front.name == "name");
388             assert(attrs.front.value == "i");
389             assert(decodeXML(attrs.front.value) == "i");
390         }
391         range.popFront();
392 
393         assert(range.front.type == EntityType.elementStart);
394         assert(range.front.name == "param");
395         {
396             auto attrs = range.front.attributes;
397             assert(attrs.front.name == "type");
398             assert(attrs.front.value == "const std::string&amp;");
399             assert(decodeXML(attrs.front.value) == "const std::string&");
400             attrs.popFront();
401             assert(attrs.front.name == "name");
402             assert(attrs.front.value == "s");
403             assert(decodeXML(attrs.front.value) == "s");
404         }
405     }
406 }
407 
408 unittest
409 {
410     import core.exception : AssertError;
411     import std.algorithm.comparison : equal;
412     import std.exception : enforce;
413     import std.utf : byUTF;
414     import dxml.internal : testRangeFuncs;
415 
416     static void test(alias func)(string text, string expected, size_t line = __LINE__)
417     {
418         auto range = func(text);
419         enforce!AssertError(range.save.decodeXML() == expected, "unittest failed 1", __FILE__, line);
420         alias C = ElementType!(typeof(range.save.asDecodedXML()));
421         enforce!AssertError(equal(range.save.asDecodedXML(), expected.byUTF!C), "unittest failed 2", __FILE__, line);
422     }
423 
424     static foreach(func; testRangeFuncs)
425     {{
426         test!func("hello world &amp;  &gt;  &lt;  &apos;  &quot; \r\r\r\r\r foo", `hello world &  >  <  '  "  foo`);
427         test!func("&amp", "&amp");
428         test!func("&#01234567890;", "&#01234567890;");
429         test!func("&", "&");
430         test!func("&&&&", "&&&&");
431         test!func("&&&&amp;", "&&&&");
432         test!func("&#", "&#");
433         test!func("&#;", "&#;");
434         test!func("&#0", "&#0");
435         test!func("&#0;", "&#0;");
436         test!func("&#48;", "0");
437         test!func("&#0amp;", "&#0amp;");
438         test!func("&#amp;", "&#amp;");
439         test!func("&#x", "&#x");
440         test!func("&#x;", "&#x;");
441         test!func("&#x0;", "&#x0;");
442         test!func("&#x9;", "\t");
443         test!func("&#x20;", " ");
444         test!func("&#12487;&#12451;&#12521;&#12531;", "ディラン");
445     }}
446 }
447 
448 @safe pure unittest
449 {
450     import std.algorithm.comparison : equal;
451     import dxml.internal : testRangeFuncs;
452 
453     static foreach(func; testRangeFuncs)
454     {{
455         assert(decodeXML(func("foo")) == "foo");
456         assert(equal(asDecodedXML(func("foo")), "foo"));
457     }}
458 }
459 
460 
461 /++
462     This parses one of the five, predefined entity references mention in the XML
463     spec from the front of a range of characters.
464 
465     If the given range starts with one of the five, predefined entity
466     references, then it is removed from the range, and the corresponding
467     character is returned.
468 
469     If the range does not start with one of those references, then the return
470     value is null, and the range is unchanged.
471 
472     $(TABLE
473         $(TR $(TH Std Entity Ref)$(TH Converts To))
474         $(TR $(TD $(D_CODE_STRING $(AMP)amp;))$(TD $(D_CODE_STRING &)))
475         $(TR $(TD $(D_CODE_STRING $(AMP)gt;))$(TD $(D_CODE_STRING >)))
476         $(TR $(TD $(D_CODE_STRING $(AMP)lt;))$(TD $(D_CODE_STRING $(LT))))
477         $(TR $(TD $(D_CODE_STRING $(AMP)apos;))$(TD $(D_CODE_STRING ')))
478         $(TR $(TD $(D_CODE_STRING $(AMP)quot;))$(TD $(D_CODE_STRING ")))
479     )
480 
481     Any other entity references would require processing a DTD section in order
482     to be handled and are untouched by parseStdEntityRef as are any other types
483     of references.
484 
485     Params:
486         range = A range of characters.
487 
488     Returns: The character represented by the predefined entity reference that
489              was parsed from the front of the given range or null if the range
490              did not start with one of the five predefined entity references.
491 
492     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
493               $(LREF parseCharRef)$(BR)
494               $(LREF decodeXML)$(BR)
495               $(LREF asDecodedXML)
496   +/
497 Nullable!dchar parseStdEntityRef(R)(ref R range)
498     if(isForwardRange!R && isSomeChar!(ElementType!R))
499 {
500     import std.algorithm.searching : startsWith;
501     import std.typecons : nullable, tuple;
502     import std.utf : byCodeUnit;
503 
504     auto orig = range.save;
505 
506     static if(isNarrowString!R)
507         auto cuRange = range.byCodeUnit();
508     else
509         alias cuRange = range;
510 
511     if(!cuRange.save.startsWith('&'))
512         return typeof(return).init;
513     cuRange.popFront();
514 
515     if(cuRange.empty)
516         goto invalid;
517 
518     static foreach(t; [tuple("amp;", '&'), tuple("gt;", '>'), tuple("lt;", '<'),
519                        tuple("apos;", '\''), tuple("quot;", '"')])
520     {
521         if(cuRange.save.startsWith(t[0]))
522         {
523             cuRange.popFrontN(t[0].length);
524             static if(isNarrowString!R)
525                 range = cuRange.source;
526             return nullable(cast(dchar)t[1]);
527         }
528     }
529 
530     invalid: range = orig;
531     return typeof(return).init;
532 }
533 
534 ///
535 unittest
536 {
537     {
538         auto range = "&amp;foo";
539         assert(range.parseStdEntityRef() == '&');
540         assert(range == "foo");
541     }
542     {
543         auto range = "&gt;bar";
544         assert(range.parseStdEntityRef() == '>');
545         assert(range == "bar");
546     }
547     {
548         auto range = "&lt;baz";
549         assert(range.parseStdEntityRef() == '<');
550         assert(range == "baz");
551     }
552     {
553         auto range = "&apos;dlang";
554         assert(range.parseStdEntityRef() == '\'');
555         assert(range == "dlang");
556     }
557     {
558         auto range = "&quot;rocks";
559         assert(range.parseStdEntityRef() == '"');
560         assert(range == "rocks");
561     }
562     {
563         auto range = " &amp;foo";
564         assert(range.parseStdEntityRef().isNull);
565         assert(range == " &amp;foo");
566     }
567     {
568         auto range = "&Amp;hello";
569         assert(range.parseStdEntityRef().isNull);
570         assert(range == "&Amp;hello");
571     }
572     {
573         auto range = "&nbsp;foo";
574         assert(range.parseStdEntityRef().isNull);
575         assert(range == "&nbsp;foo");
576     }
577     {
578         auto range = "hello world";
579         assert(range.parseStdEntityRef().isNull);
580         assert(range == "hello world");
581     }
582 }
583 
584 unittest
585 {
586     import std.algorithm.comparison : equal;
587     import dxml.internal : testRangeFuncs;
588 
589     static foreach(func; testRangeFuncs)
590     {
591         for(auto range = func(";Amp;amp;&#amp;&copy;& amp;"); !range.empty; range.popFront())
592         {
593             auto temp = range.save;
594             assert(temp.parseStdEntityRef().isNull);
595             assert(equal(range.save, temp.save));
596         }
597         {
598             auto range = func("&amp");
599             assert(range.parseStdEntityRef().isNull);
600             assert(equal(range.save, "&amp"));
601         }
602         {
603             auto range = func(" &amp;&gt;&lt;&apos;&quot;");
604             assert(range.parseStdEntityRef().isNull);
605             assert(equal(range.save, " &amp;&gt;&lt;&apos;&quot;"));
606             range.popFront();
607 
608             assert(range.parseStdEntityRef() == '&');
609             assert(equal(range.save, "&gt;&lt;&apos;&quot;"));
610             assert(range.parseStdEntityRef() == '>');
611             assert(equal(range.save, "&lt;&apos;&quot;"));
612             assert(range.parseStdEntityRef() == '<');
613             assert(equal(range.save, "&apos;&quot;"));
614             assert(range.parseStdEntityRef() == '\'');
615             assert(equal(range.save, "&quot;"));
616             assert(range.parseStdEntityRef() == '"');
617             assert(range.empty);
618         }
619     }
620 }
621 
622 @safe pure unittest
623 {
624     import dxml.internal : testRangeFuncs;
625 
626     static foreach(func; testRangeFuncs)
627     {{
628         auto range = func("foo");
629         assert(range.parseStdEntityRef().isNull);
630     }}
631 }
632 
633 
634 /++
635     If the given range starts with a valid, XML, character reference, it is
636     removed from the range, and the corresponding character is returned.
637 
638     If the range does not start with a valid, XML, character reference, then
639     the return value is null, and the range is unchanged.
640 
641     Params:
642         range = A range of characters.
643 
644     Returns: The character represented by the character reference that was
645              parsed from the front of the given range or null if the range did
646              not start with a valid, XML, character reference.
647 
648     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#NT-CharRef)$(BR)
649               $(LREF parseStdEntityRef)$(BR)
650               $(LREF decodeXML)$(BR)
651               $(LREF asDecodedXML)$(BR)
652               $(LREF encodeCharRef)
653   +/
654 Nullable!dchar parseCharRef(R)(ref R range)
655     if(isForwardRange!R && isSomeChar!(ElementType!R))
656 {
657     import std.algorithm.searching : startsWith;
658     import std.conv : ConvException, parse, to;
659     import std.range : popFrontN;
660     import std.typecons : nullable;
661     import std.utf : byCodeUnit;
662     import dxml.internal : isXMLChar;
663 
664     auto orig = range.save;
665 
666     static if(isNarrowString!R)
667         auto cuRange = range.byCodeUnit();
668     else
669         alias cuRange = range;
670 
671     if(!cuRange.save.startsWith("&#"))
672         return typeof(return).init;
673     cuRange.popFrontN(2);
674 
675     if(cuRange.empty)
676         goto invalid;
677 
678     {
679         bool hex = false;
680         if(cuRange.front == 'x')
681         {
682             cuRange.popFront();
683             hex = true;
684             // https://issues.dlang.org/show_bug.cgi?id=18248
685             import std.ascii : isHexDigit;
686             if(cuRange.empty || !isHexDigit(cuRange.front))
687                 goto invalid;
688         }
689         try
690         {
691             immutable c = to!dchar(cuRange.parse!uint(hex ? 16 : 10));
692             if(!cuRange.startsWith(';') || (c != '\n' && !isXMLChar(c)))
693                 goto invalid;
694             cuRange.popFront();
695             static if(isNarrowString!R)
696                 range = cuRange.source;
697             return nullable(cast()c);
698         }
699         catch(ConvException)
700         {}
701     }
702 
703     invalid: range = orig;
704     return typeof(return).init;
705 }
706 
707 ///
708 unittest
709 {
710     import std.range.primitives : empty;
711 
712     {
713         auto range = "&#48; hello world";
714         assert(parseCharRef(range) == '0');
715         assert(range == " hello world");
716     }
717     {
718         auto range = "&#x30; hello world";
719         assert(parseCharRef(range) == '0');
720         assert(range == " hello world");
721     }
722     {
723         auto range = "&#12487;&#12451;&#12521;&#12531;";
724         assert(parseCharRef(range) == 'デ');
725         assert(range == "&#12451;&#12521;&#12531;");
726         assert(parseCharRef(range) == 'ィ');
727         assert(range == "&#12521;&#12531;");
728         assert(parseCharRef(range) == 'ラ');
729         assert(range == "&#12531;");
730         assert(parseCharRef(range) == 'ン');
731         assert(range.empty);
732     }
733     {
734         auto range = "&#x;foo";
735         assert(parseCharRef(range).isNull);
736         assert(range == "&#x;foo");
737     }
738     {
739         auto range = "foobar";
740         assert(parseCharRef(range).isNull);
741         assert(range == "foobar");
742     }
743     {
744         auto range = " &x48;";
745         assert(parseCharRef(range).isNull);
746         assert(range == " &x48;");
747     }
748 }
749 
750 unittest
751 {
752     import std.algorithm.comparison : equal;
753     import dxml.internal : testRangeFuncs;
754 
755     static foreach(func; testRangeFuncs)
756     {
757         for(auto range = func(";;&#;&#G;&#1234567890;&#F;"); !range.empty; range.popFront())
758         {
759             auto temp = range.save;
760             assert(temp.parseCharRef().isNull);
761             assert(equal(range.save, temp.save));
762         }
763         {
764             auto range = func("&#65");
765             assert(range.parseCharRef().isNull);
766             assert(equal(range.save, "&#65"));
767         }
768         {
769             auto range = func(" &#65;&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;");
770             assert(range.parseCharRef().isNull);
771             assert(equal(range.save, " &#65;&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;"));
772             range.popFront();
773 
774             assert(range.parseCharRef() == 'A');
775             assert(equal(range.save, "&#x42;&#67; &#x4EAC;&#x90FD;&#x5E02;"));
776             assert(range.parseCharRef() == 'B');
777             assert(equal(range.save, "&#67; &#x4EAC;&#x90FD;&#x5E02;"));
778             assert(range.parseCharRef() == 'C');
779             assert(equal(range.save, " &#x4EAC;&#x90FD;&#x5E02;"));
780 
781             assert(range.parseCharRef().isNull);
782             assert(equal(range.save, " &#x4EAC;&#x90FD;&#x5E02;"));
783             range.popFront();
784 
785             assert(range.parseCharRef() == '京');
786             assert(equal(range.save, "&#x90FD;&#x5E02;"));
787             assert(range.parseCharRef() == '都');
788             assert(equal(range.save, "&#x5E02;"));
789             assert(range.parseCharRef() == '市');
790             assert(range.empty);
791         }
792     }
793 }
794 
795 @safe pure unittest
796 {
797     import dxml.internal : testRangeFuncs;
798 
799     static foreach(func; testRangeFuncs)
800     {{
801         auto range = func("foo");
802         assert(range.parseCharRef().isNull);
803     }}
804 }
805 
806 
807 /++
808     Strips the indent from a character range (most likely from
809     $(REF_ALTTEXT Entity.text, EntityRange.Entity.text, dxml, parser)).
810     The idea is that if the XML is formatted to be human-readable, and it's
811     multiple lines long, the lines are likely to be indented, but the
812     application probably doesn't want that extra whitespace. So, stripIndent
813     and withoutIndent attempt to intelligently strip off the leading
814     whitespace.
815 
816     For these functions, whitespace is considered to be some combination of
817     $(D_CODE_STRING ' '), $(D_CODE_STRING '\t'), and $(D_CODE_STRING '\r')
818     ($(D_CODE_STRING '\n') is used to delineate lines, so it's not considered
819      whitespace).
820 
821     Whitespace characters are stripped from the start of the first line, and
822     then those same number of whitespace characters are stripped from the
823     beginning of each subsequent line (or up to the first non-whitespace
824     character if the line starts with fewer whitespace characters).
825 
826     If the first line has no leading whitespace, then the leading whitespace on
827     the second line is treated as the indent. This is done to handle case where
828     there is text immediately after a start tag and then subsequent lines are
829     indented rather than the text starting on the line after the start tag.
830 
831     If neither of the first two lines has any leading whitespace, then no
832     whitespace is stripped.
833 
834     So, if the text is well-formatted, then the indent should be cleanly
835     removed, and if it's unformatted or badly formatted, then no characters
836     other than leading whitespace will be removed, and in principle, no real
837     data will have been lost - though of course, it's up to the programmer to
838     decide whether it's better for the application to try to cleanly strip the
839     indent or to leave the text as-is.
840 
841     The difference between stripIndent and withoutIndent is that stripIndent
842     returns a $(K_STRING), whereas withoutIndent returns a lazy range
843     of code units. In the case where a $(K_STRING) is passed to
844     stripIndent, it will simply return the original string if there is no
845     indent (whereas in other cases, stripIndent and withoutIndent are forced to
846     return new ranges).
847 
848     Params:
849         range = A range of characters.
850 
851     Returns: The text with the indent stripped from each line. stripIndent
852              returns a $(K_STRING), whereas withoutIndent returns a lazy range
853              of code units (so it could be a range of $(K_CHAR) or $(K_WCHAR)
854              and not just $(K_DCHAR); which it is depends on the code units of
855              the range being passed in).
856 
857     See_Also: $(REF EntityRange.Entity.text, dxml, parser)
858   +/
859 string stripIndent(R)(R range)
860     if(isForwardRange!R && isSomeChar!(ElementType!R))
861 {
862     import std.conv : to;
863 
864     static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char))
865     {
866         static bool notHWhite(char c)
867         {
868             switch(c)
869             {
870                 case ' ':
871                 case '\t':
872                 case '\r': return false;
873                 default : return true;
874             }
875         }
876 
877         import std.algorithm.searching : find;
878         import std.utf : byCodeUnit;
879 
880         if(range.empty)
881             return range.to!string();
882 
883         auto orig = range.save;
884         auto text = range.byCodeUnit();
885         ElementEncodingType!R[] firstLine;
886 
887         if(notHWhite(text.front))
888         {
889             text = text.find('\n');
890             if(text.empty)
891                 return orig.to!string();
892             text.popFront();
893             firstLine = orig[0 .. orig.length - text.length];
894         }
895 
896         immutable beforeIndent = text.length;
897         text = text.find!notHWhite();
898         if(text.empty)
899             return firstLine.empty ? "" : firstLine[0 .. $ - 1].to!string();
900         immutable indent = beforeIndent - text.length;
901 
902         if(indent == 0)
903             return orig.to!string();
904 
905         import std.array : appender;
906         auto retval = appender!string();
907         retval.reserve(orig.length / 3);
908 
909         // > 1 because we don't want a newline by itself.
910         if(firstLine.length > 1)
911             put(retval, firstLine);
912 
913         outer: while(true)
914         {
915             auto start = text.save;
916             text = text.find('\n');
917             if(text.empty)
918             {
919                 if(!start.empty)
920                     put(retval, start);
921                 return retval.data;
922             }
923             text.popFront();
924             auto line = start[0 .. $ - text.length];
925             foreach(_; 0 .. indent)
926             {
927                 if(text.empty)
928                     goto isEmpty;
929                 if(notHWhite(text.front))
930                     goto notEmpty;
931                 text.popFront();
932             }
933             if(text.empty)
934             {
935                 isEmpty: put(retval, line[0 .. $ - 1]);
936                 return retval.data;
937             }
938             notEmpty: put(retval, line);
939         }
940         // The compiler is not smart enough to realize that this line is unreachable.
941         assert(0);
942     }
943     else
944         return range.withoutIndent().to!string();
945 }
946 
947 /// Ditto
948 auto withoutIndent(R)(R range)
949     if(isForwardRange!R && isSomeChar!(ElementType!R))
950 {
951     import std.utf : byCodeUnit;
952 
953     static struct WithoutIndent
954     {
955     public:
956 
957         @property empty() { return _line.empty; }
958 
959         @property front() { return _line.front; }
960 
961         void popFront()
962         {
963             if(_indent == 0)
964             {
965                 _line.popFront();
966                 return;
967             }
968 
969             if(_line.front == '\n')
970                 _nextLine();
971             else
972                 _line.popFront();
973             // Skip last newline
974             if(_range.empty && !_line.empty && _line.front == '\n')
975                 _line = _range;
976         }
977 
978         @property save()
979         {
980             auto retval = this;
981             retval._line = _line.save;
982             retval._range = _range.save;
983             return retval;
984         }
985 
986     private:
987 
988         static bool notHWhite(ElementEncodingType!R c)
989         {
990             switch(c)
991             {
992                 case ' ':
993                 case '\t':
994                 case '\r': return false;
995                 default : return true;
996             }
997         }
998 
999         void _nextLine()
1000         {
1001             import std.algorithm.searching : find;
1002             _line = _range.save;
1003             _range = _range.find('\n');
1004             if(_range.empty)
1005                 return;
1006             _range.popFront();
1007             _popIndent();
1008         }
1009 
1010         void _popIndent()
1011         {
1012             foreach(_; 0 .. _indent)
1013             {
1014                 if(_range.empty)
1015                     return;
1016                 if(notHWhite(_range.front))
1017                     return;
1018                 _range.popFront();
1019             }
1020         }
1021 
1022         this(R range)
1023         {
1024             import std.algorithm : countUntil, find;
1025             import std.range : popFrontN;
1026 
1027             _range = byCodeUnit(range);
1028             if(_range.empty)
1029             {
1030                 _line = _range;
1031                 return;
1032             }
1033 
1034             auto orig = _range.save;
1035             immutable noFirstIndent = notHWhite(_range.front);
1036             if(noFirstIndent)
1037             {
1038                 _range = _range.find('\n');
1039                 if(_range.empty)
1040                     goto noIndent;
1041                 _range.popFront();
1042             }
1043 
1044             _indent = _range.save.countUntil!(a => notHWhite(a))();
1045             if(_indent == 0)
1046             {
1047                 noIndent: _line = orig;
1048                 return;
1049             }
1050             if(noFirstIndent && orig.front != '\n')
1051             {
1052                 _range = orig;
1053                 _popIndent();
1054             }
1055             else
1056                 _range.popFrontN(_indent);
1057             _nextLine();
1058         }
1059 
1060         typeof(byCodeUnit(R.init)) _range;
1061         typeof(byCodeUnit(R.init)) _line;
1062         size_t _indent;
1063     }
1064 
1065     return WithoutIndent(range);
1066 }
1067 
1068 ///
1069 unittest
1070 {
1071     import std.algorithm.comparison : equal;
1072 
1073     // The prime use case for these two functions is for an Entity.text section
1074     // that is formatted to be human-readable, and the rules of what whitespace
1075     // is stripped from the beginning or end of the range are geared towards
1076     // the text coming from a well-formatted Entity.text section.
1077     {
1078         import dxml.parser;
1079         auto xml = "<root>\n" ~
1080                    "    <code>\n" ~
1081                    "    bool isASCII(string str)\n" ~
1082                    "    {\n" ~
1083                    "        import std.algorithm : all;\n" ~
1084                    "        import std.ascii : isASCII;\n" ~
1085                    "        return str.all!isASCII();\n" ~
1086                    "    }\n" ~
1087                    "    </code>\n" ~
1088                    "<root>";
1089         auto range = parseXML(xml);
1090         range.popFront();
1091         range.popFront();
1092         assert(range.front.type == EntityType.text);
1093         assert(range.front.text ==
1094                "\n" ~
1095                "    bool isASCII(string str)\n" ~
1096                "    {\n" ~
1097                "        import std.algorithm : all;\n" ~
1098                "        import std.ascii : isASCII;\n" ~
1099                "        return str.all!isASCII();\n" ~
1100                "    }\n" ~
1101                "    ");
1102         assert(range.front.text.stripIndent() ==
1103                "bool isASCII(string str)\n" ~
1104                "{\n" ~
1105                "    import std.algorithm : all;\n" ~
1106                "    import std.ascii : isASCII;\n" ~
1107                "    return str.all!isASCII();\n" ~
1108                "}");
1109     }
1110 
1111     // The indent that is stripped matches the amount of whitespace at the front
1112     // of the first line.
1113     assert(("    start\n" ~
1114             "    foo\n" ~
1115             "    bar\n" ~
1116             "        baz\n" ~
1117             "        xyzzy\n" ~
1118             "           ").stripIndent() ==
1119            "start\n" ~
1120            "foo\n" ~
1121            "bar\n" ~
1122            "    baz\n" ~
1123            "    xyzzy\n" ~
1124            "       ");
1125 
1126     // If the first line has no leading whitespace but the second line does,
1127     // then the second line's leading whitespace is treated as the indent.
1128     assert(("foo\n" ~
1129             "    bar\n" ~
1130             "        baz\n" ~
1131             "        xyzzy").stripIndent() ==
1132            "foo\n" ~
1133            "bar\n" ~
1134            "    baz\n" ~
1135            "    xyzzy");
1136 
1137     assert(("\n" ~
1138             "    foo\n" ~
1139             "    bar\n" ~
1140             "        baz\n" ~
1141             "        xyzzy").stripIndent() ==
1142            "foo\n" ~
1143            "bar\n" ~
1144            "    baz\n" ~
1145            "    xyzzy");
1146 
1147     // If neither of the first two lines has leading whitespace, then nothing
1148     // is stripped.
1149     assert(("foo\n" ~
1150             "bar\n" ~
1151             "    baz\n" ~
1152             "    xyzzy\n" ~
1153             "    ").stripIndent() ==
1154            "foo\n" ~
1155            "bar\n" ~
1156            "    baz\n" ~
1157            "    xyzzy\n" ~
1158            "    ");
1159 
1160     // If a subsequent line starts with less whitespace than the indent, then
1161     // all of its leading whitespace is stripped but no other characters are
1162     // stripped.
1163     assert(("      foo\n" ~
1164             "         bar\n" ~
1165             "   baz\n" ~
1166             "         xyzzy").stripIndent() ==
1167            "foo\n" ~
1168            "   bar\n" ~
1169            "baz\n" ~
1170            "   xyzzy");
1171 
1172     // If the last line is just the indent, then it and the newline before it
1173     // are stripped.
1174     assert(("    foo\n" ~
1175             "       bar\n" ~
1176             "    ").stripIndent() ==
1177            "foo\n" ~
1178            "   bar");
1179 
1180     // If the last line is just whitespace, but it's more than the indent, then
1181     // the whitespace after the indent is kept.
1182     assert(("    foo\n" ~
1183             "       bar\n" ~
1184             "       ").stripIndent() ==
1185            "foo\n" ~
1186            "   bar\n" ~
1187            "   ");
1188 
1189     // withoutIndent does the same as stripIndent but with a lazy range.
1190     assert(equal(("  foo\n" ~
1191                   "    bar\n" ~
1192                   "    baz\n").withoutIndent(),
1193                  "foo\n" ~
1194                  "  bar\n" ~
1195                  "  baz"));
1196 }
1197 
1198 unittest
1199 {
1200     import core.exception : AssertError;
1201     import std.algorithm.comparison : equal;
1202     import std.exception : enforce;
1203     import std.utf : byUTF;
1204     import dxml.internal : testRangeFuncs;
1205 
1206     static void test(alias func)(string text, string expected, size_t line = __LINE__)
1207     {
1208         auto range = func(text);
1209         enforce!AssertError(range.save.stripIndent() == expected, "unittest failed 1", __FILE__, line);
1210         alias C = ElementType!(typeof(range.save.withoutIndent()));
1211         enforce!AssertError(equal(range.save.withoutIndent(), expected.byUTF!C), "unittest failed 2", __FILE__, line);
1212     }
1213 
1214     static foreach(func; testRangeFuncs)
1215     {
1216         test!func("", "");
1217         test!func("     ", "");
1218         test!func("foo", "foo");
1219         test!func("\nfoo", "\nfoo");
1220         test!func("    foo", "foo");
1221         test!func("\n    foo", "foo");
1222         test!func("\n    foo\n", "foo");
1223         test!func("\n    foo\n    ", "foo");
1224         test!func("\n    foo\n     ", "foo\n ");
1225         test!func("foo\n      ", "foo");
1226 
1227         test!func("  foo\n  bar  \n    baz", "foo\nbar  \n  baz");
1228         test!func("  foo\nbar\n  baz", "foo\nbar\nbaz");
1229         test!func("  foo\n bar\n  baz", "foo\nbar\nbaz");
1230         test!func("  foo\n  bar\n  baz", "foo\nbar\nbaz");
1231         test!func("  foo\n   bar\n  baz", "foo\n bar\nbaz");
1232         test!func("  foo\n    bar\n  baz", "foo\n  bar\nbaz");
1233         test!func("  foo\n     bar\n  baz", "foo\n   bar\nbaz");
1234         test!func("  foo\n     bar\n  baz\n\n\n\n\n", "foo\n   bar\nbaz\n\n\n\n");
1235 
1236         test!func("     foo\n  bar\n       baz", "foo\nbar\n  baz");
1237 
1238         test!func("foo\n     bar\n      baz", "foo\nbar\n baz");
1239         test!func("foo\nbar\n      baz\n", "foo\nbar\n      baz\n");
1240     }
1241 }
1242 
1243 @safe pure unittest
1244 {
1245     import std.algorithm.comparison : equal;
1246     import dxml.internal : testRangeFuncs;
1247 
1248     static foreach(func; testRangeFuncs)
1249     {{
1250         assert(stripIndent(func("foo")) == "foo");
1251         assert(equal(withoutIndent(func("foo")), "foo"));
1252     }}
1253 }
1254 
1255 
1256 /++
1257     The string representations of the five, entity references predefined by the
1258     XML spec.
1259 
1260     See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR)
1261               $(LREF parseStdEntityRef)
1262   +/
1263 enum StdEntityRef
1264 {
1265     /// Entity reference for $(D_CODE_STRING $(AMP))
1266     amp = "&amp;",
1267 
1268     /// Entity reference for $(D_CODE_STRING >)
1269     gt = "&gt;",
1270 
1271     /// Entity reference for $(D_CODE_STRING <)
1272     lt = "&lt;",
1273 
1274     /// Entity reference for $(D_CODE_STRING ')
1275     apos = "&apos;",
1276 
1277     /// Entity reference for $(D_CODE_STRING ")
1278     quot = "&quot;",
1279 }
1280 
1281 
1282 /++
1283     Returns a lazy range of code units which encodes any characters which cannot
1284     be put in an $(REF EntityType._text, dxml, parser) in their literal form.
1285 
1286     encodeText is intended primarily to be used with
1287     $(REF XMLWriter.writeText, dxml, writer) to ensure that characters which
1288     cannot appear in their literal form do not appear in their literal form.
1289 
1290     Specifically, what encodeText does is
1291 
1292     $(TABLE
1293         $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) ))
1294         $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) ))
1295         $(TR $(TD convert $(D_CODE_STRING ]]>) to $(D_CODE_STRING ]]$(AMP)gt;) ))
1296     )
1297 
1298     See_Also: $(REF XMLWriter.writeText, dxml, writer)$(BR)
1299               $(LREF encodeAttr)$(BR)
1300               $(LREF decodeXML)$(BR)
1301               $(LREF asDecodedXML)
1302   +/
1303 auto encodeText(R)(R text)
1304     if(isForwardRange!R && isSomeChar!(ElementType!R))
1305 {
1306     import std.utf : byCodeUnit;
1307 
1308     static struct EncodeText
1309     {
1310     public:
1311 
1312         @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; }
1313 
1314         @property empty() { return _text.empty; }
1315 
1316         void popFront()
1317         {
1318             if(_len != 0)
1319             {
1320                 if(--_len != 0)
1321                     return;
1322             }
1323             _text.popFront();
1324             _handleEntity();
1325         }
1326 
1327         @property save()
1328         {
1329             auto retval = this;
1330             retval._text = _text.save;
1331             return retval;
1332         }
1333 
1334     private:
1335 
1336         void _handleEntity()
1337         {
1338             if(_text.empty)
1339                 return;
1340             switch(_text.front)
1341             {
1342                 case '&':
1343                 {
1344                     enum entity = ";pma&";
1345                     _buffer = entity;
1346                     _len = entity.length;
1347                     return;
1348                 }
1349                 case '<':
1350                 {
1351                     enum entity = ";tl&";
1352                     _buffer = entity;
1353                     _len = entity.length;
1354                     return;
1355                 }
1356                 case ']':
1357                 {
1358                     import std.range : dropOne;
1359 
1360                     // FIXME This should use startsWith, but for some reason,
1361                     // startsWith doesn't currently work with @nogc or nothrow
1362                     // even when this code should be able to be @nogc and/or
1363                     // nothrow.
1364                     auto temp = _text.save.dropOne();
1365                     if(!temp.empty && temp.front == ']')
1366                     {
1367                         temp.popFront();
1368                         if(!temp.empty && temp.front == '>')
1369                         {
1370                             _text = temp;
1371                             enum entity = ";tg&]]";
1372                             _buffer = entity;
1373                             _len = entity.length;
1374                         }
1375                     }
1376                     return;
1377                 }
1378                 default: return;
1379             }
1380         }
1381 
1382         this(R text)
1383         {
1384             _text = byCodeUnit(text);
1385             _handleEntity();
1386         }
1387 
1388         char["]]&gt;".length] _buffer;
1389         size_t _len;
1390         typeof(byCodeUnit(R.init)) _text;
1391     }
1392 
1393     return EncodeText(text);
1394 }
1395 
1396 ///
1397 @safe pure nothrow @nogc unittest
1398 {
1399     import std.algorithm.comparison : equal;
1400 
1401     assert(equal(encodeText(`foo & bar`), `foo &amp; bar`));
1402     assert(equal(encodeText(`foo < bar`), `foo &lt; bar`));
1403     assert(equal(encodeText(`foo > bar`), `foo > bar`));
1404     assert(equal(encodeText(`foo ' bar`), `foo ' bar`));
1405     assert(equal(encodeText(`foo " bar`), `foo " bar`));
1406     assert(equal(encodeText("foo ]]> bar"), "foo ]]&gt; bar"));
1407 
1408     assert(equal(encodeText("hello world"), "hello world"));
1409 }
1410 
1411 @safe pure unittest
1412 {
1413     import std.algorithm.comparison : equal;
1414     import dxml.internal : testRangeFuncs;
1415 
1416     static foreach(func; testRangeFuncs)
1417     {{
1418         assert(encodeText(func("")).empty);
1419         assert(equal(encodeText(func(`& < > ' "`)), `&amp; &lt; > ' "`));
1420         assert(equal(encodeText(func("&&&")), "&amp;&amp;&amp;"));
1421 
1422         auto range = encodeText(func(`&&<<>>''""hello ] ]> world"">><<&&`));
1423         assert(equal(range.save, range.save));
1424         assert(equal(range.save, `&amp;&amp;&lt;&lt;>>''""hello ] ]> world"">>&lt;&lt;&amp;&amp;`));
1425     }}
1426 }
1427 
1428 
1429 /++
1430     Returns a lazy range of code units which encodes any characters which cannot
1431     be put in an attribute value of an element tag in their literal form.
1432 
1433     encodeAttr is intended primarily to be used with
1434     $(REF XMLWriter.writeAttr, dxml, writer) to ensure that characters
1435     which cannot appear in their literal form do not appear in their literal
1436     form.
1437 
1438     Specifically, what encodeAttr does is
1439 
1440     $(TABLE
1441         $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) ))
1442         $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) ))
1443         $(TR $(TD convert $(D_CODE_STRING ') to $(D_CODE_STRING $(AMP)pos;) if
1444               $(D quote == $(D_STRING '\''))))
1445         $(TR $(TD convert $(D_CODE_STRING ") to $(D_CODE_STRING $(AMP)quot;) if
1446               $(D quote == $(D_STRING '"'))))
1447     )
1448 
1449     See_Also: $(REF XMLWriter.writeAttr, dxml, writer)$(BR)
1450               $(LREF encodeText)$(BR)
1451               $(LREF decodeXML)$(BR)
1452               $(LREF asDecodedXML)
1453   +/
1454 auto encodeAttr(char quote = '"', R)(R text)
1455     if((quote == '"' || quote == '\'') && isForwardRange!R && isSomeChar!(ElementType!R))
1456 {
1457     import std.utf : byCodeUnit;
1458 
1459     static struct EncodeAttr
1460     {
1461     public:
1462 
1463         @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; }
1464 
1465         @property empty() { return _text.empty; }
1466 
1467         void popFront()
1468         {
1469             if(_len != 0)
1470             {
1471                 if(--_len != 0)
1472                     return;
1473             }
1474             _text.popFront();
1475             _handleEntity();
1476         }
1477 
1478         @property save()
1479         {
1480             auto retval = this;
1481             retval._text = _text.save;
1482             return retval;
1483         }
1484 
1485     private:
1486 
1487         void _handleEntity()
1488         {
1489             if(_text.empty)
1490                 return;
1491             switch(_text.front)
1492             {
1493                 case '&':
1494                 {
1495                     enum entity = ";pma&";
1496                     _buffer = entity;
1497                     _len = entity.length;
1498                     return;
1499                 }
1500                 case '<':
1501                 {
1502                     enum entity = ";tl&";
1503                     _buffer = entity;
1504                     _len = entity.length;
1505                     return;
1506                 }
1507                 case quote:
1508                 {
1509                     static if(quote == '"')
1510                         enum entity = ";touq&";
1511                     else
1512                         enum entity = ";sopa&";
1513                     _buffer = entity;
1514                     _len = entity.length;
1515                     return;
1516                 }
1517                 default: return;
1518             }
1519         }
1520 
1521         this(R text)
1522         {
1523             _text = byCodeUnit(text);
1524             _handleEntity();
1525         }
1526 
1527         char["&quot;".length] _buffer;
1528         size_t _len;
1529         typeof(byCodeUnit(R.init)) _text;
1530     }
1531 
1532     return EncodeAttr(text);
1533 }
1534 
1535 ///
1536 @safe pure nothrow @nogc unittest
1537 {
1538     import std.algorithm.comparison : equal;
1539 
1540     assert(equal(encodeAttr(`foo & bar`), `foo &amp; bar`));
1541     assert(equal(encodeAttr(`foo < bar`), `foo &lt; bar`));
1542     assert(equal(encodeAttr(`foo > bar`), `foo > bar`));
1543     assert(equal(encodeAttr(`foo ' bar`), `foo ' bar`));
1544     assert(equal(encodeAttr(`foo " bar`), `foo &quot; bar`));
1545 
1546     assert(equal(encodeAttr!'\''(`foo ' bar`), `foo &apos; bar`));
1547     assert(equal(encodeAttr!'\''(`foo " bar`), `foo " bar`));
1548 
1549     assert(equal(encodeAttr("hello world"), "hello world"));
1550 }
1551 
1552 @safe pure unittest
1553 {
1554     import std.algorithm.comparison : equal;
1555     import dxml.internal : testRangeFuncs;
1556 
1557     static foreach(func; testRangeFuncs)
1558     {{
1559         assert(encodeAttr(func("")).empty);
1560         assert(encodeAttr!'\''(func("")).empty);
1561         assert(equal(encodeAttr(func(`& < > ' "`)), `&amp; &lt; > ' &quot;`));
1562         assert(equal(encodeAttr!'\''(func(`& < > ' "`)), `&amp; &lt; > &apos; "`));
1563         assert(equal(encodeAttr(func("&&&")), "&amp;&amp;&amp;"));
1564 
1565         {
1566             auto range = encodeAttr(func(`&&<<>>''""hello world"">><<&&`));
1567             assert(equal(range.save, range.save));
1568             assert(equal(range.save, `&amp;&amp;&lt;&lt;>>''&quot;&quot;hello world&quot;&quot;>>&lt;&lt;&amp;&amp;`));
1569         }
1570 
1571         {
1572             auto range = encodeAttr!'\''(func(`&&<<>>''""hello world"">><<&&`));
1573             assert(equal(range.save, range.save));
1574             assert(equal(range.save, `&amp;&amp;&lt;&lt;>>&apos;&apos;""hello world"">>&lt;&lt;&amp;&amp;`));
1575         }
1576     }}
1577 }
1578 
1579 
1580 /++
1581     Returns a range of $(K_CHAR) containing the character reference
1582     corresponding to the given character.
1583 
1584     Params:
1585         c = The character to encode.
1586 
1587     See_Also: $(LREF parseCharRef)
1588   +/
1589 auto encodeCharRef(dchar c)
1590 {
1591     static struct EncodeCharRef
1592     {
1593     public:
1594 
1595         @property front() { return _buffer[_index]; }
1596 
1597         @property empty() { return _buffer[_index] == '$'; }
1598 
1599         void popFront() { ++_index; }
1600 
1601         @property save() { return this; }
1602 
1603     private:
1604 
1605         import std.conv : to;
1606 
1607         char[to!string(cast(uint)dchar.max).length + 5] _buffer;
1608         size_t _index;
1609     }
1610 
1611     import std.format : formattedWrite;
1612     import std..string : representation;
1613 
1614     EncodeCharRef retval;
1615     formattedWrite!"&#x%x;$"(retval._buffer[].representation, c);
1616     return retval;
1617 }
1618 
1619 ///
1620 unittest
1621 {
1622     import std.algorithm.comparison : equal;
1623 
1624     assert(equal(encodeCharRef(' '), "&#x20;"));
1625     assert(equal(encodeCharRef('A'), "&#x41;"));
1626     assert(equal(encodeCharRef('\u2424'), "&#x2424;"));
1627 
1628     auto range = encodeCharRef('*');
1629     assert(parseCharRef(range) == '*');
1630 }
1631 
1632 unittest
1633 {
1634     import std.algorithm.comparison : equal;
1635 
1636     enum pound = "&#x23;";
1637     auto range = encodeCharRef('#');
1638     assert(equal(range.save, range.save));
1639     assert(equal(range.save, pound));
1640 }