1 // Written in the D programming language 2 3 /++ 4 This module contains helper functions which aren't specific to the parser, 5 the DOM, or the writer. 6 7 $(TABLE 8 $(TR $(TH Symbol) $(TH Description)) 9 $(TR $(TD $(LREF decodeXML)) 10 $(TD Takes a range of characters, strips carriage returns from it, 11 and converts both character references and the predefined 12 entity references in the range into the characters that they 13 refer to.)) 14 $(TR $(TD $(LREF asDecodedXML)) 15 $(TD The version of $(LREF decodeXML) that returns a lazy range.)) 16 $(TR $(TD $(LREF parseCharRef)) 17 $(TD Parses a character reference from the front of a range of 18 characters.)) 19 $(TR $(TD $(LREF parseStdEntityRef)) 20 $(TD Parses one of the predefined entity references from the start 21 of a range of characters.)) 22 $(TR $(TD $(LREF stripIndent)) 23 $(TD Removes the indent from the front of each line of a range of 24 characters that was XML text which was formatted for 25 human-readability.)) 26 $(TR $(TD $(LREF withoutIndent)) 27 $(TD The version of $(LREF stripIndent) that returns a lazy 28 range.)) 29 $(TR $(TD $(LREF StdEntityRef)) 30 $(TD Enum containing the string representations of the five, 31 predefined entity references.)) 32 $(TR $(TD $(LREF encodeText)) 33 $(TD Encodes characters which cannot appear in 34 $(REF_ALTTEXT EntityType.text, EntityType.text, dxml, parser) 35 in their literal form.)) 36 $(TR $(TD $(LREF encodeAttr)) 37 $(TD Encodes characters which cannot appear in the attribute value 38 of an element start tag in their literal form.)) 39 $(TR $(TD $(LREF encodeCharRef)) 40 $(TD Encodes a character as a character reference.)) 41 ) 42 43 Copyright: Copyright 2018 44 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 45 Authors: $(HTTPS jmdavisprog.com, Jonathan M Davis) 46 Source: $(LINK_TO_SRC dxml/_util.d) 47 48 See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0) 49 +/ 50 module dxml.util; 51 52 import std.range.primitives; 53 import std.traits; 54 import std.typecons : Nullable; 55 56 /++ 57 Decodes any XML character references and standard XML entity references in 58 the text as well as removing any carriage returns. It's intended to be used 59 on the text fields of element tags and on the values of start tag 60 attributes. 61 62 There are a number of characters that either can't be directly represented 63 in the text fields or attribute values in XML or which can sometimes be 64 directly represented but not always (e.g. an attribute value can contain 65 either a single quote or a double quote, but it can't contain both at the 66 same time, because one of them would match the opening quote). So, those 67 characters have alternate representations in order to be allowed (e.g. 68 $(D_CODE_STRING "$(AMP)lt;") for $(D_CODE_STRING '<'), because 69 $(D_CODE_STRING '<') would normally be the beginning of an entity). 70 Technically, they're entity references, but the ones handled by decodeXML 71 are the ones explicitly defined in the XML standard and which don't require 72 a DTD section. 73 74 Ideally, the parser would transform all such alternate representations to 75 what they represent when providing the text to the application, but that 76 would make it impossible to return slices of the original text from the 77 properties of an $(REF_ALTTEXT Entity, EntityRange.Entity, dxml, parser). 78 So, instead of having those properties do the transformation themselves, 79 decodeXML and asDecodedXML do that so that the application can choose to do 80 it or not (in many cases, there is nothing to decode, making the calls 81 unnecessary). 82 83 Similarly, an application can choose to encode a character as a character 84 reference (e.g. $(D_CODE_STRING '$(AMP)#65") or 85 $(D_CODE_STRING '$(AMP)#x40") for $(D_CODE_STRING 'A')). decodeXML will 86 decode such character references to their corresponding characters. 87 88 However, decodeXML does not handle any entity references beyond the five 89 predefined ones listed below. All others are left unprocessed. Processing 90 them properly would require handling the DTD section, which dxml does not 91 support. The parser considers any entity references other than the 92 predefined ones to be invalid XML, so unless the text being passed to 93 decodeXML doesn't come from dxml's parser, it can't have any entity 94 references in it other than the predefined ones. Similarly, invalid 95 character references are left unprocessed as well as any character that is 96 not valid in an XML document. decodeXML never throws on invalid XML. 97 98 Also, $(D_CODE_STRING '\r') is not supposed to appear in an XML document 99 except as a character reference unless it's in a CDATA section. So, it 100 really should be stripped out before being handed off to the application, 101 but again, that doesn't work with slices. So, decodeXML also handles that. 102 103 Specifically, what decodeXML and asDecodedXML do is 104 105 $(TABLE 106 $(TR $(TD convert $(D_CODE_STRING $(AMP)amp;) to $(D_CODE_STRING &))) 107 $(TR $(TD convert $(D_CODE_STRING $(AMP)gt;) to $(D_CODE_STRING >))) 108 $(TR $(TD convert $(D_CODE_STRING $(AMP)lt;) to $(D_CODE_STRING <))) 109 $(TR $(TD convert $(D_CODE_STRING $(AMP)apos;) to $(D_CODE_STRING '))) 110 $(TR $(TD convert $(D_CODE_STRING $(AMP)quot;) to $(D_CODE_STRING "))) 111 $(TR $(TD remove all instances of $(D_CODE_STRING \r))) 112 $(TR $(TD convert all character references (e.g. 113 $(D_CODE_STRING $(AMP)#xA;)) to the characters that they 114 represent)) 115 ) 116 117 All other entity references are left untouched, and any $(D_CODE_STRING '&') 118 which is not used in one of the constructs listed in the table as well as 119 any malformed constructs (e.g. $(D_CODE_STRING "&Amp;") or 120 $(D_CODE_STRING "&#xGGA2;")) are left untouched. 121 122 The difference between decodeXML and asDecodedXML is that decodeXML returns 123 a $(K_STRING), whereas asDecodedXML returns a lazy _range of code 124 units. In the case where a $(K_STRING) is passed to decodeXML, it 125 will simply return the original $(K_STRING) if there is no text to decode 126 (whereas in other cases, decodeXML and asDecodedXML are forced to return 127 new ranges even if there is no text to decode). 128 129 Params: 130 range = The _range of characters to decodeXML. 131 132 Returns: The decoded text. decodeXML returns a $(K_STRING), whereas 133 asDecodedXML returns a lazy _range of code units (so it could be a 134 _range of $(K_CHAR) or $(K_WCHAR) and not just $(K_DCHAR); which it 135 is depends on the code units of the _range being passed in). 136 137 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR) 138 $(LREF parseStdEntityRef)$(BR) 139 $(LREF parseCharRef)$(BR) 140 $(REF EntityRange.Entity.attributes, dxml, parser)$(BR) 141 $(REF EntityRange.Entity.text, dxml, parser)$(BR) 142 $(LREF encodeAttr)$(BR) 143 $(LREF encodeText) 144 +/ 145 string decodeXML(R)(R range) 146 if(isForwardRange!R && isSomeChar!(ElementType!R)) 147 { 148 static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char)) 149 { 150 import std.algorithm.searching : find, startsWith; 151 import std.array : appender; 152 import std.meta : AliasSeq; 153 154 auto found = range.find('&', '\r'); 155 if(found[1] == 0) 156 return range; 157 158 auto retval = appender!string(); 159 retval.reserve(range.length); 160 put(retval, range[0 .. $ - found[0].length]); 161 range = range[$ - found[0].length .. $]; 162 163 size_t i = 0; 164 loop: for(; i != range.length;) 165 { 166 switch(range[i]) 167 { 168 case '&': 169 { 170 if(i + 1 == range.length) 171 { 172 ++i; 173 break loop; 174 } 175 put(retval, range[0 .. i]); 176 range = range[i .. $]; 177 i = 0; 178 static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef)) 179 {{ 180 immutable c = func(range); 181 if(!c.isNull) 182 { 183 put(retval, c.get); 184 continue loop; 185 } 186 }} 187 put(retval, '&'); 188 range = range[1 .. $]; 189 continue; 190 } 191 case '\r': 192 { 193 if(i != 0) 194 { 195 put(retval, range[0 .. i]); 196 range = range[i + 1 .. $]; 197 i = 0; 198 } 199 else 200 range = range[1 .. $]; 201 continue; 202 } 203 default: ++i; continue; 204 } 205 } 206 207 if(i != 0) 208 put(retval, range[0 .. i]); 209 210 return retval.data; 211 } 212 else 213 { 214 import std.conv : to; 215 return range.asDecodedXML().to!string(); 216 } 217 } 218 219 220 /// Ditto 221 auto asDecodedXML(R)(R range) 222 if(isForwardRange!R && isSomeChar!(ElementType!R)) 223 { 224 import std.meta : AliasSeq; 225 import std.utf : byCodeUnit, encode, UseReplacementDchar; 226 227 static struct DecodedXML 228 { 229 public: 230 231 @property empty() { return _range.empty && _begin == _end; } 232 233 void popFront() 234 { 235 if(_begin != _end) 236 { 237 if(++_begin != _end) 238 return; 239 } 240 else 241 _range.popFront(); 242 _popFrontImpl(); 243 } 244 245 @property save() 246 { 247 auto retval = this; 248 retval._range = _range.save; 249 return retval; 250 } 251 252 private: 253 254 void _popFrontImpl() 255 { 256 while(!_range.empty) 257 { 258 switch(_range.front) 259 { 260 case '&': 261 { 262 static foreach(func; AliasSeq!(parseStdEntityRef, parseCharRef)) 263 {{ 264 immutable c = func(_range); 265 if(!c.isNull) 266 { 267 _begin = 0; 268 _end = _buffer.encode!(UseReplacementDchar.yes)(c); 269 return; 270 } 271 }} 272 goto default; 273 } 274 case '\r': 275 { 276 assert(_begin == _end); 277 _range.popFront(); 278 continue; 279 } 280 default: 281 { 282 assert(_begin == _end); 283 return; 284 } 285 } 286 } 287 } 288 289 this(R range) @safe 290 { 291 _range = byCodeUnit(range); 292 _popFrontImpl(); 293 } 294 295 typeof(byCodeUnit(R.init)) _range; 296 static if(is(Unqual!(ElementEncodingType!R) == char)) 297 char[4] _buffer; 298 else static if(is(Unqual!(ElementEncodingType!R) == wchar)) 299 wchar[2] _buffer; 300 else 301 dchar[1] _buffer; 302 size_t _begin; 303 size_t _end; 304 305 public: 306 307 // FIXME A compiler bug prevents this from going with the public declarations 308 // above. If it's there, the compiler thinks that _buffer isn't defined when 309 // it tries to compile front. It needs to be reduced and reported. 310 @property typeof(_buffer[0]) front() { return _begin == _end ? _range.front : _buffer[_begin]; } 311 } 312 313 return DecodedXML(range); 314 } 315 316 /// 317 version(dxmlTests) unittest 318 { 319 assert(decodeXML("hello world &><'" \r\r\r\r\r foo") == 320 `hello world &><'" foo`); 321 322 assert(decodeXML("if(foo && bar)\r\n" ~ 323 " left = right;") == 324 "if(foo && bar)\n" ~ 325 " left = right;"); 326 327 assert(decodeXML("ディラン") == "ディラン"); 328 assert(decodeXML("foo") == "foo"); 329 assert(decodeXML("&# ;") == "&# ;"); 330 331 { 332 import std.algorithm.comparison : equal; 333 auto range = asDecodedXML("hello world &><'" " ~ 334 "\r\r\r\r\r foo"); 335 assert(equal(range, `hello world &><'" foo`)); 336 } 337 338 { 339 import dxml.parser; 340 auto xml = "<root>\n" ~ 341 " <function return='vector<int>' name='foo'>\r\n" ~ 342 " <doc_comment>This function does something really\r\n" ~ 343 " fancy, and you will love it.</doc_comment>\r\n" ~ 344 " <param type='int' name='i'>\r\n" ~ 345 " <param type='const std::string&' name='s'>\r\n" ~ 346 " </function>\n" ~ 347 "</root>"; 348 auto range = parseXML!simpleXML(xml); 349 range.popFront(); 350 assert(range.front.type == EntityType.elementStart); 351 assert(range.front.name == "function"); 352 { 353 auto attrs = range.front.attributes; 354 assert(attrs.front.name == "return"); 355 assert(attrs.front.value == "vector<int>"); 356 assert(decodeXML(attrs.front.value) == "vector<int>"); 357 attrs.popFront(); 358 assert(attrs.front.name == "name"); 359 assert(attrs.front.value == "foo"); 360 assert(decodeXML(attrs.front.value) == "foo"); 361 } 362 range.popFront(); 363 364 assert(range.front.type == EntityType.elementStart); 365 assert(range.front.name == "doc_comment"); 366 range.popFront(); 367 368 assert(range.front.text == 369 "This function does something really\r\n" ~ 370 " fancy, and you will love it."); 371 assert(decodeXML(range.front.text) == 372 "This function does something really\n" ~ 373 " fancy, and you will love it."); 374 range.popFront(); 375 376 assert(range.front.type == EntityType.elementEnd); 377 assert(range.front.name == "doc_comment"); 378 range.popFront(); 379 380 assert(range.front.type == EntityType.elementStart); 381 assert(range.front.name == "param"); 382 { 383 auto attrs = range.front.attributes; 384 assert(attrs.front.name == "type"); 385 assert(attrs.front.value == "int"); 386 assert(decodeXML(attrs.front.value) == "int"); 387 attrs.popFront(); 388 assert(attrs.front.name == "name"); 389 assert(attrs.front.value == "i"); 390 assert(decodeXML(attrs.front.value) == "i"); 391 } 392 range.popFront(); 393 394 assert(range.front.type == EntityType.elementStart); 395 assert(range.front.name == "param"); 396 { 397 auto attrs = range.front.attributes; 398 assert(attrs.front.name == "type"); 399 assert(attrs.front.value == "const std::string&"); 400 assert(decodeXML(attrs.front.value) == "const std::string&"); 401 attrs.popFront(); 402 assert(attrs.front.name == "name"); 403 assert(attrs.front.value == "s"); 404 assert(decodeXML(attrs.front.value) == "s"); 405 } 406 } 407 } 408 409 version(dxmlTests) unittest 410 { 411 import core.exception : AssertError; 412 import std.algorithm.comparison : equal; 413 import std.exception : enforce; 414 import std.utf : byUTF; 415 import dxml.internal : testRangeFuncs; 416 417 static void test(alias func)(string text, string expected, size_t line = __LINE__) 418 { 419 auto range = func(text); 420 enforce!AssertError(range.save.decodeXML() == expected, "unittest failed 1", __FILE__, line); 421 alias C = ElementType!(typeof(range.save.asDecodedXML())); 422 enforce!AssertError(equal(range.save.asDecodedXML(), expected.byUTF!C), "unittest failed 2", __FILE__, line); 423 } 424 425 static foreach(func; testRangeFuncs) 426 {{ 427 test!func("hello world & > < ' " \r\r\r\r\r foo", `hello world & > < ' " foo`); 428 test!func("&", "&"); 429 test!func("�", "�"); 430 test!func("&", "&"); 431 test!func("&&&&", "&&&&"); 432 test!func("&&&&", "&&&&"); 433 test!func("&#", "&#"); 434 test!func("&#;", "&#;"); 435 test!func("�", "�"); 436 test!func("�", "�"); 437 test!func("0", "0"); 438 test!func("�amp;", "�amp;"); 439 test!func("&#amp;", "&#amp;"); 440 test!func("&#x", "&#x"); 441 test!func("&#x;", "&#x;"); 442 test!func("�", "�"); 443 test!func("	", "\t"); 444 test!func(" ", " "); 445 test!func("ディラン", "ディラン"); 446 }} 447 } 448 449 version(dxmlTests) @safe pure unittest 450 { 451 import std.algorithm.comparison : equal; 452 import dxml.internal : testRangeFuncs; 453 454 static foreach(func; testRangeFuncs) 455 {{ 456 assert(decodeXML(func("foo")) == "foo"); 457 assert(equal(asDecodedXML(func("foo")), "foo")); 458 }} 459 } 460 461 462 /++ 463 This parses one of the five, predefined entity references mention in the XML 464 spec from the front of a range of characters. 465 466 If the given range starts with one of the five, predefined entity 467 references, then it is removed from the range, and the corresponding 468 character is returned. 469 470 If the range does not start with one of those references, then the return 471 value is null, and the range is unchanged. 472 473 $(TABLE 474 $(TR $(TH Std Entity Ref)$(TH Converts To)) 475 $(TR $(TD $(D_CODE_STRING $(AMP)amp;))$(TD $(D_CODE_STRING &))) 476 $(TR $(TD $(D_CODE_STRING $(AMP)gt;))$(TD $(D_CODE_STRING >))) 477 $(TR $(TD $(D_CODE_STRING $(AMP)lt;))$(TD $(D_CODE_STRING $(LT)))) 478 $(TR $(TD $(D_CODE_STRING $(AMP)apos;))$(TD $(D_CODE_STRING '))) 479 $(TR $(TD $(D_CODE_STRING $(AMP)quot;))$(TD $(D_CODE_STRING "))) 480 ) 481 482 Any other entity references would require processing a DTD section in order 483 to be handled and are untouched by parseStdEntityRef as are any other types 484 of references. 485 486 Params: 487 range = A range of characters. 488 489 Returns: The character represented by the predefined entity reference that 490 was parsed from the front of the given range or null if the range 491 did not start with one of the five predefined entity references. 492 493 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR) 494 $(LREF parseCharRef)$(BR) 495 $(LREF decodeXML)$(BR) 496 $(LREF asDecodedXML) 497 +/ 498 Nullable!dchar parseStdEntityRef(R)(ref R range) 499 if(isForwardRange!R && isSomeChar!(ElementType!R)) 500 { 501 import std.algorithm.searching : startsWith; 502 import std.typecons : nullable, tuple; 503 import std.utf : byCodeUnit; 504 505 auto orig = range.save; 506 507 static if(isNarrowString!R) 508 auto cuRange = range.byCodeUnit(); 509 else 510 alias cuRange = range; 511 512 if(!cuRange.save.startsWith('&')) 513 return typeof(return).init; 514 cuRange.popFront(); 515 516 if(cuRange.empty) 517 goto invalid; 518 519 static foreach(t; [tuple("amp;", '&'), tuple("gt;", '>'), tuple("lt;", '<'), 520 tuple("apos;", '\''), tuple("quot;", '"')]) 521 { 522 if(cuRange.save.startsWith(t[0])) 523 { 524 cuRange.popFrontN(t[0].length); 525 static if(isNarrowString!R) 526 range = cuRange.source; 527 return nullable(cast(dchar)t[1]); 528 } 529 } 530 531 invalid: range = orig; 532 return typeof(return).init; 533 } 534 535 /// 536 version(dxmlTests) unittest 537 { 538 { 539 auto range = "&foo"; 540 assert(range.parseStdEntityRef() == '&'); 541 assert(range == "foo"); 542 } 543 { 544 auto range = ">bar"; 545 assert(range.parseStdEntityRef() == '>'); 546 assert(range == "bar"); 547 } 548 { 549 auto range = "<baz"; 550 assert(range.parseStdEntityRef() == '<'); 551 assert(range == "baz"); 552 } 553 { 554 auto range = "'dlang"; 555 assert(range.parseStdEntityRef() == '\''); 556 assert(range == "dlang"); 557 } 558 { 559 auto range = ""rocks"; 560 assert(range.parseStdEntityRef() == '"'); 561 assert(range == "rocks"); 562 } 563 { 564 auto range = " &foo"; 565 assert(range.parseStdEntityRef().isNull); 566 assert(range == " &foo"); 567 } 568 { 569 auto range = "&Amp;hello"; 570 assert(range.parseStdEntityRef().isNull); 571 assert(range == "&Amp;hello"); 572 } 573 { 574 auto range = " foo"; 575 assert(range.parseStdEntityRef().isNull); 576 assert(range == " foo"); 577 } 578 { 579 auto range = "hello world"; 580 assert(range.parseStdEntityRef().isNull); 581 assert(range == "hello world"); 582 } 583 } 584 585 version(dxmlTests) unittest 586 { 587 import std.algorithm.comparison : equal; 588 import dxml.internal : testRangeFuncs; 589 590 static foreach(func; testRangeFuncs) 591 { 592 for(auto range = func(";Amp;amp;&#amp;©& amp;"); !range.empty; range.popFront()) 593 { 594 auto temp = range.save; 595 assert(temp.parseStdEntityRef().isNull); 596 assert(equal(range.save, temp.save)); 597 } 598 { 599 auto range = func("&"); 600 assert(range.parseStdEntityRef().isNull); 601 assert(equal(range.save, "&")); 602 } 603 { 604 auto range = func(" &><'""); 605 assert(range.parseStdEntityRef().isNull); 606 assert(equal(range.save, " &><'"")); 607 range.popFront(); 608 609 assert(range.parseStdEntityRef() == '&'); 610 assert(equal(range.save, "><'"")); 611 assert(range.parseStdEntityRef() == '>'); 612 assert(equal(range.save, "<'"")); 613 assert(range.parseStdEntityRef() == '<'); 614 assert(equal(range.save, "'"")); 615 assert(range.parseStdEntityRef() == '\''); 616 assert(equal(range.save, """)); 617 assert(range.parseStdEntityRef() == '"'); 618 assert(range.empty); 619 } 620 } 621 } 622 623 version(dxmlTests) @safe pure unittest 624 { 625 import dxml.internal : testRangeFuncs; 626 627 static foreach(func; testRangeFuncs) 628 {{ 629 auto range = func("foo"); 630 assert(range.parseStdEntityRef().isNull); 631 }} 632 } 633 634 635 /++ 636 If the given range starts with a valid, XML, character reference, it is 637 removed from the range, and the corresponding character is returned. 638 639 If the range does not start with a valid, XML, character reference, then 640 the return value is null, and the range is unchanged. 641 642 Params: 643 range = A range of characters. 644 645 Returns: The character represented by the character reference that was 646 parsed from the front of the given range or null if the range did 647 not start with a valid, XML, character reference. 648 649 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#NT-CharRef)$(BR) 650 $(LREF parseStdEntityRef)$(BR) 651 $(LREF decodeXML)$(BR) 652 $(LREF asDecodedXML)$(BR) 653 $(LREF encodeCharRef) 654 +/ 655 Nullable!dchar parseCharRef(R)(ref R range) 656 if(isForwardRange!R && isSomeChar!(ElementType!R)) 657 { 658 import std.algorithm.searching : startsWith; 659 import std.conv : ConvException, parse, to; 660 import std.range : popFrontN; 661 import std.typecons : nullable; 662 import std.utf : byCodeUnit; 663 import dxml.internal : isXMLChar; 664 665 auto orig = range.save; 666 667 static if(isNarrowString!R) 668 auto cuRange = range.byCodeUnit(); 669 else 670 alias cuRange = range; 671 672 if(!cuRange.save.startsWith("&#")) 673 return typeof(return).init; 674 cuRange.popFrontN(2); 675 676 if(cuRange.empty) 677 goto invalid; 678 679 { 680 bool hex = false; 681 if(cuRange.front == 'x') 682 { 683 cuRange.popFront(); 684 hex = true; 685 // https://issues.dlang.org/show_bug.cgi?id=18248 686 import std.ascii : isHexDigit; 687 if(cuRange.empty || !isHexDigit(cuRange.front)) 688 goto invalid; 689 } 690 try 691 { 692 immutable c = to!dchar(cuRange.parse!uint(hex ? 16 : 10)); 693 if(!cuRange.startsWith(';') || (c != '\n' && !isXMLChar(c))) 694 goto invalid; 695 cuRange.popFront(); 696 static if(isNarrowString!R) 697 range = cuRange.source; 698 return nullable(cast()c); 699 } 700 catch(ConvException) 701 {} 702 } 703 704 invalid: range = orig; 705 return typeof(return).init; 706 } 707 708 /// 709 version(dxmlTests) unittest 710 { 711 import std.range.primitives : empty; 712 713 { 714 auto range = "0 hello world"; 715 assert(parseCharRef(range) == '0'); 716 assert(range == " hello world"); 717 } 718 { 719 auto range = "0 hello world"; 720 assert(parseCharRef(range) == '0'); 721 assert(range == " hello world"); 722 } 723 { 724 auto range = "ディラン"; 725 assert(parseCharRef(range) == 'デ'); 726 assert(range == "ィラン"); 727 assert(parseCharRef(range) == 'ィ'); 728 assert(range == "ラン"); 729 assert(parseCharRef(range) == 'ラ'); 730 assert(range == "ン"); 731 assert(parseCharRef(range) == 'ン'); 732 assert(range.empty); 733 } 734 { 735 auto range = "&#x;foo"; 736 assert(parseCharRef(range).isNull); 737 assert(range == "&#x;foo"); 738 } 739 { 740 auto range = "foobar"; 741 assert(parseCharRef(range).isNull); 742 assert(range == "foobar"); 743 } 744 { 745 auto range = " &x48;"; 746 assert(parseCharRef(range).isNull); 747 assert(range == " &x48;"); 748 } 749 } 750 751 version(dxmlTests) unittest 752 { 753 import std.algorithm.comparison : equal; 754 import dxml.internal : testRangeFuncs; 755 756 static foreach(func; testRangeFuncs) 757 { 758 for(auto range = func(";;&#;&#G;�&#F;"); !range.empty; range.popFront()) 759 { 760 auto temp = range.save; 761 assert(temp.parseCharRef().isNull); 762 assert(equal(range.save, temp.save)); 763 } 764 { 765 auto range = func("A"); 766 assert(range.parseCharRef().isNull); 767 assert(equal(range.save, "A")); 768 } 769 { 770 auto range = func(" ABC 京都市"); 771 assert(range.parseCharRef().isNull); 772 assert(equal(range.save, " ABC 京都市")); 773 range.popFront(); 774 775 assert(range.parseCharRef() == 'A'); 776 assert(equal(range.save, "BC 京都市")); 777 assert(range.parseCharRef() == 'B'); 778 assert(equal(range.save, "C 京都市")); 779 assert(range.parseCharRef() == 'C'); 780 assert(equal(range.save, " 京都市")); 781 782 assert(range.parseCharRef().isNull); 783 assert(equal(range.save, " 京都市")); 784 range.popFront(); 785 786 assert(range.parseCharRef() == '京'); 787 assert(equal(range.save, "都市")); 788 assert(range.parseCharRef() == '都'); 789 assert(equal(range.save, "市")); 790 assert(range.parseCharRef() == '市'); 791 assert(range.empty); 792 } 793 } 794 } 795 796 version(dxmlTests) @safe pure unittest 797 { 798 import dxml.internal : testRangeFuncs; 799 800 static foreach(func; testRangeFuncs) 801 {{ 802 auto range = func("foo"); 803 assert(range.parseCharRef().isNull); 804 }} 805 } 806 807 808 /++ 809 Strips the indent from a character range (most likely from 810 $(REF_ALTTEXT Entity.text, EntityRange.Entity.text, dxml, parser)). 811 The idea is that if the XML is formatted to be human-readable, and it's 812 multiple lines long, the lines are likely to be indented, but the 813 application probably doesn't want that extra whitespace. So, stripIndent 814 and withoutIndent attempt to intelligently strip off the leading 815 whitespace. 816 817 For these functions, whitespace is considered to be some combination of 818 $(D_CODE_STRING ' '), $(D_CODE_STRING '\t'), and $(D_CODE_STRING '\r') 819 ($(D_CODE_STRING '\n') is used to delineate lines, so it's not considered 820 whitespace). 821 822 Whitespace characters are stripped from the start of the first line, and 823 then those same number of whitespace characters are stripped from the 824 beginning of each subsequent line (or up to the first non-whitespace 825 character if the line starts with fewer whitespace characters). 826 827 If the first line has no leading whitespace, then the leading whitespace on 828 the second line is treated as the indent. This is done to handle case where 829 there is text immediately after a start tag and then subsequent lines are 830 indented rather than the text starting on the line after the start tag. 831 832 If neither of the first two lines has any leading whitespace, then no 833 whitespace is stripped. 834 835 So, if the text is well-formatted, then the indent should be cleanly 836 removed, and if it's unformatted or badly formatted, then no characters 837 other than leading whitespace will be removed, and in principle, no real 838 data will have been lost - though of course, it's up to the programmer to 839 decide whether it's better for the application to try to cleanly strip the 840 indent or to leave the text as-is. 841 842 The difference between stripIndent and withoutIndent is that stripIndent 843 returns a $(K_STRING), whereas withoutIndent returns a lazy range 844 of code units. In the case where a $(K_STRING) is passed to 845 stripIndent, it will simply return the original string if there is no 846 indent (whereas in other cases, stripIndent and withoutIndent are forced to 847 return new ranges). 848 849 Params: 850 range = A range of characters. 851 852 Returns: The text with the indent stripped from each line. stripIndent 853 returns a $(K_STRING), whereas withoutIndent returns a lazy range 854 of code units (so it could be a range of $(K_CHAR) or $(K_WCHAR) 855 and not just $(K_DCHAR); which it is depends on the code units of 856 the range being passed in). 857 858 See_Also: $(REF EntityRange.Entity.text, dxml, parser) 859 +/ 860 string stripIndent(R)(R range) 861 if(isForwardRange!R && isSomeChar!(ElementType!R)) 862 { 863 static if(isDynamicArray!R && is(Unqual!(ElementEncodingType!R) == char)) 864 { 865 static bool notHWhite(char c) 866 { 867 switch(c) 868 { 869 case ' ': 870 case '\t': 871 case '\r': return false; 872 default : return true; 873 } 874 } 875 876 import std.algorithm.searching : find; 877 import std.utf : byCodeUnit; 878 879 if(range.empty) 880 return range; 881 882 auto orig = range.save; 883 auto text = range.byCodeUnit(); 884 string firstLine; 885 886 if(notHWhite(text.front)) 887 { 888 text = text.find('\n'); 889 if(text.empty) 890 return orig; 891 text.popFront(); 892 firstLine = orig[0 .. orig.length - text.length]; 893 } 894 895 immutable beforeIndent = text.length; 896 text = text.find!notHWhite(); 897 if(text.empty) 898 return text.source; 899 immutable indent = beforeIndent - text.length; 900 901 if(indent == 0) 902 return orig; 903 904 import std.array : appender; 905 auto retval = appender!string(); 906 retval.reserve(orig.length / 3); 907 908 // > 1 because we don't want a newline by itself. 909 if(firstLine.length > 1) 910 put(retval, firstLine); 911 912 outer: while(true) 913 { 914 auto start = text.save; 915 text = text.find('\n'); 916 if(text.empty) 917 { 918 if(!start.empty) 919 put(retval, start); 920 return retval.data; 921 } 922 text.popFront(); 923 auto line = start[0 .. $ - text.length]; 924 foreach(_; 0 .. indent) 925 { 926 if(text.empty) 927 goto isEmpty; 928 if(notHWhite(text.front)) 929 goto notEmpty; 930 text.popFront(); 931 } 932 if(text.empty) 933 { 934 isEmpty: put(retval, line[0 .. $ - 1]); 935 return retval.data; 936 } 937 notEmpty: put(retval, line); 938 } 939 // The compiler is not smart enough to realize that this line is unreachable. 940 assert(0); 941 } 942 else 943 { 944 import std.conv : to; 945 return range.withoutIndent().to!string(); 946 } 947 } 948 949 /// Ditto 950 auto withoutIndent(R)(R range) 951 if(isForwardRange!R && isSomeChar!(ElementType!R)) 952 { 953 import std.utf : byCodeUnit; 954 955 static struct WithoutIndent 956 { 957 public: 958 959 @property empty() { return _line.empty; } 960 961 @property front() { return _line.front; } 962 963 void popFront() 964 { 965 if(_indent == 0) 966 { 967 _line.popFront(); 968 return; 969 } 970 971 if(_line.front == '\n') 972 _nextLine(); 973 else 974 _line.popFront(); 975 // Skip last newline 976 if(_range.empty && !_line.empty && _line.front == '\n') 977 _line = _range; 978 } 979 980 @property save() 981 { 982 auto retval = this; 983 retval._line = _line.save; 984 retval._range = _range.save; 985 return retval; 986 } 987 988 private: 989 990 static bool notHWhite(ElementEncodingType!R c) 991 { 992 switch(c) 993 { 994 case ' ': 995 case '\t': 996 case '\r': return false; 997 default : return true; 998 } 999 } 1000 1001 void _nextLine() 1002 { 1003 import std.algorithm.searching : find; 1004 _line = _range.save; 1005 _range = _range.find('\n'); 1006 if(_range.empty) 1007 return; 1008 _range.popFront(); 1009 _popIndent(); 1010 } 1011 1012 void _popIndent() 1013 { 1014 foreach(_; 0 .. _indent) 1015 { 1016 if(_range.empty) 1017 return; 1018 if(notHWhite(_range.front)) 1019 return; 1020 _range.popFront(); 1021 } 1022 } 1023 1024 this(R range) 1025 { 1026 import std.algorithm : countUntil, find; 1027 import std.range : popFrontN; 1028 1029 _range = byCodeUnit(range); 1030 if(_range.empty) 1031 { 1032 _line = _range; 1033 return; 1034 } 1035 1036 auto orig = _range.save; 1037 immutable noFirstIndent = notHWhite(_range.front); 1038 if(noFirstIndent) 1039 { 1040 _range = _range.find('\n'); 1041 if(_range.empty) 1042 goto noIndent; 1043 _range.popFront(); 1044 } 1045 1046 _indent = _range.save.countUntil!(a => notHWhite(a))(); 1047 if(_indent == 0) 1048 { 1049 noIndent: _line = orig; 1050 return; 1051 } 1052 if(noFirstIndent && orig.front != '\n') 1053 { 1054 _range = orig; 1055 _popIndent(); 1056 } 1057 else 1058 _range.popFrontN(_indent); 1059 _nextLine(); 1060 } 1061 1062 typeof(byCodeUnit(R.init)) _range; 1063 typeof(byCodeUnit(R.init)) _line; 1064 size_t _indent; 1065 } 1066 1067 return WithoutIndent(range); 1068 } 1069 1070 /// 1071 version(dxmlTests) unittest 1072 { 1073 import std.algorithm.comparison : equal; 1074 1075 // The prime use case for these two functions is for an Entity.text section 1076 // that is formatted to be human-readable, and the rules of what whitespace 1077 // is stripped from the beginning or end of the range are geared towards 1078 // the text coming from a well-formatted Entity.text section. 1079 { 1080 import dxml.parser; 1081 auto xml = "<root>\n" ~ 1082 " <code>\n" ~ 1083 " bool isASCII(string str)\n" ~ 1084 " {\n" ~ 1085 " import std.algorithm : all;\n" ~ 1086 " import std.ascii : isASCII;\n" ~ 1087 " return str.all!isASCII();\n" ~ 1088 " }\n" ~ 1089 " </code>\n" ~ 1090 "<root>"; 1091 auto range = parseXML(xml); 1092 range.popFront(); 1093 range.popFront(); 1094 assert(range.front.type == EntityType.text); 1095 assert(range.front.text == 1096 "\n" ~ 1097 " bool isASCII(string str)\n" ~ 1098 " {\n" ~ 1099 " import std.algorithm : all;\n" ~ 1100 " import std.ascii : isASCII;\n" ~ 1101 " return str.all!isASCII();\n" ~ 1102 " }\n" ~ 1103 " "); 1104 assert(range.front.text.stripIndent() == 1105 "bool isASCII(string str)\n" ~ 1106 "{\n" ~ 1107 " import std.algorithm : all;\n" ~ 1108 " import std.ascii : isASCII;\n" ~ 1109 " return str.all!isASCII();\n" ~ 1110 "}"); 1111 } 1112 1113 // The indent that is stripped matches the amount of whitespace at the front 1114 // of the first line. 1115 assert((" start\n" ~ 1116 " foo\n" ~ 1117 " bar\n" ~ 1118 " baz\n" ~ 1119 " xyzzy\n" ~ 1120 " ").stripIndent() == 1121 "start\n" ~ 1122 "foo\n" ~ 1123 "bar\n" ~ 1124 " baz\n" ~ 1125 " xyzzy\n" ~ 1126 " "); 1127 1128 // If the first line has no leading whitespace but the second line does, 1129 // then the second line's leading whitespace is treated as the indent. 1130 assert(("foo\n" ~ 1131 " bar\n" ~ 1132 " baz\n" ~ 1133 " xyzzy").stripIndent() == 1134 "foo\n" ~ 1135 "bar\n" ~ 1136 " baz\n" ~ 1137 " xyzzy"); 1138 1139 assert(("\n" ~ 1140 " foo\n" ~ 1141 " bar\n" ~ 1142 " baz\n" ~ 1143 " xyzzy").stripIndent() == 1144 "foo\n" ~ 1145 "bar\n" ~ 1146 " baz\n" ~ 1147 " xyzzy"); 1148 1149 // If neither of the first two lines has leading whitespace, then nothing 1150 // is stripped. 1151 assert(("foo\n" ~ 1152 "bar\n" ~ 1153 " baz\n" ~ 1154 " xyzzy\n" ~ 1155 " ").stripIndent() == 1156 "foo\n" ~ 1157 "bar\n" ~ 1158 " baz\n" ~ 1159 " xyzzy\n" ~ 1160 " "); 1161 1162 // If a subsequent line starts with less whitespace than the indent, then 1163 // all of its leading whitespace is stripped but no other characters are 1164 // stripped. 1165 assert((" foo\n" ~ 1166 " bar\n" ~ 1167 " baz\n" ~ 1168 " xyzzy").stripIndent() == 1169 "foo\n" ~ 1170 " bar\n" ~ 1171 "baz\n" ~ 1172 " xyzzy"); 1173 1174 // If the last line is just the indent, then it and the newline before it 1175 // are stripped. 1176 assert((" foo\n" ~ 1177 " bar\n" ~ 1178 " ").stripIndent() == 1179 "foo\n" ~ 1180 " bar"); 1181 1182 // If the last line is just whitespace, but it's more than the indent, then 1183 // the whitespace after the indent is kept. 1184 assert((" foo\n" ~ 1185 " bar\n" ~ 1186 " ").stripIndent() == 1187 "foo\n" ~ 1188 " bar\n" ~ 1189 " "); 1190 1191 // withoutIndent does the same as stripIndent but with a lazy range. 1192 assert(equal((" foo\n" ~ 1193 " bar\n" ~ 1194 " baz\n").withoutIndent(), 1195 "foo\n" ~ 1196 " bar\n" ~ 1197 " baz")); 1198 } 1199 1200 version(dxmlTests) unittest 1201 { 1202 import core.exception : AssertError; 1203 import std.algorithm.comparison : equal; 1204 import std.exception : enforce; 1205 import std.utf : byUTF; 1206 import dxml.internal : testRangeFuncs; 1207 1208 static void test(alias func)(string text, string expected, size_t line = __LINE__) 1209 { 1210 auto range = func(text); 1211 enforce!AssertError(range.save.stripIndent() == expected, "unittest failed 1", __FILE__, line); 1212 alias C = ElementType!(typeof(range.save.withoutIndent())); 1213 enforce!AssertError(equal(range.save.withoutIndent(), expected.byUTF!C), "unittest failed 2", __FILE__, line); 1214 } 1215 1216 static foreach(func; testRangeFuncs) 1217 { 1218 test!func("", ""); 1219 test!func(" ", ""); 1220 test!func("foo", "foo"); 1221 test!func("\nfoo", "\nfoo"); 1222 test!func(" foo", "foo"); 1223 test!func("\n foo", "foo"); 1224 test!func("\n foo\n", "foo"); 1225 test!func("\n foo\n ", "foo"); 1226 test!func("\n foo\n ", "foo\n "); 1227 1228 test!func(" foo\n bar \n baz", "foo\nbar \n baz"); 1229 test!func(" foo\nbar\n baz", "foo\nbar\nbaz"); 1230 test!func(" foo\n bar\n baz", "foo\nbar\nbaz"); 1231 test!func(" foo\n bar\n baz", "foo\nbar\nbaz"); 1232 test!func(" foo\n bar\n baz", "foo\n bar\nbaz"); 1233 test!func(" foo\n bar\n baz", "foo\n bar\nbaz"); 1234 test!func(" foo\n bar\n baz", "foo\n bar\nbaz"); 1235 test!func(" foo\n bar\n baz\n\n\n\n\n", "foo\n bar\nbaz\n\n\n\n"); 1236 1237 test!func(" foo\n bar\n baz", "foo\nbar\n baz"); 1238 1239 test!func("foo\n bar\n baz", "foo\nbar\n baz"); 1240 test!func("foo\nbar\n baz\n", "foo\nbar\n baz\n"); 1241 } 1242 } 1243 1244 version(dxmlTests) @safe pure unittest 1245 { 1246 import std.algorithm.comparison : equal; 1247 import dxml.internal : testRangeFuncs; 1248 1249 static foreach(func; testRangeFuncs) 1250 {{ 1251 assert(stripIndent(func("foo")) == "foo"); 1252 assert(equal(withoutIndent(func("foo")), "foo")); 1253 }} 1254 } 1255 1256 1257 /++ 1258 The string representations of the five, entity references predefined by the 1259 XML spec. 1260 1261 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#dt-chardata)$(BR) 1262 $(LREF parseStdEntityRef) 1263 +/ 1264 enum StdEntityRef 1265 { 1266 /// Entity reference for $(D_CODE_STRING $(AMP)) 1267 amp = "&", 1268 1269 /// Entity reference for $(D_CODE_STRING >) 1270 gt = ">", 1271 1272 /// Entity reference for $(D_CODE_STRING <) 1273 lt = "<", 1274 1275 /// Entity reference for $(D_CODE_STRING ') 1276 apos = "'", 1277 1278 /// Entity reference for $(D_CODE_STRING ") 1279 quot = """, 1280 } 1281 1282 1283 /++ 1284 Returns a lazy range of code units which encodes any characters which cannot 1285 be put in an $(REF EntityType._text, dxml, parser) in their literal form. 1286 1287 encodeText is intended primarily to be used with 1288 $(REF XMLWriter.writeText, dxml, writer) to ensure that characters which 1289 cannot appear in their literal form do not appear in their literal form. 1290 1291 Specifically, what encodeText does is 1292 1293 $(TABLE 1294 $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) )) 1295 $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) )) 1296 $(TR $(TD convert $(D_CODE_STRING ]]>) to $(D_CODE_STRING ]]$(AMP)gt;) )) 1297 ) 1298 1299 See_Also: $(REF XMLWriter.writeText, dxml, writer)$(BR) 1300 $(LREF encodeAttr)$(BR) 1301 $(LREF decodeXML)$(BR) 1302 $(LREF asDecodedXML) 1303 +/ 1304 auto encodeText(R)(R text) 1305 if(isForwardRange!R && isSomeChar!(ElementType!R)) 1306 { 1307 import std.utf : byCodeUnit; 1308 1309 static struct EncodeText 1310 { 1311 public: 1312 1313 @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; } 1314 1315 @property empty() { return _text.empty; } 1316 1317 void popFront() 1318 { 1319 if(_len != 0) 1320 { 1321 if(--_len != 0) 1322 return; 1323 } 1324 _text.popFront(); 1325 _handleEntity(); 1326 } 1327 1328 @property save() 1329 { 1330 auto retval = this; 1331 retval._text = _text.save; 1332 return retval; 1333 } 1334 1335 private: 1336 1337 void _handleEntity() 1338 { 1339 if(_text.empty) 1340 return; 1341 switch(_text.front) 1342 { 1343 case '&': 1344 { 1345 enum entity = ";pma&"; 1346 _buffer = entity; 1347 _len = entity.length; 1348 return; 1349 } 1350 case '<': 1351 { 1352 enum entity = ";tl&"; 1353 _buffer = entity; 1354 _len = entity.length; 1355 return; 1356 } 1357 case ']': 1358 { 1359 import std.range : dropOne; 1360 1361 // FIXME This should use startsWith, but for some reason, 1362 // startsWith doesn't currently work with @nogc or nothrow 1363 // even when this code should be able to be @nogc and/or 1364 // nothrow. 1365 auto temp = _text.save.dropOne(); 1366 if(!temp.empty && temp.front == ']') 1367 { 1368 temp.popFront(); 1369 if(!temp.empty && temp.front == '>') 1370 { 1371 _text = temp; 1372 enum entity = ";tg&]]"; 1373 _buffer = entity; 1374 _len = entity.length; 1375 } 1376 } 1377 return; 1378 } 1379 default: return; 1380 } 1381 } 1382 1383 this(R text) 1384 { 1385 _text = byCodeUnit(text); 1386 _handleEntity(); 1387 } 1388 1389 char["]]>".length] _buffer; 1390 size_t _len; 1391 typeof(byCodeUnit(R.init)) _text; 1392 } 1393 1394 return EncodeText(text); 1395 } 1396 1397 /// 1398 version(dxmlTests) @safe pure nothrow @nogc unittest 1399 { 1400 import std.algorithm.comparison : equal; 1401 1402 assert(equal(encodeText(`foo & bar`), `foo & bar`)); 1403 assert(equal(encodeText(`foo < bar`), `foo < bar`)); 1404 assert(equal(encodeText(`foo > bar`), `foo > bar`)); 1405 assert(equal(encodeText(`foo ' bar`), `foo ' bar`)); 1406 assert(equal(encodeText(`foo " bar`), `foo " bar`)); 1407 assert(equal(encodeText("foo ]]> bar"), "foo ]]> bar")); 1408 1409 assert(equal(encodeText("hello world"), "hello world")); 1410 } 1411 1412 version(dxmlTests) @safe pure unittest 1413 { 1414 import std.algorithm.comparison : equal; 1415 import dxml.internal : testRangeFuncs; 1416 1417 static foreach(func; testRangeFuncs) 1418 {{ 1419 assert(encodeText(func("")).empty); 1420 assert(equal(encodeText(func(`& < > ' "`)), `& < > ' "`)); 1421 assert(equal(encodeText(func("&&&")), "&&&")); 1422 1423 auto range = encodeText(func(`&&<<>>''""hello ] ]> world"">><<&&`)); 1424 assert(equal(range.save, range.save)); 1425 assert(equal(range.save, `&&<<>>''""hello ] ]> world"">><<&&`)); 1426 }} 1427 } 1428 1429 1430 /++ 1431 Returns a lazy range of code units which encodes any characters which cannot 1432 be put in an attribute value of an element tag in their literal form. 1433 1434 encodeAttr is intended primarily to be used with 1435 $(REF XMLWriter.writeAttr, dxml, writer) to ensure that characters 1436 which cannot appear in their literal form do not appear in their literal 1437 form. 1438 1439 Specifically, what encodeAttr does is 1440 1441 $(TABLE 1442 $(TR $(TD convert $(D_CODE_STRING &) to $(D_CODE_STRING $(AMP)amp;) )) 1443 $(TR $(TD convert $(D_CODE_STRING <) to $(D_CODE_STRING $(AMP)lt;) )) 1444 $(TR $(TD convert $(D_CODE_STRING ') to $(D_CODE_STRING $(AMP)pos;) if 1445 $(D quote == $(D_STRING '\'')))) 1446 $(TR $(TD convert $(D_CODE_STRING ") to $(D_CODE_STRING $(AMP)quot;) if 1447 $(D quote == $(D_STRING '"')))) 1448 ) 1449 1450 See_Also: $(REF XMLWriter.writeAttr, dxml, writer)$(BR) 1451 $(LREF encodeText)$(BR) 1452 $(LREF decodeXML)$(BR) 1453 $(LREF asDecodedXML) 1454 +/ 1455 auto encodeAttr(char quote = '"', R)(R text) 1456 if((quote == '"' || quote == '\'') && isForwardRange!R && isSomeChar!(ElementType!R)) 1457 { 1458 import std.utf : byCodeUnit; 1459 1460 static struct EncodeAttr 1461 { 1462 public: 1463 1464 @property front() { return _len == 0 ? _text.front : cast(ElementEncodingType!R)_buffer[_len - 1]; } 1465 1466 @property empty() { return _text.empty; } 1467 1468 void popFront() 1469 { 1470 if(_len != 0) 1471 { 1472 if(--_len != 0) 1473 return; 1474 } 1475 _text.popFront(); 1476 _handleEntity(); 1477 } 1478 1479 @property save() 1480 { 1481 auto retval = this; 1482 retval._text = _text.save; 1483 return retval; 1484 } 1485 1486 private: 1487 1488 void _handleEntity() 1489 { 1490 if(_text.empty) 1491 return; 1492 switch(_text.front) 1493 { 1494 case '&': 1495 { 1496 enum entity = ";pma&"; 1497 _buffer = entity; 1498 _len = entity.length; 1499 return; 1500 } 1501 case '<': 1502 { 1503 enum entity = ";tl&"; 1504 _buffer = entity; 1505 _len = entity.length; 1506 return; 1507 } 1508 case quote: 1509 { 1510 static if(quote == '"') 1511 enum entity = ";touq&"; 1512 else 1513 enum entity = ";sopa&"; 1514 _buffer = entity; 1515 _len = entity.length; 1516 return; 1517 } 1518 default: return; 1519 } 1520 } 1521 1522 this(R text) 1523 { 1524 _text = byCodeUnit(text); 1525 _handleEntity(); 1526 } 1527 1528 char[""".length] _buffer; 1529 size_t _len; 1530 typeof(byCodeUnit(R.init)) _text; 1531 } 1532 1533 return EncodeAttr(text); 1534 } 1535 1536 /// 1537 version(dxmlTests) @safe pure nothrow @nogc unittest 1538 { 1539 import std.algorithm.comparison : equal; 1540 1541 assert(equal(encodeAttr(`foo & bar`), `foo & bar`)); 1542 assert(equal(encodeAttr(`foo < bar`), `foo < bar`)); 1543 assert(equal(encodeAttr(`foo > bar`), `foo > bar`)); 1544 assert(equal(encodeAttr(`foo ' bar`), `foo ' bar`)); 1545 assert(equal(encodeAttr(`foo " bar`), `foo " bar`)); 1546 1547 assert(equal(encodeAttr!'\''(`foo ' bar`), `foo ' bar`)); 1548 assert(equal(encodeAttr!'\''(`foo " bar`), `foo " bar`)); 1549 1550 assert(equal(encodeAttr("hello world"), "hello world")); 1551 } 1552 1553 version(dxmlTests) @safe pure unittest 1554 { 1555 import std.algorithm.comparison : equal; 1556 import dxml.internal : testRangeFuncs; 1557 1558 static foreach(func; testRangeFuncs) 1559 {{ 1560 assert(encodeAttr(func("")).empty); 1561 assert(encodeAttr!'\''(func("")).empty); 1562 assert(equal(encodeAttr(func(`& < > ' "`)), `& < > ' "`)); 1563 assert(equal(encodeAttr!'\''(func(`& < > ' "`)), `& < > ' "`)); 1564 assert(equal(encodeAttr(func("&&&")), "&&&")); 1565 1566 { 1567 auto range = encodeAttr(func(`&&<<>>''""hello world"">><<&&`)); 1568 assert(equal(range.save, range.save)); 1569 assert(equal(range.save, `&&<<>>''""hello world"">><<&&`)); 1570 } 1571 1572 { 1573 auto range = encodeAttr!'\''(func(`&&<<>>''""hello world"">><<&&`)); 1574 assert(equal(range.save, range.save)); 1575 assert(equal(range.save, `&&<<>>''""hello world"">><<&&`)); 1576 } 1577 }} 1578 } 1579 1580 1581 /++ 1582 Returns a range of $(K_CHAR) containing the character reference 1583 corresponding to the given character. 1584 1585 Params: 1586 c = The character to encode. 1587 1588 See_Also: $(LREF parseCharRef) 1589 +/ 1590 auto encodeCharRef(dchar c) 1591 { 1592 static struct EncodeCharRef 1593 { 1594 public: 1595 1596 @property front() { return _buffer[_index]; } 1597 1598 @property empty() { return _buffer[_index] == '$'; } 1599 1600 void popFront() { ++_index; } 1601 1602 @property save() { return this; } 1603 1604 private: 1605 1606 import std.conv : to; 1607 1608 char[to!string(cast(uint)dchar.max).length + 5] _buffer; 1609 size_t _index; 1610 } 1611 1612 import std.format : formattedWrite; 1613 import std..string : representation; 1614 1615 EncodeCharRef retval; 1616 formattedWrite!"&#x%x;$"(retval._buffer[].representation, c); 1617 return retval; 1618 } 1619 1620 /// 1621 version(dxmlTests) unittest 1622 { 1623 import std.algorithm.comparison : equal; 1624 1625 assert(equal(encodeCharRef(' '), " ")); 1626 assert(equal(encodeCharRef('A'), "A")); 1627 assert(equal(encodeCharRef('\u2424'), "␤")); 1628 1629 auto range = encodeCharRef('*'); 1630 assert(parseCharRef(range) == '*'); 1631 } 1632 1633 version(dxmlTests) unittest 1634 { 1635 import std.algorithm.comparison : equal; 1636 1637 enum pound = "#"; 1638 auto range = encodeCharRef('#'); 1639 assert(equal(range.save, range.save)); 1640 assert(equal(range.save, pound)); 1641 }