1 // Written in the D programming language 2 3 /++ 4 This implements a range-based 5 $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which 6 will work with XML 1.1 documents assuming that they don't use any 7 1.1-specific features). For the sake of simplicity, sanity, and efficiency, 8 the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD) 9 section is not supported beyond what is required to parse past it. 10 11 Start tags, end tags, comments, cdata sections, and processing instructions 12 are all supported and reported to the application. Anything in the DTD is 13 skipped (though it's parsed enough to parse past it correctly, and that 14 $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid 15 enough to be correctly skipped), and the 16 $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the 17 top is skipped if present (XML 1.1 requires that it be there, but XML 1.0 18 does not). 19 20 Regardless of what the XML declaration says (if present), any range of 21 $(K_CHAR) will be treated as being encoded in UTF-8, any range of $(K_WCHAR) 22 will be treated as being encoded in UTF-16, and any range of $(K_DCHAR) will 23 be treated as having been encoded in UTF-32. Strings will be treated as 24 ranges of their code units, not code points. 25 26 Since the DTD is skipped, entity references other than the five which are 27 predefined by the XML spec cannot be fully processed (since wherever they 28 were used in the document would be replaced by what they referred to, which 29 could be arbitrarily complex XML). As such, by default, if any entity 30 references which are not predefined are encountered outside of the DTD, an 31 $(LREF XMLParsingException) will be thrown (see 32 $(LREF Config.throwOnEntityRef) for how that can be configured). The 33 predefined entity references and any character references encountered will 34 be checked to verify that they're valid, but they will not be replaced 35 (since that does not work with returning slices of the original input). 36 37 However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 38 $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from 39 $(MREF dxml, util) can be used to convert the predefined entity references 40 to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 41 $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from 42 $(MREF dxml, util) can be used to convert character references to what they 43 refer to. 44 45 $(H3 Primary Symbols) 46 $(TABLE 47 $(TR $(TH Symbol) $(TH Description)) 48 $(TR $(TD $(LREF parseXML)) 49 $(TD The function used to initiate the parsing of an XML 50 document.)) 51 $(TR $(TD $(LREF EntityRange)) 52 $(TD The range returned by $(LREF parseXML).)) 53 $(TR $(TD $(LREF EntityRange.Entity)) 54 $(TD The element type of $(LREF EntityRange).)) 55 ) 56 57 $(H3 Parser Configuration Helpers) 58 $(TABLE 59 $(TR $(TH Symbol) $(TH Description)) 60 $(TR $(TD $(LREF Config)) 61 $(TD Used to configure how $(LREF EntityRange) parses the XML.)) 62 $(TR $(TD $(LREF simpleXML)) 63 $(TD A user-friendly configuration for when the application just 64 wants the element tags and the data in between them.)) 65 $(TR $(TD $(LREF makeConfig)) 66 $(TD A convenience function for constructing a custom 67 $(LREF Config).)) 68 $(TR $(TD $(LREF SkipComments)) 69 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 70 to tell the parser to skip comments.)) 71 $(TR $(TD $(LREF SkipPI)) 72 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 73 to tell the parser to skip processing instructions.)) 74 $(TR $(TD $(LREF SplitEmpty)) 75 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 76 to configure how the parser deals with empty element tags.)) 77 ) 78 79 $(H3 Helper Types Used When Parsing) 80 $(TABLE 81 $(TR $(TH Symbol) $(TH Description)) 82 $(TR $(TD $(LREF EntityType)) 83 $(TD The type of an entity in the XML (e.g. a 84 $(LREF_ALTTEXT start tag, EntityType.elementStart) or a 85 $(LREF_ALTTEXT comment, EntityType.comment)).)) 86 $(TR $(TD $(LREF TextPos)) 87 $(TD Gives the line and column number in the XML document.)) 88 $(TR $(TD $(LREF XMLParsingException)) 89 $(TD Thrown by $(LREF EntityRange) when it encounters invalid 90 XML.)) 91 ) 92 93 $(H3 Helper Functions Used When Parsing) 94 $(TABLE 95 $(TR $(TH Symbol) $(TH Description)) 96 $(TR $(TD $(LREF getAttrs)) 97 $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which 98 allows for the easy processing of start tag attributes.)) 99 $(TR $(TD $(LREF skipContents)) 100 $(TD Iterates an $(LREF EntityRange) from a start tag to its 101 matching end tag.)) 102 $(TR $(TD $(LREF skipToPath)) 103 $(TD Used to navigate from one start tag to another as if the start 104 tag names formed a file path.)) 105 $(TR $(TD $(LREF skipToEntityType)) 106 $(TD Skips to the next entity of the given type in the range.)) 107 $(TR $(TD $(LREF skipToParentEndTag)) 108 $(TD Iterates an $(LREF EntityRange) until it reaches the end tag 109 that matches the start tag which is the parent of the 110 current entity.)) 111 ) 112 113 $(H3 Helper Traits) 114 $(TABLE 115 $(TR $(TH Symbol) $(TH Description)) 116 $(TR $(TD $(LREF isAttrRange)) 117 $(TD Whether the given range is a range of attributes.))) 118 119 Copyright: Copyright 2017 - 2020 120 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 121 Authors: $(HTTPS jmdavisprog.com, Jonathan M Davis) 122 Source: $(LINK_TO_SRC dxml/_parser.d) 123 124 See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0) 125 +/ 126 module dxml.parser; 127 128 /// 129 unittest 130 { 131 auto xml = "<!-- comment -->\n" ~ 132 "<root>\n" ~ 133 " <foo>some text<whatever/></foo>\n" ~ 134 " <bar/>\n" ~ 135 " <baz></baz>\n" ~ 136 "</root>"; 137 { 138 auto range = parseXML(xml); 139 assert(range.front.type == EntityType.comment); 140 assert(range.front.text == " comment "); 141 range.popFront(); 142 143 assert(range.front.type == EntityType.elementStart); 144 assert(range.front.name == "root"); 145 range.popFront(); 146 147 assert(range.front.type == EntityType.elementStart); 148 assert(range.front.name == "foo"); 149 range.popFront(); 150 151 assert(range.front.type == EntityType.text); 152 assert(range.front.text == "some text"); 153 range.popFront(); 154 155 assert(range.front.type == EntityType.elementEmpty); 156 assert(range.front.name == "whatever"); 157 range.popFront(); 158 159 assert(range.front.type == EntityType.elementEnd); 160 assert(range.front.name == "foo"); 161 range.popFront(); 162 163 assert(range.front.type == EntityType.elementEmpty); 164 assert(range.front.name == "bar"); 165 range.popFront(); 166 167 assert(range.front.type == EntityType.elementStart); 168 assert(range.front.name == "baz"); 169 range.popFront(); 170 171 assert(range.front.type == EntityType.elementEnd); 172 assert(range.front.name == "baz"); 173 range.popFront(); 174 175 assert(range.front.type == EntityType.elementEnd); 176 assert(range.front.name == "root"); 177 range.popFront(); 178 179 assert(range.empty); 180 } 181 { 182 auto range = parseXML!simpleXML(xml); 183 184 // simpleXML skips comments 185 186 assert(range.front.type == EntityType.elementStart); 187 assert(range.front.name == "root"); 188 range.popFront(); 189 190 assert(range.front.type == EntityType.elementStart); 191 assert(range.front.name == "foo"); 192 range.popFront(); 193 194 assert(range.front.type == EntityType.text); 195 assert(range.front.text == "some text"); 196 range.popFront(); 197 198 // simpleXML splits empty element tags into a start tag and end tag 199 // so that the code doesn't have to care whether a start tag with no 200 // content is an empty tag or a start tag and end tag with nothing but 201 // whitespace in between. 202 assert(range.front.type == EntityType.elementStart); 203 assert(range.front.name == "whatever"); 204 range.popFront(); 205 206 assert(range.front.type == EntityType.elementEnd); 207 assert(range.front.name == "whatever"); 208 range.popFront(); 209 210 assert(range.front.type == EntityType.elementEnd); 211 assert(range.front.name == "foo"); 212 range.popFront(); 213 214 assert(range.front.type == EntityType.elementStart); 215 assert(range.front.name == "bar"); 216 range.popFront(); 217 218 assert(range.front.type == EntityType.elementEnd); 219 assert(range.front.name == "bar"); 220 range.popFront(); 221 222 assert(range.front.type == EntityType.elementStart); 223 assert(range.front.name == "baz"); 224 range.popFront(); 225 226 assert(range.front.type == EntityType.elementEnd); 227 assert(range.front.name == "baz"); 228 range.popFront(); 229 230 assert(range.front.type == EntityType.elementEnd); 231 assert(range.front.name == "root"); 232 range.popFront(); 233 234 assert(range.empty); 235 } 236 } 237 238 239 import std.range.primitives; 240 import std.traits; 241 import std.typecons : Flag; 242 243 244 /++ 245 The exception type thrown when the XML parser encounters invalid XML. 246 +/ 247 class XMLParsingException : Exception 248 { 249 /++ 250 The position in the XML input where the problem is. 251 +/ 252 TextPos pos; 253 254 package: 255 256 this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure 257 { 258 import std.format : format; 259 super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line); 260 pos = textPos; 261 } 262 } 263 264 265 /++ 266 Where in the XML document an entity is. 267 268 The line and column numbers are 1-based. 269 270 The primary use case for TextPos is $(LREF XMLParsingException), but an 271 application may have other uses for it. The TextPos for an 272 $(LREF2 Entity, EntityRange) can be obtained from 273 $(LREF2 Entity.pos, EntityRange). 274 275 See_Also: $(LREF XMLParsingException.pos)$(BR) 276 $(LREF EntityRange.Entity.pos) 277 +/ 278 struct TextPos 279 { 280 /// A line number in the XML file. 281 int line = 1; 282 283 /++ 284 A column number in a line of the XML file. 285 286 Each code unit is considered a column, so depending on what a program 287 is looking to do with the column number, it may need to examine the 288 actual text on that line and calculate the number that represents 289 what the program wants to display (e.g. the number of graphemes). 290 +/ 291 int col = 1; 292 } 293 294 295 /++ 296 Used to configure how the parser works. 297 298 See_Also: 299 $(LREF makeConfig)$(BR) 300 $(LREF parseXML)$(BR) 301 $(LREF simpleXML) 302 +/ 303 struct Config 304 { 305 /++ 306 Whether the comments should be skipped while parsing. 307 308 If $(D skipComments == SkipComments.yes), any entities of type 309 $(LREF EntityType.comment) will be omitted from the parsing results, 310 and they will not be validated beyond what is required to parse past 311 them. 312 313 Defaults to $(D SkipComments.no). 314 +/ 315 auto skipComments = SkipComments.no; 316 317 /++ 318 Whether processing instructions should be skipped. 319 320 If $(D skipPI == SkipPI.yes), any entities of type 321 $(LREF EntityType.pi) will be skipped, and they will not be validated 322 beyond what is required to parse past them. 323 324 Defaults to $(D SkipPI.no). 325 +/ 326 auto skipPI = SkipPI.no; 327 328 /++ 329 Whether the parser should report empty element tags as if they were a 330 start tag followed by an end tag with nothing in between. 331 332 If $(D splitEmpty == SplitEmpty.yes), then whenever an 333 $(LREF EntityType.elementEmpty) is encountered, the parser will claim 334 that that entity is an $(LREF EntityType.elementStart), and then it 335 will provide an $(LREF EntityType.elementEnd) as the next entity before 336 the entity that actually follows it. 337 338 The purpose of this is to simplify the code using the parser, since most 339 code does not care about the difference between an empty tag and a start 340 and end tag with nothing in between. But since some code may care about 341 the difference, the behavior is configurable. 342 343 Defaults to $(D SplitEmpty.no). 344 +/ 345 auto splitEmpty = SplitEmpty.no; 346 347 /// 348 unittest 349 { 350 enum configSplitYes = makeConfig(SplitEmpty.yes); 351 352 { 353 auto range = parseXML("<root></root>"); 354 assert(range.front.type == EntityType.elementStart); 355 assert(range.front.name == "root"); 356 range.popFront(); 357 assert(range.front.type == EntityType.elementEnd); 358 assert(range.front.name == "root"); 359 range.popFront(); 360 assert(range.empty); 361 } 362 { 363 // No difference if the tags are already split. 364 auto range = parseXML!configSplitYes("<root></root>"); 365 assert(range.front.type == EntityType.elementStart); 366 assert(range.front.name == "root"); 367 range.popFront(); 368 assert(range.front.type == EntityType.elementEnd); 369 assert(range.front.name == "root"); 370 range.popFront(); 371 assert(range.empty); 372 } 373 { 374 // This treats <root></root> and <root/> as distinct. 375 auto range = parseXML("<root/>"); 376 assert(range.front.type == EntityType.elementEmpty); 377 assert(range.front.name == "root"); 378 range.popFront(); 379 assert(range.empty); 380 } 381 { 382 // This is parsed as if it were <root></root> insead of <root/>. 383 auto range = parseXML!configSplitYes("<root/>"); 384 assert(range.front.type == EntityType.elementStart); 385 assert(range.front.name == "root"); 386 range.popFront(); 387 assert(range.front.type == EntityType.elementEnd); 388 assert(range.front.name == "root"); 389 range.popFront(); 390 assert(range.empty); 391 } 392 } 393 394 /++ 395 Whether the parser should throw when it encounters any entity references 396 other than the five entity references defined in the XML standard. 397 398 Any other entity references would have to be defined in the DTD in 399 order to be valid. And in order to know what XML they represent (which 400 could be arbitrarily complex, even effectively inserting entire XML 401 documents into the middle of the XML), the DTD would have to be parsed. 402 However, dxml does not support parsing the DTD beyond what is required 403 to correctly parse past it, and replacing entity references with what 404 they represent would not work with the slicing semantics that 405 $(LREF EntityRange) provides. As such, it is not possible for dxml to 406 correctly handle any entity references other than the five which are 407 defined in the XML standard, and even those are only parsed by using 408 $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util). 409 $(LREF EntityRange) always validates that entity references are one 410 of the five, predefined entity references, but otherwise, it lets them 411 pass through as normal text. It does not replace them with what they 412 represent. 413 414 As such, the default behavior of $(LREF EntityRange) is to throw an 415 $(LREF XMLParsingException) when it encounters an entity reference 416 which is not one of the five defined by the XML standard. With that 417 behavior, there is no risk of processing an XML document as if it had 418 no entity references and ending up with what the program using the 419 parser would probably consider incorrect results. However, there are 420 cases where a program may find it acceptable to treat entity references 421 as normal text and ignore them. As such, if a program wishes to take 422 that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no). 423 424 If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity 425 reference that it encounters will be validated to ensure that it is 426 syntactically valid (i.e. that the characters it contains form what 427 could be a valid entity reference assuming that the DTD declared it 428 properly), but otherwise, $(LREF EntityRange) will treat it as normal 429 text, just like it treats the five, predefined entity references as 430 normal text. 431 432 Note that any valid XML entity reference which contains start or end 433 tags must contain matching start or end tags, and entity references 434 cannot contain incomplete fragments of XML (e.g. the start or end of a 435 comment). So, missing entity references should only affect the data in 436 the XML document and not its overall structure (if that were not _true, 437 attempting to ignore entity references such as $(D ThrowOnEntityRef.no) 438 does would be a disaster in the making). However, how reasonable it is 439 to miss that data depends entirely on the application and what the XML 440 documents it's parsing contain - hence, the behavior is configurable. 441 442 See_Also: $(REF StdEntityRef, dxml, util)$(BR) 443 $(REF parseStdEntityRef, dxml, util)$(BR) 444 $(REF parseCharRef, dxml, util)$(BR) 445 $(REF encodeCharRef, dxml, util)$(BR) 446 $(REF decodeXML, dxml, util)$(BR) 447 $(REF asDecodedXML, dxml, util) 448 +/ 449 auto throwOnEntityRef = ThrowOnEntityRef.yes; 450 451 /// 452 unittest 453 { 454 import std.exception : assertThrown; 455 import dxml.util : decodeXML; 456 457 auto xml = "<root>\n" ~ 458 " <std>&'><"</std>\n" ~ 459 " <other>&foobar;</other>\n" ~ 460 " <invalid>&--;</invalid>\n" ~ 461 "</root>"; 462 463 // ThrowOnEntityRef.yes 464 { 465 auto range = parseXML(xml); 466 assert(range.front.type == EntityType.elementStart); 467 assert(range.front.name == "root"); 468 469 range.popFront(); 470 assert(range.front.type == EntityType.elementStart); 471 assert(range.front.name == "std"); 472 473 range.popFront(); 474 assert(range.front.type == EntityType.text); 475 assert(range.front.text == "&'><""); 476 assert(range.front.text.decodeXML() == `&'><"`); 477 478 range.popFront(); 479 assert(range.front.type == EntityType.elementEnd); 480 assert(range.front.name == "std"); 481 482 range.popFront(); 483 assert(range.front.type == EntityType.elementStart); 484 assert(range.front.name == "other"); 485 486 // Attempted to parse past "&foobar;", which is syntactically 487 // valid, but it's not one of the five predefined entity references. 488 assertThrown!XMLParsingException(range.popFront()); 489 } 490 491 // ThrowOnEntityRef.no 492 { 493 auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml); 494 assert(range.front.type == EntityType.elementStart); 495 assert(range.front.name == "root"); 496 497 range.popFront(); 498 assert(range.front.type == EntityType.elementStart); 499 assert(range.front.name == "std"); 500 501 range.popFront(); 502 assert(range.front.type == EntityType.text); 503 assert(range.front.text == "&'><""); 504 assert(range.front.text.decodeXML() == `&'><"`); 505 506 range.popFront(); 507 assert(range.front.type == EntityType.elementEnd); 508 assert(range.front.name == "std"); 509 510 range.popFront(); 511 assert(range.front.type == EntityType.elementStart); 512 assert(range.front.name == "other"); 513 514 // Doesn't throw, because "&foobar;" is syntactically valid. 515 range.popFront(); 516 assert(range.front.type == EntityType.text); 517 assert(range.front.text == "&foobar;"); 518 519 // decodeXML has no effect on non-standard entity references. 520 assert(range.front.text.decodeXML() == "&foobar;"); 521 522 range.popFront(); 523 assert(range.front.type == EntityType.elementEnd); 524 assert(range.front.name == "other"); 525 526 range.popFront(); 527 assert(range.front.type == EntityType.elementStart); 528 assert(range.front.name == "invalid"); 529 530 // Attempted to parse past "&--;", which is not syntactically valid, 531 // because -- is not a valid name for an entity reference. 532 assertThrown!XMLParsingException(range.popFront()); 533 } 534 } 535 } 536 537 538 /// See_Also: $(LREF2 skipComments, Config) 539 alias SkipComments = Flag!"SkipComments"; 540 541 /// See_Also: $(LREF2 skipPI, Config) 542 alias SkipPI = Flag!"SkipPI"; 543 544 /// See_Also: $(LREF2 splitEmpty, Config) 545 alias SplitEmpty = Flag!"SplitEmpty"; 546 547 /// See_Also: $(LREF2 throwOnEntityRef, Config) 548 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef"; 549 550 551 /++ 552 Helper function for creating a custom config. It makes it easy to set one 553 or more of the member variables to something other than the default without 554 having to worry about explicitly setting them individually or setting them 555 all at once via a constructor. 556 557 The order of the arguments does not matter. The types of each of the members 558 of Config are unique, so that information alone is sufficient to determine 559 which argument should be assigned to which member. 560 +/ 561 Config makeConfig(Args...)(Args args) 562 { 563 import std.format : format; 564 import std.meta : AliasSeq, staticIndexOf, staticMap; 565 566 template isValid(T, Types...) 567 { 568 static if(Types.length == 0) 569 enum isValid = false; 570 else static if(is(T == Types[0])) 571 enum isValid = true; 572 else 573 enum isValid = isValid!(T, Types[1 .. $]); 574 } 575 576 Config config; 577 578 alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName)); 579 alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config))); 580 581 foreach(i, arg; args) 582 { 583 static assert(isValid!(typeof(arg), MemberTypes), 584 format!"Argument %s does not match the type of any members of Config"(i)); 585 586 static foreach(j, Other; Args) 587 { 588 static if(i != j) 589 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j)); 590 } 591 592 foreach(memberName; __traits(allMembers, Config)) 593 { 594 static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg))) 595 mixin("config." ~ memberName ~ " = arg;"); 596 } 597 } 598 599 return config; 600 } 601 602 /// 603 @safe pure nothrow @nogc unittest 604 { 605 { 606 auto config = makeConfig(SkipComments.yes); 607 assert(config.skipComments == SkipComments.yes); 608 assert(config.skipPI == Config.init.skipPI); 609 assert(config.splitEmpty == Config.init.splitEmpty); 610 assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); 611 } 612 { 613 auto config = makeConfig(SkipComments.yes, SkipPI.yes); 614 assert(config.skipComments == SkipComments.yes); 615 assert(config.skipPI == SkipPI.yes); 616 assert(config.splitEmpty == Config.init.splitEmpty); 617 assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); 618 } 619 { 620 auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no); 621 assert(config.skipComments == SkipComments.yes); 622 assert(config.skipPI == Config.init.skipPI); 623 assert(config.splitEmpty == SplitEmpty.yes); 624 assert(config.throwOnEntityRef == ThrowOnEntityRef.no); 625 } 626 } 627 628 unittest 629 { 630 import std.typecons : Flag; 631 static assert(!__traits(compiles, makeConfig(42))); 632 static assert(!__traits(compiles, makeConfig("hello"))); 633 static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes))); 634 static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no))); 635 } 636 637 638 /++ 639 This $(LREF Config) is intended for making it easy to parse XML by skipping 640 everything that isn't the actual data as well as making it simpler to deal 641 with empty element tags by treating them the same as a start tag and end 642 tag with nothing but whitespace between them. 643 +/ 644 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes); 645 646 /// 647 @safe pure nothrow @nogc unittest 648 { 649 static assert(simpleXML.skipComments == SkipComments.yes); 650 static assert(simpleXML.skipPI == SkipPI.yes); 651 static assert(simpleXML.splitEmpty == SplitEmpty.yes); 652 static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes); 653 } 654 655 656 /++ 657 Represents the type of an XML entity. Used by $(LREF EntityRange.Entity). 658 +/ 659 enum EntityType 660 { 661 /++ 662 A cdata section: `<![CDATA[ ... ]]>`. 663 664 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect) 665 +/ 666 cdata, 667 668 /++ 669 An XML comment: `<!-- ... -->`. 670 671 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments) 672 +/ 673 comment, 674 675 /++ 676 The start tag for an element. e.g. `<foo name="value">`. 677 678 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 679 +/ 680 elementStart, 681 682 /++ 683 The end tag for an element. e.g. `</foo>`. 684 685 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 686 +/ 687 elementEnd, 688 689 /++ 690 The tag for an element with no contents or matching end tag. e.g. 691 `<foo name="value"/>`. 692 693 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 694 +/ 695 elementEmpty, 696 697 /++ 698 A processing instruction such as `<?foo?>`. Note that the 699 `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi). 700 701 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi) 702 +/ 703 pi, 704 705 /++ 706 The content of an element tag that is simple text. 707 708 If there is an entity other than the end tag following the text, then 709 the text includes up to that entity. 710 711 Note however that character references (e.g. 712 $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g. 713 $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In 714 order for them to be processed, the text should be passed to either 715 $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 716 $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references 717 which are not predefined are considered invalid XML, because the DTD 718 section is skipped, and thus they cannot be processed properly. 719 720 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR) 721 $(REF decodeXML, dxml, util)$(BR) 722 $(REF asDecodedXML, dxml, util)$(BR) 723 $(REF parseStdEntityRef, dxml, util)$(BR) 724 $(REF parseCharRef, dxml, util)$(BR) 725 $(LREF EntityRange.Entity._text) 726 +/ 727 text, 728 } 729 730 731 /++ 732 Lazily parses the given range of characters as an XML document. 733 734 EntityRange is essentially a 735 $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved 736 into that rather than being based on what Java did, and it's range-based 737 rather than iterator-based, so its API is likely to differ from other 738 implementations. The basic concept should be the same though. 739 740 One of the core design goals of this parser is to slice the original input 741 rather than having to allocate strings for the output or wrap it in a lazy 742 range that produces a mutated version of the data. So, all of the text that 743 the parser provides is either a slice or 744 $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases, 745 for the parser to be fully compliant with the XML spec, 746 $(REF decodeXML, dxml, util) must be called on the text to mutate certain 747 constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or 748 converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But 749 that's left up to the application. 750 751 The parser is not $(K_NOGC), but it allocates memory very minimally. It 752 allocates some of its state on the heap so it can validate attributes and 753 end tags. However, that state is shared among all the ranges that came from 754 the same call to parseXML (only the range farthest along in parsing 755 validates attributes or end tags), so $(LREF2 save, _EntityRange) does not 756 allocate memory unless $(D save) on the underlying range allocates memory. 757 The shared state currently uses a couple of dynamic arrays to validate the 758 tags and attributes, and if the document has a particularly deep tag depth 759 or has a lot of attributes on a start tag, then some reallocations may 760 occur until the maximum is reached, but enough is reserved that for most 761 documents, no reallocations will occur. The only other times that the 762 parser would allocate would be if an exception were thrown or if the range 763 that was passed to parseXML allocates for any reason when calling any of the 764 range primitives. 765 766 If invalid XML is encountered at any point during the parsing process, an 767 $(LREF XMLParsingException) will be thrown. If an exception has been thrown, 768 then the parser is in an invalid state, and it is an error to call any 769 functions on it. 770 771 However, note that XML validation is reduced for any entities that are 772 skipped (e.g. for anything in the DTD, validation is reduced to what is 773 required to correctly parse past it, and when 774 $(D Config.skipPI == SkipPI.yes), processing instructions are only validated 775 enough to correctly skip past them). 776 777 As the module documentation says, this parser does not provide any DTD 778 support. It is not possible to properly support the DTD while returning 779 slices of the original input, and the DTD portion of the spec makes parsing 780 XML far, far more complicated. 781 782 A quick note about carriage returns$(COLON) per the XML spec, they are all 783 supposed to either be stripped out or replaced with newlines or spaces 784 before the XML parser even processes the text. That doesn't work when the 785 parser is slicing the original text and not mutating it at all. So, for the 786 purposes of parsing, this parser treats all carriage returns as if they 787 were newlines or spaces (though they won't count as newlines when counting 788 the lines for $(LREF TextPos)). However, they $(I will) appear in any text 789 fields or attribute values if they are in the document (since the text 790 fields and attribute values are slices of the original text). 791 $(REF decodeXML, dxml, util) can be used to strip them along with 792 converting any character references in the text. Alternatively, the 793 application can remove them all before calling parseXML, but it's not 794 necessary. 795 +/ 796 struct EntityRange(Config cfg, R) 797 if(isForwardRange!R && isSomeChar!(ElementType!R)) 798 { 799 import std.algorithm : canFind; 800 import std.range : only, takeExactly; 801 import std.typecons : Nullable; 802 import std.utf : byCodeUnit; 803 804 enum compileInTests = is(R == EntityRangeCompileTests); 805 806 public: 807 808 /// The Config used for when parsing the XML. 809 alias config = cfg; 810 811 /// The type of the range that EntityRange is parsing. 812 alias Input = R; 813 814 /++ 815 The type used when any slice of the original input is used. If $(D R) 816 is a string or supports slicing, then SliceOfR is the same as $(D R); 817 otherwise, it's the result of calling 818 $(PHOBOS_REF takeExactly, std, range) on the input. 819 820 --- 821 import std.algorithm : filter; 822 import std.range : takeExactly; 823 824 static assert(is(EntityRange!(Config.init, string).SliceOfR == string)); 825 826 auto range = filter!(a => true)("some xml"); 827 828 static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR == 829 typeof(takeExactly(range, 42)))); 830 --- 831 +/ 832 static if(isDynamicArray!R || hasSlicing!R) 833 alias SliceOfR = R; 834 else 835 alias SliceOfR = typeof(takeExactly(R.init, 42)); 836 837 // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being 838 // a ddoc-ed unit test. 839 static if(compileInTests) @safe unittest 840 { 841 import std.algorithm : filter; 842 import std.range : takeExactly; 843 844 static assert(is(EntityRange!(Config.init, string).SliceOfR == string)); 845 846 auto range = filter!(a => true)("some xml"); 847 848 static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR == 849 typeof(takeExactly(range, 42)))); 850 } 851 852 853 /++ 854 Represents an entity in the XML document. 855 856 Note that the $(LREF2 type, EntityRange._Entity) determines which 857 properties can be used, and it can determine whether functions which 858 an Entity or $(LREF EntityRange) is passed to are allowed to be called. 859 Each function lists which $(LREF EntityType)s are allowed, and it is an 860 error to call them with any other $(LREF EntityType). 861 +/ 862 struct Entity 863 { 864 public: 865 866 import std.typecons : Tuple; 867 868 /++ 869 The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that 870 $(LREF2 attributes, EntityRange.EntityType) returns a range of. 871 872 See_Also: $(LREF2 attributes, EntityRange.Entity) 873 +/ 874 alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos, "pos"); 875 876 877 /++ 878 The $(LREF EntityType) for this Entity. 879 +/ 880 @property EntityType type() @safe const pure nothrow @nogc 881 { 882 return _type; 883 } 884 885 /// 886 static if(compileInTests) unittest 887 { 888 auto xml = "<root>\n" ~ 889 " <!--no comment-->\n" ~ 890 " <![CDATA[cdata run]]>\n" ~ 891 " <text>I am text!</text>\n" ~ 892 " <empty/>\n" ~ 893 " <?pi?>\n" ~ 894 "</root>"; 895 896 auto range = parseXML(xml); 897 assert(range.front.type == EntityType.elementStart); 898 assert(range.front.name == "root"); 899 range.popFront(); 900 901 assert(range.front.type == EntityType.comment); 902 assert(range.front.text == "no comment"); 903 range.popFront(); 904 905 assert(range.front.type == EntityType.cdata); 906 assert(range.front.text == "cdata run"); 907 range.popFront(); 908 909 assert(range.front.type == EntityType.elementStart); 910 assert(range.front.name == "text"); 911 range.popFront(); 912 913 assert(range.front.type == EntityType.text); 914 assert(range.front.text == "I am text!"); 915 range.popFront(); 916 917 assert(range.front.type == EntityType.elementEnd); 918 assert(range.front.name == "text"); 919 range.popFront(); 920 921 assert(range.front.type == EntityType.elementEmpty); 922 assert(range.front.name == "empty"); 923 range.popFront(); 924 925 assert(range.front.type == EntityType.pi); 926 assert(range.front.name == "pi"); 927 range.popFront(); 928 929 assert(range.front.type == EntityType.elementEnd); 930 assert(range.front.name == "root"); 931 range.popFront(); 932 933 assert(range.empty); 934 } 935 936 937 /++ 938 The position in the the original text where the entity starts. 939 940 See_Also: $(LREF TextPos)$(BR) 941 $(LREF XMLParsingException._pos) 942 +/ 943 @property TextPos pos() @safe const pure nothrow @nogc 944 { 945 return _pos; 946 } 947 948 /// 949 static if(compileInTests) unittest 950 { 951 auto xml = "<root>\n" ~ 952 " <foo>\n" ~ 953 " Foo and bar. Always foo and bar...\n" ~ 954 " </foo>\n" ~ 955 "</root>"; 956 957 auto range = parseXML(xml); 958 assert(range.front.type == EntityType.elementStart); 959 assert(range.front.name == "root"); 960 assert(range.front.pos == TextPos(1, 1)); 961 range.popFront(); 962 963 assert(range.front.type == EntityType.elementStart); 964 assert(range.front.name == "foo"); 965 assert(range.front.pos == TextPos(2, 5)); 966 range.popFront(); 967 968 assert(range.front.type == EntityType.text); 969 assert(range.front.text == 970 "\n" ~ 971 " Foo and bar. Always foo and bar...\n" ~ 972 " "); 973 assert(range.front.pos == TextPos(2, 10)); 974 range.popFront(); 975 976 assert(range.front.type == EntityType.elementEnd); 977 assert(range.front.name == "foo"); 978 assert(range.front.pos == TextPos(4, 5)); 979 range.popFront(); 980 981 assert(range.front.type == EntityType.elementEnd); 982 assert(range.front.name == "root"); 983 assert(range.front.pos == TextPos(5, 1)); 984 range.popFront(); 985 986 assert(range.empty); 987 } 988 989 static if(compileInTests) unittest 990 { 991 import core.exception : AssertError; 992 import std.exception : enforce; 993 994 static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__) 995 { 996 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line); 997 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line); 998 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 999 range.popFront(); 1000 } 1001 1002 auto xml = "<?xml?>\n" ~ 1003 " <!--comment-->\n" ~ 1004 " <?pi?>\n" ~ 1005 " <root>\n" ~ 1006 " <!--comment--><!--comment-->\n" ~ 1007 " <?pi?>\n" ~ 1008 " <![CDATA[]]>\n" ~ 1009 " <empty/> </root>\n" ~ 1010 " <!--comment-->\n" ~ 1011 " <?pi?>\n"; 1012 1013 { 1014 auto range = parseXML(xml); 1015 test(range, EntityType.comment, 2, 4); 1016 test(range, EntityType.pi, 3, 4); 1017 test(range, EntityType.elementStart, 4, 2); 1018 test(range, EntityType.comment, 5, 11); 1019 test(range, EntityType.comment, 5, 25); 1020 test(range, EntityType.pi, 6, 8); 1021 test(range, EntityType.cdata, 7, 3); 1022 test(range, EntityType.elementEmpty, 8, 15); 1023 test(range, EntityType.elementEnd, 8, 28); 1024 test(range, EntityType.comment, 9, 2); 1025 test(range, EntityType.pi, 10, 2); 1026 } 1027 1028 auto range = parseXML!simpleXML(xml); 1029 test(range, EntityType.elementStart, 4, 2); 1030 test(range, EntityType.cdata, 7, 3); 1031 test(range, EntityType.elementStart, 8, 15); 1032 test(range, EntityType.elementEnd, 8, 15); 1033 test(range, EntityType.elementEnd, 8, 28); 1034 } 1035 1036 1037 /++ 1038 Gives the name of this Entity. 1039 1040 Note that this is the direct name in the XML for this entity and 1041 does not contain any of the names of any of the parent entities that 1042 this entity has. If an application wants the full "path" of the 1043 entity, then it will have to keep track of that itself. The parser 1044 does not do that as it would require allocating memory. 1045 1046 $(TABLE 1047 $(TR $(TH Supported $(LREF EntityType)s:)) 1048 $(TR $(TD $(LREF2 elementStart, EntityType))) 1049 $(TR $(TD $(LREF2 elementEnd, EntityType))) 1050 $(TR $(TD $(LREF2 elementEmpty, EntityType))) 1051 $(TR $(TD $(LREF2 pi, EntityType))) 1052 ) 1053 +/ 1054 @property SliceOfR name() 1055 { 1056 import dxml.internal : checkedSave, stripBCU; 1057 with(EntityType) 1058 { 1059 import std.format : format; 1060 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type), 1061 format("name cannot be called with %s", _type)); 1062 } 1063 return stripBCU!R(checkedSave(_name)); 1064 } 1065 1066 /// 1067 static if(compileInTests) unittest 1068 { 1069 auto xml = "<root>\n" ~ 1070 " <empty/>\n" ~ 1071 " <?pi?>\n" ~ 1072 "</root>"; 1073 1074 auto range = parseXML(xml); 1075 assert(range.front.type == EntityType.elementStart); 1076 assert(range.front.name == "root"); 1077 range.popFront(); 1078 1079 assert(range.front.type == EntityType.elementEmpty); 1080 assert(range.front.name == "empty"); 1081 range.popFront(); 1082 1083 assert(range.front.type == EntityType.pi); 1084 assert(range.front.name == "pi"); 1085 range.popFront(); 1086 1087 assert(range.front.type == EntityType.elementEnd); 1088 assert(range.front.name == "root"); 1089 range.popFront(); 1090 1091 assert(range.empty); 1092 } 1093 1094 1095 /++ 1096 Returns a lazy range of attributes for a start tag where each 1097 attribute is represented as a$(BR) 1098 $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!( 1099 $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"), 1100 $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"), 1101 $(LREF TextPos), $(D_STRING "pos"))). 1102 1103 $(TABLE 1104 $(TR $(TH Supported $(LREF EntityType)s:)) 1105 $(TR $(TD $(LREF2 elementStart, EntityType))) 1106 $(TR $(TD $(LREF2 elementEmpty, EntityType))) 1107 ) 1108 1109 See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR) 1110 $(REF decodeXML, dxml, util)$(BR) 1111 $(REF asDecodedXML, dxml, util) 1112 +/ 1113 @property auto attributes() 1114 { 1115 with(EntityType) 1116 { 1117 import std.format : format; 1118 assert(_type == elementStart || _type == elementEmpty, 1119 format("attributes cannot be called with %s", _type)); 1120 } 1121 1122 // STag ::= '<' Name (S Attribute)* S? '>' 1123 // Attribute ::= Name Eq AttValue 1124 // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1125 1126 static struct AttributeRange 1127 { 1128 @property Attribute front() 1129 { 1130 return _front; 1131 } 1132 1133 void popFront() 1134 { 1135 import dxml.internal : stripBCU; 1136 1137 stripWS(_text); 1138 if(_text.input.empty) 1139 { 1140 empty = true; 1141 return; 1142 } 1143 1144 immutable pos = _text.pos; 1145 auto name = stripBCU!R(_text.takeName!'='()); 1146 stripWS(_text); 1147 popFrontAndIncCol(_text); 1148 stripWS(_text); 1149 _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos); 1150 } 1151 1152 @property auto save() 1153 { 1154 import dxml.internal : checkedSave; 1155 auto retval = this; 1156 retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]); 1157 retval._text.input = checkedSave(retval._text.input); 1158 return retval; 1159 } 1160 1161 this(typeof(_text) text) 1162 { 1163 _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945 1164 _text = text; 1165 if(_text.input.empty) 1166 empty = true; 1167 else 1168 popFront(); 1169 } 1170 1171 bool empty; 1172 Attribute _front; 1173 typeof(_savedText) _text; 1174 } 1175 1176 return AttributeRange(_savedText.save); 1177 } 1178 1179 /// 1180 static if(compileInTests) unittest 1181 { 1182 import std.algorithm.comparison : equal; 1183 import std.algorithm.iteration : filter; 1184 { 1185 auto xml = "<root/>"; 1186 auto range = parseXML(xml); 1187 assert(range.front.type == EntityType.elementEmpty); 1188 assert(range.front.attributes.empty); 1189 1190 static assert(is(ElementType!(typeof(range.front.attributes)) == 1191 typeof(range).Entity.Attribute)); 1192 } 1193 { 1194 auto xml = "<root a='42' q='29' w='hello'/>"; 1195 auto range = parseXML(xml); 1196 assert(range.front.type == EntityType.elementEmpty); 1197 1198 auto attrs = range.front.attributes; 1199 assert(attrs.front.name == "a"); 1200 assert(attrs.front.value == "42"); 1201 assert(attrs.front.pos == TextPos(1, 7)); 1202 attrs.popFront(); 1203 1204 assert(attrs.front.name == "q"); 1205 assert(attrs.front.value == "29"); 1206 assert(attrs.front.pos == TextPos(1, 14)); 1207 attrs.popFront(); 1208 1209 assert(attrs.front.name == "w"); 1210 assert(attrs.front.value == "hello"); 1211 assert(attrs.front.pos == TextPos(1, 21)); 1212 attrs.popFront(); 1213 1214 assert(attrs.empty); 1215 } 1216 // Because the type of name and value is SliceOfR, == with a string 1217 // only works if the range passed to parseXML was string. 1218 { 1219 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>"); 1220 auto range = parseXML(xml); 1221 assert(range.front.type == EntityType.elementEmpty); 1222 1223 auto attrs = range.front.attributes; 1224 assert(equal(attrs.front.name, "a")); 1225 assert(equal(attrs.front.value, "42")); 1226 assert(attrs.front.pos == TextPos(1, 7)); 1227 attrs.popFront(); 1228 1229 assert(equal(attrs.front.name, "q")); 1230 assert(equal(attrs.front.value, "29")); 1231 assert(attrs.front.pos == TextPos(1, 14)); 1232 attrs.popFront(); 1233 1234 assert(equal(attrs.front.name, "w")); 1235 assert(equal(attrs.front.value, "hello")); 1236 assert(attrs.front.pos == TextPos(1, 21)); 1237 attrs.popFront(); 1238 1239 assert(attrs.empty); 1240 } 1241 } 1242 1243 static if(compileInTests) unittest 1244 { 1245 import core.exception : AssertError; 1246 import std.algorithm.comparison : equal; 1247 import std.exception : assertNotThrown, collectException, enforce; 1248 import std.typecons : Tuple, tuple; 1249 import dxml.internal : codeLen, testRangeFuncs; 1250 1251 static bool cmpAttr(T, U)(T lhs, U rhs) 1252 { 1253 return equal(lhs[0].save, rhs[0].save) && 1254 equal(lhs[1].save, rhs[1].save); 1255 } 1256 1257 static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type, 1258 Tuple!(string, string)[] expected, 1259 int row, int col, size_t line = __LINE__) 1260 { 1261 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)), 1262 "unittest 1", __FILE__, line); 1263 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line); 1264 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected), 1265 "unittest failure 3", __FILE__, line); 1266 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line); 1267 } 1268 1269 static void testFail(alias func, ThrowOnEntityRef toer)(string text, 1270 int row, int col, size_t line = __LINE__) 1271 { 1272 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text))); 1273 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 1274 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 1275 } 1276 1277 static foreach(func; testRangeFuncs) 1278 { 1279 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 1280 { 1281 test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14); 1282 test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17); 1283 test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty, 1284 [tuple("a", "b")], 9, 4); 1285 test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13); 1286 test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16); 1287 test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart, 1288 [tuple("a", "b")], 5, 3); 1289 1290 test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4); 1291 test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18); 1292 test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18); 1293 test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16); 1294 test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19); 1295 1296 test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty, 1297 [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38); 1298 test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty, 1299 [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38); 1300 1301 test!(func, toer)(`<root foo="*" a='B' hello="%foo"/>`, EntityType.elementEmpty, 1302 [tuple("foo", "*"), tuple("a", "B"), tuple("hello", "%foo")], 1, 44); 1303 1304 test!(func, toer)(`<root foo="&" a='vector<int>'></root>`, EntityType.elementStart, 1305 [tuple("foo", "&"), tuple("a", "vector<int>"),], 1, 41); 1306 1307 test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty, 1308 [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1); 1309 1310 test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16); 1311 test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21); 1312 test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15); 1313 test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20); 1314 1315 test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty, 1316 [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30); 1317 1318 testFail!(func, toer)(`<root a="""/>`, 1, 11); 1319 testFail!(func, toer)(`<root a='''/>`, 1, 11); 1320 testFail!(func, toer)("<root a=/>", 1, 9); 1321 testFail!(func, toer)("<root a='/>", 1, 9); 1322 testFail!(func, toer)("<root a='/>", 1, 9); 1323 testFail!(func, toer)("<root =''/>", 1, 7); 1324 testFail!(func, toer)(`<root a ""/>`, 1, 9); 1325 testFail!(func, toer)(`<root a""/>`, 1, 8); 1326 testFail!(func, toer)(`<root a/>`, 1, 8); 1327 testFail!(func, toer)("<root foo='bar' a=/>", 1, 19); 1328 testFail!(func, toer)("<root foo='bar' a='/>", 1, 19); 1329 testFail!(func, toer)("<root foo='bar' a='/>", 1, 19); 1330 testFail!(func, toer)("<root foo='bar' =''/>", 1, 17); 1331 testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20); 1332 // It's 33 rather than 28, because it throws when processing the start tag and not when processing 1333 // the attributes. So, the mismatched quotes are detected before the attributes are checked. 1334 testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33); 1335 testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17); 1336 testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16); 1337 testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7); 1338 1339 testFail!(func, toer)(`<root foo="<"/>`, 1, 12); 1340 testFail!(func, toer)(`<root foo="<world"/>`, 1, 12); 1341 testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17); 1342 testFail!(func, toer)(`<root foo="&"/>`, 1, 12); 1343 testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17); 1344 testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17); 1345 testFail!(func, toer)(`<root foo="&;"/>`, 1, 12); 1346 testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12); 1347 testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12); 1348 testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12); 1349 testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12); 1350 testFail!(func, toer)(`<root foo="*"/>`, 1, 12); 1351 testFail!(func, toer)(`<root foo="B"/>`, 1, 12); 1352 testFail!(func, toer)(`<root foo=""/>`, 1, 12); 1353 1354 testFail!(func, toer)("<root\n\nfoo='\nbarB'></root>", 4, 4); 1355 1356 testFail!(func, toer)(`<root a="""></root>`, 1, 11); 1357 testFail!(func, toer)(`<root a='''></root>`, 1, 11); 1358 testFail!(func, toer)("<root a=></root>", 1, 9); 1359 testFail!(func, toer)("<root a='></root>", 1, 9); 1360 testFail!(func, toer)("<root a='></root>", 1, 9); 1361 testFail!(func, toer)("<root =''></root>", 1, 7); 1362 testFail!(func, toer)(`<root a ""></root>`, 1, 9); 1363 testFail!(func, toer)(`<root a""></root>`, 1, 8); 1364 testFail!(func, toer)(`<root a></root>`, 1, 8); 1365 testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19); 1366 testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19); 1367 testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19); 1368 testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17); 1369 testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20); 1370 testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33); 1371 testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17); 1372 testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16); 1373 testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7); 1374 1375 testFail!(func, toer)(`<root foo="<"></root>`, 1, 12); 1376 testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12); 1377 testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17); 1378 testFail!(func, toer)(`<root foo="&"></root>`, 1, 12); 1379 testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17); 1380 testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17); 1381 testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12); 1382 testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12); 1383 testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12); 1384 testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12); 1385 testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12); 1386 testFail!(func, toer)(`<root foo="*"></root>`, 1, 12); 1387 testFail!(func, toer)(`<root foo="B"></root>`, 1, 12); 1388 testFail!(func, toer)(`<root foo=""></root>`, 1, 12); 1389 1390 testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14); 1391 testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24); 1392 testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24); 1393 testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42); 1394 testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17); 1395 1396 test!(func, toer)(`<root foo="&"></root>`, EntityType.elementStart, 1397 [tuple("foo", "&")], 1, 19); 1398 test!(func, toer)(`<root foo="foo&<>'"bar"></root>`, EntityType.elementStart, 1399 [tuple("foo", "foo&<>'"bar")], 1, 45); 1400 testFail!(func, toer)("<root foo='&;'></root>", 1, 12); 1401 testFail!(func, toer)("<root foo='&.;'></root>", 1, 12); 1402 testFail!(func, toer)("<root foo='\n & ule'></root>", 2, 2); 1403 testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2); 1404 } 1405 { 1406 alias toer = ThrowOnEntityRef.yes; 1407 testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12); 1408 testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12); 1409 testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15); 1410 testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18); 1411 testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2); 1412 } 1413 { 1414 alias toer = ThrowOnEntityRef.no; 1415 test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty, 1416 [tuple("foo", "&foo;")], 1, 20); 1417 test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart, 1418 [tuple("foo", "&foo;")], 1, 19); 1419 test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart, 1420 [tuple("foo", "foo&bar.;")], 1, 23); 1421 test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart, 1422 [tuple("foo", "hello &a; world")], 1, 29); 1423 test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart, 1424 [tuple("foo", "hello \n &a; \n world")], 3, 9); 1425 } 1426 } 1427 } 1428 1429 1430 /++ 1431 Returns the textual value of this Entity. 1432 1433 In the case of $(LREF EntityType.pi), this is the 1434 text that follows the name, whereas in the other cases, the text is 1435 the entire contents of the entity (save for the delimeters on the 1436 ends if that entity has them). 1437 1438 $(TABLE 1439 $(TR $(TH Supported $(LREF EntityType)s:)) 1440 $(TR $(TD $(LREF2 cdata, EntityType))) 1441 $(TR $(TD $(LREF2 comment, EntityType))) 1442 $(TR $(TD $(LREF2 pi, EntityType))) 1443 $(TR $(TD $(LREF2 _text, EntityType))) 1444 ) 1445 1446 See_Also: $(REF decodeXML, dxml, util)$(BR) 1447 $(REF asDecodedXML, dxml, util)$(BR) 1448 $(REF stripIndent, dxml, util)$(BR) 1449 $(REF withoutIndent, dxml, util) 1450 +/ 1451 @property SliceOfR text() 1452 { 1453 import dxml.internal : checkedSave, stripBCU; 1454 with(EntityType) 1455 { 1456 import std.format : format; 1457 assert(only(cdata, comment, pi, text).canFind(_type), 1458 format("text cannot be called with %s", _type)); 1459 } 1460 return stripBCU!R(checkedSave(_savedText.input)); 1461 } 1462 1463 /// 1464 static if(compileInTests) unittest 1465 { 1466 import std.range.primitives : empty; 1467 1468 auto xml = "<?xml version='1.0'?>\n" ~ 1469 "<?instructionName?>\n" ~ 1470 "<?foo here is something to say?>\n" ~ 1471 "<root>\n" ~ 1472 " <![CDATA[ Yay! random text >> << ]]>\n" ~ 1473 " <!-- some random comment -->\n" ~ 1474 " <p>something here</p>\n" ~ 1475 " <p>\n" ~ 1476 " something else\n" ~ 1477 " here</p>\n" ~ 1478 "</root>"; 1479 auto range = parseXML(xml); 1480 1481 // "<?instructionName?>\n" ~ 1482 assert(range.front.type == EntityType.pi); 1483 assert(range.front.name == "instructionName"); 1484 assert(range.front.text.empty); 1485 1486 // "<?foo here is something to say?>\n" ~ 1487 range.popFront(); 1488 assert(range.front.type == EntityType.pi); 1489 assert(range.front.name == "foo"); 1490 assert(range.front.text == "here is something to say"); 1491 1492 // "<root>\n" ~ 1493 range.popFront(); 1494 assert(range.front.type == EntityType.elementStart); 1495 1496 // " <![CDATA[ Yay! random text >> << ]]>\n" ~ 1497 range.popFront(); 1498 assert(range.front.type == EntityType.cdata); 1499 assert(range.front.text == " Yay! random text >> << "); 1500 1501 // " <!-- some random comment -->\n" ~ 1502 range.popFront(); 1503 assert(range.front.type == EntityType.comment); 1504 assert(range.front.text == " some random comment "); 1505 1506 // " <p>something here</p>\n" ~ 1507 range.popFront(); 1508 assert(range.front.type == EntityType.elementStart); 1509 assert(range.front.name == "p"); 1510 1511 range.popFront(); 1512 assert(range.front.type == EntityType.text); 1513 assert(range.front.text == "something here"); 1514 1515 range.popFront(); 1516 assert(range.front.type == EntityType.elementEnd); 1517 assert(range.front.name == "p"); 1518 1519 // " <p>\n" ~ 1520 // " something else\n" ~ 1521 // " here</p>\n" ~ 1522 range.popFront(); 1523 assert(range.front.type == EntityType.elementStart); 1524 1525 range.popFront(); 1526 assert(range.front.type == EntityType.text); 1527 assert(range.front.text == "\n something else\n here"); 1528 1529 range.popFront(); 1530 assert(range.front.type == EntityType.elementEnd); 1531 1532 // "</root>" 1533 range.popFront(); 1534 assert(range.front.type == EntityType.elementEnd); 1535 1536 range.popFront(); 1537 assert(range.empty); 1538 } 1539 1540 1541 // Reduce the chance of bugs if reference-type ranges are involved. 1542 static if(!isDynamicArray!R) this(this) 1543 { 1544 with(EntityType) final switch(_type) 1545 { 1546 case cdata: break; 1547 case comment: break; 1548 case elementStart: 1549 { 1550 _name = _name.save; 1551 break; 1552 } 1553 case elementEnd: goto case elementStart; 1554 case elementEmpty: goto case elementStart; 1555 case text: break; 1556 case pi: goto case elementStart; 1557 } 1558 1559 if(_type != EntityType.elementEnd) 1560 _savedText = _savedText.save; 1561 } 1562 1563 static if(compileInTests) unittest 1564 { 1565 import std.algorithm.comparison : equal; 1566 import dxml.internal : testRangeFuncs; 1567 1568 static bool cmpAttr(T)(T lhs, T rhs) 1569 { 1570 return equal(lhs.name.save, rhs.name.save) && 1571 equal(lhs.value.save, rhs.value.save); 1572 } 1573 1574 { 1575 auto xml = "<root>\n" ~ 1576 " <foo a='42'/>\n" ~ 1577 " <foo b='42'/>\n" ~ 1578 " <nocomment>nothing to say</nocomment>\n" ~ 1579 "</root>"; 1580 1581 // The duplicate lines aren't typos. We want to ensure that the 1582 // values are independent and that nothing was consumed. 1583 static foreach(func; testRangeFuncs) 1584 {{ 1585 auto range = parseXML(func(xml)); 1586 range.popFront(); 1587 { 1588 auto entity = range.front; 1589 auto entity2 = entity; 1590 assert(entity.pos == entity2.pos); 1591 assert(equal(entity.name, entity2.name)); 1592 assert(equal(entity.name, entity2.name)); 1593 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1594 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1595 range.popFront(); 1596 assert(entity.pos == entity2.pos); 1597 assert(entity.pos != range.front.pos); 1598 } 1599 range.popFront(); 1600 range.popFront(); 1601 { 1602 auto entity = range.front; 1603 auto entity2 = entity; 1604 assert(entity.pos == entity2.pos); 1605 assert(equal(entity.text, entity2.text)); 1606 assert(equal(entity.text, entity2.text)); 1607 range.popFront(); 1608 assert(entity.pos == entity2.pos); 1609 assert(entity.pos != range.front.pos); 1610 } 1611 }} 1612 } 1613 { 1614 auto xml = "<root>\n" ~ 1615 " <![CDATA[whatever]]>\n" ~ 1616 " <?pi?>\n" ~ 1617 " <!--comment-->\n" ~ 1618 " <empty/>\n" ~ 1619 " <noend a='foo' b='bar'/>\n" ~ 1620 " <foo baz='42'></foo>\n" ~ 1621 "</root>"; 1622 1623 static foreach(func; testRangeFuncs) 1624 { 1625 for(auto range = parseXML(func(xml)); !range.empty; range.popFront()) 1626 { 1627 auto entity = range.front; 1628 auto entity2 = entity; 1629 1630 assert(entity.pos == range.front.pos); 1631 assert(entity.pos == entity2.pos); 1632 assert(entity.type == range.front.type); 1633 assert(entity.type == entity2.type); 1634 1635 with(EntityType) final switch(entity.type) 1636 { 1637 case cdata: goto case text; 1638 case comment: goto case text; 1639 case elementStart: 1640 { 1641 assert(equal!cmpAttr(entity.attributes, range.front.attributes)); 1642 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1643 goto case elementEnd; 1644 } 1645 case elementEnd: 1646 { 1647 assert(equal(entity.name, range.front.name)); 1648 assert(equal(entity.name, entity2.name)); 1649 break; 1650 } 1651 case elementEmpty: goto case elementStart; 1652 case text: 1653 { 1654 assert(equal(entity.text, range.front.text)); 1655 assert(equal(entity.text, entity2.text)); 1656 break; 1657 } 1658 case pi: 1659 { 1660 assert(equal(entity.name, range.front.name)); 1661 assert(equal(entity.name, entity2.name)); 1662 goto case text; 1663 } 1664 } 1665 } 1666 } 1667 } 1668 } 1669 1670 1671 private: 1672 1673 this(EntityType type) 1674 { 1675 _type = type; 1676 1677 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 1678 _name = typeof(_name).init; 1679 _savedText = typeof(_savedText).init; 1680 } 1681 1682 EntityType _type; 1683 TextPos _pos; 1684 Taken _name; 1685 typeof(EntityRange._savedText) _savedText; 1686 } 1687 1688 1689 /++ 1690 Returns the $(LREF Entity) representing the entity in the XML document 1691 which was most recently parsed. 1692 +/ 1693 @property Entity front() 1694 { 1695 auto retval = Entity(_type); 1696 with(EntityType) final switch(_type) 1697 { 1698 case cdata: retval._savedText = _savedText.save; break; 1699 case comment: goto case cdata; 1700 case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break; 1701 case elementEnd: retval._name = _name.save; break; 1702 case elementEmpty: goto case elementStart; 1703 case text: goto case cdata; 1704 case pi: goto case elementStart; 1705 } 1706 retval._pos = _entityPos; 1707 return retval; 1708 } 1709 1710 1711 /++ 1712 Move to the next entity. 1713 1714 The next entity is the next one that is linearly in the XML document. 1715 So, if the current entity has child entities, the next entity will be 1716 the first child entity, whereas if it has no child entities, it will be 1717 the next entity at the same level. 1718 1719 Throws: $(LREF XMLParsingException) on invalid XML. 1720 +/ 1721 void popFront() 1722 { 1723 final switch(_grammarPos) with(GrammarPos) 1724 { 1725 case documentStart: _parseDocumentStart(); break; 1726 case prologMisc1: _parseAtPrologMisc!1(); break; 1727 case prologMisc2: _parseAtPrologMisc!2(); break; 1728 case splittingEmpty: 1729 { 1730 _type = EntityType.elementEnd; 1731 _tagStack.sawEntity(); 1732 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 1733 break; 1734 } 1735 case contentCharData1: 1736 { 1737 assert(_type == EntityType.elementStart); 1738 _tagStack.pushTag(_name.save); 1739 _parseAtContentCharData(); 1740 break; 1741 } 1742 case contentMid: _parseAtContentMid(); break; 1743 case contentCharData2: _parseAtContentCharData(); break; 1744 case endTag: _parseElementEnd(); break; 1745 case endMisc: _parseAtEndMisc(); break; 1746 case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange."); 1747 } 1748 } 1749 1750 1751 /++ 1752 Whether the end of the XML document has been reached. 1753 1754 Note that because an $(LREF XMLParsingException) will be thrown an 1755 invalid XML, it's actually possible to call 1756 $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without 1757 checking empty if the only way that empty would be true is if the XML 1758 were invalid (e.g. if at a start tag, it's a given that there's at 1759 least one end tag left in the document unless it's invalid XML). 1760 1761 However, of course, caution should be used to ensure that incorrect 1762 assumptions are not made that allow the document to reach its end 1763 earlier than predicted without throwing an $(LREF XMLParsingException), 1764 since it's still an error to call $(LREF2 front, EntityRange) or 1765 $(LREF2 popFront, EntityRange) if empty would return false. 1766 +/ 1767 @property bool empty() @safe const pure nothrow @nogc 1768 { 1769 return _grammarPos == GrammarPos.documentEnd; 1770 } 1771 1772 1773 /++ 1774 Forward range function for obtaining a copy of the range which can then 1775 be iterated independently of the original. 1776 +/ 1777 @property auto save() 1778 { 1779 // The init check nonsense is because of ranges whose init values blow 1780 // up when save is called (e.g. a range that's a class). 1781 auto retval = this; 1782 if(retval._name !is typeof(retval._name).init) 1783 retval._name = _name.save; 1784 if(retval._text.input !is typeof(retval._text.input).init) 1785 retval._text.input = _text.input.save; 1786 if(retval._savedText.input !is typeof(retval._savedText.input).init) 1787 retval._savedText.input = _savedText.input.save; 1788 return retval; 1789 } 1790 1791 static if(compileInTests) unittest 1792 { 1793 import std.algorithm.comparison : equal; 1794 import std.exception : assertNotThrown; 1795 import dxml.internal : testRangeFuncs; 1796 1797 static bool cmpAttr(T)(T lhs, T rhs) 1798 { 1799 return equal(lhs.name.save, rhs.name.save) && 1800 equal(lhs.value.save, rhs.value.save); 1801 } 1802 1803 static void testEqual(ER)(ER one, ER two) 1804 { 1805 while(!one.empty && !two.empty) 1806 { 1807 auto left = one.front; 1808 auto right = two.front; 1809 1810 assert(left.pos == right.pos); 1811 assert(left.type == right.type); 1812 1813 with(EntityType) final switch(left.type) 1814 { 1815 case cdata: goto case text; 1816 case comment: goto case text; 1817 case elementStart: 1818 { 1819 assert(equal!cmpAttr(left.attributes, right.attributes)); 1820 goto case elementEnd; 1821 } 1822 case elementEnd: assert(equal(left.name, right.name)); break; 1823 case elementEmpty: goto case elementStart; 1824 case text: assert(equal(left.text, right.text)); break; 1825 case pi: assert(equal(left.name, right.name)); goto case text; 1826 } 1827 1828 one.popFront(); 1829 two.popFront(); 1830 } 1831 1832 assert(one.empty); 1833 assert(two.empty); 1834 } 1835 1836 auto xml = "<root>\n" ~ 1837 " <!-- comment -->\n" ~ 1838 " <something>\n" ~ 1839 " <else/>\n" ~ 1840 " somet text <i>goes</i> here\n" ~ 1841 " </something>\n" ~ 1842 "</root>"; 1843 1844 static foreach(i, func; testRangeFuncs) 1845 {{ 1846 auto text = func(xml); 1847 testEqual(parseXML(text.save), parseXML(text.save)); 1848 auto range = parseXML(text.save); 1849 testEqual(range.save, range.save); 1850 }} 1851 } 1852 1853 1854 /++ 1855 Returns an empty range. This corresponds to 1856 $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a 1857 wrapper type. 1858 +/ 1859 EntityRange takeNone() 1860 { 1861 auto retval = save; 1862 retval._grammarPos = GrammarPos.documentEnd; 1863 return retval; 1864 } 1865 1866 1867 private: 1868 1869 void _parseDocumentStart() 1870 { 1871 auto orig = _text.save; 1872 immutable wasWS = _text.stripWS(); 1873 if(_text.stripStartsWith("<?xml")) 1874 { 1875 if(wasWS) 1876 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init); 1877 checkNotEmpty(_text); 1878 if(_text.input.front == '?' || isSpace(_text.input.front)) 1879 _text.skipUntilAndDrop!"?>"(); 1880 else 1881 _text = orig; 1882 } 1883 _grammarPos = GrammarPos.prologMisc1; 1884 _parseAtPrologMisc!1(); 1885 } 1886 1887 static if(compileInTests) unittest 1888 { 1889 import core.exception : AssertError; 1890 import std.exception : assertNotThrown, enforce; 1891 import dxml.internal : testRangeFuncs; 1892 1893 static void test(alias func)(string xml, int row, int col, size_t line = __LINE__) 1894 { 1895 auto range = assertNotThrown!XMLParsingException(parseXML(func(xml))); 1896 enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line); 1897 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 1898 } 1899 1900 static foreach(func; testRangeFuncs) 1901 { 1902 test!func("<root/>", 1, 8); 1903 test!func("\n\t\n <root/> \n", 3, 9); 1904 test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10); 1905 test!func("<?xml\n\n\n \r\r\r\n\nversion='1.8'?><root/>", 6, 23); 1906 test!func("<?xml\n\n\n \r\r\r\n\nversion='1.8'?>\n <root/>", 7, 13); 1907 test!func("<root/>", 1, 8); 1908 test!func("\n\t\n <root/> \n", 3, 9); 1909 } 1910 } 1911 1912 1913 // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2. 1914 void _parseAtPrologMisc(int miscNum)() 1915 { 1916 static assert(miscNum == 1 || miscNum == 2); 1917 1918 // document ::= prolog element Misc* 1919 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 1920 // Misc ::= Comment | PI | S 1921 1922 stripWS(_text); 1923 checkNotEmpty(_text); 1924 if(_text.input.front != '<') 1925 throw new XMLParsingException("Expected <", _text.pos); 1926 popFrontAndIncCol(_text); 1927 checkNotEmpty(_text); 1928 1929 switch(_text.input.front) 1930 { 1931 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1932 // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' 1933 case '!': 1934 { 1935 immutable bangPos = _text.pos; 1936 popFrontAndIncCol(_text); 1937 if(_text.stripStartsWith("--")) 1938 { 1939 _parseComment(); 1940 static if(config.skipComments == SkipComments.yes) 1941 _parseAtPrologMisc!miscNum(); 1942 break; 1943 } 1944 static if(miscNum == 1) 1945 { 1946 if(_text.stripStartsWith("DOCTYPE")) 1947 { 1948 if(!_text.stripWS()) 1949 throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos); 1950 _parseDoctypeDecl(); 1951 break; 1952 } 1953 throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos); 1954 } 1955 else 1956 { 1957 if(_text.stripStartsWith("DOCTYPE")) 1958 { 1959 throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document", 1960 bangPos); 1961 } 1962 throw new XMLParsingException("Expected Comment", bangPos); 1963 } 1964 } 1965 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 1966 case '?': 1967 { 1968 _parsePI(); 1969 static if(config.skipPI == SkipPI.yes) 1970 popFront(); 1971 break; 1972 } 1973 // element ::= EmptyElemTag | STag content ETag 1974 default: 1975 { 1976 _parseElementStart(); 1977 break; 1978 } 1979 } 1980 } 1981 1982 1983 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1984 // Parses a comment. <!-- was already removed from the front of the input. 1985 void _parseComment() 1986 { 1987 static if(config.skipComments == SkipComments.yes) 1988 _text.skipUntilAndDrop!"--"(); 1989 else 1990 { 1991 _entityPos = TextPos(_text.pos.line, _text.pos.col - 4); 1992 _type = EntityType.comment; 1993 _tagStack.sawEntity(); 1994 _savedText.pos = _text.pos; 1995 _savedText.input = _text.takeUntilAndDrop!"--"(); 1996 } 1997 if(_text.input.empty || _text.input.front != '>') 1998 throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos); 1999 // This is here rather than at the end of the previous static if block 2000 // so that the error message for improperly terminating a comment takes 2001 // precedence over the one involving invalid characters in the comment. 2002 static if(config.skipComments == SkipComments.no) 2003 checkText!true(_savedText); 2004 popFrontAndIncCol(_text); 2005 } 2006 2007 static if(compileInTests) unittest 2008 { 2009 import core.exception : AssertError; 2010 import std.algorithm.comparison : equal; 2011 import std.exception : assertNotThrown, assertThrown, collectException, enforce; 2012 import dxml.internal : codeLen, testRangeFuncs; 2013 2014 static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__) 2015 { 2016 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2017 enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line); 2018 enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line); 2019 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2020 } 2021 2022 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2023 { 2024 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2025 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2026 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2027 } 2028 2029 static foreach(func; testRangeFuncs) 2030 { 2031 test!func("<!--foo-->", "foo", 1, 11); 2032 test!func("<!-- foo -->", " foo ", 1, 13); 2033 test!func("<!-- -->", " ", 1, 9); 2034 test!func("<!---->", "", 1, 8); 2035 test!func("<!--- comment -->", "- comment ", 1, 18); 2036 test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5); 2037 test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1); 2038 test!func("<!--&-->", "&", 1, 9); 2039 test!func("<!--<-->", "<", 1, 9); 2040 test!func("<!-->-->", ">", 1, 9); 2041 test!func("<!--->-->", "->", 1, 10); 2042 2043 testFail!func("<!", 1, 2); 2044 testFail!func("<!- comment -->", 1, 2); 2045 testFail!func("<!-- comment ->", 1, 5); 2046 testFail!func("<!-- comment --->", 1, 16); 2047 testFail!func("<!---- comment -->", 1, 7); 2048 testFail!func("<!-- comment -- comment -->", 1, 16); 2049 testFail!func("<!->", 1, 2); 2050 testFail!func("<!-->", 1, 5); 2051 testFail!func("<!--->", 1, 5); 2052 testFail!func("<!----->", 1, 7); 2053 testFail!func("<!blah>", 1, 2); 2054 testFail!func("<! blah>", 1, 2); 2055 testFail!func("<!-- \n\n \v \n -->", 3, 4); 2056 testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v")); 2057 2058 { 2059 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>"); 2060 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2061 assert(range.front.type == EntityType.comment); 2062 assert(equal(range.front.text, " comment ")); 2063 } 2064 { 2065 auto xml = func("<root><!-- comment --></root>"); 2066 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2067 assertNotThrown!XMLParsingException(range.popFront()); 2068 assert(range.front.type == EntityType.comment); 2069 assert(equal(range.front.text, " comment ")); 2070 } 2071 { 2072 auto xml = func("<root/><!-- comment -->"); 2073 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2074 assertNotThrown!XMLParsingException(range.popFront()); 2075 assert(range.front.type == EntityType.comment); 2076 assert(equal(range.front.text, " comment ")); 2077 } 2078 2079 static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"]) 2080 { 2081 { 2082 auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>"); 2083 assertThrown!XMLParsingException(parseXML(xml)); 2084 } 2085 { 2086 auto xml = func("<root>" ~ comment ~ "<root>"); 2087 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2088 assertThrown!XMLParsingException(range.popFront()); 2089 } 2090 { 2091 auto xml = func("<root/>" ~ comment); 2092 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2093 assertThrown!XMLParsingException(range.popFront()); 2094 } 2095 } 2096 2097 { 2098 auto xml = "<!--one-->\n" ~ 2099 "<!--two-->\n" ~ 2100 "<root>\n" ~ 2101 " <!--three-->\n" ~ 2102 " <!--four-->\n" ~ 2103 "</root>\n" ~ 2104 "<!--five-->\n" ~ 2105 "<!--six-->"; 2106 2107 auto text = func(xml); 2108 { 2109 auto range = parseXML(text.save); 2110 assert(range.front.type == EntityType.comment); 2111 assert(equal(range.front.text, "one")); 2112 assertNotThrown!XMLParsingException(range.popFront()); 2113 assert(range.front.type == EntityType.comment); 2114 assert(equal(range.front.text, "two")); 2115 assertNotThrown!XMLParsingException(range.popFront()); 2116 assert(range.front.type == EntityType.elementStart); 2117 assert(equal(range.front.name, "root")); 2118 assertNotThrown!XMLParsingException(range.popFront()); 2119 assert(range.front.type == EntityType.comment); 2120 assert(equal(range.front.text, "three")); 2121 assertNotThrown!XMLParsingException(range.popFront()); 2122 assert(range.front.type == EntityType.comment); 2123 assert(equal(range.front.text, "four")); 2124 assertNotThrown!XMLParsingException(range.popFront()); 2125 assert(range.front.type == EntityType.elementEnd); 2126 assert(equal(range.front.name, "root")); 2127 assertNotThrown!XMLParsingException(range.popFront()); 2128 assert(range.front.type == EntityType.comment); 2129 assert(equal(range.front.text, "five")); 2130 assertNotThrown!XMLParsingException(range.popFront()); 2131 assert(range.front.type == EntityType.comment); 2132 assert(equal(range.front.text, "six")); 2133 assertNotThrown!XMLParsingException(range.popFront()); 2134 assert(range.empty); 2135 } 2136 { 2137 auto range = parseXML!simpleXML(text.save); 2138 assert(range.front.type == EntityType.elementStart); 2139 assert(equal(range.front.name, "root")); 2140 assertNotThrown!XMLParsingException(range.popFront()); 2141 assert(range.front.type == EntityType.elementEnd); 2142 assert(equal(range.front.name, "root")); 2143 assertNotThrown!XMLParsingException(range.popFront()); 2144 assert(range.empty); 2145 } 2146 } 2147 } 2148 } 2149 2150 2151 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2152 // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2153 // Parses a processing instruction. < was already removed from the input. 2154 void _parsePI() 2155 { 2156 _entityPos = TextPos(_text.pos.line, _text.pos.col - 1); 2157 assert(_text.input.front == '?'); 2158 popFrontAndIncCol(_text); 2159 static if(config.skipPI == SkipPI.yes) 2160 _text.skipUntilAndDrop!"?>"(); 2161 else 2162 { 2163 immutable posAtName = _text.pos; 2164 if(_text.input.empty) 2165 throw new XMLParsingException("Unterminated processing instruction", posAtName); 2166 _type = EntityType.pi; 2167 _tagStack.sawEntity(); 2168 _name = takeName!'?'(_text); 2169 immutable posAtWS = _text.pos; 2170 stripWS(_text); 2171 checkNotEmpty(_text); 2172 _savedText.pos = _text.pos; 2173 _savedText.input = _text.takeUntilAndDrop!"?>"(); 2174 checkText!true(_savedText); 2175 if(walkLength(_name.save) == 3) 2176 { 2177 // FIXME icmp doesn't compile right now due to an issue with 2178 // byUTF that needs to be looked into. 2179 /+ 2180 import std.uni : icmp; 2181 if(icmp(_name.save, "xml") == 0) 2182 throw new XMLParsingException("Processing instructions cannot be named xml", posAtName); 2183 +/ 2184 auto temp = _name.save; 2185 if(temp.front == 'x' || temp.front == 'X') 2186 { 2187 temp.popFront(); 2188 if(temp.front == 'm' || temp.front == 'M') 2189 { 2190 temp.popFront(); 2191 if(temp.front == 'l' || temp.front == 'L') 2192 throw new XMLParsingException("Processing instructions cannot be named xml", posAtName); 2193 } 2194 } 2195 } 2196 } 2197 } 2198 2199 static if(compileInTests) unittest 2200 { 2201 import core.exception : AssertError; 2202 import std.algorithm.comparison : equal; 2203 import std.exception : assertNotThrown, assertThrown, collectException, enforce; 2204 import std.utf : byUTF; 2205 import dxml.internal : codeLen, testRangeFuncs; 2206 2207 static void test(alias func)(string text, string name, string expected, 2208 int row, int col, size_t line = __LINE__) 2209 { 2210 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")), 2211 "unittest failure 1", __FILE__, line); 2212 enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line); 2213 enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line); 2214 enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line); 2215 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line); 2216 } 2217 2218 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2219 { 2220 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2221 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2222 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2223 } 2224 2225 static foreach(func; testRangeFuncs) 2226 { 2227 test!func("<?a?>", "a", "", 1, 6); 2228 test!func("<?foo?>", "foo", "", 1, 8); 2229 test!func("<?foo.?>", "foo.", "", 1, 9); 2230 test!func("<?foo bar?>", "foo", "bar", 1, 12); 2231 test!func("<?xmf bar?>", "xmf", "bar", 1, 12); 2232 test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15); 2233 test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16); 2234 test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10); 2235 test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11); 2236 test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6); 2237 test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21); 2238 test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23); 2239 test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3); 2240 test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1); 2241 test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16); 2242 test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16); 2243 test!func("<?pi ?>", "pi", "", 1, 8); 2244 test!func("<?pi\n?>", "pi", "", 2, 3); 2245 test!func("<?foo ??>", "foo", "?", 1, 10); 2246 test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24); 2247 2248 testFail!func("<?", 1, 3); 2249 testFail!func("<??>", 1, 3); 2250 testFail!func("<? ?>", 1, 3); 2251 testFail!func("<?xml?><?xml?>", 1, 10); 2252 testFail!func("<?XML?>", 1, 3); 2253 testFail!func("<?xMl?>", 1, 3); 2254 testFail!func("<?foo>", 1, 6); 2255 testFail!func("<? foo?>", 1, 3); 2256 testFail!func("<?\nfoo?>", 1, 3); 2257 testFail!func("<??foo?>", 1, 3); 2258 testFail!func("<?.foo?>", 1, 3); 2259 testFail!func("<?foo bar\vbaz?>", 1, 10); 2260 2261 { 2262 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>"); 2263 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2264 assert(range.front.type == EntityType.pi); 2265 assert(equal(range.front.name, "foo")); 2266 assert(equal(range.front.text, "bar")); 2267 } 2268 { 2269 auto xml = func("<root><?foo bar?></root>"); 2270 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2271 assertNotThrown!XMLParsingException(range.popFront()); 2272 assert(equal(range.front.name, "foo")); 2273 assert(equal(range.front.text, "bar")); 2274 } 2275 { 2276 auto xml = func("<root/><?foo bar?>"); 2277 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2278 assertNotThrown!XMLParsingException(range.popFront()); 2279 assert(equal(range.front.name, "foo")); 2280 assert(equal(range.front.text, "bar")); 2281 } 2282 2283 static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"]) 2284 { 2285 { 2286 auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>"); 2287 assertThrown!XMLParsingException(parseXML(xml)); 2288 } 2289 { 2290 auto xml = func("<root>" ~ pi ~ "<root>"); 2291 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2292 assertThrown!XMLParsingException(range.popFront()); 2293 } 2294 { 2295 auto xml = func("<root/>" ~ pi); 2296 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2297 assertThrown!XMLParsingException(range.popFront()); 2298 } 2299 } 2300 2301 { 2302 auto xml = "<?one?>\n" ~ 2303 "<?two?>\n" ~ 2304 "<root>\n" ~ 2305 " <?three?>\n" ~ 2306 " <?four?>\n" ~ 2307 "</root>\n" ~ 2308 "<?five?>\n" ~ 2309 "<?six?>"; 2310 2311 auto text = func(xml); 2312 { 2313 auto range = parseXML(text.save); 2314 assert(range.front.type == EntityType.pi); 2315 assert(equal(range.front.name, "one")); 2316 assertNotThrown!XMLParsingException(range.popFront()); 2317 assert(range.front.type == EntityType.pi); 2318 assert(equal(range.front.name, "two")); 2319 assertNotThrown!XMLParsingException(range.popFront()); 2320 assert(range.front.type == EntityType.elementStart); 2321 assert(equal(range.front.name, "root")); 2322 assertNotThrown!XMLParsingException(range.popFront()); 2323 assert(range.front.type == EntityType.pi); 2324 assert(equal(range.front.name, "three")); 2325 assertNotThrown!XMLParsingException(range.popFront()); 2326 assert(range.front.type == EntityType.pi); 2327 assert(equal(range.front.name, "four")); 2328 assertNotThrown!XMLParsingException(range.popFront()); 2329 assert(range.front.type == EntityType.elementEnd); 2330 assert(equal(range.front.name, "root")); 2331 assertNotThrown!XMLParsingException(range.popFront()); 2332 assert(range.front.type == EntityType.pi); 2333 assert(equal(range.front.name, "five")); 2334 assertNotThrown!XMLParsingException(range.popFront()); 2335 assert(range.front.type == EntityType.pi); 2336 assert(equal(range.front.name, "six")); 2337 assertNotThrown!XMLParsingException(range.popFront()); 2338 assert(range.empty); 2339 } 2340 { 2341 auto range = parseXML!simpleXML(text.save); 2342 assert(range.front.type == EntityType.elementStart); 2343 assert(equal(range.front.name, "root")); 2344 assertNotThrown!XMLParsingException(range.popFront()); 2345 assert(range.front.type == EntityType.elementEnd); 2346 assert(equal(range.front.name, "root")); 2347 assertNotThrown!XMLParsingException(range.popFront()); 2348 assert(range.empty); 2349 } 2350 } 2351 } 2352 } 2353 2354 2355 // CDSect ::= CDStart CData CDEnd 2356 // CDStart ::= '<![CDATA[' 2357 // CData ::= (Char* - (Char* ']]>' Char*)) 2358 // CDEnd ::= ']]>' 2359 // Parses a CDATA. <![CDATA[ was already removed from the front of the input. 2360 void _parseCDATA() 2361 { 2362 _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length); 2363 _type = EntityType.cdata; 2364 _tagStack.sawEntity(); 2365 _savedText.pos = _text.pos; 2366 _savedText.input = _text.takeUntilAndDrop!"]]>"; 2367 checkText!true(_savedText); 2368 _grammarPos = GrammarPos.contentCharData2; 2369 } 2370 2371 static if(compileInTests) unittest 2372 { 2373 import core.exception : AssertError; 2374 import std.algorithm.comparison : equal; 2375 import std.exception : assertNotThrown, collectException, enforce; 2376 import dxml.internal : codeLen, testRangeFuncs; 2377 2378 static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__) 2379 { 2380 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2381 auto range = parseXML(func("<root>" ~ text ~ "<root/>")); 2382 assertNotThrown!XMLParsingException(range.popFront()); 2383 enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line); 2384 enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line); 2385 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2386 } 2387 2388 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2389 { 2390 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2391 auto range = parseXML(func("<root>" ~ text ~ "<root/>")); 2392 auto e = collectException!XMLParsingException(range.popFront()); 2393 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2394 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 2395 } 2396 2397 static foreach(func; testRangeFuncs) 2398 { 2399 test!func("<![CDATA[]]>", "", 1, 13); 2400 test!func("<![CDATA[hello world]]>", "hello world", 1, 24); 2401 test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4); 2402 test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2); 2403 test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>", 2404 "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57); 2405 test!func("<![CDATA[&]]>", "&", 1, 14); 2406 2407 testFail!func("<[CDATA[]>", 1, 2); 2408 testFail!func("<![CDAT[]>", 1, 2); 2409 testFail!func("<![CDATA]>", 1, 2); 2410 testFail!func("<![CDATA[>", 1, 10); 2411 testFail!func("<![CDATA[]", 1, 10); 2412 testFail!func("<![CDATA[]>", 1, 10); 2413 testFail!func("<![CDATA[ \v ]]>", 1, 11); 2414 testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2); 2415 } 2416 } 2417 2418 2419 // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' 2420 // DeclSep ::= PEReference | S 2421 // intSubset ::= (markupdecl | DeclSep)* 2422 // markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment 2423 // Parse doctypedecl after GrammarPos.prologMisc1. 2424 // <!DOCTYPE and any whitespace after it should have already been removed 2425 // from the input. 2426 void _parseDoctypeDecl() 2427 { 2428 outer: while(true) 2429 { 2430 _text.skipToOneOf!('"', '\'', '[', '>')(); 2431 switch(_text.input.front) 2432 { 2433 static foreach(quote; ['"', '\'']) 2434 { 2435 case quote: 2436 { 2437 popFrontAndIncCol(_text); 2438 _text.skipUntilAndDrop!([quote])(); 2439 continue outer; 2440 } 2441 } 2442 case '[': 2443 { 2444 popFrontAndIncCol(_text); 2445 while(true) 2446 { 2447 checkNotEmpty(_text); 2448 _text.skipToOneOf!('"', '\'', ']')(); 2449 switch(_text.input.front) 2450 { 2451 case '"': 2452 { 2453 popFrontAndIncCol(_text); 2454 _text.skipUntilAndDrop!`"`(); 2455 continue; 2456 } 2457 case '\'': 2458 { 2459 popFrontAndIncCol(_text); 2460 _text.skipUntilAndDrop!`'`(); 2461 continue; 2462 } 2463 case ']': 2464 { 2465 popFrontAndIncCol(_text); 2466 stripWS(_text); 2467 if(_text.input.empty || _text.input.front != '>') 2468 throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos); 2469 popFrontAndIncCol(_text); 2470 _parseAtPrologMisc!2(); 2471 return; 2472 } 2473 default: assert(0); 2474 } 2475 } 2476 } 2477 case '>': 2478 { 2479 popFrontAndIncCol(_text); 2480 _parseAtPrologMisc!2(); 2481 break; 2482 } 2483 default: assert(0); 2484 } 2485 break; 2486 } 2487 } 2488 2489 static if(compileInTests) unittest 2490 { 2491 import core.exception : AssertError; 2492 import std.exception : assertNotThrown, collectException, enforce; 2493 import dxml.internal : testRangeFuncs; 2494 2495 static void test(alias func)(string text, int row, int col, size_t line = __LINE__) 2496 { 2497 auto pos = TextPos(row, col + cast(int)"<root/>".length); 2498 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")), 2499 "unittest failure 1", __FILE__, line); 2500 enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line); 2501 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2502 } 2503 2504 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2505 { 2506 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2507 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2508 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2509 } 2510 2511 static foreach(func; testRangeFuncs) 2512 { 2513 test!func("<!DOCTYPE name>", 1, 16); 2514 test!func("<!DOCTYPE \n\n\n name>", 4, 7); 2515 test!func("<!DOCTYPE name \n\n\n >", 4, 3); 2516 2517 test!func("<!DOCTYPE name []>", 1, 19); 2518 test!func("<!DOCTYPE \n\n\n name []>", 4, 10); 2519 test!func("<!DOCTYPE name \n\n\n []>", 4, 5); 2520 2521 test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35); 2522 test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38); 2523 test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35); 2524 test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38); 2525 2526 test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42); 2527 test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40); 2528 test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48); 2529 test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53); 2530 test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43); 2531 test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48); 2532 2533 test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35); 2534 test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55); 2535 2536 test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40); 2537 test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40); 2538 test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47); 2539 test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53); 2540 2541 test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51); 2542 2543 test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26); 2544 2545 test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36); 2546 2547 test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56); 2548 test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3); 2549 2550 test!func("<!DOCTYPE doc [\n" ~ 2551 "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~ 2552 "]>\n", 4, 1); 2553 2554 testFail!func("<!DOCTYP name>", 1, 2); 2555 testFail!func("<!DOCTYPEname>", 1, 10); 2556 testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18); 2557 testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8); 2558 testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19); 2559 2560 // FIXME This really should have the exception point at the quote and 2561 // say that it couldn't find the matching quote rather than point at 2562 // the character after it and say that it couldn't find a quote, but 2563 // that requires reworking some helper functions with better error 2564 // messages in mind. 2565 testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~ 2566 "\n<!ELEMENT student (#PCDATA)>\n" ~ 2567 "]>", 1, 40); 2568 } 2569 } 2570 2571 2572 // Parse a start tag or empty element tag. It could be the root element, or 2573 // it could be a sub-element. 2574 // < was already removed from the front of the input. 2575 void _parseElementStart() 2576 { 2577 _entityPos = TextPos(_text.pos.line, _text.pos.col - 1); 2578 _savedText.pos = _text.pos; 2579 _savedText.input = _text.takeUntilAndDrop!(">", true)(); 2580 2581 if(_savedText.input.empty) 2582 throw new XMLParsingException("Tag missing name", _savedText.pos); 2583 if(_savedText.input.front == '/') 2584 throw new XMLParsingException("Invalid end tag", _savedText.pos); 2585 2586 if(_savedText.input.length > 1) 2587 { 2588 auto temp = _savedText.input.save; 2589 temp.popFrontN(temp.length - 1); 2590 if(temp.front == '/') 2591 { 2592 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1); 2593 2594 static if(config.splitEmpty == SplitEmpty.no) 2595 { 2596 _type = EntityType.elementEmpty; 2597 _tagStack.sawEntity(); 2598 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 2599 } 2600 else 2601 { 2602 _type = EntityType.elementStart; 2603 _tagStack.sawEntity(); 2604 _grammarPos = GrammarPos.splittingEmpty; 2605 } 2606 } 2607 else 2608 { 2609 _type = EntityType.elementStart; 2610 _tagStack.sawEntity(); 2611 _grammarPos = GrammarPos.contentCharData1; 2612 } 2613 } 2614 else 2615 { 2616 _type = EntityType.elementStart; 2617 _tagStack.sawEntity(); 2618 _grammarPos = GrammarPos.contentCharData1; 2619 } 2620 2621 _name = _savedText.takeName(); 2622 // The attributes should be all that's left in savedText. 2623 if(_tagStack.atMax) 2624 { 2625 auto temp = _savedText.save; 2626 auto attrChecker = _tagStack.attrChecker; 2627 2628 while(true) 2629 { 2630 immutable wasWS = stripWS(temp); 2631 if(temp.input.empty) 2632 break; 2633 if(!wasWS) 2634 throw new XMLParsingException("Whitespace missing before attribute name", temp.pos); 2635 2636 immutable attrPos = temp.pos; 2637 attrChecker.pushAttr(temp.takeName!'='(), attrPos); 2638 stripWS(temp); 2639 2640 checkNotEmpty(temp); 2641 if(temp.input.front != '=') 2642 throw new XMLParsingException("= missing", temp.pos); 2643 popFrontAndIncCol(temp); 2644 2645 stripWS(temp); 2646 temp.takeAttValue(); 2647 } 2648 2649 attrChecker.checkAttrs(); 2650 } 2651 } 2652 2653 static if(compileInTests) unittest 2654 { 2655 import core.exception : AssertError; 2656 import std.algorithm.comparison : equal; 2657 import std.exception : assertNotThrown, collectException, enforce; 2658 import dxml.internal : codeLen, testRangeFuncs; 2659 2660 static void test(alias func)(string text, EntityType type, string name, 2661 int row, int col, size_t line = __LINE__) 2662 { 2663 auto range = assertNotThrown!XMLParsingException(parseXML(func(text))); 2664 enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line); 2665 enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line); 2666 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2667 } 2668 2669 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2670 { 2671 auto xml = func(text); 2672 auto e = collectException!XMLParsingException(parseXML(func(text))); 2673 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2674 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2675 } 2676 2677 static foreach(func; testRangeFuncs) 2678 { 2679 test!func("<a/>", EntityType.elementEmpty, "a", 1, 5); 2680 test!func("<a></a>", EntityType.elementStart, "a", 1, 4); 2681 test!func("<root/>", EntityType.elementEmpty, "root", 1, 8); 2682 test!func("<root></root>", EntityType.elementStart, "root", 1, 7); 2683 test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7); 2684 test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6); 2685 test!func("<foo />", EntityType.elementEmpty, "foo", 1, 14); 2686 test!func("<foo ></foo>", EntityType.elementStart, "foo", 1, 13); 2687 test!func("<foo \n\n\n />", EntityType.elementEmpty, "foo", 4, 4); 2688 test!func("<foo \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3); 2689 test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7); 2690 test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1); 2691 2692 testFail!func(`<.foo/>`, 1, 2); 2693 testFail!func(`<>`, 1, 2); 2694 testFail!func(`</>`, 1, 2); 2695 testFail!func(`</foo>`, 1, 2); 2696 2697 { 2698 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>"))); 2699 assert(range.front.type == EntityType.elementStart); 2700 assert(equal(range.front.name, "root")); 2701 assert(range._text.pos == TextPos(1, 8)); 2702 assertNotThrown!XMLParsingException(range.popFront()); 2703 assert(range.front.type == EntityType.elementEnd); 2704 assert(equal(range.front.name, "root")); 2705 assert(range._text.pos == TextPos(1, 8)); 2706 } 2707 } 2708 } 2709 2710 2711 // Parse an end tag. It could be the root element, or it could be a 2712 // sub-element. 2713 // </ was already removed from the front of the input. 2714 void _parseElementEnd() 2715 { 2716 if(_text.input.empty) 2717 throw new XMLParsingException("Unterminated end tag", _text.pos); 2718 _entityPos = TextPos(_text.pos.line, _text.pos.col - 2); 2719 _type = EntityType.elementEnd; 2720 _tagStack.sawEntity(); 2721 immutable namePos = _text.pos; 2722 _name = _text.takeName!'>'(); 2723 stripWS(_text); 2724 if(_text.input.empty || _text.input.front != '>') 2725 { 2726 throw new XMLParsingException("There can only be whitespace between an end tag's name and the >", 2727 _text.pos); 2728 } 2729 popFrontAndIncCol(_text); 2730 _tagStack.popTag(_name.save, namePos); 2731 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 2732 } 2733 2734 static if(compileInTests) unittest 2735 { 2736 import core.exception : AssertError; 2737 import std.algorithm.comparison : equal; 2738 import std.exception : assertNotThrown, collectException, enforce; 2739 import dxml.internal : codeLen, testRangeFuncs; 2740 2741 static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__) 2742 { 2743 auto range = assertNotThrown!XMLParsingException(parseXML(func(text))); 2744 range.popFront(); 2745 enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line); 2746 enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line); 2747 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2748 } 2749 2750 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2751 { 2752 auto range = parseXML(func(text)); 2753 auto e = collectException!XMLParsingException(range.popFront()); 2754 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2755 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2756 } 2757 2758 static foreach(func; testRangeFuncs) 2759 { 2760 test!func("<a></a>", "a", 1, 8); 2761 test!func("<foo></foo>", "foo", 1, 12); 2762 test!func("<foo ></foo >", "foo", 1, 20); 2763 test!func("<foo \n ></foo \n >", "foo", 3, 3); 2764 test!func("<foo>\n\n\n</foo>", "foo", 4, 7); 2765 test!func("<foo.></foo.>", "foo.", 1, 14); 2766 test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1); 2767 2768 testFail!func(`<foo></ foo>`, 1, 8); 2769 testFail!func(`<foo></bar>`, 1, 8); 2770 testFail!func(`<foo></fo>`, 1, 8); 2771 testFail!func(`<foo></food>`, 1, 8); 2772 testFail!func(`<a></>`, 1, 6); 2773 testFail!func(`<a></`, 1, 6); 2774 testFail!func(`<a><`, 1, 5); 2775 testFail!func(`<a></a b='42'>`, 1, 8); 2776 } 2777 } 2778 2779 2780 // GrammarPos.contentCharData1 2781 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 2782 // Parses at either CharData?. Nothing from the CharData? (or what's after it 2783 // if it's not there) has been consumed. 2784 void _parseAtContentCharData() 2785 { 2786 checkNotEmpty(_text); 2787 auto orig = _text.save; 2788 stripWS(_text); 2789 checkNotEmpty(_text); 2790 if(_text.input.front != '<') 2791 { 2792 _text = orig; 2793 _entityPos = _text.pos; 2794 _type = EntityType.text; 2795 _tagStack.sawEntity(); 2796 _savedText.pos = _text.pos; 2797 _savedText.input = _text.takeUntilAndDrop!"<"(); 2798 checkText!false(_savedText); 2799 checkNotEmpty(_text); 2800 if(_text.input.front == '/') 2801 { 2802 popFrontAndIncCol(_text); 2803 _grammarPos = GrammarPos.endTag; 2804 } 2805 else 2806 _grammarPos = GrammarPos.contentMid; 2807 } 2808 else 2809 { 2810 popFrontAndIncCol(_text); 2811 checkNotEmpty(_text); 2812 if(_text.input.front == '/') 2813 { 2814 popFrontAndIncCol(_text); 2815 _parseElementEnd(); 2816 } 2817 else 2818 _parseAtContentMid(); 2819 } 2820 } 2821 2822 static if(compileInTests) unittest 2823 { 2824 import core.exception : AssertError; 2825 import std.algorithm.comparison : equal; 2826 import std.exception : assertNotThrown, collectException, enforce; 2827 import dxml.internal : codeLen, testRangeFuncs; 2828 2829 static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 2830 { 2831 auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length)); 2832 auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>")); 2833 assertNotThrown!XMLParsingException(range.popFront()); 2834 enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line); 2835 enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line); 2836 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2837 } 2838 2839 static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 2840 { 2841 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2842 auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>")); 2843 auto e = collectException!XMLParsingException(range.popFront()); 2844 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2845 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 2846 } 2847 2848 static foreach(func; testRangeFuncs) 2849 { 2850 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 2851 { 2852 test!(func, toer)("hello world", 1, 12); 2853 test!(func, toer)("\nhello\n\nworld", 4, 6); 2854 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1); 2855 test!(func, toer)("B", 1, 7); 2856 test!(func, toer)("]", 1, 2); 2857 test!(func, toer)("]]", 1, 3); 2858 test!(func, toer)("]>", 1, 3); 2859 test!(func, toer)("foo \n\n < \n bar", 4, 5); 2860 2861 testFail!(func, toer)("&", 1, 1); 2862 testFail!(func, toer)("&;", 1, 1); 2863 testFail!(func, toer)("&f", 1, 1); 2864 testFail!(func, toer)("\v", 1, 1); 2865 testFail!(func, toer)("hello&world", 1, 6); 2866 testFail!(func, toer)("hello\vworld", 1, 6); 2867 testFail!(func, toer)("hello&;world", 1, 6); 2868 testFail!(func, toer)("hello&#;world", 1, 6); 2869 testFail!(func, toer)("hello&#x;world", 1, 6); 2870 testFail!(func, toer)("hello&.;world", 1, 6); 2871 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4); 2872 2873 testFail!(func, toer)("]]>", 1, 1); 2874 testFail!(func, toer)("foo]]>bar", 1, 4); 2875 2876 static if(toer == ThrowOnEntityRef.yes) 2877 { 2878 testFail!(func, toer)("&foo; &bar baz", 1, 1); 2879 testFail!(func, toer)("foo \n\n &e; \n bar", 3, 2); 2880 } 2881 else 2882 { 2883 testFail!(func, toer)("&foo; &bar baz", 1, 7); 2884 test!(func, toer)("foo \n\n &e; \n bar", 4, 5); 2885 } 2886 } 2887 } 2888 } 2889 2890 2891 // GrammarPos.contentMid 2892 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 2893 // The text right after the start tag was what was parsed previously. So, 2894 // that first CharData? was what was parsed last, and this parses starting 2895 // right after. The < should have already been removed from the input. 2896 void _parseAtContentMid() 2897 { 2898 // Note that References are treated as part of the CharData and not 2899 // parsed out by the EntityRange (see EntityRange.text). 2900 2901 switch(_text.input.front) 2902 { 2903 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2904 // CDSect ::= CDStart CData CDEnd 2905 // CDStart ::= '<![CDATA[' 2906 // CData ::= (Char* - (Char* ']]>' Char*)) 2907 // CDEnd ::= ']]>' 2908 case '!': 2909 { 2910 popFrontAndIncCol(_text); 2911 if(_text.stripStartsWith("--")) 2912 { 2913 _parseComment(); 2914 static if(config.skipComments == SkipComments.yes) 2915 _parseAtContentCharData(); 2916 else 2917 _grammarPos = GrammarPos.contentCharData2; 2918 } 2919 else if(_text.stripStartsWith("[CDATA[")) 2920 _parseCDATA(); 2921 else 2922 { 2923 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1); 2924 throw new XMLParsingException("Expected Comment or CDATA section", bangPos); 2925 } 2926 break; 2927 } 2928 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2929 case '?': 2930 { 2931 _parsePI(); 2932 _grammarPos = GrammarPos.contentCharData2; 2933 static if(config.skipPI == SkipPI.yes) 2934 popFront(); 2935 break; 2936 } 2937 // element ::= EmptyElemTag | STag content ETag 2938 default: 2939 { 2940 _parseElementStart(); 2941 break; 2942 } 2943 } 2944 } 2945 2946 2947 // This parses the Misc* that come after the root element. 2948 void _parseAtEndMisc() 2949 { 2950 // Misc ::= Comment | PI | S 2951 2952 stripWS(_text); 2953 2954 if(_text.input.empty) 2955 { 2956 _grammarPos = GrammarPos.documentEnd; 2957 return; 2958 } 2959 2960 if(_text.input.front != '<') 2961 throw new XMLParsingException("Expected <", _text.pos); 2962 popFrontAndIncCol(_text); 2963 checkNotEmpty(_text); 2964 2965 switch(_text.input.front) 2966 { 2967 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2968 case '!': 2969 { 2970 popFrontAndIncCol(_text); 2971 if(_text.stripStartsWith("--")) 2972 { 2973 _parseComment(); 2974 static if(config.skipComments == SkipComments.yes) 2975 _parseAtEndMisc(); 2976 break; 2977 } 2978 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1); 2979 throw new XMLParsingException("Expected Comment", bangPos); 2980 } 2981 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2982 case '?': 2983 { 2984 _parsePI(); 2985 static if(config.skipPI == SkipPI.yes) 2986 popFront(); 2987 break; 2988 } 2989 default: throw new XMLParsingException("Must be a comment or PI", _text.pos); 2990 } 2991 } 2992 2993 // Used for keeping track of the names of start tags so that end tags can be 2994 // verified as well as making it possible to avoid redoing other validation. 2995 // We keep track of the total number of entities which have been parsed thus 2996 // far so that only whichever EntityRange is farthest along in parsing 2997 // actually adds or removes tags from the TagStack, and the parser can skip 2998 // some of the validation for ranges that are farther behind. That way, the 2999 // end tags get verified, but we only have one stack. If the stack were 3000 // duplicated with every call to save, then there would be a lot more 3001 // allocations, which we don't want. But because we only need to verify the 3002 // end tags once, we can get away with having a shared tag stack. The cost 3003 // is that we have to keep track of how many tags we've parsed so that we 3004 // know if an EntityRange should actually be pushing or popping tags from 3005 // the stack, but that's a lot cheaper than duplicating the stack, and it's 3006 // a lot less annoying then making EntityRange an input range and not a 3007 // forward range or making it a cursor rather than a range. 3008 struct TagStack 3009 { 3010 void pushTag(Taken tagName) 3011 { 3012 if(entityCount++ == state.maxEntities) 3013 { 3014 ++state.maxEntities; 3015 put(state.tags, tagName); 3016 } 3017 ++depth; 3018 } 3019 3020 void popTag(Taken tagName, TextPos pos) 3021 { 3022 import std.algorithm : equal; 3023 import std.format : format; 3024 if(entityCount++ == state.maxEntities) 3025 { 3026 assert(!state.tags.data.empty); 3027 if(!equal(state.tags.data.back.save, tagName.save)) 3028 { 3029 enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>"; 3030 throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos); 3031 } 3032 ++state.maxEntities; 3033 state.tags.shrinkTo(state.tags.data.length - 1); 3034 } 3035 --depth; 3036 } 3037 3038 @property auto attrChecker() 3039 { 3040 assert(atMax); 3041 3042 static struct AttrChecker 3043 { 3044 void pushAttr(Taken attrName, TextPos attrPos) 3045 { 3046 import std.typecons : tuple; 3047 put(state.attrs, tuple(attrName, attrPos)); 3048 } 3049 3050 void checkAttrs() 3051 { 3052 import std.algorithm.comparison : cmp, equal; 3053 import std.algorithm.sorting : sort; 3054 import std.conv : to; 3055 3056 if(state.attrs.data.length < 2) 3057 return; 3058 3059 sort!((a,b) => cmp(a[0].save, b[0].save) < 0)(state.attrs.data); 3060 auto prev = state.attrs.data.front; 3061 foreach(attr; state.attrs.data[1 .. $]) 3062 { 3063 if(equal(prev[0], attr[0])) 3064 throw new XMLParsingException("Duplicate attribute name", attr[1]); 3065 prev = attr; 3066 } 3067 } 3068 3069 ~this() 3070 { 3071 state.attrs.clear(); 3072 } 3073 3074 SharedState* state; 3075 } 3076 3077 return AttrChecker(state); 3078 } 3079 3080 void sawEntity() 3081 { 3082 if(entityCount++ == state.maxEntities) 3083 ++state.maxEntities; 3084 } 3085 3086 @property bool atMax() 3087 { 3088 return entityCount == state.maxEntities; 3089 } 3090 3091 struct SharedState 3092 { 3093 import std.array : Appender; 3094 import std.typecons : Tuple; 3095 3096 Appender!(Taken[]) tags; 3097 Appender!(Tuple!(Taken, TextPos)[]) attrs; 3098 size_t maxEntities; 3099 } 3100 3101 static create() 3102 { 3103 TagStack tagStack; 3104 tagStack.state = new SharedState; 3105 tagStack.state.tags.reserve(10); 3106 tagStack.state.attrs.reserve(10); 3107 return tagStack; 3108 } 3109 3110 SharedState* state; 3111 size_t entityCount; 3112 int depth; 3113 } 3114 3115 static if(compileInTests) unittest 3116 { 3117 import core.exception : AssertError; 3118 import std.algorithm.comparison : equal; 3119 import std.exception : assertNotThrown, collectException, enforce; 3120 import dxml.internal : testRangeFuncs; 3121 3122 static void test(alias func)(string text, size_t line = __LINE__) 3123 { 3124 auto xml = func(text); 3125 static foreach(config; someTestConfigs) 3126 {{ 3127 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1", 3128 __FILE__, line); 3129 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line); 3130 }} 3131 } 3132 3133 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 3134 { 3135 auto xml = func(text); 3136 static foreach(config; someTestConfigs) 3137 {{ 3138 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1", 3139 __FILE__, line); 3140 auto e = collectException!XMLParsingException(walkLength(range)); 3141 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line); 3142 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 3143 }} 3144 } 3145 3146 static foreach(func; testRangeFuncs) 3147 { 3148 test!func("<root></root>"); 3149 test!func("<root><a></a></root>"); 3150 test!func("<root><a><b></b></a></root>"); 3151 test!func("<root><a><b></b></a></root>"); 3152 test!func("<root><a><b></b></a><foo><bar></bar></foo></root>"); 3153 test!func("<a>\n" ~ 3154 " <b>\n" ~ 3155 " <c>\n" ~ 3156 " <d>\n" ~ 3157 " <e>\n" ~ 3158 " <f>\n" ~ 3159 " <g>\n" ~ 3160 " <h>\n" ~ 3161 " <i><i><i><i>\n" ~ 3162 " </i></i></i></i>\n" ~ 3163 " <i>\n" ~ 3164 " <j>\n" ~ 3165 " <k>\n" ~ 3166 " <l>\n" ~ 3167 " <m>\n" ~ 3168 " <n>\n" ~ 3169 " <o>\n" ~ 3170 " <p>\n" ~ 3171 " <q>\n" ~ 3172 " <r>\n" ~ 3173 " <s>\n" ~ 3174 " <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~ 3175 " </s>\n" ~ 3176 " </r>\n" ~ 3177 " </q>\n" ~ 3178 " </p></o></n></m>\n" ~ 3179 " </l>\n" ~ 3180 " </k>\n" ~ 3181 " </j>\n" ~ 3182 "</i></h>" ~ 3183 " </g>\n" ~ 3184 " </f>\n" ~ 3185 " </e>\n" ~ 3186 " </d>\n" ~ 3187 " </c>\n" ~ 3188 " </b>\n" ~ 3189 "</a>"); 3190 test!func(`<京都市></京都市>`); 3191 3192 testFail!func(`<a>`, 1, 4); 3193 testFail!func(`<foo></foobar>`, 1, 8); 3194 testFail!func(`<foobar></foo>`, 1, 11); 3195 testFail!func(`<a><\a>`, 1, 5); 3196 testFail!func(`<a><a/>`, 1, 8); 3197 testFail!func(`<a><b>`, 1, 7); 3198 testFail!func(`<a><b><c>`, 1, 10); 3199 testFail!func(`<a></a><b>`, 1, 9); 3200 testFail!func(`<a></a><b></b>`, 1, 9); 3201 testFail!func(`<a><b></a></b>`, 1, 9); 3202 testFail!func(`<a><b><c></c><b></a>`, 1, 19); 3203 testFail!func(`<a><b></c><c></b></a>`, 1, 9); 3204 testFail!func(`<a><b></c></b></a>`, 1, 9); 3205 testFail!func("<a>\n" ~ 3206 " <b>\n" ~ 3207 " <c>\n" ~ 3208 " <d>\n" ~ 3209 " <e>\n" ~ 3210 " <f>\n" ~ 3211 " </f>\n" ~ 3212 " </e>\n" ~ 3213 " </d>\n" ~ 3214 " </c>\n" ~ 3215 " </b>\n" ~ 3216 "<a>", 12, 4); 3217 testFail!func("<a>\n" ~ 3218 " <b>\n" ~ 3219 " <c>\n" ~ 3220 " <d>\n" ~ 3221 " <e>\n" ~ 3222 " <f>\n" ~ 3223 " </f>\n" ~ 3224 " </e>\n" ~ 3225 " </d>\n" ~ 3226 " </c>\n" ~ 3227 " </b>\n" ~ 3228 "</q>", 12, 3); 3229 } 3230 } 3231 3232 3233 struct Text(R) 3234 { 3235 alias config = cfg; 3236 alias Input = R; 3237 3238 Input input; 3239 TextPos pos; 3240 3241 @property save() { return typeof(this)(input.save, pos); } 3242 } 3243 3244 3245 alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42)); 3246 3247 3248 EntityType _type; 3249 TextPos _entityPos; 3250 auto _grammarPos = GrammarPos.documentStart; 3251 3252 Taken _name; 3253 TagStack _tagStack; 3254 3255 Text!(typeof(byCodeUnit(R.init))) _text; 3256 Text!Taken _savedText; 3257 3258 3259 this(R xmlText) 3260 { 3261 _tagStack = TagStack.create(); 3262 _text.input = byCodeUnit(xmlText); 3263 3264 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 3265 _savedText = typeof(_savedText).init; 3266 _name = typeof(_name).init; 3267 3268 popFront(); 3269 } 3270 } 3271 3272 /// Ditto 3273 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText) 3274 if(isForwardRange!R && isSomeChar!(ElementType!R)) 3275 { 3276 return EntityRange!(config, R)(xmlText); 3277 } 3278 3279 /// 3280 unittest 3281 { 3282 import std.range.primitives : walkLength; 3283 3284 auto xml = "<?xml version='1.0'?>\n" ~ 3285 "<?instruction start?>\n" ~ 3286 "<foo attr='42'>\n" ~ 3287 " <bar/>\n" ~ 3288 " <!-- no comment -->\n" ~ 3289 " <baz hello='world'>\n" ~ 3290 " nothing to say.\n" ~ 3291 " nothing at all...\n" ~ 3292 " </baz>\n" ~ 3293 "</foo>\n" ~ 3294 "<?some foo?>"; 3295 3296 { 3297 auto range = parseXML(xml); 3298 assert(range.front.type == EntityType.pi); 3299 assert(range.front.name == "instruction"); 3300 assert(range.front.text == "start"); 3301 3302 range.popFront(); 3303 assert(range.front.type == EntityType.elementStart); 3304 assert(range.front.name == "foo"); 3305 3306 { 3307 auto attrs = range.front.attributes; 3308 assert(walkLength(attrs.save) == 1); 3309 assert(attrs.front.name == "attr"); 3310 assert(attrs.front.value == "42"); 3311 } 3312 3313 range.popFront(); 3314 assert(range.front.type == EntityType.elementEmpty); 3315 assert(range.front.name == "bar"); 3316 3317 range.popFront(); 3318 assert(range.front.type == EntityType.comment); 3319 assert(range.front.text == " no comment "); 3320 3321 range.popFront(); 3322 assert(range.front.type == EntityType.elementStart); 3323 assert(range.front.name == "baz"); 3324 3325 { 3326 auto attrs = range.front.attributes; 3327 assert(walkLength(attrs.save) == 1); 3328 assert(attrs.front.name == "hello"); 3329 assert(attrs.front.value == "world"); 3330 } 3331 3332 range.popFront(); 3333 assert(range.front.type == EntityType.text); 3334 assert(range.front.text == 3335 "\n nothing to say.\n nothing at all...\n "); 3336 3337 range.popFront(); 3338 assert(range.front.type == EntityType.elementEnd); // </baz> 3339 range.popFront(); 3340 assert(range.front.type == EntityType.elementEnd); // </foo> 3341 3342 range.popFront(); 3343 assert(range.front.type == EntityType.pi); 3344 assert(range.front.name == "some"); 3345 assert(range.front.text == "foo"); 3346 3347 range.popFront(); 3348 assert(range.empty); 3349 } 3350 { 3351 auto range = parseXML!simpleXML(xml); 3352 3353 // simpleXML is set to skip processing instructions. 3354 3355 assert(range.front.type == EntityType.elementStart); 3356 assert(range.front.name == "foo"); 3357 3358 { 3359 auto attrs = range.front.attributes; 3360 assert(walkLength(attrs.save) == 1); 3361 assert(attrs.front.name == "attr"); 3362 assert(attrs.front.value == "42"); 3363 } 3364 3365 // simpleXML is set to split empty tags so that <bar/> is treated 3366 // as the same as <bar></bar> so that code does not have to 3367 // explicitly handle empty tags. 3368 range.popFront(); 3369 assert(range.front.type == EntityType.elementStart); 3370 assert(range.front.name == "bar"); 3371 range.popFront(); 3372 assert(range.front.type == EntityType.elementEnd); 3373 assert(range.front.name == "bar"); 3374 3375 // simpleXML is set to skip comments. 3376 3377 range.popFront(); 3378 assert(range.front.type == EntityType.elementStart); 3379 assert(range.front.name == "baz"); 3380 3381 { 3382 auto attrs = range.front.attributes; 3383 assert(walkLength(attrs.save) == 1); 3384 assert(attrs.front.name == "hello"); 3385 assert(attrs.front.value == "world"); 3386 } 3387 3388 range.popFront(); 3389 assert(range.front.type == EntityType.text); 3390 assert(range.front.text == 3391 "\n nothing to say.\n nothing at all...\n "); 3392 3393 range.popFront(); 3394 assert(range.front.type == EntityType.elementEnd); // </baz> 3395 range.popFront(); 3396 assert(range.front.type == EntityType.elementEnd); // </foo> 3397 range.popFront(); 3398 assert(range.empty); 3399 } 3400 } 3401 3402 // Test the state of the range immediately after parseXML returns. 3403 unittest 3404 { 3405 import std.algorithm.comparison : equal; 3406 import dxml.internal : testRangeFuncs; 3407 3408 static foreach(func; testRangeFuncs) 3409 { 3410 static foreach(config; someTestConfigs) 3411 {{ 3412 auto range = parseXML!config("<?xml?><root></root>"); 3413 assert(!range.empty); 3414 assert(range.front.type == EntityType.elementStart); 3415 assert(equal(range.front.name, "root")); 3416 }} 3417 3418 static foreach(config; [Config.init, makeConfig(SkipPI.yes)]) 3419 {{ 3420 auto range = parseXML!config("<!--no comment--><root></root>"); 3421 assert(!range.empty); 3422 assert(range.front.type == EntityType.comment); 3423 assert(equal(range.front.text, "no comment")); 3424 }} 3425 static foreach(config; [simpleXML, makeConfig(SkipComments.yes)]) 3426 {{ 3427 auto range = parseXML!config("<!--no comment--><root></root>"); 3428 assert(!range.empty); 3429 assert(range.front.type == EntityType.elementStart); 3430 assert(equal(range.front.name, "root")); 3431 }} 3432 3433 static foreach(config; [Config.init, makeConfig(SkipComments.yes)]) 3434 {{ 3435 auto range = parseXML!config("<?private eye?><root></root>"); 3436 assert(!range.empty); 3437 assert(range.front.type == EntityType.pi); 3438 assert(equal(range.front.name, "private")); 3439 assert(equal(range.front.text, "eye")); 3440 }} 3441 static foreach(config; [simpleXML, makeConfig(SkipPI.yes)]) 3442 {{ 3443 auto range = parseXML!config("<?private eye?><root></root>"); 3444 assert(!range.empty); 3445 assert(range.front.type == EntityType.elementStart); 3446 assert(equal(range.front.name, "root")); 3447 }} 3448 3449 static foreach(config; someTestConfigs) 3450 {{ 3451 auto range = parseXML!config("<root></root>"); 3452 assert(!range.empty); 3453 assert(range.front.type == EntityType.elementStart); 3454 assert(equal(range.front.name, "root")); 3455 }} 3456 } 3457 } 3458 3459 // Test various invalid states that didn't seem to fit well into tests elsewhere. 3460 unittest 3461 { 3462 import core.exception : AssertError; 3463 import std.exception : collectException, enforce; 3464 import dxml.internal : testRangeFuncs; 3465 3466 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 3467 { 3468 auto xml = func(text); 3469 static foreach(config; someTestConfigs) 3470 {{ 3471 auto e = collectException!XMLParsingException( 3472 { 3473 auto range = parseXML!config(xml.save); 3474 while(!range.empty) 3475 range.popFront(); 3476 }()); 3477 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 3478 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 3479 }} 3480 } 3481 3482 static foreach(func; testRangeFuncs) 3483 {{ 3484 testFail!func("<root></root><invalid></invalid>", 1, 15); 3485 testFail!func("<root></root><invalid/>", 1, 15); 3486 testFail!func("<root/><invalid></invalid>", 1, 9); 3487 testFail!func("<root/><invalid/>", 1, 9); 3488 3489 testFail!func("<root></root>invalid", 1, 14); 3490 testFail!func("<root/>invalid", 1, 8); 3491 3492 testFail!func("<root/><?pi?>invalid", 1, 14); 3493 testFail!func("<root/><?pi?><invalid/>", 1, 15); 3494 3495 testFail!func("<root/><!DOCTYPE foo>", 1, 9); 3496 testFail!func("<root/></root>", 1, 9); 3497 3498 testFail!func("invalid<root></root>", 1, 1); 3499 testFail!func("invalid<?xml?><root></root>", 1, 1); 3500 testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1); 3501 testFail!func("invalid<!--comment--><root></root>", 1, 1); 3502 testFail!func("invalid<?Poirot?><root></root>", 1, 1); 3503 3504 testFail!func("<?xml?>invalid<root></root>", 1, 8); 3505 testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15); 3506 testFail!func("<!--comment-->invalid<root></root>", 1, 15); 3507 testFail!func("<?Poirot?>invalid<root></root>", 1, 11); 3508 3509 testFail!func("<?xml?>", 1, 8); 3510 testFail!func("<!DOCTYPE name>", 1, 16); 3511 testFail!func("<?Sherlock?>", 1, 13); 3512 testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33); 3513 testFail!func("<?Poirot?></Poirot>", 1, 12); 3514 testFail!func("</Poirot>", 1, 2); 3515 testFail!func("<", 1, 2); 3516 testFail!func(`</`, 1, 2); 3517 testFail!func(`</a`, 1, 2); 3518 testFail!func(`</a>`, 1, 2); 3519 3520 3521 testFail!func("<doc>]]></doc>", 1, 6); 3522 3523 testFail!func(" <?xml?><root/>", 1, 1); 3524 testFail!func("\n<?xml?><root/>", 1, 1); 3525 }} 3526 } 3527 3528 // Test that parseXML and EntityRange's properties work with @safe. 3529 // pure would be nice too, but at minimum, the use of format for exception 3530 // messages, and the use of assumeSafeAppend prevent it. It may or may not be 3531 // worth trying to fix that. 3532 @safe unittest 3533 { 3534 import std.algorithm.comparison : equal; 3535 import dxml.internal : testRangeFuncs; 3536 3537 auto xml = "<root>\n" ~ 3538 " <![CDATA[nothing]]>\n" ~ 3539 " <foo a='42'/>\n" ~ 3540 "</root>"; 3541 3542 static foreach(func; testRangeFuncs) 3543 {{ 3544 auto range = parseXML(xml); 3545 assert(range.front.type == EntityType.elementStart); 3546 assert(equal(range.front.name, "root")); 3547 range.popFront(); 3548 assert(!range.empty); 3549 assert(range.front.type == EntityType.cdata); 3550 assert(equal(range.front.text, "nothing")); 3551 range.popFront(); 3552 assert(!range.empty); 3553 assert(range.front.type == EntityType.elementEmpty); 3554 assert(equal(range.front.name, "foo")); 3555 { 3556 auto attrs = range.front.attributes; 3557 auto saved = attrs.save; 3558 auto attr = attrs.front; 3559 assert(attr.name == "a"); 3560 assert(attr.value == "42"); 3561 attrs.popFront(); 3562 assert(attrs.empty); 3563 } 3564 auto saved = range.save; 3565 }} 3566 } 3567 3568 3569 // This is purely to provide a way to trigger the unittest blocks in EntityRange 3570 // without compiling them in normally. 3571 struct EntityRangeCompileTests 3572 { 3573 @property bool empty() @safe pure nothrow @nogc { assert(0); } 3574 @property char front() @safe pure nothrow @nogc { assert(0); } 3575 void popFront() @safe pure nothrow @nogc { assert(0); } 3576 @property typeof(this) save() @safe pure nothrow @nogc { assert(0); } 3577 } 3578 3579 unittest 3580 { 3581 EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests; 3582 } 3583 3584 3585 /++ 3586 Whether the given type is a forward range of attributes. 3587 3588 Essentially, an attribute range must be a forward range where 3589 3590 $(UL 3591 $(LI each element has the members $(D name), $(D value), and $(D pos)) 3592 $(LI $(D name) and $(D value) are forward ranges of characters) 3593 $(LI $(D name) and $(D value) have the same type) 3594 $(LI $(D pos) is a $(LREF TextPos))) 3595 3596 Normally, an attribute range would come from 3597 $(LREF EntityRange.Entity.attributes) or 3598 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but 3599 as long as a range has the correct API, it qualifies as an attribute range. 3600 3601 See_Also: $(LREF EntityRange.Entity.Attribute)$(BR) 3602 $(LREF EntityRange.Entity.attributes)$(BR) 3603 $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR) 3604 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom) 3605 +/ 3606 template isAttrRange(R) 3607 { 3608 static if(isForwardRange!R && 3609 is(typeof(R.init.front.name)) && 3610 is(typeof(R.init.front.value)) && 3611 is(ReturnType!((R r) => r.front.pos) == TextPos)) 3612 { 3613 alias NameType = ReturnType!((R r) => r.front.name); 3614 alias ValueType = ReturnType!((R r) => r.front.value); 3615 3616 enum isAttrRange = is(NameType == ValueType) && 3617 isForwardRange!NameType && 3618 isSomeChar!(ElementType!NameType); 3619 } 3620 else 3621 enum isAttrRange = false; 3622 } 3623 3624 /// 3625 unittest 3626 { 3627 import std.typecons : Tuple; 3628 import dxml.dom : parseDOM; 3629 3630 alias R1 = typeof(parseXML("<root/>").front.attributes); 3631 static assert(isAttrRange!R1); 3632 3633 alias R2 = typeof(parseDOM("<root/>").children[0].attributes); 3634 static assert(isAttrRange!R2); 3635 3636 alias T = Tuple!(string, "name", string, "value", TextPos, "pos"); 3637 static assert(isAttrRange!(T[])); 3638 3639 static assert(!isAttrRange!string); 3640 } 3641 3642 unittest 3643 { 3644 import std.typecons : Tuple; 3645 { 3646 alias T = Tuple!(string, "nam", string, "value", TextPos, "pos"); 3647 static assert(!isAttrRange!(T[])); 3648 } 3649 { 3650 alias T = Tuple!(string, "name", string, "valu", TextPos, "pos"); 3651 static assert(!isAttrRange!(T[])); 3652 } 3653 { 3654 alias T = Tuple!(string, "name", string, "value", TextPos, "po"); 3655 static assert(!isAttrRange!(T[])); 3656 } 3657 { 3658 alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos"); 3659 static assert(!isAttrRange!(T[])); 3660 } 3661 { 3662 alias T = Tuple!(string, "name", string, "value"); 3663 static assert(!isAttrRange!(T[])); 3664 } 3665 { 3666 alias T = Tuple!(int, "name", string, "value", TextPos, "pos"); 3667 static assert(!isAttrRange!(T[])); 3668 } 3669 { 3670 alias T = Tuple!(string, "name", int, "value", TextPos, "pos"); 3671 static assert(!isAttrRange!(T[])); 3672 } 3673 { 3674 alias T = Tuple!(string, "name", string, "value", int, "pos"); 3675 static assert(!isAttrRange!(T[])); 3676 } 3677 } 3678 3679 3680 /++ 3681 A helper function for processing start tag attributes. 3682 3683 It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a 3684 range of attributes and a list of alternating strings and pointers where 3685 each string represents the name of the attribute to parse and the pointer 3686 immediately after it is assigned the value that corresponds to the attribute 3687 name (if present). If the given pointer does not point to the same type as 3688 the range of characters used in the attributes, then 3689 $(PHOBOS_REF to, std, conv) is used to convert the value to the type the 3690 pointer points to. 3691 3692 If a $(D Nullable!T*) is given rather than a $(D T*), then it will be 3693 treated the same as if it had been $(D T*). So, $(D to!T) will be used to 3694 convert the attribute value if the matching attribute name is present. The 3695 advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's 3696 possible to distinguish between an attribute that wasn't present and one 3697 where it was present but was equivalent to $(D T.init). 3698 3699 Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed 3700 rather than taking it by $(K_REF) and leaving the attributes that weren't 3701 matched in the range (since that really doesn't work with an arbitrary 3702 range as opposed to a dynamic array). However, if the second argument of 3703 getAttrs is not a $(K_STRING) but is instead an output range that accepts 3704 the element type of the range, then any attributes which aren't matched are 3705 put into the output range. 3706 3707 Params: 3708 attrRange = A range of attributes (see $(LREF isAttrRange)). 3709 unmatched = An output range that any _unmatched attributes from the 3710 range are put into (optional argument). 3711 args = An alternating list of strings and pointers where the names 3712 represent the attribute names to get the value of, and the 3713 corresponding values get assigned to what the pointers point to. 3714 3715 Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to 3716 convert an attribute value. 3717 3718 See_Also: $(LREF isAttrRange)$(BR) 3719 $(LREF EntityRange.Entity.attributes)$(BR) 3720 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom) 3721 +/ 3722 void getAttrs(R, Args...)(R attrRange, Args args) 3723 if(isAttrRange!R && Args.length % 2 == 0) 3724 { 3725 mixin(_genGetAttrs(false)); 3726 } 3727 3728 /// Ditto 3729 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args) 3730 if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0) 3731 { 3732 mixin(_genGetAttrs(true)); 3733 } 3734 3735 private string _genGetAttrs(bool includeUnmatched) 3736 { 3737 auto retval = 3738 ` import std.algorithm.comparison : equal; 3739 import std.conv : ConvException, to; 3740 import std.format : format; 3741 import std.typecons : Nullable; 3742 import std.utf : byChar; 3743 3744 alias Attr = ElementType!R; 3745 alias SliceOfR = ElementType!(typeof(Attr.init.name)); 3746 3747 outer: foreach(attr; attrRange) 3748 { 3749 static foreach(i, arg; args) 3750 { 3751 static if(i % 2 == 0) 3752 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i)); 3753 else 3754 { 3755 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i)); 3756 3757 if(equal(attr.name, args[i - 1].byChar())) 3758 { 3759 alias ArgType = typeof(*arg); 3760 3761 static if(isInstanceOf!(Nullable, ArgType)) 3762 alias TargetType = TemplateArgsOf!ArgType; 3763 else 3764 alias TargetType = typeof(*arg); 3765 3766 try 3767 *arg = to!TargetType(attr.value); 3768 catch(ConvException ce) 3769 { 3770 enum fmt = "Failed to convert %s: %s"; 3771 throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos); 3772 } 3773 3774 continue outer; 3775 } 3776 } 3777 }`; 3778 3779 if(includeUnmatched) 3780 retval ~= "\n put(unmatched, attr);"; 3781 retval ~= "\n }"; 3782 3783 return retval; 3784 } 3785 3786 unittest 3787 { 3788 import std.array : appender; 3789 import std.exception : collectException; 3790 import std.typecons : Nullable; 3791 3792 { 3793 auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`; 3794 auto range = parseXML(xml); 3795 assert(range.front.type == EntityType.elementEmpty); 3796 3797 string a; 3798 int b; 3799 bool c; 3800 3801 getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c); 3802 assert(a == "foo"); 3803 assert(b == 19); 3804 assert(c == true); 3805 } 3806 3807 // Nullable!T* accepts the same as T*. 3808 { 3809 auto xml = `<root a="foo" c="true" d="rocks"/>`; 3810 auto range = parseXML(xml); 3811 assert(range.front.type == EntityType.elementEmpty); 3812 3813 Nullable!string a; 3814 Nullable!int b; 3815 bool c; 3816 3817 getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a); 3818 assert(a == "foo"); 3819 assert(b.isNull); 3820 assert(c == true); 3821 } 3822 3823 // If an output range of attributes is provided, then the ones that 3824 // weren't matched are put in it. 3825 { 3826 auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`; 3827 auto range = parseXML(xml); 3828 assert(range.front.type == EntityType.elementEmpty); 3829 3830 alias Attribute = typeof(range).Entity.Attribute; 3831 auto unmatched = appender!(Attribute[])(); 3832 int i; 3833 string s; 3834 3835 getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s); 3836 assert(i == 42); 3837 assert(s == "silly"); 3838 assert(unmatched.data.length == 2); 3839 assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28))); 3840 assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38))); 3841 } 3842 3843 // An XMLParsingException gets thrown if a conversion fails. 3844 { 3845 auto xml = `<root foo="bar" false="true" d="rocks"/>`; 3846 auto range = parseXML(xml); 3847 assert(range.front.type == EntityType.elementEmpty); 3848 3849 int i; 3850 3851 auto xpe = collectException!XMLParsingException( 3852 getAttrs(range.front.attributes, "d", &i)); 3853 assert(xpe.pos == TextPos(1, 30)); 3854 } 3855 } 3856 3857 unittest 3858 { 3859 auto range = parseXML("<root/>"); 3860 auto attrs = range.front.attributes; 3861 int i; 3862 static assert(!__traits(compiles, getAttrs(attrs, "foo"))); 3863 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar"))); 3864 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i))); 3865 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i))); 3866 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo"))); 3867 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i))); 3868 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar"))); 3869 } 3870 3871 @safe pure unittest 3872 { 3873 import std.typecons : Nullable; 3874 3875 static test(R)(R range, int* i, Nullable!int* j) @safe pure 3876 { 3877 getAttrs(range.front.attributes, "foo", i, "bar", j); 3878 } 3879 3880 test(parseXML("<root/>"), null, null); 3881 } 3882 3883 3884 /++ 3885 Takes an $(LREF EntityRange) which is at a start tag and iterates it until 3886 it is at its corresponding end tag. It is an error to call skipContents when 3887 the current entity is not $(LREF EntityType.elementStart). 3888 3889 $(TABLE 3890 $(TR $(TH Supported $(LREF EntityType)s:)) 3891 $(TR $(TD $(LREF2 elementStart, EntityType))) 3892 ) 3893 3894 Returns: The range with its $(D front) now at the end tag corresponding to 3895 the start tag that was $(D front) when the function was called. 3896 3897 Throws: $(LREF XMLParsingException) on invalid XML. 3898 +/ 3899 R skipContents(R)(R entityRange) 3900 if(isInstanceOf!(EntityRange, R)) 3901 { 3902 assert(entityRange._type == EntityType.elementStart); 3903 3904 // We don't bother calling empty, because the only way for the entityRange 3905 // to be empty would be for it to reach the end of the document, and an 3906 // XMLParsingException would be thrown if the end of the document were 3907 // reached before we reached the corresponding end tag. 3908 for(int tagDepth = 1; tagDepth != 0;) 3909 { 3910 entityRange.popFront(); 3911 immutable type = entityRange._type; 3912 if(type == EntityType.elementStart) 3913 ++tagDepth; 3914 else if(type == EntityType.elementEnd) 3915 --tagDepth; 3916 } 3917 3918 return entityRange; 3919 } 3920 3921 /// 3922 unittest 3923 { 3924 auto xml = "<root>\n" ~ 3925 " <foo>\n" ~ 3926 " <bar>\n" ~ 3927 " Some text\n" ~ 3928 " </bar>\n" ~ 3929 " </foo>\n" ~ 3930 " <!-- no comment -->\n" ~ 3931 "</root>"; 3932 3933 auto range = parseXML(xml); 3934 assert(range.front.type == EntityType.elementStart); 3935 assert(range.front.name == "root"); 3936 3937 range.popFront(); 3938 assert(range.front.type == EntityType.elementStart); 3939 assert(range.front.name == "foo"); 3940 3941 range = range.skipContents(); 3942 assert(range.front.type == EntityType.elementEnd); 3943 assert(range.front.name == "foo"); 3944 3945 range.popFront(); 3946 assert(range.front.type == EntityType.comment); 3947 assert(range.front.text == " no comment "); 3948 3949 range.popFront(); 3950 assert(range.front.type == EntityType.elementEnd); 3951 assert(range.front.name == "root"); 3952 3953 range.popFront(); 3954 assert(range.empty); 3955 } 3956 3957 3958 /++ 3959 Skips entities until the given $(LREF EntityType) is reached. 3960 3961 If multiple $(LREF EntityType)s are given, then any one of them counts as 3962 a match. 3963 3964 The current entity is skipped regardless of whether it is the given 3965 $(LREF EntityType). 3966 3967 This is essentially a slightly optimized equivalent to 3968 3969 --- 3970 if(!range.empty()) 3971 { 3972 range.popFront(); 3973 range = range.find!((a, b) => a.type == b.type)(entityTypes); 3974 } 3975 --- 3976 3977 Returns: The given range with its $(D front) now at the first entity which 3978 matched one of the given $(LREF EntityType)s or an empty range if 3979 none were found. 3980 3981 Throws: $(LREF XMLParsingException) on invalid XML. 3982 +/ 3983 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...) 3984 if(isInstanceOf!(EntityRange, R)) 3985 { 3986 if(entityRange.empty) 3987 return entityRange; 3988 entityRange.popFront(); 3989 for(; !entityRange.empty; entityRange.popFront()) 3990 { 3991 immutable type = entityRange._type; 3992 foreach(entityType; entityTypes) 3993 { 3994 if(type == entityType) 3995 return entityRange; 3996 } 3997 } 3998 return entityRange; 3999 } 4000 4001 /// 4002 unittest 4003 { 4004 auto xml = "<root>\n" ~ 4005 " <!-- blah blah blah -->\n" ~ 4006 " <foo>nothing to say</foo>\n" ~ 4007 "</root>"; 4008 4009 auto range = parseXML(xml); 4010 assert(range.front.type == EntityType.elementStart); 4011 assert(range.front.name == "root"); 4012 4013 range = range.skipToEntityType(EntityType.elementStart, 4014 EntityType.elementEmpty); 4015 assert(range.front.type == EntityType.elementStart); 4016 assert(range.front.name == "foo"); 4017 4018 assert(range.skipToEntityType(EntityType.comment).empty); 4019 4020 // skipToEntityType will work on an empty range but will always 4021 // return an empty range. 4022 assert(range.takeNone().skipToEntityType(EntityType.comment).empty); 4023 } 4024 4025 4026 /++ 4027 Skips entities until the end tag is reached that corresponds to the start 4028 tag that is the parent of the current entity. 4029 4030 Returns: The given range with its $(D front) now at the end tag which 4031 corresponds to the parent start tag of the entity that was 4032 $(D front) when skipToParentEndTag was called. If the current 4033 entity does not have a parent start tag (which means that it's 4034 either the root element or a comment or PI outside of the root 4035 element), then an empty range is returned. 4036 4037 Throws: $(LREF XMLParsingException) on invalid XML. 4038 +/ 4039 R skipToParentEndTag(R)(R entityRange) 4040 if(isInstanceOf!(EntityRange, R)) 4041 { 4042 with(EntityType) final switch(entityRange._type) 4043 { 4044 case cdata: 4045 case comment: 4046 { 4047 entityRange = entityRange.skipToEntityType(elementStart, elementEnd); 4048 if(entityRange.empty || entityRange._type == elementEnd) 4049 return entityRange; 4050 goto case elementStart; 4051 } 4052 case elementStart: 4053 { 4054 while(true) 4055 { 4056 entityRange = entityRange.skipContents(); 4057 entityRange.popFront(); 4058 if(entityRange.empty || entityRange._type == elementEnd) 4059 return entityRange; 4060 if(entityRange._type == elementStart) 4061 continue; 4062 goto case comment; 4063 } 4064 assert(0); // the compiler isn't smart enough to see that this is unreachable. 4065 } 4066 case elementEnd: 4067 case elementEmpty: 4068 case pi: 4069 case text: goto case comment; 4070 } 4071 } 4072 4073 /// 4074 unittest 4075 { 4076 auto xml = "<root>\n" ~ 4077 " <foo>\n" ~ 4078 " <!-- comment -->\n" ~ 4079 " <bar>exam</bar>\n" ~ 4080 " </foo>\n" ~ 4081 " <!-- another comment -->\n" ~ 4082 "</root>"; 4083 { 4084 auto range = parseXML(xml); 4085 assert(range.front.type == EntityType.elementStart); 4086 assert(range.front.name == "root"); 4087 4088 range.popFront(); 4089 assert(range.front.type == EntityType.elementStart); 4090 assert(range.front.name == "foo"); 4091 4092 range.popFront(); 4093 assert(range.front.type == EntityType.comment); 4094 assert(range.front.text == " comment "); 4095 4096 range = range.skipToParentEndTag(); 4097 assert(range.front.type == EntityType.elementEnd); 4098 assert(range.front.name == "foo"); 4099 4100 range = range.skipToParentEndTag(); 4101 assert(range.front.type == EntityType.elementEnd); 4102 assert(range.front.name == "root"); 4103 4104 range = range.skipToParentEndTag(); 4105 assert(range.empty); 4106 } 4107 { 4108 auto range = parseXML(xml); 4109 assert(range.front.type == EntityType.elementStart); 4110 assert(range.front.name == "root"); 4111 4112 range.popFront(); 4113 assert(range.front.type == EntityType.elementStart); 4114 assert(range.front.name == "foo"); 4115 4116 range.popFront(); 4117 assert(range.front.type == EntityType.comment); 4118 assert(range.front.text == " comment "); 4119 4120 range.popFront(); 4121 assert(range.front.type == EntityType.elementStart); 4122 assert(range.front.name == "bar"); 4123 4124 range.popFront(); 4125 assert(range.front.type == EntityType.text); 4126 assert(range.front.text == "exam"); 4127 4128 range = range.skipToParentEndTag(); 4129 assert(range.front.type == EntityType.elementEnd); 4130 assert(range.front.name == "bar"); 4131 4132 range = range.skipToParentEndTag(); 4133 assert(range.front.type == EntityType.elementEnd); 4134 assert(range.front.name == "foo"); 4135 4136 range.popFront(); 4137 assert(range.front.type == EntityType.comment); 4138 assert(range.front.text == " another comment "); 4139 4140 range = range.skipToParentEndTag(); 4141 assert(range.front.type == EntityType.elementEnd); 4142 assert(range.front.name == "root"); 4143 4144 assert(range.skipToParentEndTag().empty); 4145 } 4146 { 4147 auto range = parseXML("<root><foo>bar</foo></root>"); 4148 assert(range.front.type == EntityType.elementStart); 4149 assert(range.front.name == "root"); 4150 assert(range.skipToParentEndTag().empty); 4151 } 4152 } 4153 4154 unittest 4155 { 4156 import core.exception : AssertError; 4157 import std.algorithm.comparison : equal; 4158 import std.exception : enforce; 4159 import dxml.internal : testRangeFuncs; 4160 4161 static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__) 4162 { 4163 range.popFront(); 4164 enforce!AssertError(!range.empty, "unittest 1", __FILE__, line); 4165 enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line); 4166 } 4167 4168 static foreach(func; testRangeFuncs) 4169 {{ 4170 // cdata 4171 { 4172 auto xml = "<root>\n" ~ 4173 " <![CDATA[ cdata run ]]>\n" ~ 4174 " <nothing/>\n" ~ 4175 " <![CDATA[ cdata have its bits flipped ]]>\n" ~ 4176 " <foo></foo>\n" ~ 4177 " <![CDATA[ cdata play violin ]]>\n" ~ 4178 "</root>"; 4179 4180 auto range = parseXML(func(xml)); 4181 assert(range.front.type == EntityType.elementStart); 4182 popAndCheck(range, EntityType.cdata); 4183 assert(equal(range.front.text, " cdata run ")); 4184 { 4185 auto temp = range.save.skipToParentEndTag(); 4186 assert(temp._type == EntityType.elementEnd); 4187 assert(equal(temp.front.name, "root")); 4188 } 4189 popAndCheck(range, EntityType.elementEmpty); 4190 popAndCheck(range, EntityType.cdata); 4191 assert(equal(range.front.text, " cdata have its bits flipped ")); 4192 { 4193 auto temp = range.save.skipToParentEndTag(); 4194 assert(temp._type == EntityType.elementEnd); 4195 assert(equal(temp.front.name, "root")); 4196 } 4197 popAndCheck(range, EntityType.elementStart); 4198 range = range.skipContents(); 4199 popAndCheck(range, EntityType.cdata); 4200 assert(equal(range.front.text, " cdata play violin ")); 4201 range = range.skipToParentEndTag(); 4202 assert(range._type == EntityType.elementEnd); 4203 assert(equal(range.front.name, "root")); 4204 } 4205 // comment 4206 { 4207 auto xml = "<!-- before -->\n" ~ 4208 "<root>\n" ~ 4209 " <!-- comment 1 -->\n" ~ 4210 " <nothing/>\n" ~ 4211 " <!-- comment 2 -->\n" ~ 4212 " <foo></foo>\n" ~ 4213 " <!-- comment 3 -->\n" ~ 4214 "</root>\n" ~ 4215 "<!-- after -->" ~ 4216 "<!-- end -->"; 4217 4218 auto text = func(xml); 4219 assert(parseXML(text.save).skipToParentEndTag().empty); 4220 { 4221 auto range = parseXML(text.save); 4222 assert(range.front.type == EntityType.comment); 4223 popAndCheck(range, EntityType.elementStart); 4224 popAndCheck(range, EntityType.comment); 4225 assert(equal(range.front.text, " comment 1 ")); 4226 { 4227 auto temp = range.save.skipToParentEndTag(); 4228 assert(temp._type == EntityType.elementEnd); 4229 assert(equal(temp.front.name, "root")); 4230 } 4231 popAndCheck(range, EntityType.elementEmpty); 4232 popAndCheck(range, EntityType.comment); 4233 assert(equal(range.front.text, " comment 2 ")); 4234 { 4235 auto temp = range.save.skipToParentEndTag(); 4236 assert(temp._type == EntityType.elementEnd); 4237 assert(equal(temp.front.name, "root")); 4238 } 4239 popAndCheck(range, EntityType.elementStart); 4240 range = range.skipContents(); 4241 popAndCheck(range, EntityType.comment); 4242 assert(equal(range.front.text, " comment 3 ")); 4243 range = range.skipToParentEndTag(); 4244 assert(range._type == EntityType.elementEnd); 4245 assert(equal(range.front.name, "root")); 4246 } 4247 { 4248 auto range = parseXML(text.save); 4249 assert(range.front.type == EntityType.comment); 4250 popAndCheck(range, EntityType.elementStart); 4251 range = range.skipContents(); 4252 popAndCheck(range, EntityType.comment); 4253 assert(equal(range.front.text, " after ")); 4254 assert(range.save.skipToParentEndTag().empty); 4255 popAndCheck(range, EntityType.comment); 4256 assert(equal(range.front.text, " end ")); 4257 assert(range.skipToParentEndTag().empty); 4258 } 4259 } 4260 // elementStart 4261 { 4262 auto xml = "<root>\n" ~ 4263 " <a><b>foo</b></a>\n" ~ 4264 " <nothing/>\n" ~ 4265 " <c></c>\n" ~ 4266 " <d>\n" ~ 4267 " <e>\n" ~ 4268 " </e>\n" ~ 4269 " <f>\n" ~ 4270 " <g>\n" ~ 4271 " </g>\n" ~ 4272 " </f>\n" ~ 4273 " </d>\n" ~ 4274 "</root>"; 4275 4276 auto range = parseXML(func(xml)); 4277 assert(range.front.type == EntityType.elementStart); 4278 assert(equal(range.front.name, "root")); 4279 assert(range.save.skipToParentEndTag().empty); 4280 popAndCheck(range, EntityType.elementStart); 4281 assert(equal(range.front.name, "a")); 4282 { 4283 auto temp = range.save.skipToParentEndTag(); 4284 assert(temp._type == EntityType.elementEnd); 4285 assert(equal(temp.front.name, "root")); 4286 } 4287 popAndCheck(range, EntityType.elementStart); 4288 assert(equal(range.front.name, "b")); 4289 { 4290 auto temp = range.save.skipToParentEndTag(); 4291 assert(temp._type == EntityType.elementEnd); 4292 assert(equal(temp.front.name, "a")); 4293 } 4294 popAndCheck(range, EntityType.text); 4295 popAndCheck(range, EntityType.elementEnd); 4296 popAndCheck(range, EntityType.elementEnd); 4297 popAndCheck(range, EntityType.elementEmpty); 4298 popAndCheck(range, EntityType.elementStart); 4299 assert(equal(range.front.name, "c")); 4300 { 4301 auto temp = range.save.skipToParentEndTag(); 4302 assert(temp._type == EntityType.elementEnd); 4303 assert(equal(temp.front.name, "root")); 4304 } 4305 popAndCheck(range, EntityType.elementEnd); 4306 popAndCheck(range, EntityType.elementStart); 4307 assert(equal(range.front.name, "d")); 4308 popAndCheck(range, EntityType.elementStart); 4309 assert(equal(range.front.name, "e")); 4310 range = range.skipToParentEndTag(); 4311 assert(range._type == EntityType.elementEnd); 4312 assert(equal(range.front.name, "d")); 4313 range = range.skipToParentEndTag(); 4314 assert(range._type == EntityType.elementEnd); 4315 assert(equal(range.front.name, "root")); 4316 } 4317 // elementEnd 4318 { 4319 auto xml = "<root>\n" ~ 4320 " <a><b>foo</b></a>\n" ~ 4321 " <nothing/>\n" ~ 4322 " <c></c>\n" ~ 4323 "</root>"; 4324 4325 auto range = parseXML(func(xml)); 4326 assert(range.front.type == EntityType.elementStart); 4327 popAndCheck(range, EntityType.elementStart); 4328 popAndCheck(range, EntityType.elementStart); 4329 popAndCheck(range, EntityType.text); 4330 popAndCheck(range, EntityType.elementEnd); 4331 assert(equal(range.front.name, "b")); 4332 { 4333 auto temp = range.save.skipToParentEndTag(); 4334 assert(temp._type == EntityType.elementEnd); 4335 assert(equal(temp.front.name, "a")); 4336 } 4337 popAndCheck(range, EntityType.elementEnd); 4338 assert(equal(range.front.name, "a")); 4339 { 4340 auto temp = range.save.skipToParentEndTag(); 4341 assert(temp._type == EntityType.elementEnd); 4342 assert(equal(temp.front.name, "root")); 4343 } 4344 popAndCheck(range, EntityType.elementEmpty); 4345 popAndCheck(range, EntityType.elementStart); 4346 popAndCheck(range, EntityType.elementEnd); 4347 assert(equal(range.front.name, "c")); 4348 { 4349 auto temp = range.save.skipToParentEndTag(); 4350 assert(temp._type == EntityType.elementEnd); 4351 assert(equal(temp.front.name, "root")); 4352 } 4353 popAndCheck(range, EntityType.elementEnd); 4354 assert(range.skipToParentEndTag().empty); 4355 } 4356 // elementEmpty 4357 { 4358 auto range = parseXML(func("<root/>")); 4359 assert(range.front.type == EntityType.elementEmpty); 4360 assert(range.skipToParentEndTag().empty); 4361 } 4362 { 4363 auto xml = "<root>\n" ~ 4364 " <a><b>foo</b></a>\n" ~ 4365 " <nothing/>\n" ~ 4366 " <c></c>\n" ~ 4367 " <whatever/>\n" ~ 4368 "</root>"; 4369 4370 auto range = parseXML(func(xml)); 4371 popAndCheck(range, EntityType.elementStart); 4372 assert(range.front.type == EntityType.elementStart); 4373 range = range.skipContents(); 4374 popAndCheck(range, EntityType.elementEmpty); 4375 assert(equal(range.front.name, "nothing")); 4376 { 4377 auto temp = range.save; 4378 popAndCheck(temp, EntityType.elementStart); 4379 popAndCheck(temp, EntityType.elementEnd); 4380 popAndCheck(temp, EntityType.elementEmpty); 4381 assert(equal(temp.front.name, "whatever")); 4382 } 4383 range = range.skipToParentEndTag(); 4384 assert(range._type == EntityType.elementEnd); 4385 assert(equal(range.front.name, "root")); 4386 } 4387 // pi 4388 { 4389 auto xml = "<?Sherlock?>\n" ~ 4390 "<root>\n" ~ 4391 " <?Foo?>\n" ~ 4392 " <nothing/>\n" ~ 4393 " <?Bar?>\n" ~ 4394 " <foo></foo>\n" ~ 4395 " <?Baz?>\n" ~ 4396 "</root>\n" ~ 4397 "<?Poirot?>\n" ~ 4398 "<?Conan?>"; 4399 4400 auto range = parseXML(func(xml)); 4401 assert(range.front.type == EntityType.pi); 4402 assert(equal(range.front.name, "Sherlock")); 4403 assert(range.save.skipToParentEndTag().empty); 4404 popAndCheck(range, EntityType.elementStart); 4405 popAndCheck(range, EntityType.pi); 4406 assert(equal(range.front.name, "Foo")); 4407 { 4408 auto temp = range.save.skipToParentEndTag(); 4409 assert(temp._type == EntityType.elementEnd); 4410 assert(equal(temp.front.name, "root")); 4411 } 4412 popAndCheck(range, EntityType.elementEmpty); 4413 popAndCheck(range, EntityType.pi); 4414 assert(equal(range.front.name, "Bar")); 4415 { 4416 auto temp = range.save.skipToParentEndTag(); 4417 assert(temp._type == EntityType.elementEnd); 4418 assert(equal(temp.front.name, "root")); 4419 } 4420 popAndCheck(range, EntityType.elementStart); 4421 popAndCheck(range, EntityType.elementEnd); 4422 popAndCheck(range, EntityType.pi); 4423 assert(equal(range.front.name, "Baz")); 4424 range = range.skipToParentEndTag(); 4425 assert(range._type == EntityType.elementEnd); 4426 assert(equal(range.front.name, "root")); 4427 popAndCheck(range, EntityType.pi); 4428 assert(equal(range.front.name, "Poirot")); 4429 assert(range.save.skipToParentEndTag().empty); 4430 popAndCheck(range, EntityType.pi); 4431 assert(equal(range.front.name, "Conan")); 4432 assert(range.skipToParentEndTag().empty); 4433 } 4434 // text 4435 { 4436 auto xml = "<root>\n" ~ 4437 " nothing to say\n" ~ 4438 " <nothing/>\n" ~ 4439 " nothing whatsoever\n" ~ 4440 " <foo></foo>\n" ~ 4441 " but he keeps talking\n" ~ 4442 "</root>"; 4443 4444 auto range = parseXML(func(xml)); 4445 assert(range.front.type == EntityType.elementStart); 4446 popAndCheck(range, EntityType.text); 4447 assert(equal(range.front.text, "\n nothing to say\n ")); 4448 { 4449 auto temp = range.save.skipToParentEndTag(); 4450 assert(temp._type == EntityType.elementEnd); 4451 assert(equal(temp.front.name, "root")); 4452 } 4453 popAndCheck(range, EntityType.elementEmpty); 4454 popAndCheck(range, EntityType.text); 4455 assert(equal(range.front.text, "\n nothing whatsoever\n ")); 4456 { 4457 auto temp = range.save.skipToParentEndTag(); 4458 assert(temp._type == EntityType.elementEnd); 4459 assert(equal(temp.front.name, "root")); 4460 } 4461 popAndCheck(range, EntityType.elementStart); 4462 range = range.skipContents(); 4463 popAndCheck(range, EntityType.text); 4464 assert(equal(range.front.text, "\n but he keeps talking\n")); 4465 range = range.skipToParentEndTag(); 4466 assert(range._type == EntityType.elementEnd); 4467 assert(equal(range.front.name, "root")); 4468 } 4469 }} 4470 } 4471 4472 4473 /++ 4474 Treats the given string like a file path except that each directory 4475 corresponds to the name of a start tag. Note that this does $(I not) try to 4476 implement XPath as that would be quite complicated, and it really doesn't 4477 fit with a StAX parser. 4478 4479 A start tag should be thought of as a directory, with its child start tags 4480 as the directories it contains. 4481 4482 All paths should be relative. $(LREF EntityRange) can only move forward 4483 through the document, so using an absolute path would only make sense at 4484 the beginning of the document. As such, absolute paths are treated as 4485 invalid paths. 4486 4487 $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated 4488 slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are 4489 treated as an invalid path. 4490 4491 If $(D range.front.type == EntityType.elementStart), then 4492 $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child 4493 start tag (be it $(LREF EntityType.elementStart) or 4494 $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity) 4495 $(D_CODE_STRING "foo"). That start tag must be a direct child of the current 4496 start tag. 4497 4498 If $(D range.front.type) is any other $(LREF EntityType), then 4499 $(D range._skipToPath($(D_STRING "foo"))) will return an empty range, 4500 because no other $(LREF EntityType)s have child start tags. 4501 4502 For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo"))) 4503 will search for the first start tag with the 4504 $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level 4505 as the current entity. If the current entity is a start tag with the name 4506 $(D_CODE_STRING "foo"), it will not be considered a match. 4507 4508 $(D range._skipToPath($(D_STRING "./"))) is a no-op. However, 4509 $(D range._skipToPath($(D_STRING "../"))) will result in the empty range 4510 (since it doesn't target a specific start tag). 4511 4512 $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to 4513 $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))), 4514 and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to 4515 $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))). 4516 4517 Returns: The given range with its $(D front) now at the requested entity if 4518 the path is valid; otherwise, an empty range is returned. 4519 4520 Throws: $(LREF XMLParsingException) on invalid XML. 4521 +/ 4522 R skipToPath(R)(R entityRange, string path) 4523 if(isInstanceOf!(EntityRange, R)) 4524 { 4525 import std.algorithm.comparison : equal; 4526 import std.path : pathSplitter; 4527 4528 if(entityRange.empty) 4529 return entityRange; 4530 if(path.empty || path[0] == '/') 4531 return entityRange.takeNone(); 4532 4533 with(EntityType) 4534 { 4535 static if(R.config.splitEmpty == SplitEmpty.yes) 4536 EntityType[2] startOrEnd = [elementStart, elementEnd]; 4537 else 4538 EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty]; 4539 4540 R findOnCurrLevel(string name) 4541 { 4542 if(entityRange._type == elementStart) 4543 entityRange = entityRange.skipContents(); 4544 while(true) 4545 { 4546 entityRange = entityRange.skipToEntityType(startOrEnd[]); 4547 if(entityRange.empty) 4548 return entityRange; 4549 if(entityRange._type == elementEnd) 4550 return entityRange.takeNone(); 4551 4552 if(equal(name, entityRange._name.save)) 4553 return entityRange; 4554 4555 static if(R.config.splitEmpty == SplitEmpty.no) 4556 { 4557 if(entityRange._type == elementEmpty) 4558 continue; 4559 } 4560 entityRange = entityRange.skipContents(); 4561 } 4562 } 4563 4564 for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront()) 4565 { 4566 if(pieces.front == ".") 4567 continue; 4568 else if(pieces.front == "..") 4569 { 4570 pieces.popFront(); 4571 if(pieces.empty) 4572 return entityRange.takeNone(); 4573 4574 while(pieces.front == "..") 4575 { 4576 pieces.popFront(); 4577 if(pieces.empty) 4578 return entityRange.takeNone(); 4579 entityRange = entityRange.skipToParentEndTag(); 4580 if(entityRange.empty) 4581 return entityRange; 4582 } 4583 4584 entityRange = findOnCurrLevel(pieces.front); 4585 if(entityRange.empty) 4586 return entityRange; 4587 } 4588 else 4589 { 4590 if(entityRange._type != elementStart) 4591 return entityRange.takeNone(); 4592 4593 entityRange = entityRange.skipToEntityType(startOrEnd[]); 4594 assert(!entityRange.empty); 4595 if(entityRange._type == elementEnd) 4596 return entityRange.takeNone(); 4597 4598 if(!equal(pieces.front, entityRange._name.save)) 4599 { 4600 entityRange = findOnCurrLevel(pieces.front); 4601 if(entityRange.empty) 4602 return entityRange; 4603 } 4604 } 4605 } 4606 4607 return entityRange; 4608 } 4609 } 4610 4611 /// 4612 unittest 4613 { 4614 { 4615 auto xml = "<carrot>\n" ~ 4616 " <foo>\n" ~ 4617 " <bar>\n" ~ 4618 " <baz/>\n" ~ 4619 " <other/>\n" ~ 4620 " </bar>\n" ~ 4621 " </foo>\n" ~ 4622 "</carrot>"; 4623 4624 auto range = parseXML(xml); 4625 // "<carrot>" 4626 assert(range.front.type == EntityType.elementStart); 4627 assert(range.front.name == "carrot"); 4628 4629 range = range.skipToPath("foo/bar"); 4630 // " <bar> 4631 assert(!range.empty); 4632 assert(range.front.type == EntityType.elementStart); 4633 assert(range.front.name == "bar"); 4634 4635 range = range.skipToPath("baz"); 4636 // " <baz/> 4637 assert(!range.empty); 4638 assert(range.front.type == EntityType.elementEmpty); 4639 4640 // other is not a child element of baz 4641 assert(range.skipToPath("other").empty); 4642 4643 range = range.skipToPath("../other"); 4644 // " <other/>" 4645 assert(!range.empty); 4646 assert(range.front.type == EntityType.elementEmpty); 4647 } 4648 { 4649 auto xml = "<potato>\n" ~ 4650 " <foo>\n" ~ 4651 " <bar>\n "~ 4652 " </bar>\n" ~ 4653 " <crazy>\n" ~ 4654 " </crazy>\n" ~ 4655 " <fou/>\n" ~ 4656 " </foo>\n" ~ 4657 " <buzz/>\n" ~ 4658 "</potato>"; 4659 4660 auto range = parseXML(xml); 4661 // "<potato>" 4662 assert(range.front.type == EntityType.elementStart); 4663 4664 range = range.skipToPath("./"); 4665 // "<potato>" 4666 assert(!range.empty); 4667 assert(range.front.type == EntityType.elementStart); 4668 assert(range.front.name == "potato"); 4669 4670 range = range.skipToPath("./foo/bar"); 4671 // " <bar>" 4672 assert(!range.empty); 4673 assert(range.front.type == EntityType.elementStart); 4674 assert(range.front.name == "bar"); 4675 4676 range = range.skipToPath("../crazy"); 4677 // " <crazy>" 4678 assert(!range.empty); 4679 assert(range.front.type == EntityType.elementStart); 4680 assert(range.front.name == "crazy"); 4681 4682 // Whether popFront is called here before the call to 4683 // range.skipToPath("../fou") below, the result is the same, because 4684 // both <crazy> and </crazy> are at the same level. 4685 range.popFront(); 4686 // " </crazy>" 4687 assert(!range.empty); 4688 assert(range.front.type == EntityType.elementEnd); 4689 assert(range.front.name == "crazy"); 4690 4691 range = range.skipToPath("../fou"); 4692 // " <fou/>" 4693 assert(!range.empty); 4694 assert(range.front.type == EntityType.elementEmpty); 4695 } 4696 // Searching stops at the first matching start tag. 4697 { 4698 auto xml = "<beet>\n" ~ 4699 " <foo a='42'>\n" ~ 4700 " </foo>\n" ~ 4701 " <foo b='451'>\n" ~ 4702 " </foo>\n" ~ 4703 "</beet>"; 4704 4705 auto range = parseXML(xml); 4706 range = range.skipToPath("foo"); 4707 assert(!range.empty); 4708 assert(range.front.type == EntityType.elementStart); 4709 assert(range.front.name == "foo"); 4710 4711 { 4712 auto attrs = range.front.attributes; 4713 assert(attrs.front.name == "a"); 4714 assert(attrs.front.value == "42"); 4715 } 4716 4717 range = range.skipToPath("../foo"); 4718 assert(!range.empty); 4719 assert(range.front.type == EntityType.elementStart); 4720 assert(range.front.name == "foo"); 4721 4722 { 4723 auto attrs = range.front.attributes; 4724 assert(attrs.front.name == "b"); 4725 assert(attrs.front.value == "451"); 4726 } 4727 } 4728 // skipToPath will work on an empty range but will always return an 4729 // empty range. 4730 { 4731 auto range = parseXML("<root/>"); 4732 assert(range.takeNone().skipToPath("nowhere").empty); 4733 } 4734 // Empty and absolute paths will also result in an empty range as will 4735 // "../" without any actual tag name on the end. 4736 { 4737 auto range = parseXML("<root/>"); 4738 assert(range.skipToPath("").empty); 4739 assert(range.skipToPath("/").empty); 4740 assert(range.skipToPath("../").empty); 4741 } 4742 // Only non-empty start tags have children; all other EntityTypes result 4743 // in an empty range unless "../" is used. 4744 { 4745 auto xml = "<!-- comment -->\n" ~ 4746 "<root>\n" ~ 4747 " <foo/>\n" ~ 4748 "</root>"; 4749 auto range = parseXML(xml); 4750 assert(range.skipToPath("root").empty); 4751 assert(range.skipToPath("foo").empty); 4752 4753 range = range.skipToPath("../root"); 4754 assert(!range.empty); 4755 assert(range.front.type == EntityType.elementStart); 4756 assert(range.front.name == "root"); 4757 } 4758 } 4759 4760 unittest 4761 { 4762 import core.exception : AssertError; 4763 import std.algorithm.comparison : equal; 4764 import std.exception : assertNotThrown, enforce; 4765 import dxml.internal : testRangeFuncs; 4766 4767 static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__) 4768 { 4769 auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line); 4770 enforce!AssertError(!result.empty, "unittest 2", __FILE__, line); 4771 enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line); 4772 enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line); 4773 } 4774 4775 static void popEmpty(R)(ref R range) 4776 { 4777 range.popFront(); 4778 static if(range.config.splitEmpty == SplitEmpty.yes) 4779 range.popFront(); 4780 } 4781 4782 auto xml = "<superuser>\n" ~ 4783 " <!-- comment -->\n" ~ 4784 " <?pi?>\n" ~ 4785 " <![CDATA[cdata]]>\n" ~ 4786 " <foo/>\n" ~ 4787 " <bar/>\n" ~ 4788 " <!-- comment -->\n" ~ 4789 " <!-- comment -->\n" ~ 4790 " <baz/>\n" ~ 4791 " <frobozz>\n" ~ 4792 " <!-- comment -->\n" ~ 4793 " <!-- comment -->\n" ~ 4794 " <whatever/>\n" ~ 4795 " <!-- comment -->\n" ~ 4796 " <!-- comment -->\n" ~ 4797 " </frobozz>\n" ~ 4798 " <!-- comment -->\n" ~ 4799 " <!-- comment -->\n" ~ 4800 " <xyzzy/>\n" ~ 4801 "</superuser>"; 4802 4803 static foreach(func; testRangeFuncs) 4804 {{ 4805 auto text = func(xml); 4806 4807 static foreach(config; someTestConfigs) 4808 {{ 4809 static if(config.splitEmpty == SplitEmpty.yes) 4810 enum empty = EntityType.elementStart; 4811 else 4812 enum empty = EntityType.elementEmpty; 4813 4814 auto range = parseXML!config(text.save); 4815 4816 assert(range.save.skipToPath("whatever").empty); 4817 assert(range.save.skipToPath("frobozz/whateve").empty); 4818 4819 testPath(range.save, "foo", empty, "foo"); 4820 testPath(range.save, "bar", empty, "bar"); 4821 testPath(range.save, "baz", empty, "baz"); 4822 testPath(range.save, "frobozz", EntityType.elementStart, "frobozz"); 4823 testPath(range.save, "frobozz/whatever", empty, "whatever"); 4824 testPath(range.save, "xyzzy", empty, "xyzzy"); 4825 4826 range.popFront(); 4827 for(; range.front.type != empty; range.popFront()) 4828 { 4829 assert(range.save.skipToPath("foo").empty); 4830 testPath(range.save, "../foo", empty, "foo"); 4831 testPath(range.save, "../bar", empty, "bar"); 4832 testPath(range.save, "../baz", empty, "baz"); 4833 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4834 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4835 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4836 } 4837 assert(equal(range.front.name, "foo")); 4838 assert(range.save.skipToPath("foo").empty); 4839 assert(range.save.skipToPath("./foo").empty); 4840 assert(range.save.skipToPath("../foo").empty); 4841 assert(range.save.skipToPath("bar").empty); 4842 assert(range.save.skipToPath("baz").empty); 4843 assert(range.save.skipToPath("frobozz").empty); 4844 assert(range.save.skipToPath("whatever").empty); 4845 assert(range.save.skipToPath("../").empty); 4846 assert(range.save.skipToPath("../../").empty); 4847 4848 testPath(range.save, "../bar", empty, "bar"); 4849 testPath(range.save, "../baz", empty, "baz"); 4850 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4851 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4852 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4853 4854 popEmpty(range); 4855 assert(range.save.skipToPath("bar").empty); 4856 testPath(range.save, "../baz", empty, "baz"); 4857 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4858 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4859 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4860 4861 range.popFront(); 4862 for(; range.front.type != empty; range.popFront()) 4863 { 4864 assert(range.save.skipToPath("baz").empty); 4865 testPath(range.save, "../baz", empty, "baz"); 4866 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4867 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4868 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4869 } 4870 assert(equal(range.front.name, "baz")); 4871 4872 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4873 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4874 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4875 4876 popEmpty(range); 4877 assert(equal(range.front.name, "frobozz")); 4878 assert(range.save.skipToPath("wizard").empty); 4879 testPath(range.save, "whatever", empty, "whatever"); 4880 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4881 4882 range.popFront(); 4883 for(; range.front.type != empty; range.popFront()) 4884 { 4885 assert(range.save.skipToPath("whatever").empty); 4886 testPath(range.save, "../whatever", empty, "whatever"); 4887 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4888 } 4889 assert(equal(range.front.name, "whatever")); 4890 assert(range.save.skipToPath("frobozz").empty); 4891 assert(range.save.skipToPath("../frobozz").empty); 4892 assert(range.save.skipToPath("../xyzzy").empty); 4893 assert(range.save.skipToPath("../../frobozz").empty); 4894 4895 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4896 4897 popEmpty(range); 4898 for(; range.front.type != EntityType.elementEnd; range.popFront()) 4899 { 4900 assert(range.save.skipToPath("xyzzy").empty); 4901 assert(range.save.skipToPath("../xyzzy").empty); 4902 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4903 } 4904 assert(equal(range.front.name, "frobozz")); 4905 4906 range.popFront(); 4907 for(; range.front.type != empty; range.popFront()) 4908 { 4909 assert(range.save.skipToPath("xyzzy").empty); 4910 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4911 } 4912 assert(equal(range.front.name, "xyzzy")); 4913 4914 popEmpty(range); 4915 assert(equal(range.front.name, "superuser")); 4916 assert(range.save.skipToPath("superuser").empty); 4917 assert(range.save.skipToPath("foo").empty); 4918 assert(range.save.skipToPath("../foo").empty); 4919 assert(range.save.skipToPath("../../foo").empty); 4920 }} 4921 }} 4922 } 4923 4924 4925 //------------------------------------------------------------------------------ 4926 // Private Section 4927 //------------------------------------------------------------------------------ 4928 private: 4929 4930 4931 auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc 4932 { 4933 import std.utf : byCodeUnit; 4934 typeof(EntityRange!(config, R)._text) text; 4935 text.input = byCodeUnit(xmlText); 4936 return text; 4937 } 4938 4939 4940 // toCmpType is to make it easy for tests to convert the expected result to a 4941 // range with the correct element type, since comparing with equal won't do 4942 // the right thing if the result doesn't have dchar as its element type. 4943 auto toCmpType(alias func)(string str) 4944 { 4945 import std.range : takeExactly; 4946 import std.utf : byUTF; 4947 4948 return str.byUTF!(immutable ElementType!(typeof(testParser(func(str)).input.takeExactly(1))))(); 4949 } 4950 4951 auto toCmpType(alias func, ThrowOnEntityRef toer)(string str) 4952 { 4953 import std.range : takeExactly; 4954 import std.utf : byUTF; 4955 4956 return str.byUTF!(immutable ElementType!(typeof(testParser!(makeConfig(toer))(func(str)).input.takeExactly(1))))(); 4957 } 4958 4959 4960 // Used to indicate where in the grammar we're currently parsing. 4961 enum GrammarPos 4962 { 4963 // Nothing has been parsed yet. 4964 documentStart, 4965 4966 // document ::= prolog element Misc* 4967 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 4968 // This is that first Misc*. The next entity to parse is either a Misc, the 4969 // doctypedecl, or the root element which follows the prolog. 4970 prologMisc1, 4971 4972 // document ::= prolog element Misc* 4973 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*) 4974 // This is that second Misc*. The next entity to parse is either a Misc or 4975 // the root element which follows the prolog. 4976 prologMisc2, 4977 4978 // Used with SplitEmpty.yes to tell the parser that we're currently at an 4979 // empty element tag that we're treating as a start tag, so the next entity 4980 // will be an end tag even though we didn't actually parse one. 4981 splittingEmpty, 4982 4983 // element ::= EmptyElemTag | STag content ETag 4984 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 4985 // This is at the beginning of content at the first CharData?. The next 4986 // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag. 4987 // References are treated as part of the CharData and not parsed out by the 4988 // EntityRange (see EntityRange.Entity.text). 4989 contentCharData1, 4990 4991 // element ::= EmptyElemTag | STag content ETag 4992 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 4993 // This is after the first CharData?. The next thing to parse will be a 4994 // element, CDSect, PI, Comment, or ETag. 4995 // References are treated as part of the CharData and not parsed out by the 4996 // EntityRange (see EntityRange.Entity.text). 4997 contentMid, 4998 4999 // element ::= EmptyElemTag | STag content ETag 5000 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5001 // This is at the second CharData?. The next thing to parse will be a 5002 // CharData, element, CDSect, PI, Comment, or ETag. 5003 // References are treated as part of the CharData and not parsed out by the 5004 // EntityRange (see EntityRange.Entity.text). 5005 contentCharData2, 5006 5007 // element ::= EmptyElemTag | STag content ETag 5008 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5009 // This is after the second CharData?. The next thing to parse is an ETag. 5010 endTag, 5011 5012 // document ::= prolog element Misc* 5013 // This is the Misc* at the end of the document. The next thing to parse is 5014 // either another Misc, or we will hit the end of the document. 5015 endMisc, 5016 5017 // The end of the document (and the grammar) has been reached. 5018 documentEnd 5019 } 5020 5021 5022 // Wrapper around skipOver which takes an EntityParser.Text and handles 5023 // incrementing pos. 5024 // 5025 // It is assumed that there are no newlines. 5026 bool stripStartsWith(Text)(ref Text text, string needle) 5027 { 5028 import std.algorithm.searching : skipOver; 5029 import std.utf : byCodeUnit; 5030 5031 //TODO In the case where we're parsing an array of char, if we can cleanly 5032 // strip off any byCodeUnit and takeExactly wrappers, then we should be able 5033 // to have skipOver compare the string being parsed and the needle with ==. 5034 // It may happen in some cases right now when text.input is a byCodeUnit 5035 // result, but it won't happen in all cases where it ideally would. We may 5036 // also want to look into using byUTF on the needle so that it matches the 5037 // encoding of text.input or even make needle match the encoding when it's 5038 // passed in instead of always being string. 5039 if(!text.input.skipOver(needle.byCodeUnit())) 5040 return false; 5041 5042 text.pos.col += needle.length; 5043 5044 return true; 5045 } 5046 5047 unittest 5048 { 5049 import core.exception : AssertError; 5050 import std.exception : enforce; 5051 import dxml.internal : equalCU, testRangeFuncs; 5052 5053 static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith, 5054 int row, int col, size_t line = __LINE__) 5055 { 5056 auto haystack = func(origHaystack); 5057 { 5058 auto text = testParser(haystack.save); 5059 enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line); 5060 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line); 5061 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5062 } 5063 { 5064 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5065 auto text = testParser(haystack); 5066 text.pos.line += 3; 5067 text.pos.col += 7; 5068 enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line); 5069 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line); 5070 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5071 } 5072 } 5073 5074 static foreach(func; testRangeFuncs) 5075 { 5076 test!func("hello world", "hello", " world", true, 1, "hello".length + 1); 5077 test!func("hello world", "hello world", "", true, 1, "hello world".length + 1); 5078 test!func("hello world", "foo", "hello world", false, 1, 1); 5079 test!func("hello world", "hello sally", "hello world", false, 1, 1); 5080 test!func("hello world", "hello world ", "hello world", false, 1, 1); 5081 } 5082 } 5083 5084 @safe pure unittest 5085 { 5086 import std.algorithm.comparison : equal; 5087 import dxml.internal : testRangeFuncs; 5088 5089 static foreach(func; testRangeFuncs) 5090 {{ 5091 auto xml = func(`foo`); 5092 auto text = testParser!simpleXML(xml); 5093 assert(text.stripStartsWith("fo")); 5094 }} 5095 } 5096 5097 5098 // Strips whitespace while dealing with text.pos accordingly. Newlines are not 5099 // ignored. 5100 // Returns whether any whitespace was stripped. 5101 bool stripWS(Text)(ref Text text) 5102 { 5103 bool strippedSpace = false; 5104 5105 static if(hasLength!(Text.Input)) 5106 size_t lineStart = text.input.length; 5107 5108 loop: while(!text.input.empty) 5109 { 5110 switch(text.input.front) 5111 { 5112 case ' ': 5113 case '\t': 5114 case '\r': 5115 { 5116 strippedSpace = true; 5117 text.input.popFront(); 5118 static if(!hasLength!(Text.Input)) 5119 ++text.pos.col; 5120 break; 5121 } 5122 case '\n': 5123 { 5124 strippedSpace = true; 5125 text.input.popFront(); 5126 static if(hasLength!(Text.Input)) 5127 lineStart = text.input.length; 5128 nextLine!(Text.config)(text.pos); 5129 break; 5130 } 5131 default: break loop; 5132 } 5133 } 5134 5135 static if(hasLength!(Text.Input)) 5136 text.pos.col += lineStart - text.input.length; 5137 5138 return strippedSpace; 5139 } 5140 5141 unittest 5142 { 5143 import core.exception : AssertError; 5144 import std.exception : enforce; 5145 import dxml.internal : equalCU; 5146 import dxml.internal : testRangeFuncs; 5147 5148 static void test(alias func)(string origHaystack, string remainder, bool stripped, 5149 int row, int col, size_t line = __LINE__) 5150 { 5151 auto haystack = func(origHaystack); 5152 { 5153 auto text = testParser(haystack.save); 5154 enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line); 5155 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line); 5156 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5157 } 5158 { 5159 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5160 auto text = testParser(haystack); 5161 text.pos.line += 3; 5162 text.pos.col += 7; 5163 enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line); 5164 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line); 5165 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5166 } 5167 } 5168 5169 static foreach(func; testRangeFuncs) 5170 { 5171 test!func(" \t\rhello world", "hello world", true, 1, 5); 5172 test!func(" \n \n \n \nhello world", "hello world", true, 5, 1); 5173 test!func(" \n \n \n \n hello world", "hello world", true, 5, 3); 5174 test!func("hello world", "hello world", false, 1, 1); 5175 } 5176 } 5177 5178 @safe pure unittest 5179 { 5180 import dxml.internal : testRangeFuncs; 5181 5182 static foreach(func; testRangeFuncs) 5183 {{ 5184 auto xml = func(`foo`); 5185 auto text = testParser!simpleXML(xml); 5186 assert(!text.stripWS()); 5187 }} 5188 } 5189 5190 5191 // Returns a slice (or takeExactly) of text.input up to but not including the 5192 // given needle, removing both that slice and the given needle from text.input 5193 // in the process. If the needle is not found, then an XMLParsingException is 5194 // thrown. 5195 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text) 5196 { 5197 return _takeUntil!(true, needle, skipQuotedText, Text)(text); 5198 } 5199 5200 unittest 5201 { 5202 import core.exception : AssertError; 5203 import std.algorithm.comparison : equal; 5204 import std.exception : collectException, enforce; 5205 import dxml.internal : codeLen, testRangeFuncs; 5206 5207 static void test(alias func, string needle, bool sqt)(string origHaystack, string expected, string remainder, 5208 int row, int col, size_t line = __LINE__) 5209 { 5210 auto haystack = func(origHaystack); 5211 auto adjExpected = expected.toCmpType!func(); 5212 { 5213 auto text = testParser(haystack.save); 5214 auto temp = text.save; 5215 enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected.save), 5216 "unittest failure 1", __FILE__, line); 5217 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5218 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5219 } 5220 { 5221 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5222 auto text = testParser(haystack); 5223 text.pos.line += 3; 5224 text.pos.col += 7; 5225 enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected), 5226 "unittest failure 4", __FILE__, line); 5227 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5228 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5229 } 5230 } 5231 5232 static void testFail(alias func, string needle, bool sqt) 5233 (string origHaystack, int row, int col, size_t line = __LINE__) 5234 { 5235 auto haystack = func(origHaystack); 5236 { 5237 auto text = testParser(haystack.save); 5238 auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)()); 5239 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5240 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5241 } 5242 { 5243 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5244 auto text = testParser(haystack); 5245 text.pos.line += 3; 5246 text.pos.col += 7; 5247 auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)()); 5248 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5249 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5250 } 5251 } 5252 5253 static foreach(func; testRangeFuncs) 5254 { 5255 static foreach(sqt; [false, true]) 5256 { 5257 { 5258 auto haystack = "hello world"; 5259 enum needle = "world"; 5260 5261 static foreach(i; 1 .. needle.length) 5262 test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i); 5263 } 5264 5265 test!(func, "l", sqt)("lello world", "", "ello world", 1, 2); 5266 test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5); 5267 test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4); 5268 { 5269 enum needle = "great"; 5270 enum expected = "プログラミング in D is "; 5271 static foreach(i; 1 .. needle.length) 5272 { 5273 test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected, 5274 "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1); 5275 } 5276 } 5277 static foreach(haystack; ["", "a", "hello", "ディラン"]) 5278 testFail!(func, "x", sqt)(haystack, 1, 1); 5279 static foreach(haystack; ["", "l", "lte", "world", "nomatch"]) 5280 testFail!(func, "le", sqt)(haystack, 1, 1); 5281 static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"]) 5282 testFail!(func, "web", sqt)(haystack, 1, 1); 5283 } 5284 5285 test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9); 5286 test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10); 5287 test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10); 5288 test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12); 5289 test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12); 5290 testFail!(func, "*", false)("foo\n\n ' \n\nbar", 1, 1); 5291 testFail!(func, "*", false)(`ディラン " `, 1, 1); 5292 5293 test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16); 5294 test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13); 5295 test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13); 5296 testFail!(func, "*", true)(`hello ''' * world`, 1, 9); 5297 testFail!(func, "*", true)(`hello """ * world`, 1, 9); 5298 testFail!(func, "*", true)("foo\n\n ' \n\nbar", 3, 4); 5299 testFail!(func, "*", true)(`ディラン " `, 1, codeLen!(func, `ディラン "`)); 5300 5301 test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14); 5302 test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7); 5303 } 5304 } 5305 5306 @safe pure unittest 5307 { 5308 import std.algorithm.comparison : equal; 5309 import dxml.internal : testRangeFuncs; 5310 5311 static foreach(func; testRangeFuncs) 5312 {{ 5313 auto xml = func(`foo`); 5314 auto text = testParser!simpleXML(xml); 5315 assert(equal(text.takeUntilAndDrop!"o"(), "f")); 5316 }} 5317 } 5318 5319 // Variant of takeUntilAndDrop which does not return a slice. It's intended for 5320 // when the config indicates that something should be skipped. 5321 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text) 5322 { 5323 _takeUntil!(false, needle, skipQuotedText, Text)(text); 5324 } 5325 5326 unittest 5327 { 5328 import core.exception : AssertError; 5329 import std.algorithm.comparison : equal; 5330 import std.exception : assertNotThrown, collectException, enforce; 5331 import dxml.internal : codeLen, testRangeFuncs; 5332 5333 static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder, 5334 int row, int col, size_t line = __LINE__) 5335 { 5336 auto haystack = func(origHaystack); 5337 { 5338 auto text = testParser(haystack.save); 5339 assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1", 5340 __FILE__, line); 5341 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5342 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5343 } 5344 { 5345 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5346 auto text = testParser(haystack); 5347 text.pos.line += 3; 5348 text.pos.col += 7; 5349 assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4", 5350 __FILE__, line); 5351 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5352 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5353 } 5354 } 5355 5356 static void testFail(alias func, string needle, bool sqt) 5357 (string origHaystack, int row, int col, size_t line = __LINE__) 5358 { 5359 auto haystack = func(origHaystack); 5360 { 5361 auto text = testParser(haystack.save); 5362 auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)()); 5363 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5364 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5365 } 5366 { 5367 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5368 auto text = testParser(haystack); 5369 text.pos.line += 3; 5370 text.pos.col += 7; 5371 auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)()); 5372 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5373 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5374 } 5375 } 5376 5377 static foreach(func; testRangeFuncs) 5378 { 5379 static foreach(sqt; [false, true]) 5380 { 5381 { 5382 enum needle = "world"; 5383 static foreach(i; 1 .. needle.length) 5384 test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i); 5385 } 5386 5387 test!(func, "l", sqt)("lello world", "ello world", 1, 2); 5388 test!(func, "ll", sqt)("lello world", "o world", 1, 5); 5389 test!(func, "le", sqt)("llello world", "llo world", 1, 4); 5390 5391 { 5392 enum needle = "great"; 5393 static foreach(i; 1 .. needle.length) 5394 { 5395 test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $], 5396 1, codeLen!(func, "プログラミング in D is ") + i + 1); 5397 } 5398 } 5399 5400 static foreach(haystack; ["", "a", "hello", "ディラン"]) 5401 testFail!(func, "x", sqt)(haystack, 1, 1); 5402 static foreach(haystack; ["", "l", "lte", "world", "nomatch"]) 5403 testFail!(func, "le", sqt)(haystack, 1, 1); 5404 static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"]) 5405 testFail!(func, "web", sqt)(haystack, 1, 1); 5406 } 5407 5408 test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9); 5409 test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10); 5410 test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10); 5411 test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12); 5412 test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12); 5413 testFail!(func, "*", false)("foo\n\n ' \n\nbar", 1, 1); 5414 testFail!(func, "*", false)(`ディラン " `, 1, 1); 5415 5416 test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16); 5417 test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13); 5418 test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13); 5419 testFail!(func, "*", true)(`hello ''' * world`, 1, 9); 5420 testFail!(func, "*", true)(`hello """ * world`, 1, 9); 5421 testFail!(func, "*", true)("foo\n\n ' \n\nbar", 3, 4); 5422 testFail!(func, "*", true)(`ディラン " `, 1, codeLen!(func, `ディラン "`)); 5423 5424 test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14); 5425 test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7); 5426 } 5427 } 5428 5429 @safe pure unittest 5430 { 5431 import std.algorithm.comparison : equal; 5432 import dxml.internal : testRangeFuncs; 5433 5434 static foreach(func; testRangeFuncs) 5435 {{ 5436 auto xml = func(`foo`); 5437 auto text = testParser!simpleXML(xml); 5438 text.skipUntilAndDrop!"o"(); 5439 assert(equal(text.input, "o")); 5440 }} 5441 } 5442 5443 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text) 5444 { 5445 import std.algorithm : find; 5446 import std.ascii : isWhite; 5447 import std.range : takeExactly; 5448 5449 static assert(needle.find!isWhite().empty); 5450 5451 auto orig = text.save; 5452 bool found = false; 5453 size_t takeLen = 0; 5454 size_t lineStart = 0; 5455 5456 void processNewline() 5457 { 5458 ++takeLen; 5459 nextLine!(Text.config)(text.pos); 5460 lineStart = takeLen; 5461 } 5462 5463 loop: while(!text.input.empty) 5464 { 5465 switch(text.input.front) 5466 { 5467 case cast(ElementType!(Text.Input))needle[0]: 5468 { 5469 static if(needle.length == 1) 5470 { 5471 found = true; 5472 text.input.popFront(); 5473 break loop; 5474 } 5475 else static if(needle.length == 2) 5476 { 5477 text.input.popFront(); 5478 if(!text.input.empty && text.input.front == needle[1]) 5479 { 5480 found = true; 5481 text.input.popFront(); 5482 break loop; 5483 } 5484 ++takeLen; 5485 continue; 5486 } 5487 else 5488 { 5489 text.input.popFront(); 5490 auto saved = text.input.save; 5491 foreach(i, c; needle[1 .. $]) 5492 { 5493 if(text.input.empty) 5494 { 5495 takeLen += i + 1; 5496 break loop; 5497 } 5498 if(text.input.front != c) 5499 { 5500 text.input = saved; 5501 ++takeLen; 5502 continue loop; 5503 } 5504 text.input.popFront(); 5505 } 5506 found = true; 5507 break loop; 5508 } 5509 } 5510 static if(skipQuotedText) 5511 { 5512 static foreach(quote; ['\'', '"']) 5513 { 5514 case quote: 5515 { 5516 auto quotePos = text.pos; 5517 quotePos.col += takeLen - lineStart; 5518 ++takeLen; 5519 while(true) 5520 { 5521 text.input.popFront(); 5522 if(text.input.empty) 5523 throw new XMLParsingException("Failed to find matching quote", quotePos); 5524 switch(text.input.front) 5525 { 5526 case quote: 5527 { 5528 ++takeLen; 5529 text.input.popFront(); 5530 continue loop; 5531 } 5532 case '\n': 5533 { 5534 processNewline(); 5535 break; 5536 } 5537 default: 5538 { 5539 ++takeLen; 5540 break; 5541 } 5542 } 5543 } 5544 assert(0); // the compiler isn't smart enough to see that this is unreachable. 5545 } 5546 } 5547 } 5548 case '\n': 5549 { 5550 processNewline(); 5551 break; 5552 } 5553 default: 5554 { 5555 ++takeLen; 5556 break; 5557 } 5558 } 5559 5560 text.input.popFront(); 5561 } 5562 5563 text.pos.col += takeLen - lineStart + needle.length; 5564 5565 if(!found) 5566 throw new XMLParsingException("Failed to find: " ~ needle, orig.pos); 5567 5568 static if(retSlice) 5569 return takeExactly(orig.input, takeLen); 5570 } 5571 5572 5573 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop, 5574 // but I'd rather do this than be passing template arguments to choose between 5575 // behaviors - especially when the logic is so different. It skips until it 5576 // reaches one of the delimiter characters. If it finds one of them, then the 5577 // first character in the input is the delimiter that was found, and if it 5578 // doesn't find either, then it throws. 5579 template skipToOneOf(delims...) 5580 { 5581 static foreach(delim; delims) 5582 { 5583 static assert(is(typeof(delim) == char)); 5584 static assert(!isSpace(delim)); 5585 } 5586 5587 void skipToOneOf(Text)(ref Text text) 5588 { 5589 while(!text.input.empty) 5590 { 5591 switch(text.input.front) 5592 { 5593 static foreach(delim; delims) 5594 case delim: return; 5595 case '\n': 5596 { 5597 nextLine!(Text.config)(text.pos); 5598 text.input.popFront(); 5599 break; 5600 } 5601 default: 5602 { 5603 popFrontAndIncCol(text); 5604 break; 5605 } 5606 } 5607 } 5608 throw new XMLParsingException("Prematurely reached end of document", text.pos); 5609 } 5610 } 5611 5612 unittest 5613 { 5614 import core.exception : AssertError; 5615 import std.algorithm.comparison : equal; 5616 import std.exception : assertNotThrown, collectException, enforce; 5617 import dxml.internal : codeLen, testRangeFuncs; 5618 5619 static void test(alias func, delims...)(string origHaystack, string remainder, 5620 int row, int col, size_t line = __LINE__) 5621 { 5622 auto haystack = func(origHaystack); 5623 { 5624 auto text = testParser(haystack.save); 5625 assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line); 5626 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5627 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5628 } 5629 { 5630 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5631 auto text = testParser(haystack); 5632 text.pos.line += 3; 5633 text.pos.col += 7; 5634 assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line); 5635 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5636 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5637 } 5638 } 5639 5640 static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__) 5641 { 5642 auto haystack = func(origHaystack); 5643 { 5644 auto text = testParser(haystack.save); 5645 auto e = collectException!XMLParsingException(text.skipToOneOf!delims()); 5646 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5647 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5648 } 5649 { 5650 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5651 auto text = testParser(haystack); 5652 text.pos.line += 3; 5653 text.pos.col += 7; 5654 auto e = collectException!XMLParsingException(text.skipToOneOf!delims()); 5655 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5656 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5657 } 5658 } 5659 5660 static foreach(func; testRangeFuncs) 5661 { 5662 test!(func, 'o', 'w')("hello world", "o world", 1, 5); 5663 test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7); 5664 test!(func, 'z', 'y')("abc\n\n\n \n\n wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6); 5665 test!(func, 'o', 'g')("abc\n\n\n \n\n wxyzzy \nf\ng", "g", 8, 1); 5666 test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed", 5667 1, codeLen!(func, "プログラミング in D is ") + 1); 5668 5669 testFail!(func, 'a', 'b')("hello world", 1, 12); 5670 testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6); 5671 testFail!(func, 'a', 'b')("プログラミング", 1, codeLen!(func, "プログラミング") + 1); 5672 } 5673 } 5674 5675 @safe pure unittest 5676 { 5677 import std.algorithm.comparison : equal; 5678 import dxml.internal : testRangeFuncs; 5679 5680 static foreach(func; testRangeFuncs) 5681 {{ 5682 auto xml = func(`foo`); 5683 auto text = testParser!simpleXML(xml); 5684 text.skipToOneOf!('o')(); 5685 assert(equal(text.input, "oo")); 5686 }} 5687 } 5688 5689 5690 // The front of the input should be text surrounded by single or double quotes. 5691 // This returns a slice of the input containing that text, and the input is 5692 // advanced to one code unit beyond the quote. 5693 auto takeEnquotedText(Text)(ref Text text) 5694 { 5695 checkNotEmpty(text); 5696 immutable quote = text.input.front; 5697 static foreach(quoteChar; [`"`, `'`]) 5698 { 5699 // This would be a bit simpler if takeUntilAndDrop took a runtime 5700 // argument, but in all other cases, a compile-time argument makes more 5701 // sense, so this seemed like a reasonable way to handle this one case. 5702 if(quote == quoteChar[0]) 5703 { 5704 popFrontAndIncCol(text); 5705 return takeUntilAndDrop!quoteChar(text); 5706 } 5707 } 5708 throw new XMLParsingException("Expected quoted text", text.pos); 5709 } 5710 5711 unittest 5712 { 5713 import core.exception : AssertError; 5714 import std.algorithm.comparison : equal; 5715 import std.exception : assertThrown, enforce; 5716 import std.range : only; 5717 import dxml.internal : testRangeFuncs; 5718 5719 static void test(alias func)(string origHaystack, string expected, string remainder, 5720 int row, int col, size_t line = __LINE__) 5721 { 5722 auto haystack = func(origHaystack); 5723 auto adjExpected = expected.toCmpType!func(); 5724 { 5725 auto text = testParser(haystack.save); 5726 enforce!AssertError(equal(takeEnquotedText(text), adjExpected.save), "unittest failure 1", __FILE__, line); 5727 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5728 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5729 } 5730 { 5731 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5732 auto text = testParser(haystack); 5733 text.pos.line += 3; 5734 text.pos.col += 7; 5735 enforce!AssertError(equal(takeEnquotedText(text), adjExpected), "unittest failure 3", __FILE__, line); 5736 enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line); 5737 enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line); 5738 } 5739 } 5740 5741 static void testFail(alias func)(string origHaystack, size_t line = __LINE__) 5742 { 5743 auto haystack = func(origHaystack); 5744 auto text = testParser(haystack); 5745 assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line); 5746 } 5747 5748 static foreach(func; testRangeFuncs) 5749 { 5750 foreach(quote; only("\"", "'")) 5751 { 5752 test!func(quote ~ quote, "", "", 1, 3); 5753 test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14); 5754 test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14); 5755 { 5756 import std.utf : codeLength; 5757 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D"; 5758 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング "); 5759 test!func(haystack, "プログラミング ", "in D", 1, len + 3); 5760 } 5761 } 5762 5763 foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`)) 5764 testFail!func(str); 5765 } 5766 } 5767 5768 5769 // This removes a name per the Name grammar rule from the front of the input and 5770 // returns it. 5771 // The parsing continues until either one of the given delimiters or an XML 5772 // whitespace character is encountered. The delimiter/whitespace is not returned 5773 // as part of the name and is left at the front of the input. 5774 template takeName(delims...) 5775 { 5776 static foreach(delim; delims) 5777 { 5778 static assert(is(typeof(delim) == char), delim); 5779 static assert(!isSpace(delim)); 5780 } 5781 5782 auto takeName(Text)(ref Text text) 5783 { 5784 import std.format : format; 5785 import std.range : takeExactly; 5786 import std.utf : decodeFront, UseReplacementDchar; 5787 import dxml.internal : isNameStartChar, isNameChar; 5788 5789 assert(!text.input.empty); 5790 5791 auto orig = text.input.save; 5792 size_t takeLen; 5793 { 5794 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen); 5795 if(!isNameStartChar(decodedC)) 5796 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos); 5797 } 5798 5799 if(text.input.empty) 5800 { 5801 text.pos.col += takeLen; 5802 return takeExactly(orig, takeLen); 5803 } 5804 5805 loop: while(true) 5806 { 5807 immutable c = text.input.front; 5808 if(isSpace(c)) 5809 break; 5810 static foreach(delim; delims) 5811 { 5812 if(c == delim) 5813 break loop; 5814 } 5815 5816 size_t numCodeUnits; 5817 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 5818 if(!isNameChar(decodedC)) 5819 { 5820 text.pos.col += takeLen; 5821 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos); 5822 } 5823 takeLen += numCodeUnits; 5824 5825 if(text.input.empty) 5826 break; 5827 } 5828 5829 text.pos.col += takeLen; 5830 5831 return takeExactly(orig, takeLen); 5832 } 5833 } 5834 5835 unittest 5836 { 5837 import core.exception : AssertError; 5838 import std.algorithm.comparison : equal; 5839 import std.exception : collectException, enforce; 5840 import std.typecons : tuple; 5841 import dxml.internal : codeLen, testRangeFuncs; 5842 5843 static void test(alias func, delim...)(string origHaystack, string expected, string remainder, 5844 int row, int col, size_t line = __LINE__) 5845 { 5846 auto haystack = func(origHaystack); 5847 auto adjExpected = expected.toCmpType!func(); 5848 { 5849 auto text = testParser(haystack.save); 5850 enforce!AssertError(equal(text.takeName!delim(), adjExpected.save), 5851 "unittest failure 1", __FILE__, line); 5852 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5853 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5854 } 5855 { 5856 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5857 auto text = testParser(haystack); 5858 text.pos.line += 3; 5859 text.pos.col += 7; 5860 enforce!AssertError(equal(text.takeName!delim(), adjExpected), 5861 "unittest failure 4", __FILE__, line); 5862 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5863 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5864 } 5865 } 5866 5867 static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__) 5868 { 5869 auto haystack = func(origHaystack); 5870 { 5871 auto text = testParser(haystack.save); 5872 auto e = collectException!XMLParsingException(text.takeName!delim()); 5873 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5874 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5875 } 5876 { 5877 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5878 auto text = testParser(haystack); 5879 text.pos.line += 3; 5880 text.pos.col += 7; 5881 auto e = collectException!XMLParsingException(text.takeName!delim()); 5882 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5883 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5884 } 5885 } 5886 5887 static foreach(func; testRangeFuncs) 5888 { 5889 static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"]) 5890 {{ 5891 enum len = codeLen!(func, str); 5892 5893 static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo", " foo \n \r "]) 5894 {{ 5895 enum strRem = str ~ remainder; 5896 enum delimRem = '>' ~ remainder; 5897 enum hay = str ~ delimRem; 5898 test!func(strRem, str, remainder, 1, len + 1); 5899 test!(func, '=')(strRem, str, remainder, 1, len + 1); 5900 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1); 5901 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1); 5902 }} 5903 }} 5904 5905 static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)]) 5906 {{ 5907 testFail!func(t[0], t[1], t[2]); 5908 testFail!func(t[0] ~ '>', t[1], t[2]); 5909 testFail!(func, '?')(t[0], t[1], t[2]); 5910 testFail!(func, '=')(t[0] ~ '=', t[1], t[2]); 5911 }} 5912 5913 testFail!(func, '>')(">", 1, 1); 5914 testFail!(func, '?')("?", 1, 1); 5915 testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&")); 5916 5917 static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)]) 5918 { 5919 testFail!func(t[0], t[1], t[2]); 5920 testFail!(func, '>')(t[0], t[1], t[2]); 5921 } 5922 } 5923 } 5924 5925 @safe pure unittest 5926 { 5927 import std.algorithm.comparison : equal; 5928 import dxml.internal : testRangeFuncs; 5929 5930 static foreach(func; testRangeFuncs) 5931 {{ 5932 auto xml = func(`foo`); 5933 auto text = testParser!simpleXML(xml); 5934 assert(equal(text.takeName(), "foo")); 5935 }} 5936 } 5937 5938 5939 // This removes an attribute value from the front of the input, partially 5940 // validates it, and returns it. The validation that is not done is whether 5941 // the value in a character reference is valid. It's checked for whether the 5942 // characters used in it are valid but not whether the number they form is a 5943 // valid Unicode character. Checking the number doesn't seem worth the extra 5944 // complication, and it's not required for the XML to be "well-formed." 5945 // dxml.util.parseCharRef will check that it is fully correct if it is used. 5946 auto takeAttValue(Text)(ref Text text) 5947 { 5948 // AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 5949 // Reference ::= EntityRef | CharRef 5950 // EntityRef ::= '&' Name ';' 5951 // PEReference ::= '%' Name ';' 5952 5953 import std.range : only; 5954 5955 checkNotEmpty(text); 5956 immutable quote = text.input.front; 5957 immutable quotePos = text.pos; 5958 foreach(quoteChar; only('"', '\'')) 5959 { 5960 // This would be a bit simpler if takeUntilAndDrop took a runtime 5961 // argument, but in all other cases, a compile-time argument makes more 5962 // sense, so this seemed like a reasonable way to handle this one case. 5963 if(quote == quoteChar) 5964 { 5965 popFrontAndIncCol(text); 5966 size_t lineStart = 0; 5967 auto orig = text.input.save; 5968 size_t takeLen; 5969 loop: while(true) 5970 { 5971 if(text.input.empty) 5972 throw new XMLParsingException("Unterminated attribute value", quotePos); 5973 switch(text.input.front) 5974 { 5975 case '"': 5976 { 5977 if(quote == '"') 5978 { 5979 text.input.popFront(); 5980 goto done; 5981 } 5982 goto default; 5983 } 5984 case '\'': 5985 { 5986 if(quote == '\'') 5987 { 5988 text.input.popFront(); 5989 goto done; 5990 } 5991 goto default; 5992 } 5993 case '&': 5994 { 5995 { 5996 import dxml.util : parseCharRef; 5997 auto temp = text.input.save; 5998 auto charRef = parseCharRef(temp); 5999 if(!charRef.isNull) 6000 { 6001 static if(hasLength!(Text.Input)) 6002 { 6003 takeLen += text.input.length - temp.length; 6004 text.input = temp; 6005 } 6006 else 6007 { 6008 while(text.input.front != ';') 6009 { 6010 ++takeLen; 6011 text.input.popFront(); 6012 } 6013 ++takeLen; 6014 text.input.popFront(); 6015 } 6016 continue; 6017 } 6018 } 6019 6020 immutable ampLen = takeLen - lineStart; 6021 ++takeLen; 6022 text.input.popFront(); 6023 6024 // Std Entity References 6025 static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes) 6026 { 6027 import std.algorithm.searching : startsWith; 6028 6029 static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"]) 6030 { 6031 if(text.input.save.startsWith(entRef)) 6032 { 6033 takeLen += entRef.length; 6034 text.input.popFrontN(entRef.length); 6035 continue loop; 6036 } 6037 } 6038 6039 text.pos.col += ampLen; 6040 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6041 "reference, and this parser only supports entity " ~ 6042 "references if they're predefined by the spec. This is not " ~ 6043 "a valid character reference or one of the predefined " ~ 6044 "entity references.", text.pos); 6045 } 6046 // All Entity References 6047 else 6048 { 6049 import std.utf : decodeFront, UseReplacementDchar; 6050 import dxml.internal : isNameStartChar, isNameChar; 6051 6052 if(text.input.empty || text.input.front == quote) 6053 goto failedEntityRef; 6054 6055 { 6056 size_t numCodeUnits; 6057 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6058 if(!isNameStartChar(decodedC)) 6059 goto failedEntityRef; 6060 takeLen += numCodeUnits; 6061 } 6062 6063 while(true) 6064 { 6065 if(text.input.empty) 6066 goto failedEntityRef; 6067 immutable c = text.input.front; 6068 if(c == ';') 6069 { 6070 ++takeLen; 6071 break; 6072 } 6073 size_t numCodeUnits; 6074 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6075 if(!isNameChar(decodedC)) 6076 goto failedEntityRef; 6077 takeLen += numCodeUnits; 6078 } 6079 break; 6080 6081 failedEntityRef: 6082 text.pos.col += ampLen; 6083 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6084 "character or entity reference, and this is not a valid " ~ 6085 "character or entity reference.", text.pos); 6086 } 6087 } 6088 case '<': 6089 { 6090 text.pos.col += takeLen - lineStart; 6091 throw new XMLParsingException("< is not legal in an attribute name", text.pos); 6092 } 6093 case '\n': 6094 { 6095 ++takeLen; 6096 nextLine!(Text.config)(text.pos); 6097 lineStart = takeLen; 6098 break; 6099 } 6100 default: 6101 { 6102 import std.ascii : isASCII; 6103 import std.format : format; 6104 import dxml.internal : isXMLChar; 6105 6106 immutable c = text.input.front; 6107 if(isASCII(c)) 6108 { 6109 if(!isXMLChar(c)) 6110 { 6111 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c), 6112 text.pos); 6113 } 6114 ++takeLen; 6115 break; 6116 } 6117 import std.utf : decodeFront, UseReplacementDchar, UTFException; 6118 // Annoyngly, letting decodeFront throw is the easier way to handle this, since the 6119 // replacement character is considered valid XML, and if we decoded using it, then 6120 // all of the invalid Unicode characters would come out as the replacement character 6121 // and then be treated as valid instead of being caught, which isn't all bad, but 6122 // the spec requires that they be treated as invalid instead of playing nice and 6123 // using the replacement character. 6124 try 6125 { 6126 size_t numCodeUnits; 6127 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits); 6128 if(!isXMLChar(decodedC)) 6129 { 6130 enum fmt = "Character is not legal in an XML File: 0x%0x"; 6131 throw new XMLParsingException(format!fmt(decodedC), text.pos); 6132 } 6133 takeLen += numCodeUnits; 6134 } 6135 catch(UTFException e) 6136 throw new XMLParsingException("Invalid Unicode character", text.pos); 6137 continue; 6138 } 6139 } 6140 text.input.popFront(); 6141 } 6142 done: 6143 { 6144 import std.range : takeExactly; 6145 text.pos.col += takeLen - lineStart + 1; 6146 return takeExactly(orig, takeLen); 6147 } 6148 } 6149 } 6150 throw new XMLParsingException("Expected quoted text", text.pos); 6151 } 6152 6153 unittest 6154 { 6155 import core.exception : AssertError; 6156 import std.algorithm.comparison : equal; 6157 import std.exception : collectException, enforce; 6158 import std.range : only; 6159 import dxml.internal : codeLen, testRangeFuncs; 6160 6161 static void test(alias func, ThrowOnEntityRef toer)(string origHaystack, string expected, string remainder, 6162 int row, int col, size_t line = __LINE__) 6163 { 6164 auto haystack = func(origHaystack); 6165 auto adjExpected = expected.toCmpType!(func, toer)(); 6166 { 6167 auto text = testParser!(makeConfig(toer))(haystack.save); 6168 enforce!AssertError(equal(text.takeAttValue(), adjExpected.save), 6169 "unittest failure 1", __FILE__, line); 6170 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 6171 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 6172 } 6173 { 6174 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6175 auto text = testParser!(makeConfig(toer))(haystack); 6176 text.pos.line += 3; 6177 text.pos.col += 7; 6178 enforce!AssertError(equal(text.takeAttValue(), adjExpected), 6179 "unittest failure 4", __FILE__, line); 6180 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 6181 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 6182 } 6183 } 6184 6185 static void testFail(alias func, ThrowOnEntityRef toer)(string origHaystack, 6186 int row, int col, size_t line = __LINE__) 6187 { 6188 auto haystack = func(origHaystack); 6189 { 6190 auto text = testParser!(makeConfig(toer))(haystack.save); 6191 auto e = collectException!XMLParsingException(text.takeAttValue()); 6192 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6193 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 6194 } 6195 { 6196 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6197 auto text = testParser!(makeConfig(toer))(haystack); 6198 text.pos.line += 3; 6199 text.pos.col += 7; 6200 auto e = collectException!XMLParsingException(text.takeAttValue()); 6201 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6202 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 6203 } 6204 } 6205 6206 static foreach(i, func; testRangeFuncs) 6207 { 6208 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 6209 { 6210 test!(func, toer)(`""`, "", "", 1, 3); 6211 test!(func, toer)(`"J"`, "J", "", 1, 4); 6212 test!(func, toer)(`"foo"`, "foo", "", 1, 6); 6213 test!(func, toer)(`"プログラミング"`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3); 6214 test!(func, toer)(`"foo"bar`, "foo", "bar", 1, 6); 6215 test!(func, toer)(`"プログラミング" after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3); 6216 6217 test!(func, toer)(`''`, "", "", 1, 3); 6218 test!(func, toer)(`'J'`, "J", "", 1, 4); 6219 test!(func, toer)(`'foo'`, "foo", "", 1, 6); 6220 test!(func, toer)(`'プログラミング'`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3); 6221 test!(func, toer)(`'foo'bar`, "foo", "bar", 1, 6); 6222 test!(func, toer)(`'プログラミング' after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3); 6223 6224 test!(func, toer)(`"&><"`, "&><", "", 1, 16); 6225 test!(func, toer)(`"'""`, "'"", "", 1, 15); 6226 test!(func, toer)(`"hello&><world"`, "hello&><world", "", 1, 26); 6227 test!(func, toer)(`".....&><....."`, ".....&><.....", "", 1, 26); 6228 test!(func, toer)(`"ディラン"`, "ディラン", "", 1, 35); 6229 test!(func, toer)(`"hello¯M&world"`, "hello¯M&world", "", 1, 29); 6230 6231 test!(func, toer)(`'&><'`, "&><", "", 1, 16); 6232 test!(func, toer)(`'hello&><world'`, "hello&><world", "", 1, 26); 6233 test!(func, toer)(`''"'`, "'"", "", 1, 15); 6234 test!(func, toer)(`'.....&><.....'`, ".....&><.....", "", 1, 26); 6235 test!(func, toer)(`'ディラン'`, "ディラン", "", 1, 35); 6236 test!(func, toer)(`'hello¯M&world'`, "hello¯M&world", "", 1, 29); 6237 6238 test!(func, toer)("'hello\nworld'", "hello\nworld", "", 2, 7); 6239 test!(func, toer)("'hello\nworld\n'", "hello\nworld\n", "", 3, 2); 6240 6241 test!(func, toer)(`"'''"whatever`, "'''", "whatever", 1, 6); 6242 test!(func, toer)(`'"""'whatever`, `"""`, "whatever", 1, 6); 6243 6244 test!(func, toer)(`"*"`, "*", "", 1, 8); 6245 test!(func, toer)(`"B"`, "B", "", 1, 9); 6246 test!(func, toer)(`"%foo"`, "%foo", "", 1, 7); 6247 6248 testFail!(func, toer)(`"`, 1, 1); 6249 testFail!(func, toer)(`"foo`, 1, 1); 6250 testFail!(func, toer)(`"foo'`, 1, 1); 6251 testFail!(func, toer)(`"<"`, 1, 2); 6252 testFail!(func, toer)(`"&`, 1, 2); 6253 testFail!(func, toer)(`"&"`, 1, 2); 6254 testFail!(func, toer)(`"&x"`, 1, 2); 6255 testFail!(func, toer)(`"&.;"`, 1, 2); 6256 testFail!(func, toer)(`"&&;"`, 1, 2); 6257 testFail!(func, toer)(`"&a"`, 1, 2); 6258 testFail!(func, toer)(`"&a`, 1, 2); 6259 testFail!(func, toer)(`"hello&;"`, 1, 7); 6260 testFail!(func, toer)(`"hello&;world"`,1, 7); 6261 testFail!(func, toer)(`"hello&<;world"`,1, 7); 6262 testFail!(func, toer)(`"hello&world"`,1, 7); 6263 testFail!(func, toer)(`"hello<world"`,1, 7); 6264 testFail!(func, toer)(`"hello world&"`, 1, 13); 6265 testFail!(func, toer)(`"hello world&;"`, 1, 13); 6266 testFail!(func, toer)(`"hello world&foo"`, 1, 13); 6267 testFail!(func, toer)(`"foo<"`, 1, 5); 6268 testFail!(func, toer)(`"&#`, 1, 2); 6269 testFail!(func, toer)(`"&#"`, 1, 2); 6270 testFail!(func, toer)(`"&#;"`, 1, 2); 6271 testFail!(func, toer)(`"&#x;"`, 1, 2); 6272 testFail!(func, toer)(`"&#AF;"`, 1, 2); 6273 testFail!(func, toer)(`"&#x`, 1, 2); 6274 testFail!(func, toer)(`"M`, 1, 2); 6275 testFail!(func, toer)(`"M`, 1, 1); 6276 testFail!(func, toer)(`"�`, 1, 2); 6277 testFail!(func, toer)(`"�`, 1, 2); 6278 testFail!(func, toer)(`"�"`, 1, 2); 6279 6280 testFail!(func, toer)(`'`, 1, 1); 6281 testFail!(func, toer)(`'foo`, 1, 1); 6282 testFail!(func, toer)(`'foo"`, 1, 1); 6283 testFail!(func, toer)(`'<'`, 1, 2); 6284 testFail!(func, toer)("'\v'", 1, 2); 6285 testFail!(func, toer)("'\uFFFE'", 1, 2); 6286 testFail!(func, toer)(`'&`, 1, 2); 6287 testFail!(func, toer)(`'&'`, 1, 2); 6288 testFail!(func, toer)(`'&x'`, 1, 2); 6289 testFail!(func, toer)(`'&.;'`, 1, 2); 6290 testFail!(func, toer)(`'&&;'`, 1, 2); 6291 testFail!(func, toer)(`'&a'`, 1, 2); 6292 testFail!(func, toer)(`'&a`, 1, 2); 6293 testFail!(func, toer)(`'hello&;'`, 1, 7); 6294 testFail!(func, toer)(`'hello&;world'`, 1, 7); 6295 testFail!(func, toer)(`'hello&<;world'`, 1, 7); 6296 testFail!(func, toer)(`'hello&world'`, 1, 7); 6297 testFail!(func, toer)(`'hello<world'`, 1, 7); 6298 testFail!(func, toer)(`'hello world&'`, 1, 13); 6299 testFail!(func, toer)(`'hello world&;'`, 1, 13); 6300 testFail!(func, toer)(`'hello world&foo'`, 1, 13); 6301 testFail!(func, toer)(`'foo<'`, 1, 5); 6302 testFail!(func, toer)(`'&#`, 1, 2); 6303 testFail!(func, toer)(`'&#'`, 1, 2); 6304 testFail!(func, toer)(`'&#;'`, 1, 2); 6305 testFail!(func, toer)(`'&#x;'`, 1, 2); 6306 testFail!(func, toer)(`'&#AF;'`, 1, 2); 6307 testFail!(func, toer)(`'&#x`, 1, 2); 6308 testFail!(func, toer)(`'M`, 1, 2); 6309 testFail!(func, toer)(`'M`, 1, 1); 6310 testFail!(func, toer)(`'�`, 1, 2); 6311 testFail!(func, toer)(`'�`, 1, 2); 6312 testFail!(func, toer)(`'�'`, 1, 2); 6313 testFail!(func, toer)("'
\nF;'", 1, 2); 6314 testFail!(func, toer)("'&\n;'", 1, 2); 6315 testFail!(func, toer)("'&\namp;'", 1, 2); 6316 testFail!(func, toer)("'\n&&;'", 2, 6); 6317 } 6318 { 6319 alias toer = ThrowOnEntityRef.yes; 6320 testFail!(func, toer)(`"&foo;"`, 1, 2); 6321 testFail!(func, toer)(`"hello world&foo;"`, 1, 13); 6322 testFail!(func, toer)(`"hello &foo; world"`, 1, 8); 6323 testFail!(func, toer)(`"&am;"`, 1, 2); 6324 testFail!(func, toer)(`"&e;"`, 1, 2); 6325 testFail!(func, toer)(`"&l;"`, 1, 2); 6326 testFail!(func, toer)(`"<e;"`, 1, 2); 6327 testFail!(func, toer)(`"&g;"`, 1, 2); 6328 testFail!(func, toer)(`">e;"`, 1, 2); 6329 testFail!(func, toer)(`"&apo;"`, 1, 2); 6330 testFail!(func, toer)(`"&aposs;"`, 1, 2); 6331 testFail!(func, toer)(`"&quo;"`, 1, 2); 6332 testFail!(func, toer)(`""e;"`, 1, 2); 6333 6334 testFail!(func, toer)(`'&foo;'`, 1, 2); 6335 testFail!(func, toer)(`'hello world&foo;'`, 1, 13); 6336 testFail!(func, toer)(`'hello &foo; world'`, 1, 8); 6337 testFail!(func, toer)(`'&am;'`, 1, 2); 6338 testFail!(func, toer)(`'&e;'`, 1, 2); 6339 testFail!(func, toer)(`'&l;'`, 1, 2); 6340 testFail!(func, toer)(`'<e;'`, 1, 2); 6341 testFail!(func, toer)(`'&g;'`, 1, 2); 6342 testFail!(func, toer)(`'>e;'`, 1, 2); 6343 testFail!(func, toer)(`'&apo;'`, 1, 2); 6344 testFail!(func, toer)(`'&aposs;'`, 1, 2); 6345 testFail!(func, toer)(`'&quo;'`, 1, 2); 6346 testFail!(func, toer)(`'"e;'`, 1, 2); 6347 } 6348 { 6349 alias toer = ThrowOnEntityRef.no; 6350 test!(func, toer)(`"&foo;"`, "&foo;", "", 1, 8); 6351 test!(func, toer)(`"hello world&foo;"`, "hello world&foo;", "", 1, 19); 6352 test!(func, toer)(`"hello &foo; world"`, "hello &foo; world", "", 1, 20); 6353 test!(func, toer)(`"&am;"`, "&am;", "", 1, 7); 6354 test!(func, toer)(`"&e;"`, "&e;", "", 1, 9); 6355 test!(func, toer)(`"&l;"`, "&l;", "", 1, 6); 6356 test!(func, toer)(`"<e;"`, "<e;", "", 1, 8); 6357 test!(func, toer)(`"&g;"`, "&g;", "", 1, 6); 6358 test!(func, toer)(`">e;"`, ">e;", "", 1, 8); 6359 test!(func, toer)(`"&apo;"`, "&apo;", "", 1, 8); 6360 test!(func, toer)(`"&aposs;"`, "&aposs;", "", 1, 10); 6361 test!(func, toer)(`"&quo;"`, "&quo;", "", 1, 8); 6362 test!(func, toer)(`""e;"`, ""e;", "", 1, 10); 6363 6364 test!(func, toer)(`'&foo;'`, "&foo;", "", 1, 8); 6365 test!(func, toer)(`'hello world&foo;'`, "hello world&foo;", "", 1, 19); 6366 test!(func, toer)(`'hello &foo; world'`, "hello &foo; world", "", 1, 20); 6367 test!(func, toer)(`'&am;'`, "&am;", "", 1, 7); 6368 test!(func, toer)(`'&e;'`, "&e;", "", 1, 9); 6369 test!(func, toer)(`'&l;'`, "&l;", "", 1, 6); 6370 test!(func, toer)(`'<e;'`, "<e;", "", 1, 8); 6371 test!(func, toer)(`'&g;'`, "&g;", "", 1, 6); 6372 test!(func, toer)(`'>e;'`, ">e;", "", 1, 8); 6373 test!(func, toer)(`'&apo;'`, "&apo;", "", 1, 8); 6374 test!(func, toer)(`'&aposs;'`, "&aposs;", "", 1, 10); 6375 test!(func, toer)(`'&quo;'`, "&quo;", "", 1, 8); 6376 test!(func, toer)(`'"e;'`, ""e;", "", 1, 10); 6377 } 6378 } 6379 6380 // These can't be tested with testFail, because attempting to convert 6381 // invalid Unicode results in UnicodeExceptions before parseXML even 6382 // gets called. 6383 import std.meta : AliasSeq; 6384 static foreach(str; AliasSeq!("'" ~ cast(string)[255] ~ "'", 6385 "'"w ~ cast(wstring)[0xD800] ~ "'", 6386 "'"d ~ cast(dstring)[0xD800] ~ "'")) 6387 {{ 6388 auto text = testParser(str); 6389 auto e = collectException!XMLParsingException(text.takeAttValue()); 6390 assert(e ! is null); 6391 assert(e.pos == TextPos(1, 2)); 6392 }} 6393 } 6394 6395 @safe pure unittest 6396 { 6397 import std.algorithm.comparison : equal; 6398 import dxml.internal : testRangeFuncs; 6399 6400 static foreach(func; testRangeFuncs) 6401 { 6402 static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)]) 6403 {{ 6404 auto xml = func(`'foo'`); 6405 auto text = testParser!simpleXML(xml); 6406 assert(equal(text.takeAttValue(), "foo")); 6407 }} 6408 } 6409 } 6410 6411 6412 // Validates an EntityType.text field to verify that it does not contain invalid 6413 // characters. 6414 void checkText(bool allowRestrictedChars, Text)(ref Text orig) 6415 { 6416 import std.format : format; 6417 import std.utf : decodeFront, UseReplacementDchar; 6418 6419 auto text = orig.save; 6420 loop: while(!text.input.empty) 6421 { 6422 switch(text.input.front) 6423 { 6424 static if(!allowRestrictedChars) 6425 { 6426 case '&': 6427 { 6428 import dxml.util : parseCharRef; 6429 6430 { 6431 auto temp = text.input.save; 6432 auto charRef = parseCharRef(temp); 6433 if(!charRef.isNull) 6434 { 6435 static if(hasLength!(Text.Input)) 6436 { 6437 text.pos.col += text.input.length - temp.length; 6438 text.input = temp; 6439 } 6440 else 6441 { 6442 while(text.input.front != ';') 6443 popFrontAndIncCol(text); 6444 popFrontAndIncCol(text); 6445 } 6446 continue; 6447 } 6448 } 6449 6450 immutable ampPos = text.pos; 6451 popFrontAndIncCol(text); 6452 6453 // Std Entity References 6454 static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes) 6455 { 6456 static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"]) 6457 { 6458 if(text.stripStartsWith(entRef)) 6459 continue loop; 6460 } 6461 6462 throw new XMLParsingException("& is only legal in an EntitType.text entity as part of a " ~ 6463 "reference, and this parser only supports entity references if " ~ 6464 "they're predefined by the spec. This is not a valid character " ~ 6465 "reference or one of the predefined entity references.", ampPos); 6466 } 6467 // All Entity References 6468 else 6469 { 6470 import std.utf : decodeFront, UseReplacementDchar; 6471 import dxml.internal : isNameStartChar, isNameChar; 6472 6473 if(text.input.empty) 6474 goto failedEntityRef; 6475 { 6476 size_t numCodeUnits; 6477 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6478 if(!isNameStartChar(decodedC)) 6479 goto failedEntityRef; 6480 text.pos.col += numCodeUnits; 6481 } 6482 while(true) 6483 { 6484 if(text.input.empty) 6485 goto failedEntityRef; 6486 immutable c = text.input.front; 6487 if(c == ';') 6488 break; 6489 size_t numCodeUnits; 6490 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6491 if(!isNameChar(decodedC)) 6492 goto failedEntityRef; 6493 text.pos.col += numCodeUnits; 6494 } 6495 assert(text.input.front == ';'); 6496 popFrontAndIncCol(text); 6497 continue; 6498 6499 failedEntityRef: 6500 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6501 "character or entity reference, and this is not a valid " ~ 6502 "character or entity reference.", ampPos); 6503 } 6504 } 6505 case '<': throw new XMLParsingException("< is not legal in EntityType.text", text.pos); 6506 case ']': 6507 { 6508 popFrontAndIncCol(text); 6509 if(text.stripStartsWith("]>")) 6510 { 6511 text.pos.col -= 3; 6512 throw new XMLParsingException("]]> is not legal in EntityType.text", text.pos); 6513 } 6514 break; 6515 } 6516 } 6517 case '\n': 6518 { 6519 nextLine!(text.config)(text.pos); 6520 text.input.popFront(); 6521 break; 6522 } 6523 default: 6524 { 6525 import std.ascii : isASCII; 6526 import dxml.internal : isXMLChar; 6527 immutable c = text.input.front; 6528 if(isASCII(c)) 6529 { 6530 if(!isXMLChar(c)) 6531 { 6532 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c), 6533 text.pos); 6534 } 6535 popFrontAndIncCol(text); 6536 } 6537 else 6538 { 6539 import std.utf : UTFException; 6540 // Annoyngly, letting decodeFront throw is the easier way to handle this, since the 6541 // replacement character is considered valid XML, and if we decoded using it, then 6542 // all of the invalid Unicode characters would come out as the replacement character 6543 // and then be treated as valid instead of being caught, which isn't all bad, but 6544 // the spec requires that they be treated as invalid instead of playing nice and 6545 // using the replacement character. 6546 try 6547 { 6548 size_t numCodeUnits; 6549 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits); 6550 if(!isXMLChar(decodedC)) 6551 { 6552 enum fmt = "Character is not legal in an XML File: 0x%0x"; 6553 throw new XMLParsingException(format!fmt(decodedC), text.pos); 6554 } 6555 text.pos.col += numCodeUnits; 6556 } 6557 catch(UTFException) 6558 throw new XMLParsingException("Invalid Unicode character", text.pos); 6559 } 6560 break; 6561 } 6562 } 6563 } 6564 } 6565 6566 unittest 6567 { 6568 import core.exception : AssertError; 6569 import std.exception : assertNotThrown, collectException, enforce; 6570 import dxml.internal : codeLen, testRangeFuncs; 6571 6572 static void test(alias func, bool arc, ThrowOnEntityRef toer)(string text, size_t line = __LINE__) 6573 { 6574 auto xml = func(text); 6575 auto range = testParser!(makeConfig(toer))(xml); 6576 assertNotThrown(checkText!arc(range), "unittest failure", __FILE__, line); 6577 } 6578 6579 static void testFail(alias func, bool arc, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 6580 { 6581 auto xml = func(text); 6582 { 6583 auto range = testParser!(makeConfig(toer))(xml.save); 6584 auto e = collectException!XMLParsingException(checkText!arc(range)); 6585 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6586 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 6587 } 6588 { 6589 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6590 auto range = testParser!(makeConfig(toer))(xml); 6591 range.pos.line += 3; 6592 range.pos.col += 7; 6593 auto e = collectException!XMLParsingException(checkText!arc(range)); 6594 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 6595 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 6596 } 6597 } 6598 6599 static foreach(func; testRangeFuncs) 6600 { 6601 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 6602 { 6603 static foreach(arc; [false, true]) 6604 { 6605 test!(func, arc, toer)(""); 6606 test!(func, arc, toer)("J",); 6607 test!(func, arc, toer)("foo"); 6608 test!(func, arc, toer)("プログラミング"); 6609 6610 test!(func, arc, toer)("&><"); 6611 test!(func, arc, toer)("hello&><world"); 6612 test!(func, arc, toer)(".....'"&....."); 6613 test!(func, arc, toer)("ディラン"); 6614 test!(func, arc, toer)("hello¯*"world"); 6615 6616 test!(func, arc, toer)("]]"); 6617 test!(func, arc, toer)("]>"); 6618 test!(func, arc, toer)("foo]]bar"); 6619 test!(func, arc, toer)("foo]>bar"); 6620 test!(func, arc, toer)("]] >"); 6621 6622 testFail!(func, arc, toer)("\v", 1, 1); 6623 testFail!(func, arc, toer)("\uFFFE", 1, 1); 6624 testFail!(func, arc, toer)("hello\vworld", 1, 6); 6625 testFail!(func, arc, toer)("he\nllo\vwo\nrld", 2, 4); 6626 } 6627 6628 testFail!(func, false, toer)("<", 1, 1); 6629 testFail!(func, false, toer)("&", 1, 1); 6630 testFail!(func, false, toer)("&", 1, 1); 6631 testFail!(func, false, toer)("&x", 1, 1); 6632 testFail!(func, false, toer)("&&;", 1, 1); 6633 testFail!(func, false, toer)("&a", 1, 1); 6634 testFail!(func, false, toer)("hello&;", 1, 6); 6635 testFail!(func, false, toer)("hello&;world", 1, 6); 6636 testFail!(func, false, toer)("hello&<;world", 1, 6); 6637 testFail!(func, false, toer)("hello&world", 1, 6); 6638 testFail!(func, false, toer)("hello world&", 1, 12); 6639 testFail!(func, false, toer)("hello world&;", 1, 12); 6640 testFail!(func, false, toer)("hello world&foo", 1, 12); 6641 testFail!(func, false, toer)("&#;", 1, 1); 6642 testFail!(func, false, toer)("&#x;", 1, 1); 6643 testFail!(func, false, toer)("&#AF;", 1, 1); 6644 testFail!(func, false, toer)("&#x", 1, 1); 6645 testFail!(func, false, toer)("*", 1, 1); 6646 testFail!(func, false, toer)("B", 1, 1); 6647 testFail!(func, false, toer)("", 1, 1); 6648 testFail!(func, false, toer)("", 1, 1); 6649 testFail!(func, false, toer)("*foo\nbar&#;", 2, 4); 6650 testFail!(func, false, toer)("*foo\nbar&#x;", 2, 4); 6651 testFail!(func, false, toer)("*foo\nbar&#AF;", 2, 4); 6652 testFail!(func, false, toer)("*foo\nbar&#x", 2, 4); 6653 testFail!(func, false, toer)("*foo\nbar*", 2, 4); 6654 testFail!(func, false, toer)("*foo\nbarB", 2, 4); 6655 testFail!(func, false, toer)("プログラミング&", 1, codeLen!(func, "プログラミング&")); 6656 6657 static if(toer == ThrowOnEntityRef.yes) 6658 { 6659 testFail!(func, false, toer)("&a;", 1, 1); 6660 testFail!(func, false, toer)(`&am;`, 1, 1); 6661 testFail!(func, false, toer)(`&e;`, 1, 1); 6662 testFail!(func, false, toer)(`&l;`, 1, 1); 6663 testFail!(func, false, toer)(`<e;`, 1, 1); 6664 testFail!(func, false, toer)(`&g;`, 1, 1); 6665 testFail!(func, false, toer)(`>e;`, 1, 1); 6666 testFail!(func, false, toer)(`&apo;`, 1, 1); 6667 testFail!(func, false, toer)(`&aposs;`, 1, 1); 6668 testFail!(func, false, toer)(`&quo;`, 1, 1); 6669 testFail!(func, false, toer)(`"e;`, 1, 1); 6670 testFail!(func, false, toer)(`hello &foo; world`, 1, 7); 6671 testFail!(func, false, toer)("hello\n &foo; \nworld", 2, 2); 6672 } 6673 else 6674 { 6675 test!(func, false, toer)("&a;"); 6676 test!(func, false, toer)(`&am;`); 6677 test!(func, false, toer)(`&e;`); 6678 test!(func, false, toer)(`&l;`); 6679 test!(func, false, toer)(`<e;`); 6680 test!(func, false, toer)(`&g;`); 6681 test!(func, false, toer)(`>e;`); 6682 test!(func, false, toer)(`&apo;`); 6683 test!(func, false, toer)(`&aposs;`); 6684 test!(func, false, toer)(`&quo;`); 6685 test!(func, false, toer)(`"e;`); 6686 test!(func, false, toer)(`hello &foo; world`); 6687 test!(func, false, toer)("hello\n &foo; \nworld"); 6688 } 6689 6690 testFail!(func, false, toer)("]]>", 1, 1); 6691 testFail!(func, false, toer)("foo]]>bar", 1, 4); 6692 6693 test!(func, true, toer)("]]>"); 6694 test!(func, true, toer)("foo]]>bar"); 6695 6696 test!(func, true, toer)("<"); 6697 test!(func, true, toer)("&"); 6698 test!(func, true, toer)("&x"); 6699 test!(func, true, toer)("&&;"); 6700 test!(func, true, toer)("&a"); 6701 test!(func, true, toer)("&a;"); 6702 test!(func, true, toer)(`&am;`); 6703 test!(func, true, toer)(`&e;`); 6704 test!(func, true, toer)(`&l;`); 6705 test!(func, true, toer)(`<e;`); 6706 test!(func, true, toer)(`&g;`); 6707 test!(func, true, toer)(`>e;`); 6708 test!(func, true, toer)(`&apo;`); 6709 test!(func, true, toer)(`&aposs;`); 6710 test!(func, true, toer)(`&quo;`); 6711 test!(func, true, toer)(`"e;`); 6712 test!(func, true, toer)("hello&;"); 6713 test!(func, true, toer)("hello&;world"); 6714 test!(func, true, toer)("hello&<;world"); 6715 test!(func, true, toer)("hello&world"); 6716 test!(func, true, toer)("hello world&"); 6717 test!(func, true, toer)("hello world&;"); 6718 test!(func, true, toer)("hello world&foo"); 6719 test!(func, true, toer)("&#;"); 6720 test!(func, true, toer)("&#x;"); 6721 test!(func, true, toer)("&#AF;"); 6722 test!(func, true, toer)("&#x"); 6723 test!(func, true, toer)("*"); 6724 test!(func, true, toer)("B"); 6725 test!(func, true, toer)(""); 6726 test!(func, true, toer)(""); 6727 test!(func, true, toer)("*foo\nbar&#;"); 6728 test!(func, true, toer)("*foo\nbar&#x;"); 6729 test!(func, true, toer)("*foo\nbar&#AF;"); 6730 test!(func, true, toer)("*foo\nbar&#x"); 6731 test!(func, true, toer)("*foo\nbar*"); 6732 test!(func, true, toer)("*foo\nbarB"); 6733 test!(func, true, toer)("プログラミング&"); 6734 } 6735 } 6736 6737 // These can't be tested with testFail, because attempting to convert 6738 // invalid Unicode results in UnicodeExceptions before parseXML even 6739 // gets called. 6740 import std.meta : AliasSeq; 6741 static foreach(str; AliasSeq!(cast(string)[255], cast(wstring)[0xD800], cast(dstring)[0xD800])) 6742 { 6743 static foreach(arc; [false, true]) 6744 {{ 6745 auto text = testParser(str); 6746 auto e = collectException!XMLParsingException(text.checkText!arc()); 6747 assert(e ! is null); 6748 assert(e.pos == TextPos(1, 1)); 6749 }} 6750 } 6751 } 6752 6753 @safe unittest 6754 { 6755 import dxml.internal : testRangeFuncs; 6756 6757 static foreach(func; testRangeFuncs) 6758 { 6759 static foreach(arc; [false, true]) 6760 { 6761 static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)]) 6762 {{ 6763 auto xml = func("foo"); 6764 auto text = testParser!config(xml); 6765 checkText!arc(text); 6766 }} 6767 } 6768 } 6769 } 6770 6771 6772 // S := (#x20 | #x9 | #xD | #XA)+ 6773 bool isSpace(C)(C c) @safe pure nothrow @nogc 6774 if(isSomeChar!C) 6775 { 6776 switch(c) 6777 { 6778 case ' ': 6779 case '\t': 6780 case '\r': 6781 case '\n': return true; 6782 default : return false; 6783 } 6784 } 6785 6786 pure nothrow @safe @nogc unittest 6787 { 6788 foreach(char c; char.min .. char.max) 6789 { 6790 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6791 assert(isSpace(c)); 6792 else 6793 assert(!isSpace(c)); 6794 } 6795 foreach(wchar c; wchar.min .. wchar.max / 100) 6796 { 6797 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6798 assert(isSpace(c)); 6799 else 6800 assert(!isSpace(c)); 6801 } 6802 foreach(dchar c; dchar.min .. dchar.max / 1000) 6803 { 6804 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6805 assert(isSpace(c)); 6806 else 6807 assert(!isSpace(c)); 6808 } 6809 } 6810 6811 6812 pragma(inline, true) void popFrontAndIncCol(Text)(ref Text text) 6813 { 6814 text.input.popFront(); 6815 ++text.pos.col; 6816 } 6817 6818 pragma(inline, true) void nextLine(Config config)(ref TextPos pos) 6819 { 6820 ++pos.line; 6821 pos.col = 1; 6822 } 6823 6824 // TODO create bug report, because this function cannot be inlined 6825 /+pragma(inline, true)+/ void checkNotEmpty(Text)(ref Text text, size_t line = __LINE__) 6826 { 6827 if(text.input.empty) 6828 throw new XMLParsingException("Prematurely reached end of document", text.pos, __FILE__, line); 6829 } 6830 6831 6832 version(unittest) 6833 enum someTestConfigs = [Config.init, simpleXML, makeConfig(SkipComments.yes), makeConfig(SkipPI.yes)]; 6834 6835 6836 // Fuzz-testing failures 6837 unittest 6838 { 6839 static void parseEverything(string xml) 6840 { 6841 with(EntityType) foreach(entity; parseXML(xml)) 6842 { 6843 final switch(entity.type) 6844 { 6845 case cdata: break; 6846 case comment: break; 6847 case elementStart: auto name = entity.name; break; 6848 case elementEnd: goto case elementStart; 6849 case elementEmpty: goto case elementStart; 6850 case pi: goto case elementStart; 6851 case text: break; 6852 } 6853 6854 final switch(entity.type) 6855 { 6856 case cdata: auto text = entity.text; break; 6857 case comment: goto case cdata; 6858 case elementStart: 6859 { 6860 foreach(attr; entity.attributes) 6861 { 6862 auto name = attr.name; 6863 auto value = attr.value; 6864 } 6865 break; 6866 } 6867 case elementEnd: break; 6868 case elementEmpty: goto case elementStart; 6869 case pi: goto case cdata; 6870 case text: goto case cdata; 6871 } 6872 } 6873 } 6874 6875 static void testFail(string xml, size_t line = __LINE__) 6876 { 6877 import std.exception : assertThrown; 6878 assertThrown!XMLParsingException(parseEverything(xml)); 6879 } 6880 6881 testFail([0x3c, 0xff, 0x3e, 0x3e, 0x3a, 0x3c, 0x2f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6882 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6883 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6884 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6885 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x31, 0xff, 6886 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xff, 0xff, 6887 0xff]); 6888 }