1 // Written in the D programming language 2 3 /++ 4 This implements a range-based 5 $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX _parser) for XML 1.0 (which 6 will work with XML 1.1 documents assuming that they don't use any 7 1.1-specific features). For the sake of simplicity, sanity, and efficiency, 8 the $(LINK2 https://en.wikipedia.org/wiki/Document_type_definition, DTD) 9 section is not supported beyond what is required to parse past it. 10 11 Start tags, end tags, comments, cdata sections, and processing instructions 12 are all supported and reported to the application. Anything in the DTD is 13 skipped (though it's parsed enough to parse past it correctly, and that 14 $(I can) result in an $(LREF XMLParsingException) if that XML isn't valid 15 enough to be correctly skipped), and the 16 $(LINK2 http://www.w3.org/TR/REC-xml/#NT-XMLDecl, XML declaration) at the 17 top is skipped if present (XML 1.1 requires that it be there, but XML 1.0 18 does not). 19 20 Regardless of what the XML declaration says (if present), any range of 21 $(K_CHAR) will be treated as being encoded in UTF-8, any range of 22 $(K_WCHAR) will be treated as being encoded in UTF-16, and any range of 23 $(K_DCHAR) will be treated as having been encoded in UTF-32. Strings will 24 be treated as ranges of their code units, not code points. Note that like 25 Phobos typically does when processing strings, the code assumes that BOMs 26 have already been removed, so if the range of characters comes from a file 27 that uses a BOM, the calling code needs to strip it out before calling 28 $(LREF parseXML), or parsing will fail due to invalid characters. 29 30 Since the DTD is skipped, entity references other than the five which are 31 predefined by the XML spec cannot be fully processed (since wherever they 32 were used in the document would be replaced by what they referred to, which 33 could be arbitrarily complex XML). As such, by default, if any entity 34 references which are not predefined are encountered outside of the DTD, an 35 $(LREF XMLParsingException) will be thrown (see 36 $(LREF Config.throwOnEntityRef) for how that can be configured). The 37 predefined entity references and any character references encountered will 38 be checked to verify that they're valid, but they will not be replaced 39 (since that does not work with returning slices of the original input). 40 41 However, $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 42 $(REF_ALTTEXT parseStdEntityRef, parseStdEntityRef, dxml, util) from 43 $(MREF dxml, util) can be used to convert the predefined entity references 44 to what the refer to, and $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 45 $(REF_ALTTEXT parseCharRef, parseCharRef, dxml, util) from 46 $(MREF dxml, util) can be used to convert character references to what they 47 refer to. 48 49 $(H3 Primary Symbols) 50 $(TABLE 51 $(TR $(TH Symbol) $(TH Description)) 52 $(TR $(TD $(LREF parseXML)) 53 $(TD The function used to initiate the parsing of an XML 54 document.)) 55 $(TR $(TD $(LREF EntityRange)) 56 $(TD The range returned by $(LREF parseXML).)) 57 $(TR $(TD $(LREF EntityRange.Entity)) 58 $(TD The element type of $(LREF EntityRange).)) 59 ) 60 61 $(H3 Parser Configuration Helpers) 62 $(TABLE 63 $(TR $(TH Symbol) $(TH Description)) 64 $(TR $(TD $(LREF Config)) 65 $(TD Used to configure how $(LREF EntityRange) parses the XML.)) 66 $(TR $(TD $(LREF simpleXML)) 67 $(TD A user-friendly configuration for when the application just 68 wants the element tags and the data in between them.)) 69 $(TR $(TD $(LREF makeConfig)) 70 $(TD A convenience function for constructing a custom 71 $(LREF Config).)) 72 $(TR $(TD $(LREF SkipComments)) 73 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 74 to tell the parser to skip comments.)) 75 $(TR $(TD $(LREF SkipPI)) 76 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 77 to tell the parser to skip processing instructions.)) 78 $(TR $(TD $(LREF SplitEmpty)) 79 $(TD A $(PHOBOS_REF Flag, std, typecons) used with $(LREF Config) 80 to configure how the parser deals with empty element tags.)) 81 ) 82 83 $(H3 Helper Types Used When Parsing) 84 $(TABLE 85 $(TR $(TH Symbol) $(TH Description)) 86 $(TR $(TD $(LREF EntityType)) 87 $(TD The type of an entity in the XML (e.g. a 88 $(LREF_ALTTEXT start tag, EntityType.elementStart) or a 89 $(LREF_ALTTEXT comment, EntityType.comment)).)) 90 $(TR $(TD $(LREF TextPos)) 91 $(TD Gives the line and column number in the XML document.)) 92 $(TR $(TD $(LREF XMLParsingException)) 93 $(TD Thrown by $(LREF EntityRange) when it encounters invalid 94 XML.)) 95 ) 96 97 $(H3 Helper Functions Used When Parsing) 98 $(TABLE 99 $(TR $(TH Symbol) $(TH Description)) 100 $(TR $(TD $(LREF getAttrs)) 101 $(TD A function similar to $(PHOBOS_REF getopt, std, getopt) which 102 allows for the easy processing of start tag attributes.)) 103 $(TR $(TD $(LREF skipContents)) 104 $(TD Iterates an $(LREF EntityRange) from a start tag to its 105 matching end tag.)) 106 $(TR $(TD $(LREF skipToPath)) 107 $(TD Used to navigate from one start tag to another as if the start 108 tag names formed a file path.)) 109 $(TR $(TD $(LREF skipToEntityType)) 110 $(TD Skips to the next entity of the given type in the range.)) 111 $(TR $(TD $(LREF skipToParentEndTag)) 112 $(TD Iterates an $(LREF EntityRange) until it reaches the end tag 113 that matches the start tag which is the parent of the 114 current entity.)) 115 ) 116 117 $(H3 Helper Traits) 118 $(TABLE 119 $(TR $(TH Symbol) $(TH Description)) 120 $(TR $(TD $(LREF isAttrRange)) 121 $(TD Whether the given range is a range of attributes.))) 122 123 Copyright: Copyright 2017 - 2023 124 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 125 Authors: $(HTTPS jmdavisprog.com, Jonathan M Davis) 126 Source: $(LINK_TO_SRC dxml/_parser.d) 127 128 See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0) 129 +/ 130 module dxml.parser; 131 132 /// 133 version(dxmlTests) unittest 134 { 135 auto xml = "<!-- comment -->\n" ~ 136 "<root>\n" ~ 137 " <foo>some text<whatever/></foo>\n" ~ 138 " <bar/>\n" ~ 139 " <baz></baz>\n" ~ 140 "</root>"; 141 { 142 auto range = parseXML(xml); 143 assert(range.front.type == EntityType.comment); 144 assert(range.front.text == " comment "); 145 range.popFront(); 146 147 assert(range.front.type == EntityType.elementStart); 148 assert(range.front.name == "root"); 149 range.popFront(); 150 151 assert(range.front.type == EntityType.elementStart); 152 assert(range.front.name == "foo"); 153 range.popFront(); 154 155 assert(range.front.type == EntityType.text); 156 assert(range.front.text == "some text"); 157 range.popFront(); 158 159 assert(range.front.type == EntityType.elementEmpty); 160 assert(range.front.name == "whatever"); 161 range.popFront(); 162 163 assert(range.front.type == EntityType.elementEnd); 164 assert(range.front.name == "foo"); 165 range.popFront(); 166 167 assert(range.front.type == EntityType.elementEmpty); 168 assert(range.front.name == "bar"); 169 range.popFront(); 170 171 assert(range.front.type == EntityType.elementStart); 172 assert(range.front.name == "baz"); 173 range.popFront(); 174 175 assert(range.front.type == EntityType.elementEnd); 176 assert(range.front.name == "baz"); 177 range.popFront(); 178 179 assert(range.front.type == EntityType.elementEnd); 180 assert(range.front.name == "root"); 181 range.popFront(); 182 183 assert(range.empty); 184 } 185 { 186 auto range = parseXML!simpleXML(xml); 187 188 // simpleXML skips comments 189 190 assert(range.front.type == EntityType.elementStart); 191 assert(range.front.name == "root"); 192 range.popFront(); 193 194 assert(range.front.type == EntityType.elementStart); 195 assert(range.front.name == "foo"); 196 range.popFront(); 197 198 assert(range.front.type == EntityType.text); 199 assert(range.front.text == "some text"); 200 range.popFront(); 201 202 // simpleXML splits empty element tags into a start tag and end tag 203 // so that the code doesn't have to care whether a start tag with no 204 // content is an empty tag or a start tag and end tag with nothing but 205 // whitespace in between. 206 assert(range.front.type == EntityType.elementStart); 207 assert(range.front.name == "whatever"); 208 range.popFront(); 209 210 assert(range.front.type == EntityType.elementEnd); 211 assert(range.front.name == "whatever"); 212 range.popFront(); 213 214 assert(range.front.type == EntityType.elementEnd); 215 assert(range.front.name == "foo"); 216 range.popFront(); 217 218 assert(range.front.type == EntityType.elementStart); 219 assert(range.front.name == "bar"); 220 range.popFront(); 221 222 assert(range.front.type == EntityType.elementEnd); 223 assert(range.front.name == "bar"); 224 range.popFront(); 225 226 assert(range.front.type == EntityType.elementStart); 227 assert(range.front.name == "baz"); 228 range.popFront(); 229 230 assert(range.front.type == EntityType.elementEnd); 231 assert(range.front.name == "baz"); 232 range.popFront(); 233 234 assert(range.front.type == EntityType.elementEnd); 235 assert(range.front.name == "root"); 236 range.popFront(); 237 238 assert(range.empty); 239 } 240 } 241 242 243 import std.range.primitives; 244 import std.traits; 245 import std.typecons : Flag; 246 247 248 /++ 249 The exception type thrown when the XML parser encounters invalid XML. 250 +/ 251 class XMLParsingException : Exception 252 { 253 /++ 254 The position in the XML input where the problem is. 255 +/ 256 TextPos pos; 257 258 package: 259 260 this(string msg, TextPos textPos, string file = __FILE__, size_t line = __LINE__) @safe pure 261 { 262 import std.format : format; 263 super(format!"[%s:%s]: %s"(textPos.line, textPos.col, msg), file, line); 264 pos = textPos; 265 } 266 } 267 268 269 /++ 270 Where in the XML document an entity is. 271 272 The line and column numbers are 1-based. 273 274 The primary use case for TextPos is $(LREF XMLParsingException), but an 275 application may have other uses for it. The TextPos for an 276 $(LREF2 Entity, EntityRange) can be obtained from 277 $(LREF2 Entity.pos, EntityRange). 278 279 See_Also: $(LREF XMLParsingException.pos)$(BR) 280 $(LREF EntityRange.Entity.pos) 281 +/ 282 struct TextPos 283 { 284 /// A line number in the XML file. 285 int line = 1; 286 287 /++ 288 A column number in a line of the XML file. 289 290 Each code unit is considered a column, so depending on what a program 291 is looking to do with the column number, it may need to examine the 292 actual text on that line and calculate the number that represents 293 what the program wants to display (e.g. the number of graphemes). 294 +/ 295 int col = 1; 296 } 297 298 299 /++ 300 Used to configure how the parser works. 301 302 See_Also: 303 $(LREF makeConfig)$(BR) 304 $(LREF parseXML)$(BR) 305 $(LREF simpleXML) 306 +/ 307 struct Config 308 { 309 /++ 310 Whether the comments should be skipped while parsing. 311 312 If $(D skipComments == SkipComments.yes), any entities of type 313 $(LREF EntityType.comment) will be omitted from the parsing results, 314 and they will not be validated beyond what is required to parse past 315 them. 316 317 Defaults to $(D SkipComments.no). 318 +/ 319 auto skipComments = SkipComments.no; 320 321 /++ 322 Whether processing instructions should be skipped. 323 324 If $(D skipPI == SkipPI.yes), any entities of type 325 $(LREF EntityType.pi) will be skipped, and they will not be validated 326 beyond what is required to parse past them. 327 328 Defaults to $(D SkipPI.no). 329 +/ 330 auto skipPI = SkipPI.no; 331 332 /++ 333 Whether the parser should report empty element tags as if they were a 334 start tag followed by an end tag with nothing in between. 335 336 If $(D splitEmpty == SplitEmpty.yes), then whenever an 337 $(LREF EntityType.elementEmpty) is encountered, the parser will claim 338 that that entity is an $(LREF EntityType.elementStart), and then it 339 will provide an $(LREF EntityType.elementEnd) as the next entity before 340 the entity that actually follows it. 341 342 The purpose of this is to simplify the code using the parser, since most 343 code does not care about the difference between an empty tag and a start 344 and end tag with nothing in between. But since some code may care about 345 the difference, the behavior is configurable. 346 347 Defaults to $(D SplitEmpty.no). 348 +/ 349 auto splitEmpty = SplitEmpty.no; 350 351 /// 352 version(dxmlTests) unittest 353 { 354 enum configSplitYes = makeConfig(SplitEmpty.yes); 355 356 { 357 auto range = parseXML("<root></root>"); 358 assert(range.front.type == EntityType.elementStart); 359 assert(range.front.name == "root"); 360 range.popFront(); 361 assert(range.front.type == EntityType.elementEnd); 362 assert(range.front.name == "root"); 363 range.popFront(); 364 assert(range.empty); 365 } 366 { 367 // No difference if the tags are already split. 368 auto range = parseXML!configSplitYes("<root></root>"); 369 assert(range.front.type == EntityType.elementStart); 370 assert(range.front.name == "root"); 371 range.popFront(); 372 assert(range.front.type == EntityType.elementEnd); 373 assert(range.front.name == "root"); 374 range.popFront(); 375 assert(range.empty); 376 } 377 { 378 // This treats <root></root> and <root/> as distinct. 379 auto range = parseXML("<root/>"); 380 assert(range.front.type == EntityType.elementEmpty); 381 assert(range.front.name == "root"); 382 range.popFront(); 383 assert(range.empty); 384 } 385 { 386 // This is parsed as if it were <root></root> insead of <root/>. 387 auto range = parseXML!configSplitYes("<root/>"); 388 assert(range.front.type == EntityType.elementStart); 389 assert(range.front.name == "root"); 390 range.popFront(); 391 assert(range.front.type == EntityType.elementEnd); 392 assert(range.front.name == "root"); 393 range.popFront(); 394 assert(range.empty); 395 } 396 } 397 398 /++ 399 Whether the parser should throw when it encounters any entity references 400 other than the five entity references defined in the XML standard. 401 402 Any other entity references would have to be defined in the DTD in 403 order to be valid. And in order to know what XML they represent (which 404 could be arbitrarily complex, even effectively inserting entire XML 405 documents into the middle of the XML), the DTD would have to be parsed. 406 However, dxml does not support parsing the DTD beyond what is required 407 to correctly parse past it, and replacing entity references with what 408 they represent would not work with the slicing semantics that 409 $(LREF EntityRange) provides. As such, it is not possible for dxml to 410 correctly handle any entity references other than the five which are 411 defined in the XML standard, and even those are only parsed by using 412 $(REF decodeXML, dxml, util) or $(REF parseStdEntityRef, dxml, util). 413 $(LREF EntityRange) always validates that entity references are one 414 of the five, predefined entity references, but otherwise, it lets them 415 pass through as normal text. It does not replace them with what they 416 represent. 417 418 As such, the default behavior of $(LREF EntityRange) is to throw an 419 $(LREF XMLParsingException) when it encounters an entity reference 420 which is not one of the five defined by the XML standard. With that 421 behavior, there is no risk of processing an XML document as if it had 422 no entity references and ending up with what the program using the 423 parser would probably consider incorrect results. However, there are 424 cases where a program may find it acceptable to treat entity references 425 as normal text and ignore them. As such, if a program wishes to take 426 that approach, it can set throwOnEntityRef to $(D ThrowOnEntityRef.no). 427 428 If $(D throwOnEntityRef == ThrowOnEntityRef.no), then any entity 429 reference that it encounters will be validated to ensure that it is 430 syntactically valid (i.e. that the characters it contains form what 431 could be a valid entity reference assuming that the DTD declared it 432 properly), but otherwise, $(LREF EntityRange) will treat it as normal 433 text, just like it treats the five, predefined entity references as 434 normal text. 435 436 Note that any valid XML entity reference which contains start or end 437 tags must contain matching start or end tags, and entity references 438 cannot contain incomplete fragments of XML (e.g. the start or end of a 439 comment). So, missing entity references should only affect the data in 440 the XML document and not its overall structure (if that were not _true, 441 attempting to ignore entity references such as $(D ThrowOnEntityRef.no) 442 does would be a disaster in the making). However, how reasonable it is 443 to miss that data depends entirely on the application and what the XML 444 documents it's parsing contain - hence, the behavior is configurable. 445 446 See_Also: $(REF StdEntityRef, dxml, util)$(BR) 447 $(REF parseStdEntityRef, dxml, util)$(BR) 448 $(REF parseCharRef, dxml, util)$(BR) 449 $(REF encodeCharRef, dxml, util)$(BR) 450 $(REF decodeXML, dxml, util)$(BR) 451 $(REF asDecodedXML, dxml, util) 452 +/ 453 auto throwOnEntityRef = ThrowOnEntityRef.yes; 454 455 /// 456 version(dxmlTests) unittest 457 { 458 import std.exception : assertThrown; 459 import dxml.util : decodeXML; 460 461 auto xml = "<root>\n" ~ 462 " <std>&'><"</std>\n" ~ 463 " <other>&foobar;</other>\n" ~ 464 " <invalid>&--;</invalid>\n" ~ 465 "</root>"; 466 467 // ThrowOnEntityRef.yes 468 { 469 auto range = parseXML(xml); 470 assert(range.front.type == EntityType.elementStart); 471 assert(range.front.name == "root"); 472 473 range.popFront(); 474 assert(range.front.type == EntityType.elementStart); 475 assert(range.front.name == "std"); 476 477 range.popFront(); 478 assert(range.front.type == EntityType.text); 479 assert(range.front.text == "&'><""); 480 assert(range.front.text.decodeXML() == `&'><"`); 481 482 range.popFront(); 483 assert(range.front.type == EntityType.elementEnd); 484 assert(range.front.name == "std"); 485 486 range.popFront(); 487 assert(range.front.type == EntityType.elementStart); 488 assert(range.front.name == "other"); 489 490 // Attempted to parse past "&foobar;", which is syntactically 491 // valid, but it's not one of the five predefined entity references. 492 assertThrown!XMLParsingException(range.popFront()); 493 } 494 495 // ThrowOnEntityRef.no 496 { 497 auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml); 498 assert(range.front.type == EntityType.elementStart); 499 assert(range.front.name == "root"); 500 501 range.popFront(); 502 assert(range.front.type == EntityType.elementStart); 503 assert(range.front.name == "std"); 504 505 range.popFront(); 506 assert(range.front.type == EntityType.text); 507 assert(range.front.text == "&'><""); 508 assert(range.front.text.decodeXML() == `&'><"`); 509 510 range.popFront(); 511 assert(range.front.type == EntityType.elementEnd); 512 assert(range.front.name == "std"); 513 514 range.popFront(); 515 assert(range.front.type == EntityType.elementStart); 516 assert(range.front.name == "other"); 517 518 // Doesn't throw, because "&foobar;" is syntactically valid. 519 range.popFront(); 520 assert(range.front.type == EntityType.text); 521 assert(range.front.text == "&foobar;"); 522 523 // decodeXML has no effect on non-standard entity references. 524 assert(range.front.text.decodeXML() == "&foobar;"); 525 526 range.popFront(); 527 assert(range.front.type == EntityType.elementEnd); 528 assert(range.front.name == "other"); 529 530 range.popFront(); 531 assert(range.front.type == EntityType.elementStart); 532 assert(range.front.name == "invalid"); 533 534 // Attempted to parse past "&--;", which is not syntactically valid, 535 // because -- is not a valid name for an entity reference. 536 assertThrown!XMLParsingException(range.popFront()); 537 } 538 } 539 } 540 541 542 /// See_Also: $(LREF2 skipComments, Config) 543 alias SkipComments = Flag!"SkipComments"; 544 545 /// See_Also: $(LREF2 skipPI, Config) 546 alias SkipPI = Flag!"SkipPI"; 547 548 /// See_Also: $(LREF2 splitEmpty, Config) 549 alias SplitEmpty = Flag!"SplitEmpty"; 550 551 /// See_Also: $(LREF2 throwOnEntityRef, Config) 552 alias ThrowOnEntityRef = Flag!"ThrowOnEntityRef"; 553 554 555 /++ 556 Helper function for creating a custom config. It makes it easy to set one 557 or more of the member variables to something other than the default without 558 having to worry about explicitly setting them individually or setting them 559 all at once via a constructor. 560 561 The order of the arguments does not matter. The types of each of the members 562 of Config are unique, so that information alone is sufficient to determine 563 which argument should be assigned to which member. 564 +/ 565 Config makeConfig(Args...)(Args args) 566 { 567 import std.format : format; 568 import std.meta : AliasSeq, staticIndexOf, staticMap; 569 570 template isValid(T, Types...) 571 { 572 static if(Types.length == 0) 573 enum isValid = false; 574 else static if(is(T == Types[0])) 575 enum isValid = true; 576 else 577 enum isValid = isValid!(T, Types[1 .. $]); 578 } 579 580 Config config; 581 582 alias TypeOfMember(string memberName) = typeof(__traits(getMember, config, memberName)); 583 alias MemberTypes = staticMap!(TypeOfMember, AliasSeq!(__traits(allMembers, Config))); 584 585 foreach(i, arg; args) 586 { 587 static assert(isValid!(typeof(arg), MemberTypes), 588 format!"Argument %s does not match the type of any members of Config"(i)); 589 590 static foreach(j, Other; Args) 591 { 592 static if(i != j) 593 static assert(!is(typeof(arg) == Other), format!"Argument %s and %s have the same type"(i, j)); 594 } 595 596 foreach(memberName; __traits(allMembers, Config)) 597 { 598 static if(is(typeof(__traits(getMember, config, memberName)) == typeof(arg))) 599 mixin("config." ~ memberName ~ " = arg;"); 600 } 601 } 602 603 return config; 604 } 605 606 /// 607 version(dxmlTests) @safe pure nothrow @nogc unittest 608 { 609 { 610 auto config = makeConfig(SkipComments.yes); 611 assert(config.skipComments == SkipComments.yes); 612 assert(config.skipPI == Config.init.skipPI); 613 assert(config.splitEmpty == Config.init.splitEmpty); 614 assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); 615 } 616 { 617 auto config = makeConfig(SkipComments.yes, SkipPI.yes); 618 assert(config.skipComments == SkipComments.yes); 619 assert(config.skipPI == SkipPI.yes); 620 assert(config.splitEmpty == Config.init.splitEmpty); 621 assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); 622 } 623 { 624 auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no); 625 assert(config.skipComments == SkipComments.yes); 626 assert(config.skipPI == Config.init.skipPI); 627 assert(config.splitEmpty == SplitEmpty.yes); 628 assert(config.throwOnEntityRef == ThrowOnEntityRef.no); 629 } 630 } 631 632 version(dxmlTests) unittest 633 { 634 import std.typecons : Flag; 635 static assert(!__traits(compiles, makeConfig(42))); 636 static assert(!__traits(compiles, makeConfig("hello"))); 637 static assert(!__traits(compiles, makeConfig(Flag!"SomeOtherFlag".yes))); 638 static assert(!__traits(compiles, makeConfig(SplitEmpty.yes, SplitEmpty.no))); 639 } 640 641 642 /++ 643 This $(LREF Config) is intended for making it easy to parse XML by skipping 644 everything that isn't the actual data as well as making it simpler to deal 645 with empty element tags by treating them the same as a start tag and end 646 tag with nothing but whitespace between them. 647 +/ 648 enum simpleXML = makeConfig(SkipComments.yes, SkipPI.yes, SplitEmpty.yes); 649 650 /// 651 version(dxmlTests) @safe pure nothrow @nogc unittest 652 { 653 static assert(simpleXML.skipComments == SkipComments.yes); 654 static assert(simpleXML.skipPI == SkipPI.yes); 655 static assert(simpleXML.splitEmpty == SplitEmpty.yes); 656 static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes); 657 } 658 659 660 /++ 661 Represents the type of an XML entity. Used by $(LREF EntityRange.Entity). 662 +/ 663 enum EntityType 664 { 665 /++ 666 A cdata section: `<![CDATA[ ... ]]>`. 667 668 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-cdata-sect) 669 +/ 670 cdata, 671 672 /++ 673 An XML comment: `<!-- ... -->`. 674 675 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-comments) 676 +/ 677 comment, 678 679 /++ 680 The start tag for an element. e.g. `<foo name="value">`. 681 682 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 683 +/ 684 elementStart, 685 686 /++ 687 The end tag for an element. e.g. `</foo>`. 688 689 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 690 +/ 691 elementEnd, 692 693 /++ 694 The tag for an element with no contents or matching end tag. e.g. 695 `<foo name="value"/>`. 696 697 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags) 698 +/ 699 elementEmpty, 700 701 /++ 702 A processing instruction such as `<?foo?>`. Note that the 703 `<?xml ... ?>` is skipped and not treated as an $(LREF EntityType._pi). 704 705 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-pi) 706 +/ 707 pi, 708 709 /++ 710 The content of an element tag that is simple text. 711 712 If there is an entity other than the end tag following the text, then 713 the text includes up to that entity. 714 715 Note however that character references (e.g. 716 $(D_CODE_STRING "$(AMP)#42")) and the predefined entity references (e.g. 717 $(D_CODE_STRING "$(AMP)apos;")) are left unprocessed in the text. In 718 order for them to be processed, the text should be passed to either 719 $(REF_ALTTEXT decodeXML, decodeXML, dxml, util) or 720 $(REF_ALTTEXT asDecodedXML, asDecodedXML, dxml, util). Entity references 721 which are not predefined are considered invalid XML, because the DTD 722 section is skipped, and thus they cannot be processed properly. 723 724 See_Also: $(LINK http://www.w3.org/TR/REC-xml/#sec-starttags)$(BR) 725 $(REF decodeXML, dxml, util)$(BR) 726 $(REF asDecodedXML, dxml, util)$(BR) 727 $(REF parseStdEntityRef, dxml, util)$(BR) 728 $(REF parseCharRef, dxml, util)$(BR) 729 $(LREF EntityRange.Entity._text) 730 +/ 731 text, 732 } 733 734 735 /++ 736 Lazily parses the given range of characters as an XML document. 737 738 EntityRange is essentially a 739 $(LINK2 https://en.wikipedia.org/wiki/StAX, StAX) parser, though it evolved 740 into that rather than being based on what Java did, and it's range-based 741 rather than iterator-based, so its API is likely to differ from other 742 implementations. The basic concept should be the same though. 743 744 One of the core design goals of this parser is to slice the original input 745 rather than having to allocate strings for the output or wrap it in a lazy 746 range that produces a mutated version of the data. So, all of the text that 747 the parser provides is either a slice or 748 $(PHOBOS_REF takeExactly, std, range) of the input. However, in some cases, 749 for the parser to be fully compliant with the XML spec, 750 $(REF decodeXML, dxml, util) must be called on the text to mutate certain 751 constructs (e.g. removing any $(D_CODE_STRING '\r') in the text or 752 converting $(D_CODE_STRING "$(AMP)lt;") to $(D_CODE_STRING '<')). But 753 that's left up to the application. 754 755 The parser is not $(K_NOGC), but it allocates memory very minimally. It 756 allocates some of its state on the heap so it can validate attributes and 757 end tags. However, that state is shared among all the ranges that came from 758 the same call to parseXML (only the range farthest along in parsing 759 validates attributes or end tags), so $(LREF2 save, _EntityRange) does not 760 allocate memory unless $(D save) on the underlying range allocates memory. 761 The shared state currently uses a couple of dynamic arrays to validate the 762 tags and attributes, and if the document has a particularly deep tag depth 763 or has a lot of attributes on a start tag, then some reallocations may 764 occur until the maximum is reached, but enough is reserved that for most 765 documents, no reallocations will occur. The only other times that the 766 parser would allocate would be if an exception were thrown or if the range 767 that was passed to parseXML allocates for any reason when calling any of the 768 range primitives. 769 770 If invalid XML is encountered at any point during the parsing process, an 771 $(LREF XMLParsingException) will be thrown. If an exception has been thrown, 772 then the parser is in an invalid state, and it is an error to call any 773 functions on it. 774 775 However, note that XML validation is reduced for any entities that are 776 skipped (e.g. for anything in the DTD, validation is reduced to what is 777 required to correctly parse past it, and when 778 $(D Config.skipPI == SkipPI.yes), processing instructions are only validated 779 enough to correctly skip past them). 780 781 As the module documentation says, this parser does not provide any DTD 782 support. It is not possible to properly support the DTD while returning 783 slices of the original input, and the DTD portion of the spec makes parsing 784 XML far, far more complicated. 785 786 A quick note about carriage returns$(COLON) per the XML spec, they are all 787 supposed to either be stripped out or replaced with newlines or spaces 788 before the XML parser even processes the text. That doesn't work when the 789 parser is slicing the original text and not mutating it at all. So, for the 790 purposes of parsing, this parser treats all carriage returns as if they 791 were newlines or spaces (though they won't count as newlines when counting 792 the lines for $(LREF TextPos)). However, they $(I will) appear in any text 793 fields or attribute values if they are in the document (since the text 794 fields and attribute values are slices of the original text). 795 $(REF decodeXML, dxml, util) can be used to strip them along with 796 converting any character references in the text. Alternatively, the 797 application can remove them all before calling parseXML, but it's not 798 necessary. 799 +/ 800 struct EntityRange(Config cfg, R) 801 if(isForwardRange!R && isSomeChar!(ElementType!R)) 802 { 803 import std.algorithm : canFind; 804 import std.range : only, takeExactly; 805 import std.typecons : Nullable; 806 import std.utf : byCodeUnit; 807 808 enum compileInTests = is(R == EntityRangeCompileTests); 809 810 public: 811 812 /// The Config used for when parsing the XML. 813 alias config = cfg; 814 815 /// The type of the range that EntityRange is parsing. 816 alias Input = R; 817 818 /++ 819 The type used when any slice of the original input is used. If $(D R) 820 is a string or supports slicing, then SliceOfR is the same as $(D R); 821 otherwise, it's the result of calling 822 $(PHOBOS_REF takeExactly, std, range) on the input. 823 824 --- 825 import std.algorithm : filter; 826 import std.range : takeExactly; 827 828 static assert(is(EntityRange!(Config.init, string).SliceOfR == string)); 829 830 auto range = filter!(a => true)("some xml"); 831 832 static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR == 833 typeof(takeExactly(range, 42)))); 834 --- 835 +/ 836 static if(isDynamicArray!R || hasSlicing!R) 837 alias SliceOfR = R; 838 else 839 alias SliceOfR = typeof(takeExactly(R.init, 42)); 840 841 // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being 842 // a ddoc-ed unit test. 843 static if(compileInTests) @safe unittest 844 { 845 import std.algorithm : filter; 846 import std.range : takeExactly; 847 848 static assert(is(EntityRange!(Config.init, string).SliceOfR == string)); 849 850 auto range = filter!(a => true)("some xml"); 851 852 static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR == 853 typeof(takeExactly(range, 42)))); 854 } 855 856 857 /++ 858 Represents an entity in the XML document. 859 860 Note that the $(LREF2 type, EntityRange._Entity) determines which 861 properties can be used, and it can determine whether functions which 862 an Entity or $(LREF EntityRange) is passed to are allowed to be called. 863 Each function lists which $(LREF EntityType)s are allowed, and it is an 864 error to call them with any other $(LREF EntityType). 865 +/ 866 struct Entity 867 { 868 public: 869 870 import std.typecons : Tuple; 871 872 /++ 873 The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that 874 $(LREF2 attributes, EntityRange.EntityType) returns a range of. 875 876 See_Also: $(LREF2 attributes, EntityRange.Entity) 877 +/ 878 alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos, "pos"); 879 880 881 /++ 882 The $(LREF EntityType) for this Entity. 883 +/ 884 @property EntityType type() @safe const pure nothrow @nogc 885 { 886 return _type; 887 } 888 889 /// 890 static if(compileInTests) unittest 891 { 892 auto xml = "<root>\n" ~ 893 " <!--no comment-->\n" ~ 894 " <![CDATA[cdata run]]>\n" ~ 895 " <text>I am text!</text>\n" ~ 896 " <empty/>\n" ~ 897 " <?pi?>\n" ~ 898 "</root>"; 899 900 auto range = parseXML(xml); 901 assert(range.front.type == EntityType.elementStart); 902 assert(range.front.name == "root"); 903 range.popFront(); 904 905 assert(range.front.type == EntityType.comment); 906 assert(range.front.text == "no comment"); 907 range.popFront(); 908 909 assert(range.front.type == EntityType.cdata); 910 assert(range.front.text == "cdata run"); 911 range.popFront(); 912 913 assert(range.front.type == EntityType.elementStart); 914 assert(range.front.name == "text"); 915 range.popFront(); 916 917 assert(range.front.type == EntityType.text); 918 assert(range.front.text == "I am text!"); 919 range.popFront(); 920 921 assert(range.front.type == EntityType.elementEnd); 922 assert(range.front.name == "text"); 923 range.popFront(); 924 925 assert(range.front.type == EntityType.elementEmpty); 926 assert(range.front.name == "empty"); 927 range.popFront(); 928 929 assert(range.front.type == EntityType.pi); 930 assert(range.front.name == "pi"); 931 range.popFront(); 932 933 assert(range.front.type == EntityType.elementEnd); 934 assert(range.front.name == "root"); 935 range.popFront(); 936 937 assert(range.empty); 938 } 939 940 941 /++ 942 The position in the the original text where the entity starts. 943 944 See_Also: $(LREF TextPos)$(BR) 945 $(LREF XMLParsingException._pos) 946 +/ 947 @property TextPos pos() @safe const pure nothrow @nogc 948 { 949 return _pos; 950 } 951 952 /// 953 static if(compileInTests) unittest 954 { 955 auto xml = "<root>\n" ~ 956 " <foo>\n" ~ 957 " Foo and bar. Always foo and bar...\n" ~ 958 " </foo>\n" ~ 959 "</root>"; 960 961 auto range = parseXML(xml); 962 assert(range.front.type == EntityType.elementStart); 963 assert(range.front.name == "root"); 964 assert(range.front.pos == TextPos(1, 1)); 965 range.popFront(); 966 967 assert(range.front.type == EntityType.elementStart); 968 assert(range.front.name == "foo"); 969 assert(range.front.pos == TextPos(2, 5)); 970 range.popFront(); 971 972 assert(range.front.type == EntityType.text); 973 assert(range.front.text == 974 "\n" ~ 975 " Foo and bar. Always foo and bar...\n" ~ 976 " "); 977 assert(range.front.pos == TextPos(2, 10)); 978 range.popFront(); 979 980 assert(range.front.type == EntityType.elementEnd); 981 assert(range.front.name == "foo"); 982 assert(range.front.pos == TextPos(4, 5)); 983 range.popFront(); 984 985 assert(range.front.type == EntityType.elementEnd); 986 assert(range.front.name == "root"); 987 assert(range.front.pos == TextPos(5, 1)); 988 range.popFront(); 989 990 assert(range.empty); 991 } 992 993 static if(compileInTests) unittest 994 { 995 import core.exception : AssertError; 996 import std.exception : enforce; 997 998 static void test(ER)(ref ER range, EntityType type, int row, int col, size_t line = __LINE__) 999 { 1000 enforce!AssertError(!range.empty, "unittest failure 1", __FILE__, line); 1001 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line); 1002 enforce!AssertError(range.front.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 1003 range.popFront(); 1004 } 1005 1006 auto xml = "<?xml?>\n" ~ 1007 " <!--comment-->\n" ~ 1008 " <?pi?>\n" ~ 1009 " <root>\n" ~ 1010 " <!--comment--><!--comment-->\n" ~ 1011 " <?pi?>\n" ~ 1012 " <![CDATA[]]>\n" ~ 1013 " <empty/> </root>\n" ~ 1014 " <!--comment-->\n" ~ 1015 " <?pi?>\n"; 1016 1017 { 1018 auto range = parseXML(xml); 1019 test(range, EntityType.comment, 2, 4); 1020 test(range, EntityType.pi, 3, 4); 1021 test(range, EntityType.elementStart, 4, 2); 1022 test(range, EntityType.comment, 5, 11); 1023 test(range, EntityType.comment, 5, 25); 1024 test(range, EntityType.pi, 6, 8); 1025 test(range, EntityType.cdata, 7, 3); 1026 test(range, EntityType.elementEmpty, 8, 15); 1027 test(range, EntityType.elementEnd, 8, 28); 1028 test(range, EntityType.comment, 9, 2); 1029 test(range, EntityType.pi, 10, 2); 1030 } 1031 1032 auto range = parseXML!simpleXML(xml); 1033 test(range, EntityType.elementStart, 4, 2); 1034 test(range, EntityType.cdata, 7, 3); 1035 test(range, EntityType.elementStart, 8, 15); 1036 test(range, EntityType.elementEnd, 8, 15); 1037 test(range, EntityType.elementEnd, 8, 28); 1038 } 1039 1040 1041 /++ 1042 Gives the name of this Entity. 1043 1044 Note that this is the direct name in the XML for this entity and 1045 does not contain any of the names of any of the parent entities that 1046 this entity has. If an application wants the full "path" of the 1047 entity, then it will have to keep track of that itself. The parser 1048 does not do that as it would require allocating memory. 1049 1050 $(TABLE 1051 $(TR $(TH Supported $(LREF EntityType)s:)) 1052 $(TR $(TD $(LREF2 elementStart, EntityType))) 1053 $(TR $(TD $(LREF2 elementEnd, EntityType))) 1054 $(TR $(TD $(LREF2 elementEmpty, EntityType))) 1055 $(TR $(TD $(LREF2 pi, EntityType))) 1056 ) 1057 +/ 1058 @property SliceOfR name() 1059 { 1060 import dxml.internal : checkedSave, stripBCU; 1061 with(EntityType) 1062 { 1063 import std.format : format; 1064 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type), 1065 format("name cannot be called with %s", _type)); 1066 } 1067 return stripBCU!R(checkedSave(_name)); 1068 } 1069 1070 /// 1071 static if(compileInTests) unittest 1072 { 1073 auto xml = "<root>\n" ~ 1074 " <empty/>\n" ~ 1075 " <?pi?>\n" ~ 1076 "</root>"; 1077 1078 auto range = parseXML(xml); 1079 assert(range.front.type == EntityType.elementStart); 1080 assert(range.front.name == "root"); 1081 range.popFront(); 1082 1083 assert(range.front.type == EntityType.elementEmpty); 1084 assert(range.front.name == "empty"); 1085 range.popFront(); 1086 1087 assert(range.front.type == EntityType.pi); 1088 assert(range.front.name == "pi"); 1089 range.popFront(); 1090 1091 assert(range.front.type == EntityType.elementEnd); 1092 assert(range.front.name == "root"); 1093 range.popFront(); 1094 1095 assert(range.empty); 1096 } 1097 1098 1099 /++ 1100 Returns a lazy range of attributes for a start tag where each 1101 attribute is represented as a$(BR) 1102 $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!( 1103 $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"), 1104 $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"), 1105 $(LREF TextPos), $(D_STRING "pos"))). 1106 1107 $(TABLE 1108 $(TR $(TH Supported $(LREF EntityType)s:)) 1109 $(TR $(TD $(LREF2 elementStart, EntityType))) 1110 $(TR $(TD $(LREF2 elementEmpty, EntityType))) 1111 ) 1112 1113 See_Also: $(LREF2 Attribute, EntityRange.Entity)$(BR) 1114 $(REF decodeXML, dxml, util)$(BR) 1115 $(REF asDecodedXML, dxml, util) 1116 +/ 1117 @property auto attributes() 1118 { 1119 with(EntityType) 1120 { 1121 import std.format : format; 1122 assert(_type == elementStart || _type == elementEmpty, 1123 format("attributes cannot be called with %s", _type)); 1124 } 1125 1126 // STag ::= '<' Name (S Attribute)* S? '>' 1127 // Attribute ::= Name Eq AttValue 1128 // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1129 1130 static struct AttributeRange 1131 { 1132 @property Attribute front() 1133 { 1134 return _front; 1135 } 1136 1137 void popFront() 1138 { 1139 import dxml.internal : stripBCU; 1140 1141 stripWS(_text); 1142 if(_text.input.empty) 1143 { 1144 empty = true; 1145 return; 1146 } 1147 1148 immutable pos = _text.pos; 1149 auto name = stripBCU!R(_text.takeName!'='()); 1150 stripWS(_text); 1151 popFrontAndIncCol(_text); 1152 stripWS(_text); 1153 _front = Attribute(name, stripBCU!R(takeEnquotedText(_text)), pos); 1154 } 1155 1156 @property auto save() 1157 { 1158 import dxml.internal : checkedSave; 1159 auto retval = this; 1160 retval._front = Attribute(_front[0].save, checkedSave(_front[1]), _front[2]); 1161 retval._text.input = checkedSave(retval._text.input); 1162 return retval; 1163 } 1164 1165 this(typeof(_text) text) 1166 { 1167 _front = Attribute.init; // This is utterly stupid. https://issues.dlang.org/show_bug.cgi?id=13945 1168 _text = text; 1169 if(_text.input.empty) 1170 empty = true; 1171 else 1172 popFront(); 1173 } 1174 1175 bool empty; 1176 Attribute _front; 1177 typeof(_savedText) _text; 1178 } 1179 1180 return AttributeRange(_savedText.save); 1181 } 1182 1183 /// 1184 static if(compileInTests) unittest 1185 { 1186 import std.algorithm.comparison : equal; 1187 import std.algorithm.iteration : filter; 1188 { 1189 auto xml = "<root/>"; 1190 auto range = parseXML(xml); 1191 assert(range.front.type == EntityType.elementEmpty); 1192 assert(range.front.attributes.empty); 1193 1194 static assert(is(ElementType!(typeof(range.front.attributes)) == 1195 typeof(range).Entity.Attribute)); 1196 } 1197 { 1198 auto xml = "<root a='42' q='29' w='hello'/>"; 1199 auto range = parseXML(xml); 1200 assert(range.front.type == EntityType.elementEmpty); 1201 1202 auto attrs = range.front.attributes; 1203 assert(attrs.front.name == "a"); 1204 assert(attrs.front.value == "42"); 1205 assert(attrs.front.pos == TextPos(1, 7)); 1206 attrs.popFront(); 1207 1208 assert(attrs.front.name == "q"); 1209 assert(attrs.front.value == "29"); 1210 assert(attrs.front.pos == TextPos(1, 14)); 1211 attrs.popFront(); 1212 1213 assert(attrs.front.name == "w"); 1214 assert(attrs.front.value == "hello"); 1215 assert(attrs.front.pos == TextPos(1, 21)); 1216 attrs.popFront(); 1217 1218 assert(attrs.empty); 1219 } 1220 // Because the type of name and value is SliceOfR, == with a string 1221 // only works if the range passed to parseXML was string. 1222 { 1223 auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>"); 1224 auto range = parseXML(xml); 1225 assert(range.front.type == EntityType.elementEmpty); 1226 1227 auto attrs = range.front.attributes; 1228 assert(equal(attrs.front.name, "a")); 1229 assert(equal(attrs.front.value, "42")); 1230 assert(attrs.front.pos == TextPos(1, 7)); 1231 attrs.popFront(); 1232 1233 assert(equal(attrs.front.name, "q")); 1234 assert(equal(attrs.front.value, "29")); 1235 assert(attrs.front.pos == TextPos(1, 14)); 1236 attrs.popFront(); 1237 1238 assert(equal(attrs.front.name, "w")); 1239 assert(equal(attrs.front.value, "hello")); 1240 assert(attrs.front.pos == TextPos(1, 21)); 1241 attrs.popFront(); 1242 1243 assert(attrs.empty); 1244 } 1245 } 1246 1247 static if(compileInTests) unittest 1248 { 1249 import core.exception : AssertError; 1250 import std.algorithm.comparison : equal; 1251 import std.exception : assertNotThrown, collectException, enforce; 1252 import std.typecons : Tuple, tuple; 1253 import dxml.internal : codeLen, testRangeFuncs; 1254 1255 static bool cmpAttr(T, U)(T lhs, U rhs) 1256 { 1257 return equal(lhs[0].save, rhs[0].save) && 1258 equal(lhs[1].save, rhs[1].save); 1259 } 1260 1261 static void test(alias func, ThrowOnEntityRef toer)(string text, EntityType type, 1262 Tuple!(string, string)[] expected, 1263 int row, int col, size_t line = __LINE__) 1264 { 1265 auto range = assertNotThrown!XMLParsingException(parseXML!(makeConfig(toer))(func(text)), 1266 "unittest 1", __FILE__, line); 1267 enforce!AssertError(range.front.type == type, "unittest failure 2", __FILE__, line); 1268 enforce!AssertError(equal!cmpAttr(range.front.attributes, expected), 1269 "unittest failure 3", __FILE__, line); 1270 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 4", __FILE__, line); 1271 } 1272 1273 static void testFail(alias func, ThrowOnEntityRef toer)(string text, 1274 int row, int col, size_t line = __LINE__) 1275 { 1276 auto e = collectException!XMLParsingException(parseXML!(makeConfig(toer))(func(text))); 1277 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 1278 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 1279 } 1280 1281 static foreach(func; testRangeFuncs) 1282 { 1283 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 1284 { 1285 test!(func, toer)("<root a='b'/>", EntityType.elementEmpty, [tuple("a", "b")], 1, 14); 1286 test!(func, toer)("<root a = 'b' />", EntityType.elementEmpty, [tuple("a", "b")], 1, 17); 1287 test!(func, toer)("<root \n\n a \n\n = \n\n 'b' \n\n />", EntityType.elementEmpty, 1288 [tuple("a", "b")], 9, 4); 1289 test!(func, toer)("<root a='b'></root>", EntityType.elementStart, [tuple("a", "b")], 1, 13); 1290 test!(func, toer)("<root a = 'b' ></root>", EntityType.elementStart, [tuple("a", "b")], 1, 16); 1291 test!(func, toer)("<root \n a \n = \n 'b' \n ></root>", EntityType.elementStart, 1292 [tuple("a", "b")], 5, 3); 1293 1294 test!(func, toer)("<root foo='\n\n\n'/>", EntityType.elementEmpty, [tuple("foo", "\n\n\n")], 4, 4); 1295 test!(func, toer)(`<root foo='"""'/>`, EntityType.elementEmpty, [tuple("foo", `"""`)], 1, 18); 1296 test!(func, toer)(`<root foo="'''"/>`, EntityType.elementEmpty, [tuple("foo", `'''`)], 1, 18); 1297 test!(func, toer)(`<root foo.=""/>`, EntityType.elementEmpty, [tuple("foo.", "")], 1, 16); 1298 test!(func, toer)(`<root foo="bar="/>`, EntityType.elementEmpty, [tuple("foo", "bar=")], 1, 19); 1299 1300 test!(func, toer)("<root foo='bar' a='b' hello='world'/>", EntityType.elementEmpty, 1301 [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38); 1302 test!(func, toer)(`<root foo="bar" a='b' hello="world"/>`, EntityType.elementEmpty, 1303 [tuple("foo", "bar"), tuple("a", "b"), tuple("hello", "world")], 1, 38); 1304 1305 test!(func, toer)(`<root foo="*" a='B' hello="%foo"/>`, EntityType.elementEmpty, 1306 [tuple("foo", "*"), tuple("a", "B"), tuple("hello", "%foo")], 1, 44); 1307 1308 test!(func, toer)(`<root foo="&" a='vector<int>'></root>`, EntityType.elementStart, 1309 [tuple("foo", "&"), tuple("a", "vector<int>"),], 1, 41); 1310 1311 test!(func, toer)(`<foo 京都市="ディラン"/>`, EntityType.elementEmpty, 1312 [tuple("京都市", "ディラン")], 1, codeLen!(func, `<foo 京都市="ディラン"/>`) + 1); 1313 1314 test!(func, toer)(`<root foo=">"/>`, EntityType.elementEmpty, [tuple("foo", ">")], 1, 16); 1315 test!(func, toer)(`<root foo=">>>>>>"/>`, EntityType.elementEmpty, [tuple("foo", ">>>>>>")], 1, 21); 1316 test!(func, toer)(`<root foo=">"></root>`, EntityType.elementStart, [tuple("foo", ">")], 1, 15); 1317 test!(func, toer)(`<root foo=">>>>>>"></root>`, EntityType.elementStart, [tuple("foo", ">>>>>>")], 1, 20); 1318 1319 test!(func, toer)(`<root foo="bar" foos="ball"/>`, EntityType.elementEmpty, 1320 [tuple("foo", "bar"), tuple("foos", "ball")], 1, 30); 1321 1322 testFail!(func, toer)(`<root a="""/>`, 1, 11); 1323 testFail!(func, toer)(`<root a='''/>`, 1, 11); 1324 testFail!(func, toer)("<root a=/>", 1, 9); 1325 testFail!(func, toer)("<root a='/>", 1, 9); 1326 testFail!(func, toer)("<root a='/>", 1, 9); 1327 testFail!(func, toer)("<root =''/>", 1, 7); 1328 testFail!(func, toer)(`<root a ""/>`, 1, 9); 1329 testFail!(func, toer)(`<root a""/>`, 1, 8); 1330 testFail!(func, toer)(`<root a/>`, 1, 8); 1331 testFail!(func, toer)("<root foo='bar' a=/>", 1, 19); 1332 testFail!(func, toer)("<root foo='bar' a='/>", 1, 19); 1333 testFail!(func, toer)("<root foo='bar' a='/>", 1, 19); 1334 testFail!(func, toer)("<root foo='bar' =''/>", 1, 17); 1335 testFail!(func, toer)("<root foo='bar' a= hello='world'/>", 1, 20); 1336 // It's 33 rather than 28, because it throws when processing the start tag and not when processing 1337 // the attributes. So, the mismatched quotes are detected before the attributes are checked. 1338 testFail!(func, toer)("<root foo='bar' a=' hello='world'/>", 1, 33); 1339 testFail!(func, toer)("<root foo='bar' ='' hello='world'/>", 1, 17); 1340 testFail!(func, toer)("<root foo='bar'a='b'/>", 1, 16); 1341 testFail!(func, toer)(`<root .foo="bar"/>`, 1, 7); 1342 1343 testFail!(func, toer)(`<root foo="<"/>`, 1, 12); 1344 testFail!(func, toer)(`<root foo="<world"/>`, 1, 12); 1345 testFail!(func, toer)(`<root foo="hello<world"/>`, 1, 17); 1346 testFail!(func, toer)(`<root foo="&"/>`, 1, 12); 1347 testFail!(func, toer)(`<root foo="hello&"/>`, 1, 17); 1348 testFail!(func, toer)(`<root foo="hello&world"/>`, 1, 17); 1349 testFail!(func, toer)(`<root foo="&;"/>`, 1, 12); 1350 testFail!(func, toer)(`<root foo="&#;"/>`, 1, 12); 1351 testFail!(func, toer)(`<root foo="&#x;"/>`, 1, 12); 1352 testFail!(func, toer)(`<root foo="&#A;"/>`, 1, 12); 1353 testFail!(func, toer)(`<root foo="&#xG;"/>`, 1, 12); 1354 testFail!(func, toer)(`<root foo="*"/>`, 1, 12); 1355 testFail!(func, toer)(`<root foo="B"/>`, 1, 12); 1356 testFail!(func, toer)(`<root foo=""/>`, 1, 12); 1357 1358 testFail!(func, toer)("<root\n\nfoo='\nbarB'></root>", 4, 4); 1359 1360 testFail!(func, toer)(`<root a="""></root>`, 1, 11); 1361 testFail!(func, toer)(`<root a='''></root>`, 1, 11); 1362 testFail!(func, toer)("<root a=></root>", 1, 9); 1363 testFail!(func, toer)("<root a='></root>", 1, 9); 1364 testFail!(func, toer)("<root a='></root>", 1, 9); 1365 testFail!(func, toer)("<root =''></root>", 1, 7); 1366 testFail!(func, toer)(`<root a ""></root>`, 1, 9); 1367 testFail!(func, toer)(`<root a""></root>`, 1, 8); 1368 testFail!(func, toer)(`<root a></root>`, 1, 8); 1369 testFail!(func, toer)("<root foo='bar' a=></root>", 1, 19); 1370 testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19); 1371 testFail!(func, toer)("<root foo='bar' a='></root>", 1, 19); 1372 testFail!(func, toer)("<root foo='bar' =''></root>", 1, 17); 1373 testFail!(func, toer)("<root foo='bar' a= hello='world'></root>", 1, 20); 1374 testFail!(func, toer)("<root foo='bar' a=' hello='world'></root>", 1, 33); 1375 testFail!(func, toer)("<root foo='bar' ='' hello='world'></root>", 1, 17); 1376 testFail!(func, toer)("<root foo='bar'a='b'></root>", 1, 16); 1377 testFail!(func, toer)(`<root .foo='bar'></root>`, 1, 7); 1378 1379 testFail!(func, toer)(`<root foo="<"></root>`, 1, 12); 1380 testFail!(func, toer)(`<root foo="<world"></root>`, 1, 12); 1381 testFail!(func, toer)(`<root foo="hello<world"></root>`, 1, 17); 1382 testFail!(func, toer)(`<root foo="&"></root>`, 1, 12); 1383 testFail!(func, toer)(`<root foo="hello&"></root>`, 1, 17); 1384 testFail!(func, toer)(`<root foo="hello&world"></root>`, 1, 17); 1385 testFail!(func, toer)(`<root foo="&;"></root>`, 1, 12); 1386 testFail!(func, toer)(`<root foo="&#;"></root>`, 1, 12); 1387 testFail!(func, toer)(`<root foo="&#x;"></root>`, 1, 12); 1388 testFail!(func, toer)(`<root foo="&#A;"></root>`, 1, 12); 1389 testFail!(func, toer)(`<root foo="&#xG;"></root>`, 1, 12); 1390 testFail!(func, toer)(`<root foo="*"></root>`, 1, 12); 1391 testFail!(func, toer)(`<root foo="B"></root>`, 1, 12); 1392 testFail!(func, toer)(`<root foo=""></root>`, 1, 12); 1393 1394 testFail!(func, toer)(`<root a='42' a='19'/>`, 1, 14); 1395 testFail!(func, toer)(`<root a='42' b='hello' a='19'/>`, 1, 24); 1396 testFail!(func, toer)(`<root a='42' b='hello' a='19' c=''/>`, 1, 24); 1397 testFail!(func, toer)(`<root a='' b='' c='' d='' e='' f='' g='' e='' h=''/>`, 1, 42); 1398 testFail!(func, toer)(`<root foo='bar' foo='bar'/>`, 1, 17); 1399 1400 test!(func, toer)(`<root foo="&"></root>`, EntityType.elementStart, 1401 [tuple("foo", "&")], 1, 19); 1402 test!(func, toer)(`<root foo="foo&<>'"bar"></root>`, EntityType.elementStart, 1403 [tuple("foo", "foo&<>'"bar")], 1, 45); 1404 testFail!(func, toer)("<root foo='&;'></root>", 1, 12); 1405 testFail!(func, toer)("<root foo='&.;'></root>", 1, 12); 1406 testFail!(func, toer)("<root foo='\n & ule'></root>", 2, 2); 1407 testFail!(func, toer)("<root foo='\n &foo bar'></root>", 2, 2); 1408 } 1409 { 1410 alias toer = ThrowOnEntityRef.yes; 1411 testFail!(func, toer)(`<root foo="&foo;"/>`, 1, 12); 1412 testFail!(func, toer)(`<root foo="&foo;"></root>`, 1, 12); 1413 testFail!(func, toer)("<root foo='foo&bar.;'></root>", 1, 15); 1414 testFail!(func, toer)(`<root foo="hello &a; world"></root>`, 1, 18); 1415 testFail!(func, toer)("<root foo='hello \n &a; \n world'></root>", 2, 2); 1416 } 1417 { 1418 alias toer = ThrowOnEntityRef.no; 1419 test!(func, toer)(`<root foo="&foo;"/>`, EntityType.elementEmpty, 1420 [tuple("foo", "&foo;")], 1, 20); 1421 test!(func, toer)(`<root foo="&foo;"></root>`, EntityType.elementStart, 1422 [tuple("foo", "&foo;")], 1, 19); 1423 test!(func, toer)("<root foo='foo&bar.;'></root>", EntityType.elementStart, 1424 [tuple("foo", "foo&bar.;")], 1, 23); 1425 test!(func, toer)(`<root foo="hello &a; world"></root>`, EntityType.elementStart, 1426 [tuple("foo", "hello &a; world")], 1, 29); 1427 test!(func, toer)("<root foo='hello \n &a; \n world'></root>", EntityType.elementStart, 1428 [tuple("foo", "hello \n &a; \n world")], 3, 9); 1429 } 1430 } 1431 } 1432 1433 1434 /++ 1435 Returns the textual value of this Entity. 1436 1437 In the case of $(LREF EntityType.pi), this is the 1438 text that follows the name, whereas in the other cases, the text is 1439 the entire contents of the entity (save for the delimeters on the 1440 ends if that entity has them). 1441 1442 $(TABLE 1443 $(TR $(TH Supported $(LREF EntityType)s:)) 1444 $(TR $(TD $(LREF2 cdata, EntityType))) 1445 $(TR $(TD $(LREF2 comment, EntityType))) 1446 $(TR $(TD $(LREF2 pi, EntityType))) 1447 $(TR $(TD $(LREF2 _text, EntityType))) 1448 ) 1449 1450 See_Also: $(REF decodeXML, dxml, util)$(BR) 1451 $(REF asDecodedXML, dxml, util)$(BR) 1452 $(REF stripIndent, dxml, util)$(BR) 1453 $(REF withoutIndent, dxml, util) 1454 +/ 1455 @property SliceOfR text() 1456 { 1457 import dxml.internal : checkedSave, stripBCU; 1458 with(EntityType) 1459 { 1460 import std.format : format; 1461 assert(only(cdata, comment, pi, text).canFind(_type), 1462 format("text cannot be called with %s", _type)); 1463 } 1464 return stripBCU!R(checkedSave(_savedText.input)); 1465 } 1466 1467 /// 1468 static if(compileInTests) unittest 1469 { 1470 import std.range.primitives : empty; 1471 1472 auto xml = "<?xml version='1.0'?>\n" ~ 1473 "<?instructionName?>\n" ~ 1474 "<?foo here is something to say?>\n" ~ 1475 "<root>\n" ~ 1476 " <![CDATA[ Yay! random text >> << ]]>\n" ~ 1477 " <!-- some random comment -->\n" ~ 1478 " <p>something here</p>\n" ~ 1479 " <p>\n" ~ 1480 " something else\n" ~ 1481 " here</p>\n" ~ 1482 "</root>"; 1483 auto range = parseXML(xml); 1484 1485 // "<?instructionName?>\n" ~ 1486 assert(range.front.type == EntityType.pi); 1487 assert(range.front.name == "instructionName"); 1488 assert(range.front.text.empty); 1489 1490 // "<?foo here is something to say?>\n" ~ 1491 range.popFront(); 1492 assert(range.front.type == EntityType.pi); 1493 assert(range.front.name == "foo"); 1494 assert(range.front.text == "here is something to say"); 1495 1496 // "<root>\n" ~ 1497 range.popFront(); 1498 assert(range.front.type == EntityType.elementStart); 1499 1500 // " <![CDATA[ Yay! random text >> << ]]>\n" ~ 1501 range.popFront(); 1502 assert(range.front.type == EntityType.cdata); 1503 assert(range.front.text == " Yay! random text >> << "); 1504 1505 // " <!-- some random comment -->\n" ~ 1506 range.popFront(); 1507 assert(range.front.type == EntityType.comment); 1508 assert(range.front.text == " some random comment "); 1509 1510 // " <p>something here</p>\n" ~ 1511 range.popFront(); 1512 assert(range.front.type == EntityType.elementStart); 1513 assert(range.front.name == "p"); 1514 1515 range.popFront(); 1516 assert(range.front.type == EntityType.text); 1517 assert(range.front.text == "something here"); 1518 1519 range.popFront(); 1520 assert(range.front.type == EntityType.elementEnd); 1521 assert(range.front.name == "p"); 1522 1523 // " <p>\n" ~ 1524 // " something else\n" ~ 1525 // " here</p>\n" ~ 1526 range.popFront(); 1527 assert(range.front.type == EntityType.elementStart); 1528 1529 range.popFront(); 1530 assert(range.front.type == EntityType.text); 1531 assert(range.front.text == "\n something else\n here"); 1532 1533 range.popFront(); 1534 assert(range.front.type == EntityType.elementEnd); 1535 1536 // "</root>" 1537 range.popFront(); 1538 assert(range.front.type == EntityType.elementEnd); 1539 1540 range.popFront(); 1541 assert(range.empty); 1542 } 1543 1544 1545 // Reduce the chance of bugs if reference-type ranges are involved. 1546 static if(!isDynamicArray!R) this(this) 1547 { 1548 with(EntityType) final switch(_type) 1549 { 1550 case cdata: break; 1551 case comment: break; 1552 case elementStart: 1553 { 1554 _name = _name.save; 1555 break; 1556 } 1557 case elementEnd: goto case elementStart; 1558 case elementEmpty: goto case elementStart; 1559 case text: break; 1560 case pi: goto case elementStart; 1561 } 1562 1563 if(_type != EntityType.elementEnd) 1564 _savedText = _savedText.save; 1565 } 1566 1567 static if(compileInTests) unittest 1568 { 1569 import std.algorithm.comparison : equal; 1570 import dxml.internal : testRangeFuncs; 1571 1572 static bool cmpAttr(T)(T lhs, T rhs) 1573 { 1574 return equal(lhs.name.save, rhs.name.save) && 1575 equal(lhs.value.save, rhs.value.save); 1576 } 1577 1578 { 1579 auto xml = "<root>\n" ~ 1580 " <foo a='42'/>\n" ~ 1581 " <foo b='42'/>\n" ~ 1582 " <nocomment>nothing to say</nocomment>\n" ~ 1583 "</root>"; 1584 1585 // The duplicate lines aren't typos. We want to ensure that the 1586 // values are independent and that nothing was consumed. 1587 static foreach(func; testRangeFuncs) 1588 {{ 1589 auto range = parseXML(func(xml)); 1590 range.popFront(); 1591 { 1592 auto entity = range.front; 1593 auto entity2 = entity; 1594 assert(entity.pos == entity2.pos); 1595 assert(equal(entity.name, entity2.name)); 1596 assert(equal(entity.name, entity2.name)); 1597 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1598 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1599 range.popFront(); 1600 assert(entity.pos == entity2.pos); 1601 assert(entity.pos != range.front.pos); 1602 } 1603 range.popFront(); 1604 range.popFront(); 1605 { 1606 auto entity = range.front; 1607 auto entity2 = entity; 1608 assert(entity.pos == entity2.pos); 1609 assert(equal(entity.text, entity2.text)); 1610 assert(equal(entity.text, entity2.text)); 1611 range.popFront(); 1612 assert(entity.pos == entity2.pos); 1613 assert(entity.pos != range.front.pos); 1614 } 1615 }} 1616 } 1617 { 1618 auto xml = "<root>\n" ~ 1619 " <![CDATA[whatever]]>\n" ~ 1620 " <?pi?>\n" ~ 1621 " <!--comment-->\n" ~ 1622 " <empty/>\n" ~ 1623 " <noend a='foo' b='bar'/>\n" ~ 1624 " <foo baz='42'></foo>\n" ~ 1625 "</root>"; 1626 1627 static foreach(func; testRangeFuncs) 1628 { 1629 for(auto range = parseXML(func(xml)); !range.empty; range.popFront()) 1630 { 1631 auto entity = range.front; 1632 auto entity2 = entity; 1633 1634 assert(entity.pos == range.front.pos); 1635 assert(entity.pos == entity2.pos); 1636 assert(entity.type == range.front.type); 1637 assert(entity.type == entity2.type); 1638 1639 with(EntityType) final switch(entity.type) 1640 { 1641 case cdata: goto case text; 1642 case comment: goto case text; 1643 case elementStart: 1644 { 1645 assert(equal!cmpAttr(entity.attributes, range.front.attributes)); 1646 assert(equal!cmpAttr(entity.attributes, entity2.attributes)); 1647 goto case elementEnd; 1648 } 1649 case elementEnd: 1650 { 1651 assert(equal(entity.name, range.front.name)); 1652 assert(equal(entity.name, entity2.name)); 1653 break; 1654 } 1655 case elementEmpty: goto case elementStart; 1656 case text: 1657 { 1658 assert(equal(entity.text, range.front.text)); 1659 assert(equal(entity.text, entity2.text)); 1660 break; 1661 } 1662 case pi: 1663 { 1664 assert(equal(entity.name, range.front.name)); 1665 assert(equal(entity.name, entity2.name)); 1666 goto case text; 1667 } 1668 } 1669 } 1670 } 1671 } 1672 } 1673 1674 1675 private: 1676 1677 this(EntityType type) 1678 { 1679 _type = type; 1680 1681 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 1682 _name = typeof(_name).init; 1683 _savedText = typeof(_savedText).init; 1684 } 1685 1686 EntityType _type; 1687 TextPos _pos; 1688 Taken _name; 1689 typeof(EntityRange._savedText) _savedText; 1690 } 1691 1692 1693 /++ 1694 Returns the $(LREF Entity) representing the entity in the XML document 1695 which was most recently parsed. 1696 +/ 1697 @property Entity front() 1698 { 1699 auto retval = Entity(_type); 1700 with(EntityType) final switch(_type) 1701 { 1702 case cdata: retval._savedText = _savedText.save; break; 1703 case comment: goto case cdata; 1704 case elementStart: retval._name = _name.save; retval._savedText = _savedText.save; break; 1705 case elementEnd: retval._name = _name.save; break; 1706 case elementEmpty: goto case elementStart; 1707 case text: goto case cdata; 1708 case pi: goto case elementStart; 1709 } 1710 retval._pos = _entityPos; 1711 return retval; 1712 } 1713 1714 1715 /++ 1716 Move to the next entity. 1717 1718 The next entity is the next one that is linearly in the XML document. 1719 So, if the current entity has child entities, the next entity will be 1720 the first child entity, whereas if it has no child entities, it will be 1721 the next entity at the same level. 1722 1723 Throws: $(LREF XMLParsingException) on invalid XML. 1724 +/ 1725 void popFront() 1726 { 1727 final switch(_grammarPos) with(GrammarPos) 1728 { 1729 case documentStart: _parseDocumentStart(); break; 1730 case prologMisc1: _parseAtPrologMisc!1(); break; 1731 case prologMisc2: _parseAtPrologMisc!2(); break; 1732 case splittingEmpty: 1733 { 1734 _type = EntityType.elementEnd; 1735 _tagStack.sawEntity(); 1736 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 1737 break; 1738 } 1739 case contentCharData1: 1740 { 1741 assert(_type == EntityType.elementStart); 1742 _tagStack.pushTag(_name.save); 1743 _parseAtContentCharData(); 1744 break; 1745 } 1746 case contentMid: _parseAtContentMid(); break; 1747 case contentCharData2: _parseAtContentCharData(); break; 1748 case endTag: _parseElementEnd(); break; 1749 case endMisc: _parseAtEndMisc(); break; 1750 case documentEnd: assert(0, "It's illegal to call popFront() on an empty EntityRange."); 1751 } 1752 } 1753 1754 1755 /++ 1756 Whether the end of the XML document has been reached. 1757 1758 Note that because an $(LREF XMLParsingException) will be thrown an 1759 invalid XML, it's actually possible to call 1760 $(LREF2 front, EntityRange) and $(LREF2 popFront, EntityRange) without 1761 checking empty if the only way that empty would be true is if the XML 1762 were invalid (e.g. if at a start tag, it's a given that there's at 1763 least one end tag left in the document unless it's invalid XML). 1764 1765 However, of course, caution should be used to ensure that incorrect 1766 assumptions are not made that allow the document to reach its end 1767 earlier than predicted without throwing an $(LREF XMLParsingException), 1768 since it's still an error to call $(LREF2 front, EntityRange) or 1769 $(LREF2 popFront, EntityRange) if empty would return false. 1770 +/ 1771 @property bool empty() @safe const pure nothrow @nogc 1772 { 1773 return _grammarPos == GrammarPos.documentEnd; 1774 } 1775 1776 1777 /++ 1778 Forward range function for obtaining a copy of the range which can then 1779 be iterated independently of the original. 1780 +/ 1781 @property auto save() 1782 { 1783 // The init check nonsense is because of ranges whose init values blow 1784 // up when save is called (e.g. a range that's a class). 1785 auto retval = this; 1786 if(retval._name !is typeof(retval._name).init) 1787 retval._name = _name.save; 1788 if(retval._text.input !is typeof(retval._text.input).init) 1789 retval._text.input = _text.input.save; 1790 if(retval._savedText.input !is typeof(retval._savedText.input).init) 1791 retval._savedText.input = _savedText.input.save; 1792 return retval; 1793 } 1794 1795 static if(compileInTests) unittest 1796 { 1797 import std.algorithm.comparison : equal; 1798 import std.exception : assertNotThrown; 1799 import dxml.internal : testRangeFuncs; 1800 1801 static bool cmpAttr(T)(T lhs, T rhs) 1802 { 1803 return equal(lhs.name.save, rhs.name.save) && 1804 equal(lhs.value.save, rhs.value.save); 1805 } 1806 1807 static void testEqual(ER)(ER one, ER two) 1808 { 1809 while(!one.empty && !two.empty) 1810 { 1811 auto left = one.front; 1812 auto right = two.front; 1813 1814 assert(left.pos == right.pos); 1815 assert(left.type == right.type); 1816 1817 with(EntityType) final switch(left.type) 1818 { 1819 case cdata: goto case text; 1820 case comment: goto case text; 1821 case elementStart: 1822 { 1823 assert(equal!cmpAttr(left.attributes, right.attributes)); 1824 goto case elementEnd; 1825 } 1826 case elementEnd: assert(equal(left.name, right.name)); break; 1827 case elementEmpty: goto case elementStart; 1828 case text: assert(equal(left.text, right.text)); break; 1829 case pi: assert(equal(left.name, right.name)); goto case text; 1830 } 1831 1832 one.popFront(); 1833 two.popFront(); 1834 } 1835 1836 assert(one.empty); 1837 assert(two.empty); 1838 } 1839 1840 auto xml = "<root>\n" ~ 1841 " <!-- comment -->\n" ~ 1842 " <something>\n" ~ 1843 " <else/>\n" ~ 1844 " somet text <i>goes</i> here\n" ~ 1845 " </something>\n" ~ 1846 "</root>"; 1847 1848 static foreach(i, func; testRangeFuncs) 1849 {{ 1850 auto text = func(xml); 1851 testEqual(parseXML(text.save), parseXML(text.save)); 1852 auto range = parseXML(text.save); 1853 testEqual(range.save, range.save); 1854 }} 1855 } 1856 1857 1858 /++ 1859 Returns an empty range. This corresponds to 1860 $(PHOBOS_REF _takeNone, std, range) except that it doesn't create a 1861 wrapper type. 1862 +/ 1863 EntityRange takeNone() 1864 { 1865 auto retval = save; 1866 retval._grammarPos = GrammarPos.documentEnd; 1867 return retval; 1868 } 1869 1870 1871 private: 1872 1873 void _parseDocumentStart() 1874 { 1875 auto orig = _text.save; 1876 immutable wasWS = _text.stripWS(); 1877 if(_text.stripStartsWith("<?xml")) 1878 { 1879 if(wasWS) 1880 throw new XMLParsingException("Cannot have whitespace before the <?xml...?> declaration", TextPos.init); 1881 checkNotEmpty(_text); 1882 if(_text.input.front == '?' || isSpace(_text.input.front)) 1883 _text.skipUntilAndDrop!"?>"(); 1884 else 1885 _text = orig; 1886 } 1887 _grammarPos = GrammarPos.prologMisc1; 1888 _parseAtPrologMisc!1(); 1889 } 1890 1891 static if(compileInTests) unittest 1892 { 1893 import core.exception : AssertError; 1894 import std.exception : assertNotThrown, enforce; 1895 import dxml.internal : testRangeFuncs; 1896 1897 static void test(alias func)(string xml, int row, int col, size_t line = __LINE__) 1898 { 1899 auto range = assertNotThrown!XMLParsingException(parseXML(func(xml))); 1900 enforce!AssertError(range._type == EntityType.elementEmpty, "unittest failure 1", __FILE__, line); 1901 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 1902 } 1903 1904 static foreach(func; testRangeFuncs) 1905 { 1906 test!func("<root/>", 1, 8); 1907 test!func("\n\t\n <root/> \n", 3, 9); 1908 test!func("<?xml\n\n\nversion='1.8'\n\n\n\nencoding='UTF-8'\n\n\nstandalone='yes'\n?><root/>", 12, 10); 1909 test!func("<?xml\n\n\n \r\r\r\n\nversion='1.8'?><root/>", 6, 23); 1910 test!func("<?xml\n\n\n \r\r\r\n\nversion='1.8'?>\n <root/>", 7, 13); 1911 test!func("<root/>", 1, 8); 1912 test!func("\n\t\n <root/> \n", 3, 9); 1913 } 1914 } 1915 1916 1917 // Parse at GrammarPos.prologMisc1 or GrammarPos.prologMisc2. 1918 void _parseAtPrologMisc(int miscNum)() 1919 { 1920 static assert(miscNum == 1 || miscNum == 2); 1921 1922 // document ::= prolog element Misc* 1923 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 1924 // Misc ::= Comment | PI | S 1925 1926 stripWS(_text); 1927 checkNotEmpty(_text); 1928 if(_text.input.front != '<') 1929 throw new XMLParsingException("Expected <", _text.pos); 1930 popFrontAndIncCol(_text); 1931 checkNotEmpty(_text); 1932 1933 switch(_text.input.front) 1934 { 1935 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1936 // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' 1937 case '!': 1938 { 1939 immutable bangPos = _text.pos; 1940 popFrontAndIncCol(_text); 1941 if(_text.stripStartsWith("--")) 1942 { 1943 _parseComment(); 1944 static if(config.skipComments == SkipComments.yes) 1945 _parseAtPrologMisc!miscNum(); 1946 break; 1947 } 1948 static if(miscNum == 1) 1949 { 1950 if(_text.stripStartsWith("DOCTYPE")) 1951 { 1952 if(!_text.stripWS()) 1953 throw new XMLParsingException("Whitespace must follow <!DOCTYPE", _text.pos); 1954 _parseDoctypeDecl(); 1955 break; 1956 } 1957 throw new XMLParsingException("Expected Comment or DOCTYPE section", bangPos); 1958 } 1959 else 1960 { 1961 if(_text.stripStartsWith("DOCTYPE")) 1962 { 1963 throw new XMLParsingException("Only one <!DOCTYPE ...> declaration allowed per XML document", 1964 bangPos); 1965 } 1966 throw new XMLParsingException("Expected Comment", bangPos); 1967 } 1968 } 1969 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 1970 case '?': 1971 { 1972 _parsePI(); 1973 static if(config.skipPI == SkipPI.yes) 1974 popFront(); 1975 break; 1976 } 1977 // element ::= EmptyElemTag | STag content ETag 1978 default: 1979 { 1980 _parseElementStart(); 1981 break; 1982 } 1983 } 1984 } 1985 1986 1987 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1988 // Parses a comment. <!-- was already removed from the front of the input. 1989 void _parseComment() 1990 { 1991 static if(config.skipComments == SkipComments.yes) 1992 _text.skipUntilAndDrop!"--"(); 1993 else 1994 { 1995 _entityPos = TextPos(_text.pos.line, _text.pos.col - 4); 1996 _type = EntityType.comment; 1997 _tagStack.sawEntity(); 1998 _savedText.pos = _text.pos; 1999 _savedText.input = _text.takeUntilAndDrop!"--"(); 2000 } 2001 if(_text.input.empty || _text.input.front != '>') 2002 throw new XMLParsingException("Comments cannot contain -- and cannot be terminated by --->", _text.pos); 2003 // This is here rather than at the end of the previous static if block 2004 // so that the error message for improperly terminating a comment takes 2005 // precedence over the one involving invalid characters in the comment. 2006 static if(config.skipComments == SkipComments.no) 2007 checkText!true(_savedText); 2008 popFrontAndIncCol(_text); 2009 } 2010 2011 static if(compileInTests) unittest 2012 { 2013 import core.exception : AssertError; 2014 import std.algorithm.comparison : equal; 2015 import std.exception : assertNotThrown, assertThrown, collectException, enforce; 2016 import dxml.internal : codeLen, testRangeFuncs; 2017 2018 static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__) 2019 { 2020 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2021 enforce!AssertError(range.front.type == EntityType.comment, "unittest failure 1", __FILE__, line); 2022 enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line); 2023 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2024 } 2025 2026 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2027 { 2028 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2029 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2030 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2031 } 2032 2033 static foreach(func; testRangeFuncs) 2034 { 2035 test!func("<!--foo-->", "foo", 1, 11); 2036 test!func("<!-- foo -->", " foo ", 1, 13); 2037 test!func("<!-- -->", " ", 1, 9); 2038 test!func("<!---->", "", 1, 8); 2039 test!func("<!--- comment -->", "- comment ", 1, 18); 2040 test!func("<!-- \n foo \n -->", " \n foo \n ", 3, 5); 2041 test!func("<!--京都市 ディラン-->", "京都市 ディラン", 1, codeLen!(func, "<!--京都市 ディラン-->") + 1); 2042 test!func("<!--&-->", "&", 1, 9); 2043 test!func("<!--<-->", "<", 1, 9); 2044 test!func("<!-->-->", ">", 1, 9); 2045 test!func("<!--->-->", "->", 1, 10); 2046 2047 testFail!func("<!", 1, 2); 2048 testFail!func("<!- comment -->", 1, 2); 2049 testFail!func("<!-- comment ->", 1, 5); 2050 testFail!func("<!-- comment --->", 1, 16); 2051 testFail!func("<!---- comment -->", 1, 7); 2052 testFail!func("<!-- comment -- comment -->", 1, 16); 2053 testFail!func("<!->", 1, 2); 2054 testFail!func("<!-->", 1, 5); 2055 testFail!func("<!--->", 1, 5); 2056 testFail!func("<!----->", 1, 7); 2057 testFail!func("<!blah>", 1, 2); 2058 testFail!func("<! blah>", 1, 2); 2059 testFail!func("<!-- \n\n \v \n -->", 3, 4); 2060 testFail!func("<!--京都市 ディラン\v-->", 1, codeLen!(func, "<!--京都市 ディラン\v")); 2061 2062 { 2063 auto xml = func("<!DOCTYPE foo><!-- comment --><root/>"); 2064 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2065 assert(range.front.type == EntityType.comment); 2066 assert(equal(range.front.text, " comment ")); 2067 } 2068 { 2069 auto xml = func("<root><!-- comment --></root>"); 2070 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2071 assertNotThrown!XMLParsingException(range.popFront()); 2072 assert(range.front.type == EntityType.comment); 2073 assert(equal(range.front.text, " comment ")); 2074 } 2075 { 2076 auto xml = func("<root/><!-- comment -->"); 2077 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2078 assertNotThrown!XMLParsingException(range.popFront()); 2079 assert(range.front.type == EntityType.comment); 2080 assert(equal(range.front.text, " comment ")); 2081 } 2082 2083 static foreach(comment; ["<!foo>", "<! foo>", "<!->", "<!-->", "<!--->"]) 2084 { 2085 { 2086 auto xml = func("<!DOCTYPE foo>" ~ comment ~ "<root/>"); 2087 assertThrown!XMLParsingException(parseXML(xml)); 2088 } 2089 { 2090 auto xml = func("<root>" ~ comment ~ "<root>"); 2091 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2092 assertThrown!XMLParsingException(range.popFront()); 2093 } 2094 { 2095 auto xml = func("<root/>" ~ comment); 2096 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2097 assertThrown!XMLParsingException(range.popFront()); 2098 } 2099 } 2100 2101 { 2102 auto xml = "<!--one-->\n" ~ 2103 "<!--two-->\n" ~ 2104 "<root>\n" ~ 2105 " <!--three-->\n" ~ 2106 " <!--four-->\n" ~ 2107 "</root>\n" ~ 2108 "<!--five-->\n" ~ 2109 "<!--six-->"; 2110 2111 auto text = func(xml); 2112 { 2113 auto range = parseXML(text.save); 2114 assert(range.front.type == EntityType.comment); 2115 assert(equal(range.front.text, "one")); 2116 assertNotThrown!XMLParsingException(range.popFront()); 2117 assert(range.front.type == EntityType.comment); 2118 assert(equal(range.front.text, "two")); 2119 assertNotThrown!XMLParsingException(range.popFront()); 2120 assert(range.front.type == EntityType.elementStart); 2121 assert(equal(range.front.name, "root")); 2122 assertNotThrown!XMLParsingException(range.popFront()); 2123 assert(range.front.type == EntityType.comment); 2124 assert(equal(range.front.text, "three")); 2125 assertNotThrown!XMLParsingException(range.popFront()); 2126 assert(range.front.type == EntityType.comment); 2127 assert(equal(range.front.text, "four")); 2128 assertNotThrown!XMLParsingException(range.popFront()); 2129 assert(range.front.type == EntityType.elementEnd); 2130 assert(equal(range.front.name, "root")); 2131 assertNotThrown!XMLParsingException(range.popFront()); 2132 assert(range.front.type == EntityType.comment); 2133 assert(equal(range.front.text, "five")); 2134 assertNotThrown!XMLParsingException(range.popFront()); 2135 assert(range.front.type == EntityType.comment); 2136 assert(equal(range.front.text, "six")); 2137 assertNotThrown!XMLParsingException(range.popFront()); 2138 assert(range.empty); 2139 } 2140 { 2141 auto range = parseXML!simpleXML(text.save); 2142 assert(range.front.type == EntityType.elementStart); 2143 assert(equal(range.front.name, "root")); 2144 assertNotThrown!XMLParsingException(range.popFront()); 2145 assert(range.front.type == EntityType.elementEnd); 2146 assert(equal(range.front.name, "root")); 2147 assertNotThrown!XMLParsingException(range.popFront()); 2148 assert(range.empty); 2149 } 2150 } 2151 } 2152 } 2153 2154 2155 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2156 // PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2157 // Parses a processing instruction. < was already removed from the input. 2158 void _parsePI() 2159 { 2160 _entityPos = TextPos(_text.pos.line, _text.pos.col - 1); 2161 assert(_text.input.front == '?'); 2162 popFrontAndIncCol(_text); 2163 static if(config.skipPI == SkipPI.yes) 2164 _text.skipUntilAndDrop!"?>"(); 2165 else 2166 { 2167 immutable posAtName = _text.pos; 2168 if(_text.input.empty) 2169 throw new XMLParsingException("Unterminated processing instruction", posAtName); 2170 _type = EntityType.pi; 2171 _tagStack.sawEntity(); 2172 _name = takeName!'?'(_text); 2173 immutable posAtWS = _text.pos; 2174 stripWS(_text); 2175 checkNotEmpty(_text); 2176 _savedText.pos = _text.pos; 2177 _savedText.input = _text.takeUntilAndDrop!"?>"(); 2178 checkText!true(_savedText); 2179 if(walkLength(_name.save) == 3) 2180 { 2181 // FIXME icmp doesn't compile right now due to an issue with 2182 // byUTF that needs to be looked into. 2183 /+ 2184 import std.uni : icmp; 2185 if(icmp(_name.save, "xml") == 0) 2186 throw new XMLParsingException("Processing instructions cannot be named xml", posAtName); 2187 +/ 2188 auto temp = _name.save; 2189 if(temp.front == 'x' || temp.front == 'X') 2190 { 2191 temp.popFront(); 2192 if(temp.front == 'm' || temp.front == 'M') 2193 { 2194 temp.popFront(); 2195 if(temp.front == 'l' || temp.front == 'L') 2196 throw new XMLParsingException("Processing instructions cannot be named xml", posAtName); 2197 } 2198 } 2199 } 2200 } 2201 } 2202 2203 static if(compileInTests) unittest 2204 { 2205 import core.exception : AssertError; 2206 import std.algorithm.comparison : equal; 2207 import std.exception : assertNotThrown, assertThrown, collectException, enforce; 2208 import std.utf : byUTF; 2209 import dxml.internal : codeLen, testRangeFuncs; 2210 2211 static void test(alias func)(string text, string name, string expected, 2212 int row, int col, size_t line = __LINE__) 2213 { 2214 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")), 2215 "unittest failure 1", __FILE__, line); 2216 enforce!AssertError(range.front.type == EntityType.pi, "unittest failure 2", __FILE__, line); 2217 enforce!AssertError(equal(range.front.name, name), "unittest failure 3", __FILE__, line); 2218 enforce!AssertError(equal(range.front.text, expected), "unittest failure 4", __FILE__, line); 2219 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 5", __FILE__, line); 2220 } 2221 2222 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2223 { 2224 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2225 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2226 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2227 } 2228 2229 static foreach(func; testRangeFuncs) 2230 { 2231 test!func("<?a?>", "a", "", 1, 6); 2232 test!func("<?foo?>", "foo", "", 1, 8); 2233 test!func("<?foo.?>", "foo.", "", 1, 9); 2234 test!func("<?foo bar?>", "foo", "bar", 1, 12); 2235 test!func("<?xmf bar?>", "xmf", "bar", 1, 12); 2236 test!func("<?xmlfoo bar?>", "xmlfoo", "bar", 1, 15); 2237 test!func("<?foo bar baz?>", "foo", "bar baz", 1, 16); 2238 test!func("<?foo\nbar baz?>", "foo", "bar baz", 2, 10); 2239 test!func("<?foo \n bar baz?>", "foo", "bar baz", 2, 11); 2240 test!func("<?foo bar\nbaz?>", "foo", "bar\nbaz", 2, 6); 2241 test!func("<?dlang is awesome?>", "dlang", "is awesome", 1, 21); 2242 test!func("<?dlang is awesome! ?>", "dlang", "is awesome! ", 1, 23); 2243 test!func("<?dlang\n\nis\n\nawesome\n\n?>", "dlang", "is\n\nawesome\n\n", 7, 3); 2244 test!func("<?京都市 ディラン?>", "京都市", "ディラン", 1, codeLen!(func, "<?京都市 ディラン?>") + 1); 2245 test!func("<?foo bar&baz?>", "foo", "bar&baz", 1, 16); 2246 test!func("<?foo bar<baz?>", "foo", "bar<baz", 1, 16); 2247 test!func("<?pi ?>", "pi", "", 1, 8); 2248 test!func("<?pi\n?>", "pi", "", 2, 3); 2249 test!func("<?foo ??>", "foo", "?", 1, 10); 2250 test!func("<?pi some data ? > <??>", "pi", "some data ? > <?", 1, 24); 2251 2252 testFail!func("<?", 1, 3); 2253 testFail!func("<??>", 1, 3); 2254 testFail!func("<? ?>", 1, 3); 2255 testFail!func("<?xml?><?xml?>", 1, 10); 2256 testFail!func("<?XML?>", 1, 3); 2257 testFail!func("<?xMl?>", 1, 3); 2258 testFail!func("<?foo>", 1, 6); 2259 testFail!func("<? foo?>", 1, 3); 2260 testFail!func("<?\nfoo?>", 1, 3); 2261 testFail!func("<??foo?>", 1, 3); 2262 testFail!func("<?.foo?>", 1, 3); 2263 testFail!func("<?foo bar\vbaz?>", 1, 10); 2264 2265 { 2266 auto xml = func("<!DOCTYPE foo><?foo bar?><root/>"); 2267 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2268 assert(range.front.type == EntityType.pi); 2269 assert(equal(range.front.name, "foo")); 2270 assert(equal(range.front.text, "bar")); 2271 } 2272 { 2273 auto xml = func("<root><?foo bar?></root>"); 2274 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2275 assertNotThrown!XMLParsingException(range.popFront()); 2276 assert(equal(range.front.name, "foo")); 2277 assert(equal(range.front.text, "bar")); 2278 } 2279 { 2280 auto xml = func("<root/><?foo bar?>"); 2281 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2282 assertNotThrown!XMLParsingException(range.popFront()); 2283 assert(equal(range.front.name, "foo")); 2284 assert(equal(range.front.text, "bar")); 2285 } 2286 2287 static foreach(pi; ["<?foo>", "<foo?>", "<? foo>"]) 2288 { 2289 { 2290 auto xml = func("<!DOCTYPE foo>" ~ pi ~ "<root/>"); 2291 assertThrown!XMLParsingException(parseXML(xml)); 2292 } 2293 { 2294 auto xml = func("<root>" ~ pi ~ "<root>"); 2295 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2296 assertThrown!XMLParsingException(range.popFront()); 2297 } 2298 { 2299 auto xml = func("<root/>" ~ pi); 2300 auto range = assertNotThrown!XMLParsingException(parseXML(xml)); 2301 assertThrown!XMLParsingException(range.popFront()); 2302 } 2303 } 2304 2305 { 2306 auto xml = "<?one?>\n" ~ 2307 "<?two?>\n" ~ 2308 "<root>\n" ~ 2309 " <?three?>\n" ~ 2310 " <?four?>\n" ~ 2311 "</root>\n" ~ 2312 "<?five?>\n" ~ 2313 "<?six?>"; 2314 2315 auto text = func(xml); 2316 { 2317 auto range = parseXML(text.save); 2318 assert(range.front.type == EntityType.pi); 2319 assert(equal(range.front.name, "one")); 2320 assertNotThrown!XMLParsingException(range.popFront()); 2321 assert(range.front.type == EntityType.pi); 2322 assert(equal(range.front.name, "two")); 2323 assertNotThrown!XMLParsingException(range.popFront()); 2324 assert(range.front.type == EntityType.elementStart); 2325 assert(equal(range.front.name, "root")); 2326 assertNotThrown!XMLParsingException(range.popFront()); 2327 assert(range.front.type == EntityType.pi); 2328 assert(equal(range.front.name, "three")); 2329 assertNotThrown!XMLParsingException(range.popFront()); 2330 assert(range.front.type == EntityType.pi); 2331 assert(equal(range.front.name, "four")); 2332 assertNotThrown!XMLParsingException(range.popFront()); 2333 assert(range.front.type == EntityType.elementEnd); 2334 assert(equal(range.front.name, "root")); 2335 assertNotThrown!XMLParsingException(range.popFront()); 2336 assert(range.front.type == EntityType.pi); 2337 assert(equal(range.front.name, "five")); 2338 assertNotThrown!XMLParsingException(range.popFront()); 2339 assert(range.front.type == EntityType.pi); 2340 assert(equal(range.front.name, "six")); 2341 assertNotThrown!XMLParsingException(range.popFront()); 2342 assert(range.empty); 2343 } 2344 { 2345 auto range = parseXML!simpleXML(text.save); 2346 assert(range.front.type == EntityType.elementStart); 2347 assert(equal(range.front.name, "root")); 2348 assertNotThrown!XMLParsingException(range.popFront()); 2349 assert(range.front.type == EntityType.elementEnd); 2350 assert(equal(range.front.name, "root")); 2351 assertNotThrown!XMLParsingException(range.popFront()); 2352 assert(range.empty); 2353 } 2354 } 2355 } 2356 } 2357 2358 2359 // CDSect ::= CDStart CData CDEnd 2360 // CDStart ::= '<![CDATA[' 2361 // CData ::= (Char* - (Char* ']]>' Char*)) 2362 // CDEnd ::= ']]>' 2363 // Parses a CDATA. <![CDATA[ was already removed from the front of the input. 2364 void _parseCDATA() 2365 { 2366 _entityPos = TextPos(_text.pos.line, _text.pos.col - cast(int)"<![CDATA[".length); 2367 _type = EntityType.cdata; 2368 _tagStack.sawEntity(); 2369 _savedText.pos = _text.pos; 2370 _savedText.input = _text.takeUntilAndDrop!"]]>"; 2371 checkText!true(_savedText); 2372 _grammarPos = GrammarPos.contentCharData2; 2373 } 2374 2375 static if(compileInTests) unittest 2376 { 2377 import core.exception : AssertError; 2378 import std.algorithm.comparison : equal; 2379 import std.exception : assertNotThrown, collectException, enforce; 2380 import dxml.internal : codeLen, testRangeFuncs; 2381 2382 static void test(alias func)(string text, string expected, int row, int col, size_t line = __LINE__) 2383 { 2384 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2385 auto range = parseXML(func("<root>" ~ text ~ "<root/>")); 2386 assertNotThrown!XMLParsingException(range.popFront()); 2387 enforce!AssertError(range.front.type == EntityType.cdata, "unittest failure 1", __FILE__, line); 2388 enforce!AssertError(equal(range.front.text, expected), "unittest failure 2", __FILE__, line); 2389 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2390 } 2391 2392 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2393 { 2394 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2395 auto range = parseXML(func("<root>" ~ text ~ "<root/>")); 2396 auto e = collectException!XMLParsingException(range.popFront()); 2397 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2398 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 2399 } 2400 2401 static foreach(func; testRangeFuncs) 2402 { 2403 test!func("<![CDATA[]]>", "", 1, 13); 2404 test!func("<![CDATA[hello world]]>", "hello world", 1, 24); 2405 test!func("<![CDATA[\nhello\n\nworld\n]]>", "\nhello\n\nworld\n", 5, 4); 2406 test!func("<![CDATA[京都市]]>", "京都市", 1, codeLen!(func, "<![CDATA[京都市]>") + 2); 2407 test!func("<![CDATA[<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ]]>", 2408 "<><><><><<<<>>>>>> ] ] ]> <]> <<>> ][][] >> ", 1, 57); 2409 test!func("<![CDATA[&]]>", "&", 1, 14); 2410 2411 testFail!func("<[CDATA[]>", 1, 2); 2412 testFail!func("<![CDAT[]>", 1, 2); 2413 testFail!func("<![CDATA]>", 1, 2); 2414 testFail!func("<![CDATA[>", 1, 10); 2415 testFail!func("<![CDATA[]", 1, 10); 2416 testFail!func("<![CDATA[]>", 1, 10); 2417 testFail!func("<![CDATA[ \v ]]>", 1, 11); 2418 testFail!func("<![CDATA[ \n\n \v \n ]]>", 3, 2); 2419 } 2420 } 2421 2422 2423 // doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' 2424 // DeclSep ::= PEReference | S 2425 // intSubset ::= (markupdecl | DeclSep)* 2426 // markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment 2427 // Parse doctypedecl after GrammarPos.prologMisc1. 2428 // <!DOCTYPE and any whitespace after it should have already been removed 2429 // from the input. 2430 void _parseDoctypeDecl() 2431 { 2432 outer: while(true) 2433 { 2434 _text.skipToOneOf!('"', '\'', '[', '>')(); 2435 switch(_text.input.front) 2436 { 2437 static foreach(quote; ['"', '\'']) 2438 { 2439 case quote: 2440 { 2441 popFrontAndIncCol(_text); 2442 _text.skipUntilAndDrop!([quote])(); 2443 continue outer; 2444 } 2445 } 2446 case '[': 2447 { 2448 popFrontAndIncCol(_text); 2449 while(true) 2450 { 2451 checkNotEmpty(_text); 2452 _text.skipToOneOf!('"', '\'', ']')(); 2453 switch(_text.input.front) 2454 { 2455 case '"': 2456 { 2457 popFrontAndIncCol(_text); 2458 _text.skipUntilAndDrop!`"`(); 2459 continue; 2460 } 2461 case '\'': 2462 { 2463 popFrontAndIncCol(_text); 2464 _text.skipUntilAndDrop!`'`(); 2465 continue; 2466 } 2467 case ']': 2468 { 2469 popFrontAndIncCol(_text); 2470 stripWS(_text); 2471 if(_text.input.empty || _text.input.front != '>') 2472 throw new XMLParsingException("Incorrectly terminated <!DOCTYPE> section.", _text.pos); 2473 popFrontAndIncCol(_text); 2474 _parseAtPrologMisc!2(); 2475 return; 2476 } 2477 default: assert(0); 2478 } 2479 } 2480 } 2481 case '>': 2482 { 2483 popFrontAndIncCol(_text); 2484 _parseAtPrologMisc!2(); 2485 break; 2486 } 2487 default: assert(0); 2488 } 2489 break; 2490 } 2491 } 2492 2493 static if(compileInTests) unittest 2494 { 2495 import core.exception : AssertError; 2496 import std.exception : assertNotThrown, collectException, enforce; 2497 import dxml.internal : testRangeFuncs; 2498 2499 static void test(alias func)(string text, int row, int col, size_t line = __LINE__) 2500 { 2501 auto pos = TextPos(row, col + cast(int)"<root/>".length); 2502 auto range = assertNotThrown!XMLParsingException(parseXML(func(text ~ "<root/>")), 2503 "unittest failure 1", __FILE__, line); 2504 enforce!AssertError(range.front.type == EntityType.elementEmpty, "unittest failure 2", __FILE__, line); 2505 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2506 } 2507 2508 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2509 { 2510 auto e = collectException!XMLParsingException(parseXML(func(text ~ "<root/>"))); 2511 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2512 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2513 } 2514 2515 static foreach(func; testRangeFuncs) 2516 { 2517 test!func("<!DOCTYPE name>", 1, 16); 2518 test!func("<!DOCTYPE \n\n\n name>", 4, 7); 2519 test!func("<!DOCTYPE name \n\n\n >", 4, 3); 2520 2521 test!func("<!DOCTYPE name []>", 1, 19); 2522 test!func("<!DOCTYPE \n\n\n name []>", 4, 10); 2523 test!func("<!DOCTYPE name \n\n\n []>", 4, 5); 2524 2525 test!func(`<!DOCTYPE name PUBLIC "'''" '"""'>`, 1, 35); 2526 test!func(`<!DOCTYPE name PUBLIC "'''" '"""' []>`, 1, 38); 2527 test!func(`<!DOCTYPE name PUBLIC 'foo' "'''">`, 1, 35); 2528 test!func(`<!DOCTYPE name PUBLIC 'foo' '"""' []>`, 1, 38); 2529 2530 test!func("<!DOCTYPE name [ <!ELEMENT foo EMPTY > ]>", 1, 42); 2531 test!func("<!DOCTYPE name [ <!ELEMENT bar ANY > ]>", 1, 40); 2532 test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA) > ]>", 1, 48); 2533 test!func("<!DOCTYPE name [ <!ELEMENT mixed (#PCDATA | foo)> ]>", 1, 53); 2534 test!func("<!DOCTYPE name [ <!ELEMENT kids (foo) > ]>", 1, 43); 2535 test!func("<!DOCTYPE name [ <!ELEMENT kids (foo | bar)> ]>", 1, 48); 2536 2537 test!func("<!DOCTYPE name [ <!ATTLIST foo> ]>", 1, 35); 2538 test!func("<!DOCTYPE name [ <!ATTLIST foo def CDATA #REQUIRED> ]>", 1, 55); 2539 2540 test!func(`<!DOCTYPE name [ <!ENTITY foo "bar"> ]>`, 1, 40); 2541 test!func(`<!DOCTYPE name [ <!ENTITY foo 'bar'> ]>`, 1, 40); 2542 test!func(`<!DOCTYPE name [ <!ENTITY foo SYSTEM 'sys'> ]>`, 1, 47); 2543 test!func(`<!DOCTYPE name [ <!ENTITY foo PUBLIC "'''" 'sys'> ]>`, 1, 53); 2544 2545 test!func(`<!DOCTYPE name [ <!NOTATION note PUBLIC 'blah'> ]>`, 1, 51); 2546 2547 test!func("<!DOCTYPE name [ <?pi> ]>", 1, 26); 2548 2549 test!func("<!DOCTYPE name [ <!-- coment --> ]>", 1, 36); 2550 2551 test!func("<!DOCTYPE name [ <?pi> <!----> <!ELEMENT blah EMPTY> ]>", 1, 56); 2552 test!func("<!DOCTYPE \nname\n[\n<?pi> \n <!---->\n<!ENTITY foo '\n\n'\n>\n]>", 10, 3); 2553 2554 test!func("<!DOCTYPE doc [\n" ~ 2555 "<!ENTITY e '<![CDATA[Tim Michael]]>'>\n" ~ 2556 "]>\n", 4, 1); 2557 2558 testFail!func("<!DOCTYP name>", 1, 2); 2559 testFail!func("<!DOCTYPEname>", 1, 10); 2560 testFail!func("<!DOCTYPE name1><!DOCTYPE name2>", 1, 18); 2561 testFail!func("<!DOCTYPE\n\nname1><!DOCTYPE name2>", 3, 8); 2562 testFail!func("<!DOCTYPE name [ ]<!--comment-->", 1, 19); 2563 2564 // FIXME This really should have the exception point at the quote and 2565 // say that it couldn't find the matching quote rather than point at 2566 // the character after it and say that it couldn't find a quote, but 2567 // that requires reworking some helper functions with better error 2568 // messages in mind. 2569 testFail!func(`<!DOCTYPE student SYSTEM "student".dtd"[` ~ 2570 "\n<!ELEMENT student (#PCDATA)>\n" ~ 2571 "]>", 1, 40); 2572 } 2573 } 2574 2575 2576 // Parse a start tag or empty element tag. It could be the root element, or 2577 // it could be a sub-element. 2578 // < was already removed from the front of the input. 2579 void _parseElementStart() 2580 { 2581 _entityPos = TextPos(_text.pos.line, _text.pos.col - 1); 2582 _savedText.pos = _text.pos; 2583 _savedText.input = _text.takeUntilAndDrop!(">", true)(); 2584 2585 if(_savedText.input.empty) 2586 throw new XMLParsingException("Tag missing name", _savedText.pos); 2587 if(_savedText.input.front == '/') 2588 throw new XMLParsingException("Invalid end tag", _savedText.pos); 2589 2590 if(_savedText.input.length > 1) 2591 { 2592 auto temp = _savedText.input.save; 2593 temp.popFrontN(temp.length - 1); 2594 if(temp.front == '/') 2595 { 2596 _savedText.input = _savedText.input.takeExactly(_savedText.input.length - 1); 2597 2598 static if(config.splitEmpty == SplitEmpty.no) 2599 { 2600 _type = EntityType.elementEmpty; 2601 _tagStack.sawEntity(); 2602 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 2603 } 2604 else 2605 { 2606 _type = EntityType.elementStart; 2607 _tagStack.sawEntity(); 2608 _grammarPos = GrammarPos.splittingEmpty; 2609 } 2610 } 2611 else 2612 { 2613 _type = EntityType.elementStart; 2614 _tagStack.sawEntity(); 2615 _grammarPos = GrammarPos.contentCharData1; 2616 } 2617 } 2618 else 2619 { 2620 _type = EntityType.elementStart; 2621 _tagStack.sawEntity(); 2622 _grammarPos = GrammarPos.contentCharData1; 2623 } 2624 2625 _name = _savedText.takeName(); 2626 // The attributes should be all that's left in savedText. 2627 if(_tagStack.atMax) 2628 { 2629 auto temp = _savedText.save; 2630 auto attrChecker = _tagStack.attrChecker; 2631 2632 while(true) 2633 { 2634 immutable wasWS = stripWS(temp); 2635 if(temp.input.empty) 2636 break; 2637 if(!wasWS) 2638 throw new XMLParsingException("Whitespace missing before attribute name", temp.pos); 2639 2640 immutable attrPos = temp.pos; 2641 attrChecker.pushAttr(temp.takeName!'='(), attrPos); 2642 stripWS(temp); 2643 2644 checkNotEmpty(temp); 2645 if(temp.input.front != '=') 2646 throw new XMLParsingException("= missing", temp.pos); 2647 popFrontAndIncCol(temp); 2648 2649 stripWS(temp); 2650 temp.takeAttValue(); 2651 } 2652 2653 attrChecker.checkAttrs(); 2654 } 2655 } 2656 2657 static if(compileInTests) unittest 2658 { 2659 import core.exception : AssertError; 2660 import std.algorithm.comparison : equal; 2661 import std.exception : assertNotThrown, collectException, enforce; 2662 import dxml.internal : codeLen, testRangeFuncs; 2663 2664 static void test(alias func)(string text, EntityType type, string name, 2665 int row, int col, size_t line = __LINE__) 2666 { 2667 auto range = assertNotThrown!XMLParsingException(parseXML(func(text))); 2668 enforce!AssertError(range.front.type == type, "unittest failure 1", __FILE__, line); 2669 enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line); 2670 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2671 } 2672 2673 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2674 { 2675 auto xml = func(text); 2676 auto e = collectException!XMLParsingException(parseXML(func(text))); 2677 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2678 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2679 } 2680 2681 static foreach(func; testRangeFuncs) 2682 { 2683 test!func("<a/>", EntityType.elementEmpty, "a", 1, 5); 2684 test!func("<a></a>", EntityType.elementStart, "a", 1, 4); 2685 test!func("<root/>", EntityType.elementEmpty, "root", 1, 8); 2686 test!func("<root></root>", EntityType.elementStart, "root", 1, 7); 2687 test!func("<foo/>", EntityType.elementEmpty, "foo", 1, 7); 2688 test!func("<foo></foo>", EntityType.elementStart, "foo", 1, 6); 2689 test!func("<foo />", EntityType.elementEmpty, "foo", 1, 14); 2690 test!func("<foo ></foo>", EntityType.elementStart, "foo", 1, 13); 2691 test!func("<foo \n\n\n />", EntityType.elementEmpty, "foo", 4, 4); 2692 test!func("<foo \n\n\n ></foo>", EntityType.elementStart, "foo", 4, 3); 2693 test!func("<foo.></foo.>", EntityType.elementStart, "foo.", 1, 7); 2694 test!func(`<京都市></京都市>`, EntityType.elementStart, "京都市", 1, codeLen!(func, `<京都市>`) + 1); 2695 2696 testFail!func(`<.foo/>`, 1, 2); 2697 testFail!func(`<>`, 1, 2); 2698 testFail!func(`</>`, 1, 2); 2699 testFail!func(`</foo>`, 1, 2); 2700 2701 { 2702 auto range = assertNotThrown!XMLParsingException(parseXML!simpleXML(func("<root/>"))); 2703 assert(range.front.type == EntityType.elementStart); 2704 assert(equal(range.front.name, "root")); 2705 assert(range._text.pos == TextPos(1, 8)); 2706 assertNotThrown!XMLParsingException(range.popFront()); 2707 assert(range.front.type == EntityType.elementEnd); 2708 assert(equal(range.front.name, "root")); 2709 assert(range._text.pos == TextPos(1, 8)); 2710 } 2711 } 2712 } 2713 2714 2715 // Parse an end tag. It could be the root element, or it could be a 2716 // sub-element. 2717 // </ was already removed from the front of the input. 2718 void _parseElementEnd() 2719 { 2720 if(_text.input.empty) 2721 throw new XMLParsingException("Unterminated end tag", _text.pos); 2722 _entityPos = TextPos(_text.pos.line, _text.pos.col - 2); 2723 _type = EntityType.elementEnd; 2724 _tagStack.sawEntity(); 2725 immutable namePos = _text.pos; 2726 _name = _text.takeName!'>'(); 2727 stripWS(_text); 2728 if(_text.input.empty || _text.input.front != '>') 2729 { 2730 throw new XMLParsingException("There can only be whitespace between an end tag's name and the >", 2731 _text.pos); 2732 } 2733 popFrontAndIncCol(_text); 2734 _tagStack.popTag(_name.save, namePos); 2735 _grammarPos = _tagStack.depth == 0 ? GrammarPos.endMisc : GrammarPos.contentCharData2; 2736 } 2737 2738 static if(compileInTests) unittest 2739 { 2740 import core.exception : AssertError; 2741 import std.algorithm.comparison : equal; 2742 import std.exception : assertNotThrown, collectException, enforce; 2743 import dxml.internal : codeLen, testRangeFuncs; 2744 2745 static void test(alias func)(string text, string name, int row, int col, size_t line = __LINE__) 2746 { 2747 auto range = assertNotThrown!XMLParsingException(parseXML(func(text))); 2748 range.popFront(); 2749 enforce!AssertError(range.front.type == EntityType.elementEnd, "unittest failure 1", __FILE__, line); 2750 enforce!AssertError(equal(range.front.name, name), "unittest failure 2", __FILE__, line); 2751 enforce!AssertError(range._text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 2752 } 2753 2754 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 2755 { 2756 auto range = parseXML(func(text)); 2757 auto e = collectException!XMLParsingException(range.popFront()); 2758 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2759 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 2760 } 2761 2762 static foreach(func; testRangeFuncs) 2763 { 2764 test!func("<a></a>", "a", 1, 8); 2765 test!func("<foo></foo>", "foo", 1, 12); 2766 test!func("<foo ></foo >", "foo", 1, 20); 2767 test!func("<foo \n ></foo \n >", "foo", 3, 3); 2768 test!func("<foo>\n\n\n</foo>", "foo", 4, 7); 2769 test!func("<foo.></foo.>", "foo.", 1, 14); 2770 test!func(`<京都市></京都市>`, "京都市", 1, codeLen!(func, `<京都市></京都市>`) + 1); 2771 2772 testFail!func(`<foo></ foo>`, 1, 8); 2773 testFail!func(`<foo></bar>`, 1, 8); 2774 testFail!func(`<foo></fo>`, 1, 8); 2775 testFail!func(`<foo></food>`, 1, 8); 2776 testFail!func(`<a></>`, 1, 6); 2777 testFail!func(`<a></`, 1, 6); 2778 testFail!func(`<a><`, 1, 5); 2779 testFail!func(`<a></a b='42'>`, 1, 8); 2780 } 2781 } 2782 2783 2784 // GrammarPos.contentCharData1 2785 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 2786 // Parses at either CharData?. Nothing from the CharData? (or what's after it 2787 // if it's not there) has been consumed. 2788 void _parseAtContentCharData() 2789 { 2790 checkNotEmpty(_text); 2791 auto orig = _text.save; 2792 stripWS(_text); 2793 checkNotEmpty(_text); 2794 if(_text.input.front != '<') 2795 { 2796 _text = orig; 2797 _entityPos = _text.pos; 2798 _type = EntityType.text; 2799 _tagStack.sawEntity(); 2800 _savedText.pos = _text.pos; 2801 _savedText.input = _text.takeUntilAndDrop!"<"(); 2802 checkText!false(_savedText); 2803 checkNotEmpty(_text); 2804 if(_text.input.front == '/') 2805 { 2806 popFrontAndIncCol(_text); 2807 _grammarPos = GrammarPos.endTag; 2808 } 2809 else 2810 _grammarPos = GrammarPos.contentMid; 2811 } 2812 else 2813 { 2814 popFrontAndIncCol(_text); 2815 checkNotEmpty(_text); 2816 if(_text.input.front == '/') 2817 { 2818 popFrontAndIncCol(_text); 2819 _parseElementEnd(); 2820 } 2821 else 2822 _parseAtContentMid(); 2823 } 2824 } 2825 2826 static if(compileInTests) unittest 2827 { 2828 import core.exception : AssertError; 2829 import std.algorithm.comparison : equal; 2830 import std.exception : assertNotThrown, collectException, enforce; 2831 import dxml.internal : codeLen, testRangeFuncs; 2832 2833 static void test(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 2834 { 2835 auto pos = TextPos(row, col + (cast(int)(row == 1 ? "<root></" : "</").length)); 2836 auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>")); 2837 assertNotThrown!XMLParsingException(range.popFront()); 2838 enforce!AssertError(range.front.type == EntityType.text, "unittest failure 1", __FILE__, line); 2839 enforce!AssertError(equal(range.front.text, text), "unittest failure 2", __FILE__, line); 2840 enforce!AssertError(range._text.pos == pos, "unittest failure 3", __FILE__, line); 2841 } 2842 2843 static void testFail(alias func, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 2844 { 2845 auto pos = TextPos(row, col + (row == 1 ? cast(int)"<root>".length : 0)); 2846 auto range = parseXML!(makeConfig(toer))(func("<root>" ~ text ~ "</root>")); 2847 auto e = collectException!XMLParsingException(range.popFront()); 2848 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 2849 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 2850 } 2851 2852 static foreach(func; testRangeFuncs) 2853 { 2854 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 2855 { 2856 test!(func, toer)("hello world", 1, 12); 2857 test!(func, toer)("\nhello\n\nworld", 4, 6); 2858 test!(func, toer)("京都市", 1, codeLen!(func, "京都市") + 1); 2859 test!(func, toer)("B", 1, 7); 2860 test!(func, toer)("]", 1, 2); 2861 test!(func, toer)("]]", 1, 3); 2862 test!(func, toer)("]>", 1, 3); 2863 test!(func, toer)("foo \n\n < \n bar", 4, 5); 2864 2865 testFail!(func, toer)("&", 1, 1); 2866 testFail!(func, toer)("&;", 1, 1); 2867 testFail!(func, toer)("&f", 1, 1); 2868 testFail!(func, toer)("\v", 1, 1); 2869 testFail!(func, toer)("hello&world", 1, 6); 2870 testFail!(func, toer)("hello\vworld", 1, 6); 2871 testFail!(func, toer)("hello&;world", 1, 6); 2872 testFail!(func, toer)("hello&#;world", 1, 6); 2873 testFail!(func, toer)("hello&#x;world", 1, 6); 2874 testFail!(func, toer)("hello&.;world", 1, 6); 2875 testFail!(func, toer)("\n\nfoo\nbar&.;", 4, 4); 2876 2877 testFail!(func, toer)("]]>", 1, 1); 2878 testFail!(func, toer)("foo]]>bar", 1, 4); 2879 2880 static if(toer == ThrowOnEntityRef.yes) 2881 { 2882 testFail!(func, toer)("&foo; &bar baz", 1, 1); 2883 testFail!(func, toer)("foo \n\n &e; \n bar", 3, 2); 2884 } 2885 else 2886 { 2887 testFail!(func, toer)("&foo; &bar baz", 1, 7); 2888 test!(func, toer)("foo \n\n &e; \n bar", 4, 5); 2889 } 2890 } 2891 } 2892 } 2893 2894 2895 // GrammarPos.contentMid 2896 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 2897 // The text right after the start tag was what was parsed previously. So, 2898 // that first CharData? was what was parsed last, and this parses starting 2899 // right after. The < should have already been removed from the input. 2900 void _parseAtContentMid() 2901 { 2902 // Note that References are treated as part of the CharData and not 2903 // parsed out by the EntityRange (see EntityRange.text). 2904 2905 switch(_text.input.front) 2906 { 2907 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2908 // CDSect ::= CDStart CData CDEnd 2909 // CDStart ::= '<![CDATA[' 2910 // CData ::= (Char* - (Char* ']]>' Char*)) 2911 // CDEnd ::= ']]>' 2912 case '!': 2913 { 2914 popFrontAndIncCol(_text); 2915 if(_text.stripStartsWith("--")) 2916 { 2917 _parseComment(); 2918 static if(config.skipComments == SkipComments.yes) 2919 _parseAtContentCharData(); 2920 else 2921 _grammarPos = GrammarPos.contentCharData2; 2922 } 2923 else if(_text.stripStartsWith("[CDATA[")) 2924 _parseCDATA(); 2925 else 2926 { 2927 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1); 2928 throw new XMLParsingException("Expected Comment or CDATA section", bangPos); 2929 } 2930 break; 2931 } 2932 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2933 case '?': 2934 { 2935 _parsePI(); 2936 _grammarPos = GrammarPos.contentCharData2; 2937 static if(config.skipPI == SkipPI.yes) 2938 popFront(); 2939 break; 2940 } 2941 // element ::= EmptyElemTag | STag content ETag 2942 default: 2943 { 2944 _parseElementStart(); 2945 break; 2946 } 2947 } 2948 } 2949 2950 2951 // This parses the Misc* that come after the root element. 2952 void _parseAtEndMisc() 2953 { 2954 // Misc ::= Comment | PI | S 2955 2956 stripWS(_text); 2957 2958 if(_text.input.empty) 2959 { 2960 _grammarPos = GrammarPos.documentEnd; 2961 return; 2962 } 2963 2964 if(_text.input.front != '<') 2965 throw new XMLParsingException("Expected <", _text.pos); 2966 popFrontAndIncCol(_text); 2967 checkNotEmpty(_text); 2968 2969 switch(_text.input.front) 2970 { 2971 // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2972 case '!': 2973 { 2974 popFrontAndIncCol(_text); 2975 if(_text.stripStartsWith("--")) 2976 { 2977 _parseComment(); 2978 static if(config.skipComments == SkipComments.yes) 2979 _parseAtEndMisc(); 2980 break; 2981 } 2982 immutable bangPos = TextPos(_text.pos.line, _text.pos.col - 1); 2983 throw new XMLParsingException("Expected Comment", bangPos); 2984 } 2985 // PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2986 case '?': 2987 { 2988 _parsePI(); 2989 static if(config.skipPI == SkipPI.yes) 2990 popFront(); 2991 break; 2992 } 2993 default: throw new XMLParsingException("Must be a comment or PI", _text.pos); 2994 } 2995 } 2996 2997 // Used for keeping track of the names of start tags so that end tags can be 2998 // verified as well as making it possible to avoid redoing other validation. 2999 // We keep track of the total number of entities which have been parsed thus 3000 // far so that only whichever EntityRange is farthest along in parsing 3001 // actually adds or removes tags from the TagStack, and the parser can skip 3002 // some of the validation for ranges that are farther behind. That way, the 3003 // end tags get verified, but we only have one stack. If the stack were 3004 // duplicated with every call to save, then there would be a lot more 3005 // allocations, which we don't want. But because we only need to verify the 3006 // end tags once, we can get away with having a shared tag stack. The cost 3007 // is that we have to keep track of how many tags we've parsed so that we 3008 // know if an EntityRange should actually be pushing or popping tags from 3009 // the stack, but that's a lot cheaper than duplicating the stack, and it's 3010 // a lot less annoying then making EntityRange an input range and not a 3011 // forward range or making it a cursor rather than a range. 3012 struct TagStack 3013 { 3014 void pushTag(Taken tagName) 3015 { 3016 if(entityCount++ == state.maxEntities) 3017 { 3018 ++state.maxEntities; 3019 put(state.tags, tagName); 3020 } 3021 ++depth; 3022 } 3023 3024 void popTag(Taken tagName, TextPos pos) 3025 { 3026 import std.algorithm : equal; 3027 import std.format : format; 3028 if(entityCount++ == state.maxEntities) 3029 { 3030 assert(!state.tags.data.empty); 3031 if(!equal(state.tags.data.back.save, tagName.save)) 3032 { 3033 enum fmt = "Name of end tag </%s> does not match corresponding start tag <%s>"; 3034 throw new XMLParsingException(format!fmt(tagName, state.tags.data.back), pos); 3035 } 3036 ++state.maxEntities; 3037 state.tags.shrinkTo(state.tags.data.length - 1); 3038 } 3039 --depth; 3040 } 3041 3042 @property auto attrChecker() 3043 { 3044 assert(atMax); 3045 3046 static struct AttrChecker 3047 { 3048 void pushAttr(Taken attrName, TextPos attrPos) 3049 { 3050 put(state.attrs, Attribute(attrName, attrPos)); 3051 } 3052 3053 void checkAttrs() 3054 { 3055 import std.algorithm.comparison : cmp, equal; 3056 import std.algorithm.sorting : sort; 3057 import std.conv : to; 3058 3059 if(state.attrs.data.length < 2) 3060 return; 3061 3062 sort!((a,b) => cmp(a.taken.save, b.taken.save) < 0)(state.attrs.data); 3063 auto prev = state.attrs.data.front; 3064 foreach(attr; state.attrs.data[1 .. $]) 3065 { 3066 if(equal(prev.taken, attr.taken)) 3067 throw new XMLParsingException("Duplicate attribute name", attr.pos); 3068 prev = attr; 3069 } 3070 } 3071 3072 ~this() 3073 { 3074 state.attrs.clear(); 3075 } 3076 3077 SharedState* state; 3078 } 3079 3080 return AttrChecker(state); 3081 } 3082 3083 void sawEntity() 3084 { 3085 if(entityCount++ == state.maxEntities) 3086 ++state.maxEntities; 3087 } 3088 3089 @property bool atMax() 3090 { 3091 return entityCount == state.maxEntities; 3092 } 3093 3094 struct Attribute 3095 { 3096 Taken taken; 3097 TextPos pos; 3098 } 3099 3100 struct SharedState 3101 { 3102 import std.array : Appender; 3103 3104 Appender!(Taken[]) tags; 3105 Appender!(Attribute[]) attrs; 3106 size_t maxEntities; 3107 } 3108 3109 static create() 3110 { 3111 TagStack tagStack; 3112 tagStack.state = new SharedState; 3113 tagStack.state.tags.reserve(10); 3114 tagStack.state.attrs.reserve(10); 3115 return tagStack; 3116 } 3117 3118 SharedState* state; 3119 size_t entityCount; 3120 int depth; 3121 } 3122 3123 static if(compileInTests) unittest 3124 { 3125 import core.exception : AssertError; 3126 import std.algorithm.comparison : equal; 3127 import std.exception : assertNotThrown, collectException, enforce; 3128 import dxml.internal : testRangeFuncs; 3129 3130 static void test(alias func)(string text, size_t line = __LINE__) 3131 { 3132 auto xml = func(text); 3133 static foreach(config; someTestConfigs) 3134 {{ 3135 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1", 3136 __FILE__, line); 3137 assertNotThrown!XMLParsingException(walkLength(range), "unittest failure 2", __FILE__, line); 3138 }} 3139 } 3140 3141 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 3142 { 3143 auto xml = func(text); 3144 static foreach(config; someTestConfigs) 3145 {{ 3146 auto range = assertNotThrown!XMLParsingException(parseXML!config(xml.save), "unittest failure 1", 3147 __FILE__, line); 3148 auto e = collectException!XMLParsingException(walkLength(range)); 3149 enforce!AssertError(e !is null, "unittest failure 2", __FILE__, line); 3150 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 3151 }} 3152 } 3153 3154 static foreach(func; testRangeFuncs) 3155 { 3156 test!func("<root></root>"); 3157 test!func("<root><a></a></root>"); 3158 test!func("<root><a><b></b></a></root>"); 3159 test!func("<root><a><b></b></a></root>"); 3160 test!func("<root><a><b></b></a><foo><bar></bar></foo></root>"); 3161 test!func("<a>\n" ~ 3162 " <b>\n" ~ 3163 " <c>\n" ~ 3164 " <d>\n" ~ 3165 " <e>\n" ~ 3166 " <f>\n" ~ 3167 " <g>\n" ~ 3168 " <h>\n" ~ 3169 " <i><i><i><i>\n" ~ 3170 " </i></i></i></i>\n" ~ 3171 " <i>\n" ~ 3172 " <j>\n" ~ 3173 " <k>\n" ~ 3174 " <l>\n" ~ 3175 " <m>\n" ~ 3176 " <n>\n" ~ 3177 " <o>\n" ~ 3178 " <p>\n" ~ 3179 " <q>\n" ~ 3180 " <r>\n" ~ 3181 " <s>\n" ~ 3182 " <!-- comment --> <?pi?> <t><u><v></v></u></t>\n" ~ 3183 " </s>\n" ~ 3184 " </r>\n" ~ 3185 " </q>\n" ~ 3186 " </p></o></n></m>\n" ~ 3187 " </l>\n" ~ 3188 " </k>\n" ~ 3189 " </j>\n" ~ 3190 "</i></h>" ~ 3191 " </g>\n" ~ 3192 " </f>\n" ~ 3193 " </e>\n" ~ 3194 " </d>\n" ~ 3195 " </c>\n" ~ 3196 " </b>\n" ~ 3197 "</a>"); 3198 test!func(`<京都市></京都市>`); 3199 3200 testFail!func(`<a>`, 1, 4); 3201 testFail!func(`<foo></foobar>`, 1, 8); 3202 testFail!func(`<foobar></foo>`, 1, 11); 3203 testFail!func(`<a><\a>`, 1, 5); 3204 testFail!func(`<a><a/>`, 1, 8); 3205 testFail!func(`<a><b>`, 1, 7); 3206 testFail!func(`<a><b><c>`, 1, 10); 3207 testFail!func(`<a></a><b>`, 1, 9); 3208 testFail!func(`<a></a><b></b>`, 1, 9); 3209 testFail!func(`<a><b></a></b>`, 1, 9); 3210 testFail!func(`<a><b><c></c><b></a>`, 1, 19); 3211 testFail!func(`<a><b></c><c></b></a>`, 1, 9); 3212 testFail!func(`<a><b></c></b></a>`, 1, 9); 3213 testFail!func("<a>\n" ~ 3214 " <b>\n" ~ 3215 " <c>\n" ~ 3216 " <d>\n" ~ 3217 " <e>\n" ~ 3218 " <f>\n" ~ 3219 " </f>\n" ~ 3220 " </e>\n" ~ 3221 " </d>\n" ~ 3222 " </c>\n" ~ 3223 " </b>\n" ~ 3224 "<a>", 12, 4); 3225 testFail!func("<a>\n" ~ 3226 " <b>\n" ~ 3227 " <c>\n" ~ 3228 " <d>\n" ~ 3229 " <e>\n" ~ 3230 " <f>\n" ~ 3231 " </f>\n" ~ 3232 " </e>\n" ~ 3233 " </d>\n" ~ 3234 " </c>\n" ~ 3235 " </b>\n" ~ 3236 "</q>", 12, 3); 3237 } 3238 } 3239 3240 3241 struct Text(R) 3242 { 3243 alias config = cfg; 3244 alias Input = R; 3245 3246 Input input; 3247 TextPos pos; 3248 3249 @property save() { return typeof(this)(input.save, pos); } 3250 } 3251 3252 3253 alias Taken = typeof(takeExactly(byCodeUnit(R.init), 42)); 3254 3255 3256 EntityType _type; 3257 TextPos _entityPos; 3258 auto _grammarPos = GrammarPos.documentStart; 3259 3260 Taken _name; 3261 TagStack _tagStack; 3262 3263 Text!(typeof(byCodeUnit(R.init))) _text; 3264 Text!Taken _savedText; 3265 3266 3267 this(R xmlText) 3268 { 3269 _tagStack = TagStack.create(); 3270 _text.input = byCodeUnit(xmlText); 3271 3272 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 3273 _savedText = typeof(_savedText).init; 3274 _name = typeof(_name).init; 3275 3276 popFront(); 3277 } 3278 } 3279 3280 /// Ditto 3281 EntityRange!(config, R) parseXML(Config config = Config.init, R)(R xmlText) 3282 if(isForwardRange!R && isSomeChar!(ElementType!R)) 3283 { 3284 return EntityRange!(config, R)(xmlText); 3285 } 3286 3287 /// 3288 version(dxmlTests) unittest 3289 { 3290 import std.range.primitives : walkLength; 3291 3292 auto xml = "<?xml version='1.0'?>\n" ~ 3293 "<?instruction start?>\n" ~ 3294 "<foo attr='42'>\n" ~ 3295 " <bar/>\n" ~ 3296 " <!-- no comment -->\n" ~ 3297 " <baz hello='world'>\n" ~ 3298 " nothing to say.\n" ~ 3299 " nothing at all...\n" ~ 3300 " </baz>\n" ~ 3301 "</foo>\n" ~ 3302 "<?some foo?>"; 3303 3304 { 3305 auto range = parseXML(xml); 3306 assert(range.front.type == EntityType.pi); 3307 assert(range.front.name == "instruction"); 3308 assert(range.front.text == "start"); 3309 3310 range.popFront(); 3311 assert(range.front.type == EntityType.elementStart); 3312 assert(range.front.name == "foo"); 3313 3314 { 3315 auto attrs = range.front.attributes; 3316 assert(walkLength(attrs.save) == 1); 3317 assert(attrs.front.name == "attr"); 3318 assert(attrs.front.value == "42"); 3319 } 3320 3321 range.popFront(); 3322 assert(range.front.type == EntityType.elementEmpty); 3323 assert(range.front.name == "bar"); 3324 3325 range.popFront(); 3326 assert(range.front.type == EntityType.comment); 3327 assert(range.front.text == " no comment "); 3328 3329 range.popFront(); 3330 assert(range.front.type == EntityType.elementStart); 3331 assert(range.front.name == "baz"); 3332 3333 { 3334 auto attrs = range.front.attributes; 3335 assert(walkLength(attrs.save) == 1); 3336 assert(attrs.front.name == "hello"); 3337 assert(attrs.front.value == "world"); 3338 } 3339 3340 range.popFront(); 3341 assert(range.front.type == EntityType.text); 3342 assert(range.front.text == 3343 "\n nothing to say.\n nothing at all...\n "); 3344 3345 range.popFront(); 3346 assert(range.front.type == EntityType.elementEnd); // </baz> 3347 range.popFront(); 3348 assert(range.front.type == EntityType.elementEnd); // </foo> 3349 3350 range.popFront(); 3351 assert(range.front.type == EntityType.pi); 3352 assert(range.front.name == "some"); 3353 assert(range.front.text == "foo"); 3354 3355 range.popFront(); 3356 assert(range.empty); 3357 } 3358 { 3359 auto range = parseXML!simpleXML(xml); 3360 3361 // simpleXML is set to skip processing instructions. 3362 3363 assert(range.front.type == EntityType.elementStart); 3364 assert(range.front.name == "foo"); 3365 3366 { 3367 auto attrs = range.front.attributes; 3368 assert(walkLength(attrs.save) == 1); 3369 assert(attrs.front.name == "attr"); 3370 assert(attrs.front.value == "42"); 3371 } 3372 3373 // simpleXML is set to split empty tags so that <bar/> is treated 3374 // as the same as <bar></bar> so that code does not have to 3375 // explicitly handle empty tags. 3376 range.popFront(); 3377 assert(range.front.type == EntityType.elementStart); 3378 assert(range.front.name == "bar"); 3379 range.popFront(); 3380 assert(range.front.type == EntityType.elementEnd); 3381 assert(range.front.name == "bar"); 3382 3383 // simpleXML is set to skip comments. 3384 3385 range.popFront(); 3386 assert(range.front.type == EntityType.elementStart); 3387 assert(range.front.name == "baz"); 3388 3389 { 3390 auto attrs = range.front.attributes; 3391 assert(walkLength(attrs.save) == 1); 3392 assert(attrs.front.name == "hello"); 3393 assert(attrs.front.value == "world"); 3394 } 3395 3396 range.popFront(); 3397 assert(range.front.type == EntityType.text); 3398 assert(range.front.text == 3399 "\n nothing to say.\n nothing at all...\n "); 3400 3401 range.popFront(); 3402 assert(range.front.type == EntityType.elementEnd); // </baz> 3403 range.popFront(); 3404 assert(range.front.type == EntityType.elementEnd); // </foo> 3405 range.popFront(); 3406 assert(range.empty); 3407 } 3408 } 3409 3410 // Test the state of the range immediately after parseXML returns. 3411 version(dxmlTests) unittest 3412 { 3413 import std.algorithm.comparison : equal; 3414 import dxml.internal : testRangeFuncs; 3415 3416 static foreach(func; testRangeFuncs) 3417 { 3418 static foreach(config; someTestConfigs) 3419 {{ 3420 auto range = parseXML!config("<?xml?><root></root>"); 3421 assert(!range.empty); 3422 assert(range.front.type == EntityType.elementStart); 3423 assert(equal(range.front.name, "root")); 3424 }} 3425 3426 static foreach(config; [Config.init, makeConfig(SkipPI.yes)]) 3427 {{ 3428 auto range = parseXML!config("<!--no comment--><root></root>"); 3429 assert(!range.empty); 3430 assert(range.front.type == EntityType.comment); 3431 assert(equal(range.front.text, "no comment")); 3432 }} 3433 static foreach(config; [simpleXML, makeConfig(SkipComments.yes)]) 3434 {{ 3435 auto range = parseXML!config("<!--no comment--><root></root>"); 3436 assert(!range.empty); 3437 assert(range.front.type == EntityType.elementStart); 3438 assert(equal(range.front.name, "root")); 3439 }} 3440 3441 static foreach(config; [Config.init, makeConfig(SkipComments.yes)]) 3442 {{ 3443 auto range = parseXML!config("<?private eye?><root></root>"); 3444 assert(!range.empty); 3445 assert(range.front.type == EntityType.pi); 3446 assert(equal(range.front.name, "private")); 3447 assert(equal(range.front.text, "eye")); 3448 }} 3449 static foreach(config; [simpleXML, makeConfig(SkipPI.yes)]) 3450 {{ 3451 auto range = parseXML!config("<?private eye?><root></root>"); 3452 assert(!range.empty); 3453 assert(range.front.type == EntityType.elementStart); 3454 assert(equal(range.front.name, "root")); 3455 }} 3456 3457 static foreach(config; someTestConfigs) 3458 {{ 3459 auto range = parseXML!config("<root></root>"); 3460 assert(!range.empty); 3461 assert(range.front.type == EntityType.elementStart); 3462 assert(equal(range.front.name, "root")); 3463 }} 3464 } 3465 } 3466 3467 // Test various invalid states that didn't seem to fit well into tests elsewhere. 3468 version(dxmlTests) unittest 3469 { 3470 import core.exception : AssertError; 3471 import std.exception : collectException, enforce; 3472 import dxml.internal : testRangeFuncs; 3473 3474 static void testFail(alias func)(string text, int row, int col, size_t line = __LINE__) 3475 { 3476 auto xml = func(text); 3477 static foreach(config; someTestConfigs) 3478 {{ 3479 auto e = collectException!XMLParsingException( 3480 { 3481 auto range = parseXML!config(xml.save); 3482 while(!range.empty) 3483 range.popFront(); 3484 }()); 3485 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 3486 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 3487 }} 3488 } 3489 3490 static foreach(func; testRangeFuncs) 3491 {{ 3492 testFail!func("<root></root><invalid></invalid>", 1, 15); 3493 testFail!func("<root></root><invalid/>", 1, 15); 3494 testFail!func("<root/><invalid></invalid>", 1, 9); 3495 testFail!func("<root/><invalid/>", 1, 9); 3496 3497 testFail!func("<root></root>invalid", 1, 14); 3498 testFail!func("<root/>invalid", 1, 8); 3499 3500 testFail!func("<root/><?pi?>invalid", 1, 14); 3501 testFail!func("<root/><?pi?><invalid/>", 1, 15); 3502 3503 testFail!func("<root/><!DOCTYPE foo>", 1, 9); 3504 testFail!func("<root/></root>", 1, 9); 3505 3506 testFail!func("invalid<root></root>", 1, 1); 3507 testFail!func("invalid<?xml?><root></root>", 1, 1); 3508 testFail!func("invalid<!DOCTYPE foo><root></root>", 1, 1); 3509 testFail!func("invalid<!--comment--><root></root>", 1, 1); 3510 testFail!func("invalid<?Poirot?><root></root>", 1, 1); 3511 3512 testFail!func("<?xml?>invalid<root></root>", 1, 8); 3513 testFail!func("<!DOCTYPE foo>invalid<root></root>", 1, 15); 3514 testFail!func("<!--comment-->invalid<root></root>", 1, 15); 3515 testFail!func("<?Poirot?>invalid<root></root>", 1, 11); 3516 3517 testFail!func("<?xml?>", 1, 8); 3518 testFail!func("<!DOCTYPE name>", 1, 16); 3519 testFail!func("<?Sherlock?>", 1, 13); 3520 testFail!func("<?Poirot?><?Sherlock?><?Holmes?>", 1, 33); 3521 testFail!func("<?Poirot?></Poirot>", 1, 12); 3522 testFail!func("</Poirot>", 1, 2); 3523 testFail!func("<", 1, 2); 3524 testFail!func(`</`, 1, 2); 3525 testFail!func(`</a`, 1, 2); 3526 testFail!func(`</a>`, 1, 2); 3527 3528 3529 testFail!func("<doc>]]></doc>", 1, 6); 3530 3531 testFail!func(" <?xml?><root/>", 1, 1); 3532 testFail!func("\n<?xml?><root/>", 1, 1); 3533 }} 3534 } 3535 3536 // Test that parseXML and EntityRange's properties work with @safe. 3537 // pure would be nice too, but at minimum, the use of format for exception 3538 // messages, and the use of assumeSafeAppend prevent it. It may or may not be 3539 // worth trying to fix that. 3540 version(dxmlTests) @safe unittest 3541 { 3542 import std.algorithm.comparison : equal; 3543 import dxml.internal : testRangeFuncs; 3544 3545 auto xml = "<root>\n" ~ 3546 " <![CDATA[nothing]]>\n" ~ 3547 " <foo a='42'/>\n" ~ 3548 "</root>"; 3549 3550 static foreach(func; testRangeFuncs) 3551 {{ 3552 auto range = parseXML(xml); 3553 assert(range.front.type == EntityType.elementStart); 3554 assert(equal(range.front.name, "root")); 3555 range.popFront(); 3556 assert(!range.empty); 3557 assert(range.front.type == EntityType.cdata); 3558 assert(equal(range.front.text, "nothing")); 3559 range.popFront(); 3560 assert(!range.empty); 3561 assert(range.front.type == EntityType.elementEmpty); 3562 assert(equal(range.front.name, "foo")); 3563 { 3564 auto attrs = range.front.attributes; 3565 auto saved = attrs.save; 3566 auto attr = attrs.front; 3567 assert(attr.name == "a"); 3568 assert(attr.value == "42"); 3569 attrs.popFront(); 3570 assert(attrs.empty); 3571 } 3572 auto saved = range.save; 3573 }} 3574 } 3575 3576 // This is purely to provide a way to trigger the unittest blocks in EntityRange 3577 // without compiling them in normally. 3578 private struct EntityRangeCompileTests 3579 { 3580 @property bool empty() @safe pure nothrow @nogc { assert(0); } 3581 @property char front() @safe pure nothrow @nogc { assert(0); } 3582 void popFront() @safe pure nothrow @nogc { assert(0); } 3583 @property typeof(this) save() @safe pure nothrow @nogc { assert(0); } 3584 } 3585 3586 version(dxmlTests) 3587 EntityRange!(Config.init, EntityRangeCompileTests) _entityRangeTests; 3588 3589 3590 /++ 3591 Whether the given type is a forward range of attributes. 3592 3593 Essentially, an attribute range must be a forward range where 3594 3595 $(UL 3596 $(LI each element has the members $(D name), $(D value), and $(D pos)) 3597 $(LI $(D name) and $(D value) are forward ranges of characters) 3598 $(LI $(D name) and $(D value) have the same type) 3599 $(LI $(D pos) is a $(LREF TextPos))) 3600 3601 Normally, an attribute range would come from 3602 $(LREF EntityRange.Entity.attributes) or 3603 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom), but 3604 as long as a range has the correct API, it qualifies as an attribute range. 3605 3606 See_Also: $(LREF EntityRange.Entity.Attribute)$(BR) 3607 $(LREF EntityRange.Entity.attributes)$(BR) 3608 $(REF_ALTTEXT DOMEntity.Attribute, DOMEntity.Attribute, dxml, dom)$(BR) 3609 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom) 3610 +/ 3611 template isAttrRange(R) 3612 { 3613 static if(isForwardRange!R && 3614 is(typeof(R.init.front.name)) && 3615 is(typeof(R.init.front.value)) && 3616 is(ReturnType!((R r) => r.front.pos) == TextPos)) 3617 { 3618 alias NameType = ReturnType!((R r) => r.front.name); 3619 alias ValueType = ReturnType!((R r) => r.front.value); 3620 3621 enum isAttrRange = is(NameType == ValueType) && 3622 isForwardRange!NameType && 3623 isSomeChar!(ElementType!NameType); 3624 } 3625 else 3626 enum isAttrRange = false; 3627 } 3628 3629 /// 3630 version(dxmlTests) unittest 3631 { 3632 import std.typecons : Tuple; 3633 import dxml.dom : parseDOM; 3634 3635 alias R1 = typeof(parseXML("<root/>").front.attributes); 3636 static assert(isAttrRange!R1); 3637 3638 alias R2 = typeof(parseDOM("<root/>").children[0].attributes); 3639 static assert(isAttrRange!R2); 3640 3641 alias T = Tuple!(string, "name", string, "value", TextPos, "pos"); 3642 static assert(isAttrRange!(T[])); 3643 3644 static assert(!isAttrRange!string); 3645 } 3646 3647 version(dxmlTests) unittest 3648 { 3649 import std.typecons : Tuple; 3650 { 3651 alias T = Tuple!(string, "nam", string, "value", TextPos, "pos"); 3652 static assert(!isAttrRange!(T[])); 3653 } 3654 { 3655 alias T = Tuple!(string, "name", string, "valu", TextPos, "pos"); 3656 static assert(!isAttrRange!(T[])); 3657 } 3658 { 3659 alias T = Tuple!(string, "name", string, "value", TextPos, "po"); 3660 static assert(!isAttrRange!(T[])); 3661 } 3662 { 3663 alias T = Tuple!(string, "name", wstring, "value", TextPos, "pos"); 3664 static assert(!isAttrRange!(T[])); 3665 } 3666 { 3667 alias T = Tuple!(string, "name", string, "value"); 3668 static assert(!isAttrRange!(T[])); 3669 } 3670 { 3671 alias T = Tuple!(int, "name", string, "value", TextPos, "pos"); 3672 static assert(!isAttrRange!(T[])); 3673 } 3674 { 3675 alias T = Tuple!(string, "name", int, "value", TextPos, "pos"); 3676 static assert(!isAttrRange!(T[])); 3677 } 3678 { 3679 alias T = Tuple!(string, "name", string, "value", int, "pos"); 3680 static assert(!isAttrRange!(T[])); 3681 } 3682 } 3683 3684 3685 /++ 3686 A helper function for processing start tag attributes. 3687 3688 It functions similarly to $(PHOBOS_REF getopt, std, getopt). It takes a 3689 range of attributes and a list of alternating strings and pointers where 3690 each string represents the name of the attribute to parse and the pointer 3691 immediately after it is assigned the value that corresponds to the attribute 3692 name (if present). If the given pointer does not point to the same type as 3693 the range of characters used in the attributes, then 3694 $(PHOBOS_REF to, std, conv) is used to convert the value to the type the 3695 pointer points to. 3696 3697 If a $(D Nullable!T*) is given rather than a $(D T*), then it will be 3698 treated the same as if it had been $(D T*). So, $(D to!T) will be used to 3699 convert the attribute value if the matching attribute name is present. The 3700 advantage of passing $(D Nullable!T*) instead of $(D T*) is that it's 3701 possible to distinguish between an attribute that wasn't present and one 3702 where it was present but was equivalent to $(D T.init). 3703 3704 Unlike $(PHOBOS_REF getopt, std, getopt), the given range is consumed 3705 rather than taking it by $(K_REF) and leaving the attributes that weren't 3706 matched in the range (since that really doesn't work with an arbitrary 3707 range as opposed to a dynamic array). However, if the second argument of 3708 getAttrs is not a $(K_STRING) but is instead an output range that accepts 3709 the element type of the range, then any attributes which aren't matched are 3710 put into the output range. 3711 3712 Params: 3713 attrRange = A range of attributes (see $(LREF isAttrRange)). 3714 unmatched = An output range that any _unmatched attributes from the 3715 range are put into (optional argument). 3716 args = An alternating list of strings and pointers where the names 3717 represent the attribute names to get the value of, and the 3718 corresponding values get assigned to what the pointers point to. 3719 3720 Throws: $(LREF XMLParsingException) if $(PHOBOS_REF to, std, conv) fails to 3721 convert an attribute value. 3722 3723 See_Also: $(LREF isAttrRange)$(BR) 3724 $(LREF EntityRange.Entity.attributes)$(BR) 3725 $(REF_ALTTEXT DOMEntity.attributes, DOMEntity.attributes, dxml, dom) 3726 +/ 3727 void getAttrs(R, Args...)(R attrRange, Args args) 3728 if(isAttrRange!R && Args.length % 2 == 0) 3729 { 3730 mixin(_genGetAttrs(false)); 3731 } 3732 3733 /// Ditto 3734 void getAttrs(R, OR, Args...)(R attrRange, ref OR unmatched, Args args) 3735 if(isAttrRange!R && isOutputRange!(OR, ElementType!R) && Args.length % 2 == 0) 3736 { 3737 mixin(_genGetAttrs(true)); 3738 } 3739 3740 private string _genGetAttrs(bool includeUnmatched) 3741 { 3742 auto retval = 3743 ` import std.algorithm.comparison : equal; 3744 import std.conv : ConvException, to; 3745 import std.format : format; 3746 import std.typecons : Nullable; 3747 import std.utf : byChar; 3748 3749 alias Attr = ElementType!R; 3750 alias SliceOfR = ElementType!(typeof(Attr.init.name)); 3751 3752 outer: foreach(attr; attrRange) 3753 { 3754 static foreach(i, arg; args) 3755 { 3756 static if(i % 2 == 0) 3757 static assert(is(Args[i] == string), format!"Expected string for args[%s]"(i)); 3758 else 3759 { 3760 static assert(isPointer!(Args[i]), format!"Expected pointer for args[%s]"(i)); 3761 3762 if(equal(attr.name, args[i - 1].byChar())) 3763 { 3764 alias ArgType = typeof(*arg); 3765 3766 static if(isInstanceOf!(Nullable, ArgType)) 3767 alias TargetType = TemplateArgsOf!ArgType; 3768 else 3769 alias TargetType = typeof(*arg); 3770 3771 try 3772 *arg = to!TargetType(attr.value); 3773 catch(ConvException ce) 3774 { 3775 enum fmt = "Failed to convert %s: %s"; 3776 throw new XMLParsingException(format!fmt(attr.name, ce.msg), attr.pos); 3777 } 3778 3779 continue outer; 3780 } 3781 } 3782 }`; 3783 3784 if(includeUnmatched) 3785 retval ~= "\n put(unmatched, attr);"; 3786 retval ~= "\n }"; 3787 3788 return retval; 3789 } 3790 3791 version(dxmlTests) unittest 3792 { 3793 import std.array : appender; 3794 import std.exception : collectException; 3795 import std.typecons : Nullable; 3796 3797 { 3798 auto xml = `<root a="foo" b="19" c="true" d="rocks"/>`; 3799 auto range = parseXML(xml); 3800 assert(range.front.type == EntityType.elementEmpty); 3801 3802 string a; 3803 int b; 3804 bool c; 3805 3806 getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c); 3807 assert(a == "foo"); 3808 assert(b == 19); 3809 assert(c == true); 3810 } 3811 3812 // Nullable!T* accepts the same as T*. 3813 { 3814 auto xml = `<root a="foo" c="true" d="rocks"/>`; 3815 auto range = parseXML(xml); 3816 assert(range.front.type == EntityType.elementEmpty); 3817 3818 Nullable!string a; 3819 Nullable!int b; 3820 bool c; 3821 3822 getAttrs(range.front.attributes, "c", &c, "b", &b, "a", &a); 3823 assert(a == "foo"); 3824 assert(b.isNull); 3825 assert(c == true); 3826 } 3827 3828 // If an output range of attributes is provided, then the ones that 3829 // weren't matched are put in it. 3830 { 3831 auto xml = `<root foo="42" bar="silly" d="rocks" q="t"/>`; 3832 auto range = parseXML(xml); 3833 assert(range.front.type == EntityType.elementEmpty); 3834 3835 alias Attribute = typeof(range).Entity.Attribute; 3836 auto unmatched = appender!(Attribute[])(); 3837 int i; 3838 string s; 3839 3840 getAttrs(range.front.attributes, unmatched, "foo", &i, "bar", &s); 3841 assert(i == 42); 3842 assert(s == "silly"); 3843 assert(unmatched.data.length == 2); 3844 assert(unmatched.data[0] == Attribute("d", "rocks", TextPos(1, 28))); 3845 assert(unmatched.data[1] == Attribute("q", "t", TextPos(1, 38))); 3846 } 3847 3848 // An XMLParsingException gets thrown if a conversion fails. 3849 { 3850 auto xml = `<root foo="bar" false="true" d="rocks"/>`; 3851 auto range = parseXML(xml); 3852 assert(range.front.type == EntityType.elementEmpty); 3853 3854 int i; 3855 3856 auto xpe = collectException!XMLParsingException( 3857 getAttrs(range.front.attributes, "d", &i)); 3858 assert(xpe.pos == TextPos(1, 30)); 3859 } 3860 3861 // Test parsing attributes with CTFE. 3862 enum dummy = (){ 3863 auto xml = `<root a="foo" d="rocks" c="true" b="19" />`; 3864 auto range = parseXML(xml); 3865 assert(range.front.type == EntityType.elementEmpty); 3866 3867 string a; 3868 int b; 3869 bool c; 3870 3871 getAttrs(range.front.attributes, "a", &a, "b", &b, "c", &c); 3872 assert(a == "foo"); 3873 assert(b == 19); 3874 assert(c == true); 3875 return 0; 3876 }(); 3877 } 3878 3879 version(dxmlTests) unittest 3880 { 3881 auto range = parseXML("<root/>"); 3882 auto attrs = range.front.attributes; 3883 int i; 3884 static assert(!__traits(compiles, getAttrs(attrs, "foo"))); 3885 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar"))); 3886 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i))); 3887 static assert(!__traits(compiles, getAttrs(attrs, "foo", "bar", &i, &i))); 3888 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo"))); 3889 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i))); 3890 static assert(!__traits(compiles, getAttrs(attrs, &i, "foo", &i, "bar"))); 3891 } 3892 3893 version(dxmlTests) @safe pure unittest 3894 { 3895 import std.typecons : Nullable; 3896 3897 static test(R)(R range, int* i, Nullable!int* j) @safe pure 3898 { 3899 getAttrs(range.front.attributes, "foo", i, "bar", j); 3900 } 3901 3902 test(parseXML("<root/>"), null, null); 3903 } 3904 3905 3906 /++ 3907 Takes an $(LREF EntityRange) which is at a start tag and iterates it until 3908 it is at its corresponding end tag. It is an error to call skipContents when 3909 the current entity is not $(LREF EntityType.elementStart). 3910 3911 $(TABLE 3912 $(TR $(TH Supported $(LREF EntityType)s:)) 3913 $(TR $(TD $(LREF2 elementStart, EntityType))) 3914 ) 3915 3916 Returns: The range with its $(D front) now at the end tag corresponding to 3917 the start tag that was $(D front) when the function was called. 3918 3919 Throws: $(LREF XMLParsingException) on invalid XML. 3920 +/ 3921 R skipContents(R)(R entityRange) 3922 if(isInstanceOf!(EntityRange, R)) 3923 { 3924 assert(entityRange._type == EntityType.elementStart); 3925 3926 // We don't bother calling empty, because the only way for the entityRange 3927 // to be empty would be for it to reach the end of the document, and an 3928 // XMLParsingException would be thrown if the end of the document were 3929 // reached before we reached the corresponding end tag. 3930 for(int tagDepth = 1; tagDepth != 0;) 3931 { 3932 entityRange.popFront(); 3933 immutable type = entityRange._type; 3934 if(type == EntityType.elementStart) 3935 ++tagDepth; 3936 else if(type == EntityType.elementEnd) 3937 --tagDepth; 3938 } 3939 3940 return entityRange; 3941 } 3942 3943 /// 3944 version(dxmlTests) unittest 3945 { 3946 auto xml = "<root>\n" ~ 3947 " <foo>\n" ~ 3948 " <bar>\n" ~ 3949 " Some text\n" ~ 3950 " </bar>\n" ~ 3951 " </foo>\n" ~ 3952 " <!-- no comment -->\n" ~ 3953 "</root>"; 3954 3955 auto range = parseXML(xml); 3956 assert(range.front.type == EntityType.elementStart); 3957 assert(range.front.name == "root"); 3958 3959 range.popFront(); 3960 assert(range.front.type == EntityType.elementStart); 3961 assert(range.front.name == "foo"); 3962 3963 range = range.skipContents(); 3964 assert(range.front.type == EntityType.elementEnd); 3965 assert(range.front.name == "foo"); 3966 3967 range.popFront(); 3968 assert(range.front.type == EntityType.comment); 3969 assert(range.front.text == " no comment "); 3970 3971 range.popFront(); 3972 assert(range.front.type == EntityType.elementEnd); 3973 assert(range.front.name == "root"); 3974 3975 range.popFront(); 3976 assert(range.empty); 3977 } 3978 3979 3980 /++ 3981 Skips entities until the given $(LREF EntityType) is reached. 3982 3983 If multiple $(LREF EntityType)s are given, then any one of them counts as 3984 a match. 3985 3986 The current entity is skipped regardless of whether it is the given 3987 $(LREF EntityType). 3988 3989 This is essentially a slightly optimized equivalent to 3990 3991 --- 3992 if(!range.empty()) 3993 { 3994 range.popFront(); 3995 range = range.find!((a, b) => a.type == b.type)(entityTypes); 3996 } 3997 --- 3998 3999 Returns: The given range with its $(D front) now at the first entity which 4000 matched one of the given $(LREF EntityType)s or an empty range if 4001 none were found. 4002 4003 Throws: $(LREF XMLParsingException) on invalid XML. 4004 +/ 4005 R skipToEntityType(R)(R entityRange, EntityType[] entityTypes...) 4006 if(isInstanceOf!(EntityRange, R)) 4007 { 4008 if(entityRange.empty) 4009 return entityRange; 4010 entityRange.popFront(); 4011 for(; !entityRange.empty; entityRange.popFront()) 4012 { 4013 immutable type = entityRange._type; 4014 foreach(entityType; entityTypes) 4015 { 4016 if(type == entityType) 4017 return entityRange; 4018 } 4019 } 4020 return entityRange; 4021 } 4022 4023 /// 4024 version(dxmlTests) unittest 4025 { 4026 auto xml = "<root>\n" ~ 4027 " <!-- blah blah blah -->\n" ~ 4028 " <foo>nothing to say</foo>\n" ~ 4029 "</root>"; 4030 4031 auto range = parseXML(xml); 4032 assert(range.front.type == EntityType.elementStart); 4033 assert(range.front.name == "root"); 4034 4035 range = range.skipToEntityType(EntityType.elementStart, 4036 EntityType.elementEmpty); 4037 assert(range.front.type == EntityType.elementStart); 4038 assert(range.front.name == "foo"); 4039 4040 assert(range.skipToEntityType(EntityType.comment).empty); 4041 4042 // skipToEntityType will work on an empty range but will always 4043 // return an empty range. 4044 assert(range.takeNone().skipToEntityType(EntityType.comment).empty); 4045 } 4046 4047 4048 /++ 4049 Skips entities until the end tag is reached that corresponds to the start 4050 tag that is the parent of the current entity. 4051 4052 Returns: The given range with its $(D front) now at the end tag which 4053 corresponds to the parent start tag of the entity that was 4054 $(D front) when skipToParentEndTag was called. If the current 4055 entity does not have a parent start tag (which means that it's 4056 either the root element or a comment or PI outside of the root 4057 element), then an empty range is returned. 4058 4059 Throws: $(LREF XMLParsingException) on invalid XML. 4060 +/ 4061 R skipToParentEndTag(R)(R entityRange) 4062 if(isInstanceOf!(EntityRange, R)) 4063 { 4064 with(EntityType) final switch(entityRange._type) 4065 { 4066 case cdata: 4067 case comment: 4068 { 4069 entityRange = entityRange.skipToEntityType(elementStart, elementEnd); 4070 if(entityRange.empty || entityRange._type == elementEnd) 4071 return entityRange; 4072 goto case elementStart; 4073 } 4074 case elementStart: 4075 { 4076 while(true) 4077 { 4078 entityRange = entityRange.skipContents(); 4079 entityRange.popFront(); 4080 if(entityRange.empty || entityRange._type == elementEnd) 4081 return entityRange; 4082 if(entityRange._type == elementStart) 4083 continue; 4084 goto case comment; 4085 } 4086 assert(0); // the compiler isn't smart enough to see that this is unreachable. 4087 } 4088 case elementEnd: 4089 case elementEmpty: 4090 case pi: 4091 case text: goto case comment; 4092 } 4093 } 4094 4095 /// 4096 version(dxmlTests) unittest 4097 { 4098 auto xml = "<root>\n" ~ 4099 " <foo>\n" ~ 4100 " <!-- comment -->\n" ~ 4101 " <bar>exam</bar>\n" ~ 4102 " </foo>\n" ~ 4103 " <!-- another comment -->\n" ~ 4104 "</root>"; 4105 { 4106 auto range = parseXML(xml); 4107 assert(range.front.type == EntityType.elementStart); 4108 assert(range.front.name == "root"); 4109 4110 range.popFront(); 4111 assert(range.front.type == EntityType.elementStart); 4112 assert(range.front.name == "foo"); 4113 4114 range.popFront(); 4115 assert(range.front.type == EntityType.comment); 4116 assert(range.front.text == " comment "); 4117 4118 range = range.skipToParentEndTag(); 4119 assert(range.front.type == EntityType.elementEnd); 4120 assert(range.front.name == "foo"); 4121 4122 range = range.skipToParentEndTag(); 4123 assert(range.front.type == EntityType.elementEnd); 4124 assert(range.front.name == "root"); 4125 4126 range = range.skipToParentEndTag(); 4127 assert(range.empty); 4128 } 4129 { 4130 auto range = parseXML(xml); 4131 assert(range.front.type == EntityType.elementStart); 4132 assert(range.front.name == "root"); 4133 4134 range.popFront(); 4135 assert(range.front.type == EntityType.elementStart); 4136 assert(range.front.name == "foo"); 4137 4138 range.popFront(); 4139 assert(range.front.type == EntityType.comment); 4140 assert(range.front.text == " comment "); 4141 4142 range.popFront(); 4143 assert(range.front.type == EntityType.elementStart); 4144 assert(range.front.name == "bar"); 4145 4146 range.popFront(); 4147 assert(range.front.type == EntityType.text); 4148 assert(range.front.text == "exam"); 4149 4150 range = range.skipToParentEndTag(); 4151 assert(range.front.type == EntityType.elementEnd); 4152 assert(range.front.name == "bar"); 4153 4154 range = range.skipToParentEndTag(); 4155 assert(range.front.type == EntityType.elementEnd); 4156 assert(range.front.name == "foo"); 4157 4158 range.popFront(); 4159 assert(range.front.type == EntityType.comment); 4160 assert(range.front.text == " another comment "); 4161 4162 range = range.skipToParentEndTag(); 4163 assert(range.front.type == EntityType.elementEnd); 4164 assert(range.front.name == "root"); 4165 4166 assert(range.skipToParentEndTag().empty); 4167 } 4168 { 4169 auto range = parseXML("<root><foo>bar</foo></root>"); 4170 assert(range.front.type == EntityType.elementStart); 4171 assert(range.front.name == "root"); 4172 assert(range.skipToParentEndTag().empty); 4173 } 4174 } 4175 4176 version(dxmlTests) unittest 4177 { 4178 import core.exception : AssertError; 4179 import std.algorithm.comparison : equal; 4180 import std.exception : enforce; 4181 import dxml.internal : testRangeFuncs; 4182 4183 static void popAndCheck(R)(ref R range, EntityType type, size_t line = __LINE__) 4184 { 4185 range.popFront(); 4186 enforce!AssertError(!range.empty, "unittest 1", __FILE__, line); 4187 enforce!AssertError(range.front.type == type, "unittest 2", __FILE__, line); 4188 } 4189 4190 static foreach(func; testRangeFuncs) 4191 {{ 4192 // cdata 4193 { 4194 auto xml = "<root>\n" ~ 4195 " <![CDATA[ cdata run ]]>\n" ~ 4196 " <nothing/>\n" ~ 4197 " <![CDATA[ cdata have its bits flipped ]]>\n" ~ 4198 " <foo></foo>\n" ~ 4199 " <![CDATA[ cdata play violin ]]>\n" ~ 4200 "</root>"; 4201 4202 auto range = parseXML(func(xml)); 4203 assert(range.front.type == EntityType.elementStart); 4204 popAndCheck(range, EntityType.cdata); 4205 assert(equal(range.front.text, " cdata run ")); 4206 { 4207 auto temp = range.save.skipToParentEndTag(); 4208 assert(temp._type == EntityType.elementEnd); 4209 assert(equal(temp.front.name, "root")); 4210 } 4211 popAndCheck(range, EntityType.elementEmpty); 4212 popAndCheck(range, EntityType.cdata); 4213 assert(equal(range.front.text, " cdata have its bits flipped ")); 4214 { 4215 auto temp = range.save.skipToParentEndTag(); 4216 assert(temp._type == EntityType.elementEnd); 4217 assert(equal(temp.front.name, "root")); 4218 } 4219 popAndCheck(range, EntityType.elementStart); 4220 range = range.skipContents(); 4221 popAndCheck(range, EntityType.cdata); 4222 assert(equal(range.front.text, " cdata play violin ")); 4223 range = range.skipToParentEndTag(); 4224 assert(range._type == EntityType.elementEnd); 4225 assert(equal(range.front.name, "root")); 4226 } 4227 // comment 4228 { 4229 auto xml = "<!-- before -->\n" ~ 4230 "<root>\n" ~ 4231 " <!-- comment 1 -->\n" ~ 4232 " <nothing/>\n" ~ 4233 " <!-- comment 2 -->\n" ~ 4234 " <foo></foo>\n" ~ 4235 " <!-- comment 3 -->\n" ~ 4236 "</root>\n" ~ 4237 "<!-- after -->" ~ 4238 "<!-- end -->"; 4239 4240 auto text = func(xml); 4241 assert(parseXML(text.save).skipToParentEndTag().empty); 4242 { 4243 auto range = parseXML(text.save); 4244 assert(range.front.type == EntityType.comment); 4245 popAndCheck(range, EntityType.elementStart); 4246 popAndCheck(range, EntityType.comment); 4247 assert(equal(range.front.text, " comment 1 ")); 4248 { 4249 auto temp = range.save.skipToParentEndTag(); 4250 assert(temp._type == EntityType.elementEnd); 4251 assert(equal(temp.front.name, "root")); 4252 } 4253 popAndCheck(range, EntityType.elementEmpty); 4254 popAndCheck(range, EntityType.comment); 4255 assert(equal(range.front.text, " comment 2 ")); 4256 { 4257 auto temp = range.save.skipToParentEndTag(); 4258 assert(temp._type == EntityType.elementEnd); 4259 assert(equal(temp.front.name, "root")); 4260 } 4261 popAndCheck(range, EntityType.elementStart); 4262 range = range.skipContents(); 4263 popAndCheck(range, EntityType.comment); 4264 assert(equal(range.front.text, " comment 3 ")); 4265 range = range.skipToParentEndTag(); 4266 assert(range._type == EntityType.elementEnd); 4267 assert(equal(range.front.name, "root")); 4268 } 4269 { 4270 auto range = parseXML(text.save); 4271 assert(range.front.type == EntityType.comment); 4272 popAndCheck(range, EntityType.elementStart); 4273 range = range.skipContents(); 4274 popAndCheck(range, EntityType.comment); 4275 assert(equal(range.front.text, " after ")); 4276 assert(range.save.skipToParentEndTag().empty); 4277 popAndCheck(range, EntityType.comment); 4278 assert(equal(range.front.text, " end ")); 4279 assert(range.skipToParentEndTag().empty); 4280 } 4281 } 4282 // elementStart 4283 { 4284 auto xml = "<root>\n" ~ 4285 " <a><b>foo</b></a>\n" ~ 4286 " <nothing/>\n" ~ 4287 " <c></c>\n" ~ 4288 " <d>\n" ~ 4289 " <e>\n" ~ 4290 " </e>\n" ~ 4291 " <f>\n" ~ 4292 " <g>\n" ~ 4293 " </g>\n" ~ 4294 " </f>\n" ~ 4295 " </d>\n" ~ 4296 "</root>"; 4297 4298 auto range = parseXML(func(xml)); 4299 assert(range.front.type == EntityType.elementStart); 4300 assert(equal(range.front.name, "root")); 4301 assert(range.save.skipToParentEndTag().empty); 4302 popAndCheck(range, EntityType.elementStart); 4303 assert(equal(range.front.name, "a")); 4304 { 4305 auto temp = range.save.skipToParentEndTag(); 4306 assert(temp._type == EntityType.elementEnd); 4307 assert(equal(temp.front.name, "root")); 4308 } 4309 popAndCheck(range, EntityType.elementStart); 4310 assert(equal(range.front.name, "b")); 4311 { 4312 auto temp = range.save.skipToParentEndTag(); 4313 assert(temp._type == EntityType.elementEnd); 4314 assert(equal(temp.front.name, "a")); 4315 } 4316 popAndCheck(range, EntityType.text); 4317 popAndCheck(range, EntityType.elementEnd); 4318 popAndCheck(range, EntityType.elementEnd); 4319 popAndCheck(range, EntityType.elementEmpty); 4320 popAndCheck(range, EntityType.elementStart); 4321 assert(equal(range.front.name, "c")); 4322 { 4323 auto temp = range.save.skipToParentEndTag(); 4324 assert(temp._type == EntityType.elementEnd); 4325 assert(equal(temp.front.name, "root")); 4326 } 4327 popAndCheck(range, EntityType.elementEnd); 4328 popAndCheck(range, EntityType.elementStart); 4329 assert(equal(range.front.name, "d")); 4330 popAndCheck(range, EntityType.elementStart); 4331 assert(equal(range.front.name, "e")); 4332 range = range.skipToParentEndTag(); 4333 assert(range._type == EntityType.elementEnd); 4334 assert(equal(range.front.name, "d")); 4335 range = range.skipToParentEndTag(); 4336 assert(range._type == EntityType.elementEnd); 4337 assert(equal(range.front.name, "root")); 4338 } 4339 // elementEnd 4340 { 4341 auto xml = "<root>\n" ~ 4342 " <a><b>foo</b></a>\n" ~ 4343 " <nothing/>\n" ~ 4344 " <c></c>\n" ~ 4345 "</root>"; 4346 4347 auto range = parseXML(func(xml)); 4348 assert(range.front.type == EntityType.elementStart); 4349 popAndCheck(range, EntityType.elementStart); 4350 popAndCheck(range, EntityType.elementStart); 4351 popAndCheck(range, EntityType.text); 4352 popAndCheck(range, EntityType.elementEnd); 4353 assert(equal(range.front.name, "b")); 4354 { 4355 auto temp = range.save.skipToParentEndTag(); 4356 assert(temp._type == EntityType.elementEnd); 4357 assert(equal(temp.front.name, "a")); 4358 } 4359 popAndCheck(range, EntityType.elementEnd); 4360 assert(equal(range.front.name, "a")); 4361 { 4362 auto temp = range.save.skipToParentEndTag(); 4363 assert(temp._type == EntityType.elementEnd); 4364 assert(equal(temp.front.name, "root")); 4365 } 4366 popAndCheck(range, EntityType.elementEmpty); 4367 popAndCheck(range, EntityType.elementStart); 4368 popAndCheck(range, EntityType.elementEnd); 4369 assert(equal(range.front.name, "c")); 4370 { 4371 auto temp = range.save.skipToParentEndTag(); 4372 assert(temp._type == EntityType.elementEnd); 4373 assert(equal(temp.front.name, "root")); 4374 } 4375 popAndCheck(range, EntityType.elementEnd); 4376 assert(range.skipToParentEndTag().empty); 4377 } 4378 // elementEmpty 4379 { 4380 auto range = parseXML(func("<root/>")); 4381 assert(range.front.type == EntityType.elementEmpty); 4382 assert(range.skipToParentEndTag().empty); 4383 } 4384 { 4385 auto xml = "<root>\n" ~ 4386 " <a><b>foo</b></a>\n" ~ 4387 " <nothing/>\n" ~ 4388 " <c></c>\n" ~ 4389 " <whatever/>\n" ~ 4390 "</root>"; 4391 4392 auto range = parseXML(func(xml)); 4393 popAndCheck(range, EntityType.elementStart); 4394 assert(range.front.type == EntityType.elementStart); 4395 range = range.skipContents(); 4396 popAndCheck(range, EntityType.elementEmpty); 4397 assert(equal(range.front.name, "nothing")); 4398 { 4399 auto temp = range.save; 4400 popAndCheck(temp, EntityType.elementStart); 4401 popAndCheck(temp, EntityType.elementEnd); 4402 popAndCheck(temp, EntityType.elementEmpty); 4403 assert(equal(temp.front.name, "whatever")); 4404 } 4405 range = range.skipToParentEndTag(); 4406 assert(range._type == EntityType.elementEnd); 4407 assert(equal(range.front.name, "root")); 4408 } 4409 // pi 4410 { 4411 auto xml = "<?Sherlock?>\n" ~ 4412 "<root>\n" ~ 4413 " <?Foo?>\n" ~ 4414 " <nothing/>\n" ~ 4415 " <?Bar?>\n" ~ 4416 " <foo></foo>\n" ~ 4417 " <?Baz?>\n" ~ 4418 "</root>\n" ~ 4419 "<?Poirot?>\n" ~ 4420 "<?Conan?>"; 4421 4422 auto range = parseXML(func(xml)); 4423 assert(range.front.type == EntityType.pi); 4424 assert(equal(range.front.name, "Sherlock")); 4425 assert(range.save.skipToParentEndTag().empty); 4426 popAndCheck(range, EntityType.elementStart); 4427 popAndCheck(range, EntityType.pi); 4428 assert(equal(range.front.name, "Foo")); 4429 { 4430 auto temp = range.save.skipToParentEndTag(); 4431 assert(temp._type == EntityType.elementEnd); 4432 assert(equal(temp.front.name, "root")); 4433 } 4434 popAndCheck(range, EntityType.elementEmpty); 4435 popAndCheck(range, EntityType.pi); 4436 assert(equal(range.front.name, "Bar")); 4437 { 4438 auto temp = range.save.skipToParentEndTag(); 4439 assert(temp._type == EntityType.elementEnd); 4440 assert(equal(temp.front.name, "root")); 4441 } 4442 popAndCheck(range, EntityType.elementStart); 4443 popAndCheck(range, EntityType.elementEnd); 4444 popAndCheck(range, EntityType.pi); 4445 assert(equal(range.front.name, "Baz")); 4446 range = range.skipToParentEndTag(); 4447 assert(range._type == EntityType.elementEnd); 4448 assert(equal(range.front.name, "root")); 4449 popAndCheck(range, EntityType.pi); 4450 assert(equal(range.front.name, "Poirot")); 4451 assert(range.save.skipToParentEndTag().empty); 4452 popAndCheck(range, EntityType.pi); 4453 assert(equal(range.front.name, "Conan")); 4454 assert(range.skipToParentEndTag().empty); 4455 } 4456 // text 4457 { 4458 auto xml = "<root>\n" ~ 4459 " nothing to say\n" ~ 4460 " <nothing/>\n" ~ 4461 " nothing whatsoever\n" ~ 4462 " <foo></foo>\n" ~ 4463 " but he keeps talking\n" ~ 4464 "</root>"; 4465 4466 auto range = parseXML(func(xml)); 4467 assert(range.front.type == EntityType.elementStart); 4468 popAndCheck(range, EntityType.text); 4469 assert(equal(range.front.text, "\n nothing to say\n ")); 4470 { 4471 auto temp = range.save.skipToParentEndTag(); 4472 assert(temp._type == EntityType.elementEnd); 4473 assert(equal(temp.front.name, "root")); 4474 } 4475 popAndCheck(range, EntityType.elementEmpty); 4476 popAndCheck(range, EntityType.text); 4477 assert(equal(range.front.text, "\n nothing whatsoever\n ")); 4478 { 4479 auto temp = range.save.skipToParentEndTag(); 4480 assert(temp._type == EntityType.elementEnd); 4481 assert(equal(temp.front.name, "root")); 4482 } 4483 popAndCheck(range, EntityType.elementStart); 4484 range = range.skipContents(); 4485 popAndCheck(range, EntityType.text); 4486 assert(equal(range.front.text, "\n but he keeps talking\n")); 4487 range = range.skipToParentEndTag(); 4488 assert(range._type == EntityType.elementEnd); 4489 assert(equal(range.front.name, "root")); 4490 } 4491 }} 4492 } 4493 4494 4495 /++ 4496 Treats the given string like a file path except that each directory 4497 corresponds to the name of a start tag. Note that this does $(I not) try to 4498 implement XPath as that would be quite complicated, and it really doesn't 4499 fit with a StAX parser. 4500 4501 A start tag should be thought of as a directory, with its child start tags 4502 as the directories it contains. 4503 4504 All paths should be relative. $(LREF EntityRange) can only move forward 4505 through the document, so using an absolute path would only make sense at 4506 the beginning of the document. As such, absolute paths are treated as 4507 invalid paths. 4508 4509 $(D_CODE_STRING "./") and $(D_CODE_STRING "../") are supported. Repeated 4510 slashes such as in $(D_CODE_STRING "foo//bar") are not supported and are 4511 treated as an invalid path. 4512 4513 If $(D range.front.type == EntityType.elementStart), then 4514 $(D range._skiptoPath($(D_STRING "foo"))) will search for the first child 4515 start tag (be it $(LREF EntityType.elementStart) or 4516 $(LREF EntityType.elementEmpty)) with the $(LREF2 name, EntityRange.Entity) 4517 $(D_CODE_STRING "foo"). That start tag must be a direct child of the current 4518 start tag. 4519 4520 If $(D range.front.type) is any other $(LREF EntityType), then 4521 $(D range._skipToPath($(D_STRING "foo"))) will return an empty range, 4522 because no other $(LREF EntityType)s have child start tags. 4523 4524 For any $(LREF EntityType), $(D range._skipToPath($(D_STRING "../foo"))) 4525 will search for the first start tag with the 4526 $(LREF2 name, EntityRange.Entity) $(D_CODE_STRING "foo") at the same level 4527 as the current entity. If the current entity is a start tag with the name 4528 $(D_CODE_STRING "foo"), it will not be considered a match. 4529 4530 $(D range._skipToPath($(D_STRING "./"))) is a no-op. However, 4531 $(D range._skipToPath($(D_STRING "../"))) will result in the empty range 4532 (since it doesn't target a specific start tag). 4533 4534 $(D range._skipToPath($(D_STRING "foo/bar"))) is equivalent to 4535 $(D range._skipToPath($(D_STRING "foo"))._skipToPath($(D_STRING "bar"))), 4536 and $(D range._skipToPath($(D_STRING "../foo/bar"))) is equivalent to 4537 $(D range._skipToPath($(D_STRING "../foo"))._skipToPath($(D_STRING "bar"))). 4538 4539 Returns: The given range with its $(D front) now at the requested entity if 4540 the path is valid; otherwise, an empty range is returned. 4541 4542 Throws: $(LREF XMLParsingException) on invalid XML. 4543 +/ 4544 R skipToPath(R)(R entityRange, string path) 4545 if(isInstanceOf!(EntityRange, R)) 4546 { 4547 import std.algorithm.comparison : equal; 4548 import std.path : pathSplitter; 4549 4550 if(entityRange.empty) 4551 return entityRange; 4552 if(path.empty || path[0] == '/') 4553 return entityRange.takeNone(); 4554 4555 with(EntityType) 4556 { 4557 static if(R.config.splitEmpty == SplitEmpty.yes) 4558 EntityType[2] startOrEnd = [elementStart, elementEnd]; 4559 else 4560 EntityType[3] startOrEnd = [elementStart, elementEnd, elementEmpty]; 4561 4562 R findOnCurrLevel(string name) 4563 { 4564 if(entityRange._type == elementStart) 4565 entityRange = entityRange.skipContents(); 4566 while(true) 4567 { 4568 entityRange = entityRange.skipToEntityType(startOrEnd[]); 4569 if(entityRange.empty) 4570 return entityRange; 4571 if(entityRange._type == elementEnd) 4572 return entityRange.takeNone(); 4573 4574 if(equal(name, entityRange._name.save)) 4575 return entityRange; 4576 4577 static if(R.config.splitEmpty == SplitEmpty.no) 4578 { 4579 if(entityRange._type == elementEmpty) 4580 continue; 4581 } 4582 entityRange = entityRange.skipContents(); 4583 } 4584 } 4585 4586 for(auto pieces = path.pathSplitter(); !pieces.empty; pieces.popFront()) 4587 { 4588 if(pieces.front == ".") 4589 continue; 4590 else if(pieces.front == "..") 4591 { 4592 pieces.popFront(); 4593 if(pieces.empty) 4594 return entityRange.takeNone(); 4595 4596 while(pieces.front == "..") 4597 { 4598 pieces.popFront(); 4599 if(pieces.empty) 4600 return entityRange.takeNone(); 4601 entityRange = entityRange.skipToParentEndTag(); 4602 if(entityRange.empty) 4603 return entityRange; 4604 } 4605 4606 entityRange = findOnCurrLevel(pieces.front); 4607 if(entityRange.empty) 4608 return entityRange; 4609 } 4610 else 4611 { 4612 if(entityRange._type != elementStart) 4613 return entityRange.takeNone(); 4614 4615 entityRange = entityRange.skipToEntityType(startOrEnd[]); 4616 assert(!entityRange.empty); 4617 if(entityRange._type == elementEnd) 4618 return entityRange.takeNone(); 4619 4620 if(!equal(pieces.front, entityRange._name.save)) 4621 { 4622 entityRange = findOnCurrLevel(pieces.front); 4623 if(entityRange.empty) 4624 return entityRange; 4625 } 4626 } 4627 } 4628 4629 return entityRange; 4630 } 4631 } 4632 4633 /// 4634 version(dxmlTests) unittest 4635 { 4636 { 4637 auto xml = "<carrot>\n" ~ 4638 " <foo>\n" ~ 4639 " <bar>\n" ~ 4640 " <baz/>\n" ~ 4641 " <other/>\n" ~ 4642 " </bar>\n" ~ 4643 " </foo>\n" ~ 4644 "</carrot>"; 4645 4646 auto range = parseXML(xml); 4647 // "<carrot>" 4648 assert(range.front.type == EntityType.elementStart); 4649 assert(range.front.name == "carrot"); 4650 4651 range = range.skipToPath("foo/bar"); 4652 // " <bar> 4653 assert(!range.empty); 4654 assert(range.front.type == EntityType.elementStart); 4655 assert(range.front.name == "bar"); 4656 4657 range = range.skipToPath("baz"); 4658 // " <baz/> 4659 assert(!range.empty); 4660 assert(range.front.type == EntityType.elementEmpty); 4661 4662 // other is not a child element of baz 4663 assert(range.skipToPath("other").empty); 4664 4665 range = range.skipToPath("../other"); 4666 // " <other/>" 4667 assert(!range.empty); 4668 assert(range.front.type == EntityType.elementEmpty); 4669 } 4670 { 4671 auto xml = "<potato>\n" ~ 4672 " <foo>\n" ~ 4673 " <bar>\n "~ 4674 " </bar>\n" ~ 4675 " <crazy>\n" ~ 4676 " </crazy>\n" ~ 4677 " <fou/>\n" ~ 4678 " </foo>\n" ~ 4679 " <buzz/>\n" ~ 4680 "</potato>"; 4681 4682 auto range = parseXML(xml); 4683 // "<potato>" 4684 assert(range.front.type == EntityType.elementStart); 4685 4686 range = range.skipToPath("./"); 4687 // "<potato>" 4688 assert(!range.empty); 4689 assert(range.front.type == EntityType.elementStart); 4690 assert(range.front.name == "potato"); 4691 4692 range = range.skipToPath("./foo/bar"); 4693 // " <bar>" 4694 assert(!range.empty); 4695 assert(range.front.type == EntityType.elementStart); 4696 assert(range.front.name == "bar"); 4697 4698 range = range.skipToPath("../crazy"); 4699 // " <crazy>" 4700 assert(!range.empty); 4701 assert(range.front.type == EntityType.elementStart); 4702 assert(range.front.name == "crazy"); 4703 4704 // Whether popFront is called here before the call to 4705 // range.skipToPath("../fou") below, the result is the same, because 4706 // both <crazy> and </crazy> are at the same level. 4707 range.popFront(); 4708 // " </crazy>" 4709 assert(!range.empty); 4710 assert(range.front.type == EntityType.elementEnd); 4711 assert(range.front.name == "crazy"); 4712 4713 range = range.skipToPath("../fou"); 4714 // " <fou/>" 4715 assert(!range.empty); 4716 assert(range.front.type == EntityType.elementEmpty); 4717 } 4718 // Searching stops at the first matching start tag. 4719 { 4720 auto xml = "<beet>\n" ~ 4721 " <foo a='42'>\n" ~ 4722 " </foo>\n" ~ 4723 " <foo b='451'>\n" ~ 4724 " </foo>\n" ~ 4725 "</beet>"; 4726 4727 auto range = parseXML(xml); 4728 range = range.skipToPath("foo"); 4729 assert(!range.empty); 4730 assert(range.front.type == EntityType.elementStart); 4731 assert(range.front.name == "foo"); 4732 4733 { 4734 auto attrs = range.front.attributes; 4735 assert(attrs.front.name == "a"); 4736 assert(attrs.front.value == "42"); 4737 } 4738 4739 range = range.skipToPath("../foo"); 4740 assert(!range.empty); 4741 assert(range.front.type == EntityType.elementStart); 4742 assert(range.front.name == "foo"); 4743 4744 { 4745 auto attrs = range.front.attributes; 4746 assert(attrs.front.name == "b"); 4747 assert(attrs.front.value == "451"); 4748 } 4749 } 4750 // skipToPath will work on an empty range but will always return an 4751 // empty range. 4752 { 4753 auto range = parseXML("<root/>"); 4754 assert(range.takeNone().skipToPath("nowhere").empty); 4755 } 4756 // Empty and absolute paths will also result in an empty range as will 4757 // "../" without any actual tag name on the end. 4758 { 4759 auto range = parseXML("<root/>"); 4760 assert(range.skipToPath("").empty); 4761 assert(range.skipToPath("/").empty); 4762 assert(range.skipToPath("../").empty); 4763 } 4764 // Only non-empty start tags have children; all other EntityTypes result 4765 // in an empty range unless "../" is used. 4766 { 4767 auto xml = "<!-- comment -->\n" ~ 4768 "<root>\n" ~ 4769 " <foo/>\n" ~ 4770 "</root>"; 4771 auto range = parseXML(xml); 4772 assert(range.skipToPath("root").empty); 4773 assert(range.skipToPath("foo").empty); 4774 4775 range = range.skipToPath("../root"); 4776 assert(!range.empty); 4777 assert(range.front.type == EntityType.elementStart); 4778 assert(range.front.name == "root"); 4779 } 4780 } 4781 4782 version(dxmlTests) unittest 4783 { 4784 import core.exception : AssertError; 4785 import std.algorithm.comparison : equal; 4786 import std.exception : assertNotThrown, enforce; 4787 import dxml.internal : testRangeFuncs; 4788 4789 static void testPath(R)(R range, string path, EntityType type, string name, size_t line = __LINE__) 4790 { 4791 auto result = assertNotThrown!XMLParsingException(range.skipToPath(path), "unittest 1", __FILE__, line); 4792 enforce!AssertError(!result.empty, "unittest 2", __FILE__, line); 4793 enforce!AssertError(result.front.type == type, "unittest 3", __FILE__, line); 4794 enforce!AssertError(equal(result.front.name, name), "unittest 4", __FILE__, line); 4795 } 4796 4797 static void popEmpty(R)(ref R range) 4798 { 4799 range.popFront(); 4800 static if(range.config.splitEmpty == SplitEmpty.yes) 4801 range.popFront(); 4802 } 4803 4804 auto xml = "<superuser>\n" ~ 4805 " <!-- comment -->\n" ~ 4806 " <?pi?>\n" ~ 4807 " <![CDATA[cdata]]>\n" ~ 4808 " <foo/>\n" ~ 4809 " <bar/>\n" ~ 4810 " <!-- comment -->\n" ~ 4811 " <!-- comment -->\n" ~ 4812 " <baz/>\n" ~ 4813 " <frobozz>\n" ~ 4814 " <!-- comment -->\n" ~ 4815 " <!-- comment -->\n" ~ 4816 " <whatever/>\n" ~ 4817 " <!-- comment -->\n" ~ 4818 " <!-- comment -->\n" ~ 4819 " </frobozz>\n" ~ 4820 " <!-- comment -->\n" ~ 4821 " <!-- comment -->\n" ~ 4822 " <xyzzy/>\n" ~ 4823 "</superuser>"; 4824 4825 static foreach(func; testRangeFuncs) 4826 {{ 4827 auto text = func(xml); 4828 4829 static foreach(config; someTestConfigs) 4830 {{ 4831 static if(config.splitEmpty == SplitEmpty.yes) 4832 enum empty = EntityType.elementStart; 4833 else 4834 enum empty = EntityType.elementEmpty; 4835 4836 auto range = parseXML!config(text.save); 4837 4838 assert(range.save.skipToPath("whatever").empty); 4839 assert(range.save.skipToPath("frobozz/whateve").empty); 4840 4841 testPath(range.save, "foo", empty, "foo"); 4842 testPath(range.save, "bar", empty, "bar"); 4843 testPath(range.save, "baz", empty, "baz"); 4844 testPath(range.save, "frobozz", EntityType.elementStart, "frobozz"); 4845 testPath(range.save, "frobozz/whatever", empty, "whatever"); 4846 testPath(range.save, "xyzzy", empty, "xyzzy"); 4847 4848 range.popFront(); 4849 for(; range.front.type != empty; range.popFront()) 4850 { 4851 assert(range.save.skipToPath("foo").empty); 4852 testPath(range.save, "../foo", empty, "foo"); 4853 testPath(range.save, "../bar", empty, "bar"); 4854 testPath(range.save, "../baz", empty, "baz"); 4855 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4856 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4857 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4858 } 4859 assert(equal(range.front.name, "foo")); 4860 assert(range.save.skipToPath("foo").empty); 4861 assert(range.save.skipToPath("./foo").empty); 4862 assert(range.save.skipToPath("../foo").empty); 4863 assert(range.save.skipToPath("bar").empty); 4864 assert(range.save.skipToPath("baz").empty); 4865 assert(range.save.skipToPath("frobozz").empty); 4866 assert(range.save.skipToPath("whatever").empty); 4867 assert(range.save.skipToPath("../").empty); 4868 assert(range.save.skipToPath("../../").empty); 4869 4870 testPath(range.save, "../bar", empty, "bar"); 4871 testPath(range.save, "../baz", empty, "baz"); 4872 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4873 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4874 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4875 4876 popEmpty(range); 4877 assert(range.save.skipToPath("bar").empty); 4878 testPath(range.save, "../baz", empty, "baz"); 4879 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4880 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4881 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4882 4883 range.popFront(); 4884 for(; range.front.type != empty; range.popFront()) 4885 { 4886 assert(range.save.skipToPath("baz").empty); 4887 testPath(range.save, "../baz", empty, "baz"); 4888 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4889 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4890 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4891 } 4892 assert(equal(range.front.name, "baz")); 4893 4894 testPath(range.save, "../frobozz", EntityType.elementStart, "frobozz"); 4895 testPath(range.save, "../frobozz/whatever", empty, "whatever"); 4896 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4897 4898 popEmpty(range); 4899 assert(equal(range.front.name, "frobozz")); 4900 assert(range.save.skipToPath("wizard").empty); 4901 testPath(range.save, "whatever", empty, "whatever"); 4902 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4903 4904 range.popFront(); 4905 for(; range.front.type != empty; range.popFront()) 4906 { 4907 assert(range.save.skipToPath("whatever").empty); 4908 testPath(range.save, "../whatever", empty, "whatever"); 4909 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4910 } 4911 assert(equal(range.front.name, "whatever")); 4912 assert(range.save.skipToPath("frobozz").empty); 4913 assert(range.save.skipToPath("../frobozz").empty); 4914 assert(range.save.skipToPath("../xyzzy").empty); 4915 assert(range.save.skipToPath("../../frobozz").empty); 4916 4917 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4918 4919 popEmpty(range); 4920 for(; range.front.type != EntityType.elementEnd; range.popFront()) 4921 { 4922 assert(range.save.skipToPath("xyzzy").empty); 4923 assert(range.save.skipToPath("../xyzzy").empty); 4924 testPath(range.save, "../../xyzzy", empty, "xyzzy"); 4925 } 4926 assert(equal(range.front.name, "frobozz")); 4927 4928 range.popFront(); 4929 for(; range.front.type != empty; range.popFront()) 4930 { 4931 assert(range.save.skipToPath("xyzzy").empty); 4932 testPath(range.save, "../xyzzy", empty, "xyzzy"); 4933 } 4934 assert(equal(range.front.name, "xyzzy")); 4935 4936 popEmpty(range); 4937 assert(equal(range.front.name, "superuser")); 4938 assert(range.save.skipToPath("superuser").empty); 4939 assert(range.save.skipToPath("foo").empty); 4940 assert(range.save.skipToPath("../foo").empty); 4941 assert(range.save.skipToPath("../../foo").empty); 4942 }} 4943 }} 4944 } 4945 4946 4947 //------------------------------------------------------------------------------ 4948 // Private Section 4949 //------------------------------------------------------------------------------ 4950 private: 4951 4952 4953 version(dxmlTests) auto testParser(Config config = Config.init, R)(R xmlText) @trusted pure nothrow @nogc 4954 { 4955 import std.utf : byCodeUnit; 4956 typeof(EntityRange!(config, R)._text) text; 4957 text.input = byCodeUnit(xmlText); 4958 return text; 4959 } 4960 4961 4962 // toCmpType is to make it easy for tests to convert the expected result to a 4963 // range with the correct element type, since comparing with equal won't do 4964 // the right thing if the result doesn't have dchar as its element type. 4965 auto toCmpType(alias func)(string str) 4966 { 4967 import std.range : takeExactly; 4968 import std.utf : byUTF; 4969 4970 return str.byUTF!(immutable ElementType!(typeof(testParser(func(str)).input.takeExactly(1))))(); 4971 } 4972 4973 auto toCmpType(alias func, ThrowOnEntityRef toer)(string str) 4974 { 4975 import std.range : takeExactly; 4976 import std.utf : byUTF; 4977 4978 return str.byUTF!(immutable ElementType!(typeof(testParser!(makeConfig(toer))(func(str)).input.takeExactly(1))))(); 4979 } 4980 4981 4982 // Used to indicate where in the grammar we're currently parsing. 4983 enum GrammarPos 4984 { 4985 // Nothing has been parsed yet. 4986 documentStart, 4987 4988 // document ::= prolog element Misc* 4989 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 4990 // This is that first Misc*. The next entity to parse is either a Misc, the 4991 // doctypedecl, or the root element which follows the prolog. 4992 prologMisc1, 4993 4994 // document ::= prolog element Misc* 4995 // prolog ::= XMLDecl? Misc* (doctypedecl Misc*) 4996 // This is that second Misc*. The next entity to parse is either a Misc or 4997 // the root element which follows the prolog. 4998 prologMisc2, 4999 5000 // Used with SplitEmpty.yes to tell the parser that we're currently at an 5001 // empty element tag that we're treating as a start tag, so the next entity 5002 // will be an end tag even though we didn't actually parse one. 5003 splittingEmpty, 5004 5005 // element ::= EmptyElemTag | STag content ETag 5006 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5007 // This is at the beginning of content at the first CharData?. The next 5008 // thing to parse will be a CharData, element, CDSect, PI, Comment, or ETag. 5009 // References are treated as part of the CharData and not parsed out by the 5010 // EntityRange (see EntityRange.Entity.text). 5011 contentCharData1, 5012 5013 // element ::= EmptyElemTag | STag content ETag 5014 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5015 // This is after the first CharData?. The next thing to parse will be a 5016 // element, CDSect, PI, Comment, or ETag. 5017 // References are treated as part of the CharData and not parsed out by the 5018 // EntityRange (see EntityRange.Entity.text). 5019 contentMid, 5020 5021 // element ::= EmptyElemTag | STag content ETag 5022 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5023 // This is at the second CharData?. The next thing to parse will be a 5024 // CharData, element, CDSect, PI, Comment, or ETag. 5025 // References are treated as part of the CharData and not parsed out by the 5026 // EntityRange (see EntityRange.Entity.text). 5027 contentCharData2, 5028 5029 // element ::= EmptyElemTag | STag content ETag 5030 // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* 5031 // This is after the second CharData?. The next thing to parse is an ETag. 5032 endTag, 5033 5034 // document ::= prolog element Misc* 5035 // This is the Misc* at the end of the document. The next thing to parse is 5036 // either another Misc, or we will hit the end of the document. 5037 endMisc, 5038 5039 // The end of the document (and the grammar) has been reached. 5040 documentEnd 5041 } 5042 5043 5044 // Wrapper around skipOver which takes an EntityParser.Text and handles 5045 // incrementing pos. 5046 // 5047 // It is assumed that there are no newlines. 5048 bool stripStartsWith(Text)(ref Text text, string needle) 5049 { 5050 import std.algorithm.searching : skipOver; 5051 import std.utf : byCodeUnit; 5052 5053 //TODO In the case where we're parsing an array of char, if we can cleanly 5054 // strip off any byCodeUnit and takeExactly wrappers, then we should be able 5055 // to have skipOver compare the string being parsed and the needle with ==. 5056 // It may happen in some cases right now when text.input is a byCodeUnit 5057 // result, but it won't happen in all cases where it ideally would. We may 5058 // also want to look into using byUTF on the needle so that it matches the 5059 // encoding of text.input or even make needle match the encoding when it's 5060 // passed in instead of always being string. 5061 if(!text.input.skipOver(needle.byCodeUnit())) 5062 return false; 5063 5064 text.pos.col += needle.length; 5065 5066 return true; 5067 } 5068 5069 version(dxmlTests) unittest 5070 { 5071 import core.exception : AssertError; 5072 import std.exception : enforce; 5073 import dxml.internal : equalCU, testRangeFuncs; 5074 5075 static void test(alias func)(string origHaystack, string needle, string remainder, bool startsWith, 5076 int row, int col, size_t line = __LINE__) 5077 { 5078 auto haystack = func(origHaystack); 5079 { 5080 auto text = testParser(haystack.save); 5081 enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 1", __FILE__, line); 5082 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line); 5083 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5084 } 5085 { 5086 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5087 auto text = testParser(haystack); 5088 text.pos.line += 3; 5089 text.pos.col += 7; 5090 enforce!AssertError(text.stripStartsWith(needle) == startsWith, "unittest failure 4", __FILE__, line); 5091 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line); 5092 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5093 } 5094 } 5095 5096 static foreach(func; testRangeFuncs) 5097 { 5098 test!func("hello world", "hello", " world", true, 1, "hello".length + 1); 5099 test!func("hello world", "hello world", "", true, 1, "hello world".length + 1); 5100 test!func("hello world", "foo", "hello world", false, 1, 1); 5101 test!func("hello world", "hello sally", "hello world", false, 1, 1); 5102 test!func("hello world", "hello world ", "hello world", false, 1, 1); 5103 } 5104 } 5105 5106 version(dxmlTests) @safe pure unittest 5107 { 5108 import std.algorithm.comparison : equal; 5109 import dxml.internal : testRangeFuncs; 5110 5111 static foreach(func; testRangeFuncs) 5112 {{ 5113 auto xml = func(`foo`); 5114 auto text = testParser!simpleXML(xml); 5115 assert(text.stripStartsWith("fo")); 5116 }} 5117 } 5118 5119 5120 // Strips whitespace while dealing with text.pos accordingly. Newlines are not 5121 // ignored. 5122 // Returns whether any whitespace was stripped. 5123 bool stripWS(Text)(ref Text text) 5124 { 5125 bool strippedSpace = false; 5126 5127 static if(hasLength!(Text.Input)) 5128 size_t lineStart = text.input.length; 5129 5130 loop: while(!text.input.empty) 5131 { 5132 switch(text.input.front) 5133 { 5134 case ' ': 5135 case '\t': 5136 case '\r': 5137 { 5138 strippedSpace = true; 5139 text.input.popFront(); 5140 static if(!hasLength!(Text.Input)) 5141 ++text.pos.col; 5142 break; 5143 } 5144 case '\n': 5145 { 5146 strippedSpace = true; 5147 text.input.popFront(); 5148 static if(hasLength!(Text.Input)) 5149 lineStart = text.input.length; 5150 nextLine!(Text.config)(text.pos); 5151 break; 5152 } 5153 default: break loop; 5154 } 5155 } 5156 5157 static if(hasLength!(Text.Input)) 5158 text.pos.col += lineStart - text.input.length; 5159 5160 return strippedSpace; 5161 } 5162 5163 version(dxmlTests) unittest 5164 { 5165 import core.exception : AssertError; 5166 import std.exception : enforce; 5167 import dxml.internal : equalCU; 5168 import dxml.internal : testRangeFuncs; 5169 5170 static void test(alias func)(string origHaystack, string remainder, bool stripped, 5171 int row, int col, size_t line = __LINE__) 5172 { 5173 auto haystack = func(origHaystack); 5174 { 5175 auto text = testParser(haystack.save); 5176 enforce!AssertError(text.stripWS() == stripped, "unittest failure 1", __FILE__, line); 5177 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 2", __FILE__, line); 5178 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5179 } 5180 { 5181 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5182 auto text = testParser(haystack); 5183 text.pos.line += 3; 5184 text.pos.col += 7; 5185 enforce!AssertError(text.stripWS() == stripped, "unittest failure 4", __FILE__, line); 5186 enforce!AssertError(equalCU(text.input, remainder), "unittest failure 5", __FILE__, line); 5187 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5188 } 5189 } 5190 5191 static foreach(func; testRangeFuncs) 5192 { 5193 test!func(" \t\rhello world", "hello world", true, 1, 5); 5194 test!func(" \n \n \n \nhello world", "hello world", true, 5, 1); 5195 test!func(" \n \n \n \n hello world", "hello world", true, 5, 3); 5196 test!func("hello world", "hello world", false, 1, 1); 5197 } 5198 } 5199 5200 version(dxmlTests) @safe pure unittest 5201 { 5202 import dxml.internal : testRangeFuncs; 5203 5204 static foreach(func; testRangeFuncs) 5205 {{ 5206 auto xml = func(`foo`); 5207 auto text = testParser!simpleXML(xml); 5208 assert(!text.stripWS()); 5209 }} 5210 } 5211 5212 5213 // Returns a slice (or takeExactly) of text.input up to but not including the 5214 // given needle, removing both that slice and the given needle from text.input 5215 // in the process. If the needle is not found, then an XMLParsingException is 5216 // thrown. 5217 auto takeUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text) 5218 { 5219 return _takeUntil!(true, needle, skipQuotedText, Text)(text); 5220 } 5221 5222 version(dxmlTests) unittest 5223 { 5224 import core.exception : AssertError; 5225 import std.algorithm.comparison : equal; 5226 import std.exception : collectException, enforce; 5227 import dxml.internal : codeLen, testRangeFuncs; 5228 5229 static void test(alias func, string needle, bool sqt)(string origHaystack, string expected, string remainder, 5230 int row, int col, size_t line = __LINE__) 5231 { 5232 auto haystack = func(origHaystack); 5233 auto adjExpected = expected.toCmpType!func(); 5234 { 5235 auto text = testParser(haystack.save); 5236 auto temp = text.save; 5237 enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected.save), 5238 "unittest failure 1", __FILE__, line); 5239 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5240 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5241 } 5242 { 5243 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5244 auto text = testParser(haystack); 5245 text.pos.line += 3; 5246 text.pos.col += 7; 5247 enforce!AssertError(equal(text.takeUntilAndDrop!(needle, sqt)(), adjExpected), 5248 "unittest failure 4", __FILE__, line); 5249 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5250 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5251 } 5252 } 5253 5254 static void testFail(alias func, string needle, bool sqt) 5255 (string origHaystack, int row, int col, size_t line = __LINE__) 5256 { 5257 auto haystack = func(origHaystack); 5258 { 5259 auto text = testParser(haystack.save); 5260 auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)()); 5261 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5262 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5263 } 5264 { 5265 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5266 auto text = testParser(haystack); 5267 text.pos.line += 3; 5268 text.pos.col += 7; 5269 auto e = collectException!XMLParsingException(text.takeUntilAndDrop!(needle, sqt)()); 5270 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5271 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5272 } 5273 } 5274 5275 static foreach(func; testRangeFuncs) 5276 { 5277 static foreach(sqt; [false, true]) 5278 { 5279 { 5280 auto haystack = "hello world"; 5281 enum needle = "world"; 5282 5283 static foreach(i; 1 .. needle.length) 5284 test!(func, needle[0 .. i], sqt)(haystack, "hello ", needle[i .. $], 1, 7 + i); 5285 } 5286 5287 test!(func, "l", sqt)("lello world", "", "ello world", 1, 2); 5288 test!(func, "ll", sqt)("lello world", "le", "o world", 1, 5); 5289 test!(func, "le", sqt)("llello world", "l", "llo world", 1, 4); 5290 { 5291 enum needle = "great"; 5292 enum expected = "プログラミング in D is "; 5293 static foreach(i; 1 .. needle.length) 5294 { 5295 test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", expected, 5296 "great indeed"[i .. $], 1, codeLen!(func, expected) + i + 1); 5297 } 5298 } 5299 static foreach(haystack; ["", "a", "hello", "ディラン"]) 5300 testFail!(func, "x", sqt)(haystack, 1, 1); 5301 static foreach(haystack; ["", "l", "lte", "world", "nomatch"]) 5302 testFail!(func, "le", sqt)(haystack, 1, 1); 5303 static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"]) 5304 testFail!(func, "web", sqt)(haystack, 1, 1); 5305 } 5306 5307 test!(func, "*", false)(`hello '*' "*" * world`, `hello '`, `' "*" * world`, 1, 9); 5308 test!(func, "*", false)(`hello '"*' * world`, `hello '"`, `' * world`, 1, 10); 5309 test!(func, "*", false)(`hello "'*" * world`, `hello "'`, `" * world`, 1, 10); 5310 test!(func, "*", false)(`hello ''' * world`, `hello ''' `, ` world`, 1, 12); 5311 test!(func, "*", false)(`hello """ * world`, `hello """ `, ` world`, 1, 12); 5312 testFail!(func, "*", false)("foo\n\n ' \n\nbar", 1, 1); 5313 testFail!(func, "*", false)(`ディラン " `, 1, 1); 5314 5315 test!(func, "*", true)(`hello '*' "*" * world`, `hello '*' "*" `, ` world`, 1, 16); 5316 test!(func, "*", true)(`hello '"*' * world`, `hello '"*' `, ` world`, 1, 13); 5317 test!(func, "*", true)(`hello "'*" * world`, `hello "'*" `, ` world`, 1, 13); 5318 testFail!(func, "*", true)(`hello ''' * world`, 1, 9); 5319 testFail!(func, "*", true)(`hello """ * world`, 1, 9); 5320 testFail!(func, "*", true)("foo\n\n ' \n\nbar", 3, 4); 5321 testFail!(func, "*", true)(`ディラン " `, 1, codeLen!(func, `ディラン "`)); 5322 5323 test!(func, "*", true)(`hello '' "" * world`, `hello '' "" `, ` world`, 1, 14); 5324 test!(func, "*", true)("foo '\n \n \n' bar*", "foo '\n \n \n' bar", "", 4, 7); 5325 } 5326 } 5327 5328 version(dxmlTests) @safe pure unittest 5329 { 5330 import std.algorithm.comparison : equal; 5331 import dxml.internal : testRangeFuncs; 5332 5333 static foreach(func; testRangeFuncs) 5334 {{ 5335 auto xml = func(`foo`); 5336 auto text = testParser!simpleXML(xml); 5337 assert(equal(text.takeUntilAndDrop!"o"(), "f")); 5338 }} 5339 } 5340 5341 // Variant of takeUntilAndDrop which does not return a slice. It's intended for 5342 // when the config indicates that something should be skipped. 5343 void skipUntilAndDrop(string needle, bool skipQuotedText = false, Text)(ref Text text) 5344 { 5345 _takeUntil!(false, needle, skipQuotedText, Text)(text); 5346 } 5347 5348 version(dxmlTests) unittest 5349 { 5350 import core.exception : AssertError; 5351 import std.algorithm.comparison : equal; 5352 import std.exception : assertNotThrown, collectException, enforce; 5353 import dxml.internal : codeLen, testRangeFuncs; 5354 5355 static void test(alias func, string needle, bool sqt)(string origHaystack, string remainder, 5356 int row, int col, size_t line = __LINE__) 5357 { 5358 auto haystack = func(origHaystack); 5359 { 5360 auto text = testParser(haystack.save); 5361 assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 1", 5362 __FILE__, line); 5363 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5364 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5365 } 5366 { 5367 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5368 auto text = testParser(haystack); 5369 text.pos.line += 3; 5370 text.pos.col += 7; 5371 assertNotThrown!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)(), "unittest failure 4", 5372 __FILE__, line); 5373 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5374 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5375 } 5376 } 5377 5378 static void testFail(alias func, string needle, bool sqt) 5379 (string origHaystack, int row, int col, size_t line = __LINE__) 5380 { 5381 auto haystack = func(origHaystack); 5382 { 5383 auto text = testParser(haystack.save); 5384 auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)()); 5385 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5386 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5387 } 5388 { 5389 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5390 auto text = testParser(haystack); 5391 text.pos.line += 3; 5392 text.pos.col += 7; 5393 auto e = collectException!XMLParsingException(text.skipUntilAndDrop!(needle, sqt)()); 5394 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5395 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5396 } 5397 } 5398 5399 static foreach(func; testRangeFuncs) 5400 { 5401 static foreach(sqt; [false, true]) 5402 { 5403 { 5404 enum needle = "world"; 5405 static foreach(i; 1 .. needle.length) 5406 test!(func, needle[0 .. i], sqt)("hello world", needle[i .. $], 1, 7 + i); 5407 } 5408 5409 test!(func, "l", sqt)("lello world", "ello world", 1, 2); 5410 test!(func, "ll", sqt)("lello world", "o world", 1, 5); 5411 test!(func, "le", sqt)("llello world", "llo world", 1, 4); 5412 5413 { 5414 enum needle = "great"; 5415 static foreach(i; 1 .. needle.length) 5416 { 5417 test!(func, needle[0 .. i], sqt)("プログラミング in D is great indeed", "great indeed"[i .. $], 5418 1, codeLen!(func, "プログラミング in D is ") + i + 1); 5419 } 5420 } 5421 5422 static foreach(haystack; ["", "a", "hello", "ディラン"]) 5423 testFail!(func, "x", sqt)(haystack, 1, 1); 5424 static foreach(haystack; ["", "l", "lte", "world", "nomatch"]) 5425 testFail!(func, "le", sqt)(haystack, 1, 1); 5426 static foreach(haystack; ["", "w", "we", "wew", "bwe", "we b", "hello we go", "nomatch"]) 5427 testFail!(func, "web", sqt)(haystack, 1, 1); 5428 } 5429 5430 test!(func, "*", false)(`hello '*' "*" * world`, `' "*" * world`, 1, 9); 5431 test!(func, "*", false)(`hello '"*' * world`, `' * world`, 1, 10); 5432 test!(func, "*", false)(`hello "'*" * world`, `" * world`, 1, 10); 5433 test!(func, "*", false)(`hello ''' * world`, ` world`, 1, 12); 5434 test!(func, "*", false)(`hello """ * world`, ` world`, 1, 12); 5435 testFail!(func, "*", false)("foo\n\n ' \n\nbar", 1, 1); 5436 testFail!(func, "*", false)(`ディラン " `, 1, 1); 5437 5438 test!(func, "*", true)(`hello '*' "*" * world`, ` world`, 1, 16); 5439 test!(func, "*", true)(`hello '"*' * world`, ` world`, 1, 13); 5440 test!(func, "*", true)(`hello "'*" * world`, ` world`, 1, 13); 5441 testFail!(func, "*", true)(`hello ''' * world`, 1, 9); 5442 testFail!(func, "*", true)(`hello """ * world`, 1, 9); 5443 testFail!(func, "*", true)("foo\n\n ' \n\nbar", 3, 4); 5444 testFail!(func, "*", true)(`ディラン " `, 1, codeLen!(func, `ディラン "`)); 5445 5446 test!(func, "*", true)(`hello '' "" * world`, ` world`, 1, 14); 5447 test!(func, "*", true)("foo '\n \n \n' bar*", "", 4, 7); 5448 } 5449 } 5450 5451 version(dxmlTests) @safe pure unittest 5452 { 5453 import std.algorithm.comparison : equal; 5454 import dxml.internal : testRangeFuncs; 5455 5456 static foreach(func; testRangeFuncs) 5457 {{ 5458 auto xml = func(`foo`); 5459 auto text = testParser!simpleXML(xml); 5460 text.skipUntilAndDrop!"o"(); 5461 assert(equal(text.input, "o")); 5462 }} 5463 } 5464 5465 auto _takeUntil(bool retSlice, string needle, bool skipQuotedText, Text)(ref Text text) 5466 { 5467 import std.algorithm : find; 5468 import std.ascii : isWhite; 5469 import std.range : takeExactly; 5470 5471 static assert(needle.find!isWhite().empty); 5472 5473 auto orig = text.save; 5474 bool found = false; 5475 size_t takeLen = 0; 5476 size_t lineStart = 0; 5477 5478 void processNewline() 5479 { 5480 ++takeLen; 5481 nextLine!(Text.config)(text.pos); 5482 lineStart = takeLen; 5483 } 5484 5485 loop: while(!text.input.empty) 5486 { 5487 switch(text.input.front) 5488 { 5489 case cast(ElementType!(Text.Input))needle[0]: 5490 { 5491 static if(needle.length == 1) 5492 { 5493 found = true; 5494 text.input.popFront(); 5495 break loop; 5496 } 5497 else static if(needle.length == 2) 5498 { 5499 text.input.popFront(); 5500 if(!text.input.empty && text.input.front == needle[1]) 5501 { 5502 found = true; 5503 text.input.popFront(); 5504 break loop; 5505 } 5506 ++takeLen; 5507 continue; 5508 } 5509 else 5510 { 5511 text.input.popFront(); 5512 auto saved = text.input.save; 5513 foreach(i, c; needle[1 .. $]) 5514 { 5515 if(text.input.empty) 5516 { 5517 takeLen += i + 1; 5518 break loop; 5519 } 5520 if(text.input.front != c) 5521 { 5522 text.input = saved; 5523 ++takeLen; 5524 continue loop; 5525 } 5526 text.input.popFront(); 5527 } 5528 found = true; 5529 break loop; 5530 } 5531 } 5532 static if(skipQuotedText) 5533 { 5534 static foreach(quote; ['\'', '"']) 5535 { 5536 case quote: 5537 { 5538 auto quotePos = text.pos; 5539 quotePos.col += takeLen - lineStart; 5540 ++takeLen; 5541 while(true) 5542 { 5543 text.input.popFront(); 5544 if(text.input.empty) 5545 throw new XMLParsingException("Failed to find matching quote", quotePos); 5546 switch(text.input.front) 5547 { 5548 case quote: 5549 { 5550 ++takeLen; 5551 text.input.popFront(); 5552 continue loop; 5553 } 5554 case '\n': 5555 { 5556 processNewline(); 5557 break; 5558 } 5559 default: 5560 { 5561 ++takeLen; 5562 break; 5563 } 5564 } 5565 } 5566 assert(0); // the compiler isn't smart enough to see that this is unreachable. 5567 } 5568 } 5569 } 5570 case '\n': 5571 { 5572 processNewline(); 5573 break; 5574 } 5575 default: 5576 { 5577 ++takeLen; 5578 break; 5579 } 5580 } 5581 5582 text.input.popFront(); 5583 } 5584 5585 text.pos.col += takeLen - lineStart + needle.length; 5586 5587 if(!found) 5588 throw new XMLParsingException("Failed to find: " ~ needle, orig.pos); 5589 5590 static if(retSlice) 5591 return takeExactly(orig.input, takeLen); 5592 } 5593 5594 5595 // Okay, this name kind of sucks, because it's too close to skipUntilAndDrop, 5596 // but I'd rather do this than be passing template arguments to choose between 5597 // behaviors - especially when the logic is so different. It skips until it 5598 // reaches one of the delimiter characters. If it finds one of them, then the 5599 // first character in the input is the delimiter that was found, and if it 5600 // doesn't find either, then it throws. 5601 template skipToOneOf(delims...) 5602 { 5603 static foreach(delim; delims) 5604 { 5605 static assert(is(typeof(delim) == char)); 5606 static assert(!isSpace(delim)); 5607 } 5608 5609 void skipToOneOf(Text)(ref Text text) 5610 { 5611 while(!text.input.empty) 5612 { 5613 switch(text.input.front) 5614 { 5615 static foreach(delim; delims) 5616 case delim: return; 5617 case '\n': 5618 { 5619 nextLine!(Text.config)(text.pos); 5620 text.input.popFront(); 5621 break; 5622 } 5623 default: 5624 { 5625 popFrontAndIncCol(text); 5626 break; 5627 } 5628 } 5629 } 5630 throw new XMLParsingException("Prematurely reached end of document", text.pos); 5631 } 5632 } 5633 5634 version(dxmlTests) unittest 5635 { 5636 import core.exception : AssertError; 5637 import std.algorithm.comparison : equal; 5638 import std.exception : assertNotThrown, collectException, enforce; 5639 import dxml.internal : codeLen, testRangeFuncs; 5640 5641 static void test(alias func, delims...)(string origHaystack, string remainder, 5642 int row, int col, size_t line = __LINE__) 5643 { 5644 auto haystack = func(origHaystack); 5645 { 5646 auto text = testParser(haystack.save); 5647 assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 1", __FILE__, line); 5648 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5649 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5650 } 5651 { 5652 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5653 auto text = testParser(haystack); 5654 text.pos.line += 3; 5655 text.pos.col += 7; 5656 assertNotThrown!XMLParsingException(text.skipToOneOf!delims(), "unittest 4", __FILE__, line); 5657 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5658 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5659 } 5660 } 5661 5662 static void testFail(alias func, delims...)(string origHaystack, int row, int col, size_t line = __LINE__) 5663 { 5664 auto haystack = func(origHaystack); 5665 { 5666 auto text = testParser(haystack.save); 5667 auto e = collectException!XMLParsingException(text.skipToOneOf!delims()); 5668 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5669 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5670 } 5671 { 5672 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5673 auto text = testParser(haystack); 5674 text.pos.line += 3; 5675 text.pos.col += 7; 5676 auto e = collectException!XMLParsingException(text.skipToOneOf!delims()); 5677 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5678 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5679 } 5680 } 5681 5682 static foreach(func; testRangeFuncs) 5683 { 5684 test!(func, 'o', 'w')("hello world", "o world", 1, 5); 5685 test!(func, 'r', 'w', '1', '+', '*')("hello world", "world", 1, 7); 5686 test!(func, 'z', 'y')("abc\n\n\n \n\n wxyzzy \nf\ng", "yzzy \nf\ng", 6, 6); 5687 test!(func, 'o', 'g')("abc\n\n\n \n\n wxyzzy \nf\ng", "g", 8, 1); 5688 test!(func, 'g', 'x')("プログラミング in D is great indeed", "great indeed", 5689 1, codeLen!(func, "プログラミング in D is ") + 1); 5690 5691 testFail!(func, 'a', 'b')("hello world", 1, 12); 5692 testFail!(func, 'a', 'b')("hello\n\nworld", 3, 6); 5693 testFail!(func, 'a', 'b')("プログラミング", 1, codeLen!(func, "プログラミング") + 1); 5694 } 5695 } 5696 5697 version(dxmlTests) @safe pure unittest 5698 { 5699 import std.algorithm.comparison : equal; 5700 import dxml.internal : testRangeFuncs; 5701 5702 static foreach(func; testRangeFuncs) 5703 {{ 5704 auto xml = func(`foo`); 5705 auto text = testParser!simpleXML(xml); 5706 text.skipToOneOf!('o')(); 5707 assert(equal(text.input, "oo")); 5708 }} 5709 } 5710 5711 5712 // The front of the input should be text surrounded by single or double quotes. 5713 // This returns a slice of the input containing that text, and the input is 5714 // advanced to one code unit beyond the quote. 5715 auto takeEnquotedText(Text)(ref Text text) 5716 { 5717 checkNotEmpty(text); 5718 immutable quote = text.input.front; 5719 static foreach(quoteChar; [`"`, `'`]) 5720 { 5721 // This would be a bit simpler if takeUntilAndDrop took a runtime 5722 // argument, but in all other cases, a compile-time argument makes more 5723 // sense, so this seemed like a reasonable way to handle this one case. 5724 if(quote == quoteChar[0]) 5725 { 5726 popFrontAndIncCol(text); 5727 return takeUntilAndDrop!quoteChar(text); 5728 } 5729 } 5730 throw new XMLParsingException("Expected quoted text", text.pos); 5731 } 5732 5733 version(dxmlTests) unittest 5734 { 5735 import core.exception : AssertError; 5736 import std.algorithm.comparison : equal; 5737 import std.exception : assertThrown, enforce; 5738 import std.range : only; 5739 import dxml.internal : testRangeFuncs; 5740 5741 static void test(alias func)(string origHaystack, string expected, string remainder, 5742 int row, int col, size_t line = __LINE__) 5743 { 5744 auto haystack = func(origHaystack); 5745 auto adjExpected = expected.toCmpType!func(); 5746 { 5747 auto text = testParser(haystack.save); 5748 enforce!AssertError(equal(takeEnquotedText(text), adjExpected.save), "unittest failure 1", __FILE__, line); 5749 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5750 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5751 } 5752 { 5753 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5754 auto text = testParser(haystack); 5755 text.pos.line += 3; 5756 text.pos.col += 7; 5757 enforce!AssertError(equal(takeEnquotedText(text), adjExpected), "unittest failure 3", __FILE__, line); 5758 enforce!AssertError(equal(text.input, remainder), "unittest failure 4", __FILE__, line); 5759 enforce!AssertError(text.pos == pos, "unittest failure 3", __FILE__, line); 5760 } 5761 } 5762 5763 static void testFail(alias func)(string origHaystack, size_t line = __LINE__) 5764 { 5765 auto haystack = func(origHaystack); 5766 auto text = testParser(haystack); 5767 assertThrown!XMLParsingException(text.takeEnquotedText(), "unittest failure", __FILE__, line); 5768 } 5769 5770 static foreach(func; testRangeFuncs) 5771 { 5772 foreach(quote; only("\"", "'")) 5773 { 5774 test!func(quote ~ quote, "", "", 1, 3); 5775 test!func(quote ~ "hello world" ~ quote, "hello world", "", 1, 14); 5776 test!func(quote ~ "hello world" ~ quote ~ " foo", "hello world", " foo", 1, 14); 5777 { 5778 import std.utf : codeLength; 5779 auto haystack = quote ~ "プログラミング " ~ quote ~ "in D"; 5780 enum len = cast(int)codeLength!(ElementEncodingType!(typeof(func(haystack))))("プログラミング "); 5781 test!func(haystack, "プログラミング ", "in D", 1, len + 3); 5782 } 5783 } 5784 5785 foreach(str; only(`hello`, `"hello'`, `"hello`, `'hello"`, `'hello`, ``, `"'`, `"`, `'"`, `'`)) 5786 testFail!func(str); 5787 } 5788 } 5789 5790 5791 // This removes a name per the Name grammar rule from the front of the input and 5792 // returns it. 5793 // The parsing continues until either one of the given delimiters or an XML 5794 // whitespace character is encountered. The delimiter/whitespace is not returned 5795 // as part of the name and is left at the front of the input. 5796 template takeName(delims...) 5797 { 5798 static foreach(delim; delims) 5799 { 5800 static assert(is(typeof(delim) == char), delim); 5801 static assert(!isSpace(delim)); 5802 } 5803 5804 auto takeName(Text)(ref Text text) 5805 { 5806 import std.format : format; 5807 import std.range : takeExactly; 5808 import std.utf : decodeFront, UseReplacementDchar; 5809 import dxml.internal : isNameStartChar, isNameChar; 5810 5811 assert(!text.input.empty); 5812 5813 auto orig = text.input.save; 5814 size_t takeLen; 5815 { 5816 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(takeLen); 5817 if(!isNameStartChar(decodedC)) 5818 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos); 5819 } 5820 5821 if(text.input.empty) 5822 { 5823 text.pos.col += takeLen; 5824 return takeExactly(orig, takeLen); 5825 } 5826 5827 loop: while(true) 5828 { 5829 immutable c = text.input.front; 5830 if(isSpace(c)) 5831 break; 5832 static foreach(delim; delims) 5833 { 5834 if(c == delim) 5835 break loop; 5836 } 5837 5838 size_t numCodeUnits; 5839 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 5840 if(!isNameChar(decodedC)) 5841 { 5842 text.pos.col += takeLen; 5843 throw new XMLParsingException(format!"Name contains invalid character: 0x%0x"(decodedC), text.pos); 5844 } 5845 takeLen += numCodeUnits; 5846 5847 if(text.input.empty) 5848 break; 5849 } 5850 5851 text.pos.col += takeLen; 5852 5853 return takeExactly(orig, takeLen); 5854 } 5855 } 5856 5857 version(dxmlTests) unittest 5858 { 5859 import core.exception : AssertError; 5860 import std.algorithm.comparison : equal; 5861 import std.exception : collectException, enforce; 5862 import std.typecons : tuple; 5863 import dxml.internal : codeLen, testRangeFuncs; 5864 5865 static void test(alias func, delim...)(string origHaystack, string expected, string remainder, 5866 int row, int col, size_t line = __LINE__) 5867 { 5868 auto haystack = func(origHaystack); 5869 auto adjExpected = expected.toCmpType!func(); 5870 { 5871 auto text = testParser(haystack.save); 5872 enforce!AssertError(equal(text.takeName!delim(), adjExpected.save), 5873 "unittest failure 1", __FILE__, line); 5874 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 5875 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 5876 } 5877 { 5878 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5879 auto text = testParser(haystack); 5880 text.pos.line += 3; 5881 text.pos.col += 7; 5882 enforce!AssertError(equal(text.takeName!delim(), adjExpected), 5883 "unittest failure 4", __FILE__, line); 5884 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 5885 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 5886 } 5887 } 5888 5889 static void testFail(alias func, delim...)(string origHaystack, int row, int col, size_t line = __LINE__) 5890 { 5891 auto haystack = func(origHaystack); 5892 { 5893 auto text = testParser(haystack.save); 5894 auto e = collectException!XMLParsingException(text.takeName!delim()); 5895 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 5896 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 5897 } 5898 { 5899 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 5900 auto text = testParser(haystack); 5901 text.pos.line += 3; 5902 text.pos.col += 7; 5903 auto e = collectException!XMLParsingException(text.takeName!delim()); 5904 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 5905 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 5906 } 5907 } 5908 5909 static foreach(func; testRangeFuncs) 5910 { 5911 static foreach(str; ["hello", "プログラミング", "h_:llo-.42", "_.", "_-", "_42"]) 5912 {{ 5913 enum len = codeLen!(func, str); 5914 5915 static foreach(remainder; ["", " ", "\t", "\r", "\n", " foo", "\tfoo", "\rfoo", "\nfoo", " foo \n \r "]) 5916 {{ 5917 enum strRem = str ~ remainder; 5918 enum delimRem = '>' ~ remainder; 5919 enum hay = str ~ delimRem; 5920 test!func(strRem, str, remainder, 1, len + 1); 5921 test!(func, '=')(strRem, str, remainder, 1, len + 1); 5922 test!(func, '>', '|')(hay, str, delimRem, 1, len + 1); 5923 test!(func, '|', '>')(hay, str, delimRem, 1, len + 1); 5924 }} 5925 }} 5926 5927 static foreach(t; [tuple(" ", 1, 1), tuple("<", 1, 1), tuple("foo!", 1, 4), tuple("foo!<", 1, 4)]) 5928 {{ 5929 testFail!func(t[0], t[1], t[2]); 5930 testFail!func(t[0] ~ '>', t[1], t[2]); 5931 testFail!(func, '?')(t[0], t[1], t[2]); 5932 testFail!(func, '=')(t[0] ~ '=', t[1], t[2]); 5933 }} 5934 5935 testFail!(func, '>')(">", 1, 1); 5936 testFail!(func, '?')("?", 1, 1); 5937 testFail!(func, '?')("プログ&ラミング", 1, codeLen!(func, "プログ&")); 5938 5939 static foreach(t; [tuple("42", 1, 1), tuple(".", 1, 1), tuple(".a", 1, 1)]) 5940 { 5941 testFail!func(t[0], t[1], t[2]); 5942 testFail!(func, '>')(t[0], t[1], t[2]); 5943 } 5944 } 5945 } 5946 5947 version(dxmlTests) @safe pure unittest 5948 { 5949 import std.algorithm.comparison : equal; 5950 import dxml.internal : testRangeFuncs; 5951 5952 static foreach(func; testRangeFuncs) 5953 {{ 5954 auto xml = func(`foo`); 5955 auto text = testParser!simpleXML(xml); 5956 assert(equal(text.takeName(), "foo")); 5957 }} 5958 } 5959 5960 5961 // This removes an attribute value from the front of the input, partially 5962 // validates it, and returns it. The validation that is not done is whether 5963 // the value in a character reference is valid. It's checked for whether the 5964 // characters used in it are valid but not whether the number they form is a 5965 // valid Unicode character. Checking the number doesn't seem worth the extra 5966 // complication, and it's not required for the XML to be "well-formed." 5967 // dxml.util.parseCharRef will check that it is fully correct if it is used. 5968 auto takeAttValue(Text)(ref Text text) 5969 { 5970 // AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 5971 // Reference ::= EntityRef | CharRef 5972 // EntityRef ::= '&' Name ';' 5973 // PEReference ::= '%' Name ';' 5974 5975 import std.range : only; 5976 5977 checkNotEmpty(text); 5978 immutable quote = text.input.front; 5979 immutable quotePos = text.pos; 5980 foreach(quoteChar; only('"', '\'')) 5981 { 5982 // This would be a bit simpler if takeUntilAndDrop took a runtime 5983 // argument, but in all other cases, a compile-time argument makes more 5984 // sense, so this seemed like a reasonable way to handle this one case. 5985 if(quote == quoteChar) 5986 { 5987 popFrontAndIncCol(text); 5988 size_t lineStart = 0; 5989 auto orig = text.input.save; 5990 size_t takeLen; 5991 loop: while(true) 5992 { 5993 if(text.input.empty) 5994 throw new XMLParsingException("Unterminated attribute value", quotePos); 5995 switch(text.input.front) 5996 { 5997 case '"': 5998 { 5999 if(quote == '"') 6000 { 6001 text.input.popFront(); 6002 goto done; 6003 } 6004 goto default; 6005 } 6006 case '\'': 6007 { 6008 if(quote == '\'') 6009 { 6010 text.input.popFront(); 6011 goto done; 6012 } 6013 goto default; 6014 } 6015 case '&': 6016 { 6017 { 6018 import dxml.util : parseCharRef; 6019 auto temp = text.input.save; 6020 auto charRef = parseCharRef(temp); 6021 if(!charRef.isNull) 6022 { 6023 static if(hasLength!(Text.Input)) 6024 { 6025 takeLen += text.input.length - temp.length; 6026 text.input = temp; 6027 } 6028 else 6029 { 6030 while(text.input.front != ';') 6031 { 6032 ++takeLen; 6033 text.input.popFront(); 6034 } 6035 ++takeLen; 6036 text.input.popFront(); 6037 } 6038 continue; 6039 } 6040 } 6041 6042 immutable ampLen = takeLen - lineStart; 6043 ++takeLen; 6044 text.input.popFront(); 6045 6046 // Std Entity References 6047 static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes) 6048 { 6049 import std.algorithm.searching : startsWith; 6050 6051 static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"]) 6052 { 6053 if(text.input.save.startsWith(entRef)) 6054 { 6055 takeLen += entRef.length; 6056 text.input.popFrontN(entRef.length); 6057 continue loop; 6058 } 6059 } 6060 6061 text.pos.col += ampLen; 6062 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6063 "reference, and this parser only supports entity " ~ 6064 "references if they're predefined by the spec. This is not " ~ 6065 "a valid character reference or one of the predefined " ~ 6066 "entity references.", text.pos); 6067 } 6068 // All Entity References 6069 else 6070 { 6071 import std.utf : decodeFront, UseReplacementDchar; 6072 import dxml.internal : isNameStartChar, isNameChar; 6073 6074 if(text.input.empty || text.input.front == quote) 6075 goto failedEntityRef; 6076 6077 { 6078 size_t numCodeUnits; 6079 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6080 if(!isNameStartChar(decodedC)) 6081 goto failedEntityRef; 6082 takeLen += numCodeUnits; 6083 } 6084 6085 while(true) 6086 { 6087 if(text.input.empty) 6088 goto failedEntityRef; 6089 immutable c = text.input.front; 6090 if(c == ';') 6091 { 6092 ++takeLen; 6093 break; 6094 } 6095 size_t numCodeUnits; 6096 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6097 if(!isNameChar(decodedC)) 6098 goto failedEntityRef; 6099 takeLen += numCodeUnits; 6100 } 6101 break; 6102 6103 failedEntityRef: 6104 text.pos.col += ampLen; 6105 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6106 "character or entity reference, and this is not a valid " ~ 6107 "character or entity reference.", text.pos); 6108 } 6109 } 6110 case '<': 6111 { 6112 text.pos.col += takeLen - lineStart; 6113 throw new XMLParsingException("< is not legal in an attribute name", text.pos); 6114 } 6115 case '\n': 6116 { 6117 ++takeLen; 6118 nextLine!(Text.config)(text.pos); 6119 lineStart = takeLen; 6120 break; 6121 } 6122 default: 6123 { 6124 import std.ascii : isASCII; 6125 import std.format : format; 6126 import dxml.internal : isXMLChar; 6127 6128 immutable c = text.input.front; 6129 if(isASCII(c)) 6130 { 6131 if(!isXMLChar(c)) 6132 { 6133 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c), 6134 text.pos); 6135 } 6136 ++takeLen; 6137 break; 6138 } 6139 import std.utf : decodeFront, UseReplacementDchar, UTFException; 6140 // Annoyngly, letting decodeFront throw is the easier way to handle this, since the 6141 // replacement character is considered valid XML, and if we decoded using it, then 6142 // all of the invalid Unicode characters would come out as the replacement character 6143 // and then be treated as valid instead of being caught, which isn't all bad, but 6144 // the spec requires that they be treated as invalid instead of playing nice and 6145 // using the replacement character. 6146 try 6147 { 6148 size_t numCodeUnits; 6149 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits); 6150 if(!isXMLChar(decodedC)) 6151 { 6152 enum fmt = "Character is not legal in an XML File: 0x%0x"; 6153 throw new XMLParsingException(format!fmt(decodedC), text.pos); 6154 } 6155 takeLen += numCodeUnits; 6156 } 6157 catch(UTFException e) 6158 throw new XMLParsingException("Invalid Unicode character", text.pos); 6159 continue; 6160 } 6161 } 6162 text.input.popFront(); 6163 } 6164 done: 6165 { 6166 import std.range : takeExactly; 6167 text.pos.col += takeLen - lineStart + 1; 6168 return takeExactly(orig, takeLen); 6169 } 6170 } 6171 } 6172 throw new XMLParsingException("Expected quoted text", text.pos); 6173 } 6174 6175 version(dxmlTests) unittest 6176 { 6177 import core.exception : AssertError; 6178 import std.algorithm.comparison : equal; 6179 import std.exception : collectException, enforce; 6180 import std.range : only; 6181 import dxml.internal : codeLen, testRangeFuncs; 6182 6183 static void test(alias func, ThrowOnEntityRef toer)(string origHaystack, string expected, string remainder, 6184 int row, int col, size_t line = __LINE__) 6185 { 6186 auto haystack = func(origHaystack); 6187 auto adjExpected = expected.toCmpType!(func, toer)(); 6188 { 6189 auto text = testParser!(makeConfig(toer))(haystack.save); 6190 enforce!AssertError(equal(text.takeAttValue(), adjExpected.save), 6191 "unittest failure 1", __FILE__, line); 6192 enforce!AssertError(equal(text.input, remainder), "unittest failure 2", __FILE__, line); 6193 enforce!AssertError(text.pos == TextPos(row, col), "unittest failure 3", __FILE__, line); 6194 } 6195 { 6196 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6197 auto text = testParser!(makeConfig(toer))(haystack); 6198 text.pos.line += 3; 6199 text.pos.col += 7; 6200 enforce!AssertError(equal(text.takeAttValue(), adjExpected), 6201 "unittest failure 4", __FILE__, line); 6202 enforce!AssertError(equal(text.input, remainder), "unittest failure 5", __FILE__, line); 6203 enforce!AssertError(text.pos == pos, "unittest failure 6", __FILE__, line); 6204 } 6205 } 6206 6207 static void testFail(alias func, ThrowOnEntityRef toer)(string origHaystack, 6208 int row, int col, size_t line = __LINE__) 6209 { 6210 auto haystack = func(origHaystack); 6211 { 6212 auto text = testParser!(makeConfig(toer))(haystack.save); 6213 auto e = collectException!XMLParsingException(text.takeAttValue()); 6214 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6215 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 6216 } 6217 { 6218 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6219 auto text = testParser!(makeConfig(toer))(haystack); 6220 text.pos.line += 3; 6221 text.pos.col += 7; 6222 auto e = collectException!XMLParsingException(text.takeAttValue()); 6223 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6224 enforce!AssertError(e.pos == pos, "unittest failure 2", __FILE__, line); 6225 } 6226 } 6227 6228 static foreach(i, func; testRangeFuncs) 6229 { 6230 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 6231 { 6232 test!(func, toer)(`""`, "", "", 1, 3); 6233 test!(func, toer)(`"J"`, "J", "", 1, 4); 6234 test!(func, toer)(`"foo"`, "foo", "", 1, 6); 6235 test!(func, toer)(`"プログラミング"`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3); 6236 test!(func, toer)(`"foo"bar`, "foo", "bar", 1, 6); 6237 test!(func, toer)(`"プログラミング" after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3); 6238 6239 test!(func, toer)(`''`, "", "", 1, 3); 6240 test!(func, toer)(`'J'`, "J", "", 1, 4); 6241 test!(func, toer)(`'foo'`, "foo", "", 1, 6); 6242 test!(func, toer)(`'プログラミング'`, "プログラミング", "", 1, codeLen!(func, "プログラミング") + 3); 6243 test!(func, toer)(`'foo'bar`, "foo", "bar", 1, 6); 6244 test!(func, toer)(`'プログラミング' after`, "プログラミング", " after", 1, codeLen!(func, "プログラミング") + 3); 6245 6246 test!(func, toer)(`"&><"`, "&><", "", 1, 16); 6247 test!(func, toer)(`"'""`, "'"", "", 1, 15); 6248 test!(func, toer)(`"hello&><world"`, "hello&><world", "", 1, 26); 6249 test!(func, toer)(`".....&><....."`, ".....&><.....", "", 1, 26); 6250 test!(func, toer)(`"ディラン"`, "ディラン", "", 1, 35); 6251 test!(func, toer)(`"hello¯M&world"`, "hello¯M&world", "", 1, 29); 6252 6253 test!(func, toer)(`'&><'`, "&><", "", 1, 16); 6254 test!(func, toer)(`'hello&><world'`, "hello&><world", "", 1, 26); 6255 test!(func, toer)(`''"'`, "'"", "", 1, 15); 6256 test!(func, toer)(`'.....&><.....'`, ".....&><.....", "", 1, 26); 6257 test!(func, toer)(`'ディラン'`, "ディラン", "", 1, 35); 6258 test!(func, toer)(`'hello¯M&world'`, "hello¯M&world", "", 1, 29); 6259 6260 test!(func, toer)("'hello\nworld'", "hello\nworld", "", 2, 7); 6261 test!(func, toer)("'hello\nworld\n'", "hello\nworld\n", "", 3, 2); 6262 6263 test!(func, toer)(`"'''"whatever`, "'''", "whatever", 1, 6); 6264 test!(func, toer)(`'"""'whatever`, `"""`, "whatever", 1, 6); 6265 6266 test!(func, toer)(`"*"`, "*", "", 1, 8); 6267 test!(func, toer)(`"B"`, "B", "", 1, 9); 6268 test!(func, toer)(`"%foo"`, "%foo", "", 1, 7); 6269 6270 testFail!(func, toer)(`"`, 1, 1); 6271 testFail!(func, toer)(`"foo`, 1, 1); 6272 testFail!(func, toer)(`"foo'`, 1, 1); 6273 testFail!(func, toer)(`"<"`, 1, 2); 6274 testFail!(func, toer)(`"&`, 1, 2); 6275 testFail!(func, toer)(`"&"`, 1, 2); 6276 testFail!(func, toer)(`"&x"`, 1, 2); 6277 testFail!(func, toer)(`"&.;"`, 1, 2); 6278 testFail!(func, toer)(`"&&;"`, 1, 2); 6279 testFail!(func, toer)(`"&a"`, 1, 2); 6280 testFail!(func, toer)(`"&a`, 1, 2); 6281 testFail!(func, toer)(`"hello&;"`, 1, 7); 6282 testFail!(func, toer)(`"hello&;world"`,1, 7); 6283 testFail!(func, toer)(`"hello&<;world"`,1, 7); 6284 testFail!(func, toer)(`"hello&world"`,1, 7); 6285 testFail!(func, toer)(`"hello<world"`,1, 7); 6286 testFail!(func, toer)(`"hello world&"`, 1, 13); 6287 testFail!(func, toer)(`"hello world&;"`, 1, 13); 6288 testFail!(func, toer)(`"hello world&foo"`, 1, 13); 6289 testFail!(func, toer)(`"foo<"`, 1, 5); 6290 testFail!(func, toer)(`"&#`, 1, 2); 6291 testFail!(func, toer)(`"&#"`, 1, 2); 6292 testFail!(func, toer)(`"&#;"`, 1, 2); 6293 testFail!(func, toer)(`"&#x;"`, 1, 2); 6294 testFail!(func, toer)(`"&#AF;"`, 1, 2); 6295 testFail!(func, toer)(`"&#x`, 1, 2); 6296 testFail!(func, toer)(`"M`, 1, 2); 6297 testFail!(func, toer)(`"M`, 1, 1); 6298 testFail!(func, toer)(`"�`, 1, 2); 6299 testFail!(func, toer)(`"�`, 1, 2); 6300 testFail!(func, toer)(`"�"`, 1, 2); 6301 6302 testFail!(func, toer)(`'`, 1, 1); 6303 testFail!(func, toer)(`'foo`, 1, 1); 6304 testFail!(func, toer)(`'foo"`, 1, 1); 6305 testFail!(func, toer)(`'<'`, 1, 2); 6306 testFail!(func, toer)("'\v'", 1, 2); 6307 testFail!(func, toer)("'\uFFFE'", 1, 2); 6308 testFail!(func, toer)(`'&`, 1, 2); 6309 testFail!(func, toer)(`'&'`, 1, 2); 6310 testFail!(func, toer)(`'&x'`, 1, 2); 6311 testFail!(func, toer)(`'&.;'`, 1, 2); 6312 testFail!(func, toer)(`'&&;'`, 1, 2); 6313 testFail!(func, toer)(`'&a'`, 1, 2); 6314 testFail!(func, toer)(`'&a`, 1, 2); 6315 testFail!(func, toer)(`'hello&;'`, 1, 7); 6316 testFail!(func, toer)(`'hello&;world'`, 1, 7); 6317 testFail!(func, toer)(`'hello&<;world'`, 1, 7); 6318 testFail!(func, toer)(`'hello&world'`, 1, 7); 6319 testFail!(func, toer)(`'hello<world'`, 1, 7); 6320 testFail!(func, toer)(`'hello world&'`, 1, 13); 6321 testFail!(func, toer)(`'hello world&;'`, 1, 13); 6322 testFail!(func, toer)(`'hello world&foo'`, 1, 13); 6323 testFail!(func, toer)(`'foo<'`, 1, 5); 6324 testFail!(func, toer)(`'&#`, 1, 2); 6325 testFail!(func, toer)(`'&#'`, 1, 2); 6326 testFail!(func, toer)(`'&#;'`, 1, 2); 6327 testFail!(func, toer)(`'&#x;'`, 1, 2); 6328 testFail!(func, toer)(`'&#AF;'`, 1, 2); 6329 testFail!(func, toer)(`'&#x`, 1, 2); 6330 testFail!(func, toer)(`'M`, 1, 2); 6331 testFail!(func, toer)(`'M`, 1, 1); 6332 testFail!(func, toer)(`'�`, 1, 2); 6333 testFail!(func, toer)(`'�`, 1, 2); 6334 testFail!(func, toer)(`'�'`, 1, 2); 6335 testFail!(func, toer)("'
\nF;'", 1, 2); 6336 testFail!(func, toer)("'&\n;'", 1, 2); 6337 testFail!(func, toer)("'&\namp;'", 1, 2); 6338 testFail!(func, toer)("'\n&&;'", 2, 6); 6339 } 6340 { 6341 alias toer = ThrowOnEntityRef.yes; 6342 testFail!(func, toer)(`"&foo;"`, 1, 2); 6343 testFail!(func, toer)(`"hello world&foo;"`, 1, 13); 6344 testFail!(func, toer)(`"hello &foo; world"`, 1, 8); 6345 testFail!(func, toer)(`"&am;"`, 1, 2); 6346 testFail!(func, toer)(`"&e;"`, 1, 2); 6347 testFail!(func, toer)(`"&l;"`, 1, 2); 6348 testFail!(func, toer)(`"<e;"`, 1, 2); 6349 testFail!(func, toer)(`"&g;"`, 1, 2); 6350 testFail!(func, toer)(`">e;"`, 1, 2); 6351 testFail!(func, toer)(`"&apo;"`, 1, 2); 6352 testFail!(func, toer)(`"&aposs;"`, 1, 2); 6353 testFail!(func, toer)(`"&quo;"`, 1, 2); 6354 testFail!(func, toer)(`""e;"`, 1, 2); 6355 6356 testFail!(func, toer)(`'&foo;'`, 1, 2); 6357 testFail!(func, toer)(`'hello world&foo;'`, 1, 13); 6358 testFail!(func, toer)(`'hello &foo; world'`, 1, 8); 6359 testFail!(func, toer)(`'&am;'`, 1, 2); 6360 testFail!(func, toer)(`'&e;'`, 1, 2); 6361 testFail!(func, toer)(`'&l;'`, 1, 2); 6362 testFail!(func, toer)(`'<e;'`, 1, 2); 6363 testFail!(func, toer)(`'&g;'`, 1, 2); 6364 testFail!(func, toer)(`'>e;'`, 1, 2); 6365 testFail!(func, toer)(`'&apo;'`, 1, 2); 6366 testFail!(func, toer)(`'&aposs;'`, 1, 2); 6367 testFail!(func, toer)(`'&quo;'`, 1, 2); 6368 testFail!(func, toer)(`'"e;'`, 1, 2); 6369 } 6370 { 6371 alias toer = ThrowOnEntityRef.no; 6372 test!(func, toer)(`"&foo;"`, "&foo;", "", 1, 8); 6373 test!(func, toer)(`"hello world&foo;"`, "hello world&foo;", "", 1, 19); 6374 test!(func, toer)(`"hello &foo; world"`, "hello &foo; world", "", 1, 20); 6375 test!(func, toer)(`"&am;"`, "&am;", "", 1, 7); 6376 test!(func, toer)(`"&e;"`, "&e;", "", 1, 9); 6377 test!(func, toer)(`"&l;"`, "&l;", "", 1, 6); 6378 test!(func, toer)(`"<e;"`, "<e;", "", 1, 8); 6379 test!(func, toer)(`"&g;"`, "&g;", "", 1, 6); 6380 test!(func, toer)(`">e;"`, ">e;", "", 1, 8); 6381 test!(func, toer)(`"&apo;"`, "&apo;", "", 1, 8); 6382 test!(func, toer)(`"&aposs;"`, "&aposs;", "", 1, 10); 6383 test!(func, toer)(`"&quo;"`, "&quo;", "", 1, 8); 6384 test!(func, toer)(`""e;"`, ""e;", "", 1, 10); 6385 6386 test!(func, toer)(`'&foo;'`, "&foo;", "", 1, 8); 6387 test!(func, toer)(`'hello world&foo;'`, "hello world&foo;", "", 1, 19); 6388 test!(func, toer)(`'hello &foo; world'`, "hello &foo; world", "", 1, 20); 6389 test!(func, toer)(`'&am;'`, "&am;", "", 1, 7); 6390 test!(func, toer)(`'&e;'`, "&e;", "", 1, 9); 6391 test!(func, toer)(`'&l;'`, "&l;", "", 1, 6); 6392 test!(func, toer)(`'<e;'`, "<e;", "", 1, 8); 6393 test!(func, toer)(`'&g;'`, "&g;", "", 1, 6); 6394 test!(func, toer)(`'>e;'`, ">e;", "", 1, 8); 6395 test!(func, toer)(`'&apo;'`, "&apo;", "", 1, 8); 6396 test!(func, toer)(`'&aposs;'`, "&aposs;", "", 1, 10); 6397 test!(func, toer)(`'&quo;'`, "&quo;", "", 1, 8); 6398 test!(func, toer)(`'"e;'`, ""e;", "", 1, 10); 6399 } 6400 } 6401 6402 // These can't be tested with testFail, because attempting to convert 6403 // invalid Unicode results in UnicodeExceptions before parseXML even 6404 // gets called. 6405 import std.meta : AliasSeq; 6406 static foreach(str; AliasSeq!("'" ~ cast(string)[255] ~ "'", 6407 "'"w ~ cast(wstring)[0xD800] ~ "'", 6408 "'"d ~ cast(dstring)[0xD800] ~ "'")) 6409 {{ 6410 auto text = testParser(str); 6411 auto e = collectException!XMLParsingException(text.takeAttValue()); 6412 assert(e ! is null); 6413 assert(e.pos == TextPos(1, 2)); 6414 }} 6415 } 6416 6417 version(dxmlTests) @safe pure unittest 6418 { 6419 import std.algorithm.comparison : equal; 6420 import dxml.internal : testRangeFuncs; 6421 6422 static foreach(func; testRangeFuncs) 6423 { 6424 static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)]) 6425 {{ 6426 auto xml = func(`'foo'`); 6427 auto text = testParser!simpleXML(xml); 6428 assert(equal(text.takeAttValue(), "foo")); 6429 }} 6430 } 6431 } 6432 6433 6434 // Validates an EntityType.text field to verify that it does not contain invalid 6435 // characters. 6436 void checkText(bool allowRestrictedChars, Text)(ref Text orig) 6437 { 6438 import std.format : format; 6439 import std.utf : decodeFront, UseReplacementDchar; 6440 6441 auto text = orig.save; 6442 loop: while(!text.input.empty) 6443 { 6444 switch(text.input.front) 6445 { 6446 static if(!allowRestrictedChars) 6447 { 6448 case '&': 6449 { 6450 import dxml.util : parseCharRef; 6451 6452 { 6453 auto temp = text.input.save; 6454 auto charRef = parseCharRef(temp); 6455 if(!charRef.isNull) 6456 { 6457 static if(hasLength!(Text.Input)) 6458 { 6459 text.pos.col += text.input.length - temp.length; 6460 text.input = temp; 6461 } 6462 else 6463 { 6464 while(text.input.front != ';') 6465 popFrontAndIncCol(text); 6466 popFrontAndIncCol(text); 6467 } 6468 continue; 6469 } 6470 } 6471 6472 immutable ampPos = text.pos; 6473 popFrontAndIncCol(text); 6474 6475 // Std Entity References 6476 static if(Text.config.throwOnEntityRef == ThrowOnEntityRef.yes) 6477 { 6478 static foreach(entRef; ["amp;", "apos;", "quot;", "lt;", "gt;"]) 6479 { 6480 if(text.stripStartsWith(entRef)) 6481 continue loop; 6482 } 6483 6484 throw new XMLParsingException("& is only legal in an EntitType.text entity as part of a " ~ 6485 "reference, and this parser only supports entity references if " ~ 6486 "they're predefined by the spec. This is not a valid character " ~ 6487 "reference or one of the predefined entity references.", ampPos); 6488 } 6489 // All Entity References 6490 else 6491 { 6492 import std.utf : decodeFront, UseReplacementDchar; 6493 import dxml.internal : isNameStartChar, isNameChar; 6494 6495 if(text.input.empty) 6496 goto failedEntityRef; 6497 { 6498 size_t numCodeUnits; 6499 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6500 if(!isNameStartChar(decodedC)) 6501 goto failedEntityRef; 6502 text.pos.col += numCodeUnits; 6503 } 6504 while(true) 6505 { 6506 if(text.input.empty) 6507 goto failedEntityRef; 6508 immutable c = text.input.front; 6509 if(c == ';') 6510 break; 6511 size_t numCodeUnits; 6512 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.yes)(numCodeUnits); 6513 if(!isNameChar(decodedC)) 6514 goto failedEntityRef; 6515 text.pos.col += numCodeUnits; 6516 } 6517 assert(text.input.front == ';'); 6518 popFrontAndIncCol(text); 6519 continue; 6520 6521 failedEntityRef: 6522 throw new XMLParsingException("& is only legal in an attribute value as part of a " ~ 6523 "character or entity reference, and this is not a valid " ~ 6524 "character or entity reference.", ampPos); 6525 } 6526 } 6527 case '<': throw new XMLParsingException("< is not legal in EntityType.text", text.pos); 6528 case ']': 6529 { 6530 popFrontAndIncCol(text); 6531 if(text.stripStartsWith("]>")) 6532 { 6533 text.pos.col -= 3; 6534 throw new XMLParsingException("]]> is not legal in EntityType.text", text.pos); 6535 } 6536 break; 6537 } 6538 } 6539 case '\n': 6540 { 6541 nextLine!(text.config)(text.pos); 6542 text.input.popFront(); 6543 break; 6544 } 6545 default: 6546 { 6547 import std.ascii : isASCII; 6548 import dxml.internal : isXMLChar; 6549 immutable c = text.input.front; 6550 if(isASCII(c)) 6551 { 6552 if(!isXMLChar(c)) 6553 { 6554 throw new XMLParsingException(format!"Character is not legal in an XML File: 0x%0x"(c), 6555 text.pos); 6556 } 6557 popFrontAndIncCol(text); 6558 } 6559 else 6560 { 6561 import std.utf : UTFException; 6562 // Annoyngly, letting decodeFront throw is the easier way to handle this, since the 6563 // replacement character is considered valid XML, and if we decoded using it, then 6564 // all of the invalid Unicode characters would come out as the replacement character 6565 // and then be treated as valid instead of being caught, which isn't all bad, but 6566 // the spec requires that they be treated as invalid instead of playing nice and 6567 // using the replacement character. 6568 try 6569 { 6570 size_t numCodeUnits; 6571 immutable decodedC = text.input.decodeFront!(UseReplacementDchar.no)(numCodeUnits); 6572 if(!isXMLChar(decodedC)) 6573 { 6574 enum fmt = "Character is not legal in an XML File: 0x%0x"; 6575 throw new XMLParsingException(format!fmt(decodedC), text.pos); 6576 } 6577 text.pos.col += numCodeUnits; 6578 } 6579 catch(UTFException) 6580 throw new XMLParsingException("Invalid Unicode character", text.pos); 6581 } 6582 break; 6583 } 6584 } 6585 } 6586 } 6587 6588 version(dxmlTests) unittest 6589 { 6590 import core.exception : AssertError; 6591 import std.exception : assertNotThrown, collectException, enforce; 6592 import dxml.internal : codeLen, testRangeFuncs; 6593 6594 static void test(alias func, bool arc, ThrowOnEntityRef toer)(string text, size_t line = __LINE__) 6595 { 6596 auto xml = func(text); 6597 auto range = testParser!(makeConfig(toer))(xml); 6598 assertNotThrown(checkText!arc(range), "unittest failure", __FILE__, line); 6599 } 6600 6601 static void testFail(alias func, bool arc, ThrowOnEntityRef toer)(string text, int row, int col, size_t line = __LINE__) 6602 { 6603 auto xml = func(text); 6604 { 6605 auto range = testParser!(makeConfig(toer))(xml.save); 6606 auto e = collectException!XMLParsingException(checkText!arc(range)); 6607 enforce!AssertError(e !is null, "unittest failure 1", __FILE__, line); 6608 enforce!AssertError(e.pos == TextPos(row, col), "unittest failure 2", __FILE__, line); 6609 } 6610 { 6611 auto pos = TextPos(row + 3, row == 1 ? col + 7 : col); 6612 auto range = testParser!(makeConfig(toer))(xml); 6613 range.pos.line += 3; 6614 range.pos.col += 7; 6615 auto e = collectException!XMLParsingException(checkText!arc(range)); 6616 enforce!AssertError(e !is null, "unittest failure 3", __FILE__, line); 6617 enforce!AssertError(e.pos == pos, "unittest failure 4", __FILE__, line); 6618 } 6619 } 6620 6621 static foreach(func; testRangeFuncs) 6622 { 6623 static foreach(toer; [ThrowOnEntityRef.yes, ThrowOnEntityRef.no]) 6624 { 6625 static foreach(arc; [false, true]) 6626 { 6627 test!(func, arc, toer)(""); 6628 test!(func, arc, toer)("J",); 6629 test!(func, arc, toer)("foo"); 6630 test!(func, arc, toer)("プログラミング"); 6631 6632 test!(func, arc, toer)("&><"); 6633 test!(func, arc, toer)("hello&><world"); 6634 test!(func, arc, toer)(".....'"&....."); 6635 test!(func, arc, toer)("ディラン"); 6636 test!(func, arc, toer)("hello¯*"world"); 6637 6638 test!(func, arc, toer)("]]"); 6639 test!(func, arc, toer)("]>"); 6640 test!(func, arc, toer)("foo]]bar"); 6641 test!(func, arc, toer)("foo]>bar"); 6642 test!(func, arc, toer)("]] >"); 6643 6644 testFail!(func, arc, toer)("\v", 1, 1); 6645 testFail!(func, arc, toer)("\uFFFE", 1, 1); 6646 testFail!(func, arc, toer)("hello\vworld", 1, 6); 6647 testFail!(func, arc, toer)("he\nllo\vwo\nrld", 2, 4); 6648 } 6649 6650 testFail!(func, false, toer)("<", 1, 1); 6651 testFail!(func, false, toer)("&", 1, 1); 6652 testFail!(func, false, toer)("&", 1, 1); 6653 testFail!(func, false, toer)("&x", 1, 1); 6654 testFail!(func, false, toer)("&&;", 1, 1); 6655 testFail!(func, false, toer)("&a", 1, 1); 6656 testFail!(func, false, toer)("hello&;", 1, 6); 6657 testFail!(func, false, toer)("hello&;world", 1, 6); 6658 testFail!(func, false, toer)("hello&<;world", 1, 6); 6659 testFail!(func, false, toer)("hello&world", 1, 6); 6660 testFail!(func, false, toer)("hello world&", 1, 12); 6661 testFail!(func, false, toer)("hello world&;", 1, 12); 6662 testFail!(func, false, toer)("hello world&foo", 1, 12); 6663 testFail!(func, false, toer)("&#;", 1, 1); 6664 testFail!(func, false, toer)("&#x;", 1, 1); 6665 testFail!(func, false, toer)("&#AF;", 1, 1); 6666 testFail!(func, false, toer)("&#x", 1, 1); 6667 testFail!(func, false, toer)("*", 1, 1); 6668 testFail!(func, false, toer)("B", 1, 1); 6669 testFail!(func, false, toer)("", 1, 1); 6670 testFail!(func, false, toer)("", 1, 1); 6671 testFail!(func, false, toer)("*foo\nbar&#;", 2, 4); 6672 testFail!(func, false, toer)("*foo\nbar&#x;", 2, 4); 6673 testFail!(func, false, toer)("*foo\nbar&#AF;", 2, 4); 6674 testFail!(func, false, toer)("*foo\nbar&#x", 2, 4); 6675 testFail!(func, false, toer)("*foo\nbar*", 2, 4); 6676 testFail!(func, false, toer)("*foo\nbarB", 2, 4); 6677 testFail!(func, false, toer)("プログラミング&", 1, codeLen!(func, "プログラミング&")); 6678 6679 static if(toer == ThrowOnEntityRef.yes) 6680 { 6681 testFail!(func, false, toer)("&a;", 1, 1); 6682 testFail!(func, false, toer)(`&am;`, 1, 1); 6683 testFail!(func, false, toer)(`&e;`, 1, 1); 6684 testFail!(func, false, toer)(`&l;`, 1, 1); 6685 testFail!(func, false, toer)(`<e;`, 1, 1); 6686 testFail!(func, false, toer)(`&g;`, 1, 1); 6687 testFail!(func, false, toer)(`>e;`, 1, 1); 6688 testFail!(func, false, toer)(`&apo;`, 1, 1); 6689 testFail!(func, false, toer)(`&aposs;`, 1, 1); 6690 testFail!(func, false, toer)(`&quo;`, 1, 1); 6691 testFail!(func, false, toer)(`"e;`, 1, 1); 6692 testFail!(func, false, toer)(`hello &foo; world`, 1, 7); 6693 testFail!(func, false, toer)("hello\n &foo; \nworld", 2, 2); 6694 } 6695 else 6696 { 6697 test!(func, false, toer)("&a;"); 6698 test!(func, false, toer)(`&am;`); 6699 test!(func, false, toer)(`&e;`); 6700 test!(func, false, toer)(`&l;`); 6701 test!(func, false, toer)(`<e;`); 6702 test!(func, false, toer)(`&g;`); 6703 test!(func, false, toer)(`>e;`); 6704 test!(func, false, toer)(`&apo;`); 6705 test!(func, false, toer)(`&aposs;`); 6706 test!(func, false, toer)(`&quo;`); 6707 test!(func, false, toer)(`"e;`); 6708 test!(func, false, toer)(`hello &foo; world`); 6709 test!(func, false, toer)("hello\n &foo; \nworld"); 6710 } 6711 6712 testFail!(func, false, toer)("]]>", 1, 1); 6713 testFail!(func, false, toer)("foo]]>bar", 1, 4); 6714 6715 test!(func, true, toer)("]]>"); 6716 test!(func, true, toer)("foo]]>bar"); 6717 6718 test!(func, true, toer)("<"); 6719 test!(func, true, toer)("&"); 6720 test!(func, true, toer)("&x"); 6721 test!(func, true, toer)("&&;"); 6722 test!(func, true, toer)("&a"); 6723 test!(func, true, toer)("&a;"); 6724 test!(func, true, toer)(`&am;`); 6725 test!(func, true, toer)(`&e;`); 6726 test!(func, true, toer)(`&l;`); 6727 test!(func, true, toer)(`<e;`); 6728 test!(func, true, toer)(`&g;`); 6729 test!(func, true, toer)(`>e;`); 6730 test!(func, true, toer)(`&apo;`); 6731 test!(func, true, toer)(`&aposs;`); 6732 test!(func, true, toer)(`&quo;`); 6733 test!(func, true, toer)(`"e;`); 6734 test!(func, true, toer)("hello&;"); 6735 test!(func, true, toer)("hello&;world"); 6736 test!(func, true, toer)("hello&<;world"); 6737 test!(func, true, toer)("hello&world"); 6738 test!(func, true, toer)("hello world&"); 6739 test!(func, true, toer)("hello world&;"); 6740 test!(func, true, toer)("hello world&foo"); 6741 test!(func, true, toer)("&#;"); 6742 test!(func, true, toer)("&#x;"); 6743 test!(func, true, toer)("&#AF;"); 6744 test!(func, true, toer)("&#x"); 6745 test!(func, true, toer)("*"); 6746 test!(func, true, toer)("B"); 6747 test!(func, true, toer)(""); 6748 test!(func, true, toer)(""); 6749 test!(func, true, toer)("*foo\nbar&#;"); 6750 test!(func, true, toer)("*foo\nbar&#x;"); 6751 test!(func, true, toer)("*foo\nbar&#AF;"); 6752 test!(func, true, toer)("*foo\nbar&#x"); 6753 test!(func, true, toer)("*foo\nbar*"); 6754 test!(func, true, toer)("*foo\nbarB"); 6755 test!(func, true, toer)("プログラミング&"); 6756 } 6757 } 6758 6759 // These can't be tested with testFail, because attempting to convert 6760 // invalid Unicode results in UnicodeExceptions before parseXML even 6761 // gets called. 6762 import std.meta : AliasSeq; 6763 static foreach(str; AliasSeq!(cast(string)[255], cast(wstring)[0xD800], cast(dstring)[0xD800])) 6764 { 6765 static foreach(arc; [false, true]) 6766 {{ 6767 auto text = testParser(str); 6768 auto e = collectException!XMLParsingException(text.checkText!arc()); 6769 assert(e ! is null); 6770 assert(e.pos == TextPos(1, 1)); 6771 }} 6772 } 6773 } 6774 6775 version(dxmlTests) @safe unittest 6776 { 6777 import dxml.internal : testRangeFuncs; 6778 6779 static foreach(func; testRangeFuncs) 6780 { 6781 static foreach(arc; [false, true]) 6782 { 6783 static foreach(config; [Config.init, simpleXML, makeConfig(ThrowOnEntityRef.no)]) 6784 {{ 6785 auto xml = func("foo"); 6786 auto text = testParser!config(xml); 6787 checkText!arc(text); 6788 }} 6789 } 6790 } 6791 } 6792 6793 6794 // S := (#x20 | #x9 | #xD | #XA)+ 6795 bool isSpace(C)(C c) @safe pure nothrow @nogc 6796 if(isSomeChar!C) 6797 { 6798 switch(c) 6799 { 6800 case ' ': 6801 case '\t': 6802 case '\r': 6803 case '\n': return true; 6804 default : return false; 6805 } 6806 } 6807 6808 version(dxmlTests) pure nothrow @safe @nogc unittest 6809 { 6810 foreach(char c; char.min .. char.max) 6811 { 6812 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6813 assert(isSpace(c)); 6814 else 6815 assert(!isSpace(c)); 6816 } 6817 foreach(wchar c; wchar.min .. wchar.max / 100) 6818 { 6819 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6820 assert(isSpace(c)); 6821 else 6822 assert(!isSpace(c)); 6823 } 6824 foreach(dchar c; dchar.min .. dchar.max / 1000) 6825 { 6826 if(c == ' ' || c == '\t' || c == '\r' || c == '\n') 6827 assert(isSpace(c)); 6828 else 6829 assert(!isSpace(c)); 6830 } 6831 } 6832 6833 6834 pragma(inline, true) void popFrontAndIncCol(Text)(ref Text text) 6835 { 6836 text.input.popFront(); 6837 ++text.pos.col; 6838 } 6839 6840 pragma(inline, true) void nextLine(Config config)(ref TextPos pos) 6841 { 6842 ++pos.line; 6843 pos.col = 1; 6844 } 6845 6846 // TODO create bug report, because this function cannot be inlined 6847 /+pragma(inline, true)+/ void checkNotEmpty(Text)(ref Text text, size_t line = __LINE__) 6848 { 6849 if(text.input.empty) 6850 throw new XMLParsingException("Prematurely reached end of document", text.pos, __FILE__, line); 6851 } 6852 6853 6854 version(dxmlTests) 6855 enum someTestConfigs = [Config.init, simpleXML, makeConfig(SkipComments.yes), makeConfig(SkipPI.yes)]; 6856 6857 6858 // Fuzz-testing failures 6859 version(dxmlTests) unittest 6860 { 6861 static void parseEverything(string xml) 6862 { 6863 with(EntityType) foreach(entity; parseXML(xml)) 6864 { 6865 final switch(entity.type) 6866 { 6867 case cdata: break; 6868 case comment: break; 6869 case elementStart: auto name = entity.name; break; 6870 case elementEnd: goto case elementStart; 6871 case elementEmpty: goto case elementStart; 6872 case pi: goto case elementStart; 6873 case text: break; 6874 } 6875 6876 final switch(entity.type) 6877 { 6878 case cdata: auto text = entity.text; break; 6879 case comment: goto case cdata; 6880 case elementStart: 6881 { 6882 foreach(attr; entity.attributes) 6883 { 6884 auto name = attr.name; 6885 auto value = attr.value; 6886 } 6887 break; 6888 } 6889 case elementEnd: break; 6890 case elementEmpty: goto case elementStart; 6891 case pi: goto case cdata; 6892 case text: goto case cdata; 6893 } 6894 } 6895 } 6896 6897 static void testFail(string xml, size_t line = __LINE__) 6898 { 6899 import std.exception : assertThrown; 6900 assertThrown!XMLParsingException(parseEverything(xml)); 6901 } 6902 6903 testFail([0x3c, 0xff, 0x3e, 0x3e, 0x3a, 0x3c, 0x2f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6904 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6905 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6906 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 6907 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x31, 0xff, 6908 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xff, 0xff, 6909 0xff]); 6910 }