1 // Written in the D programming language 2 3 /++ 4 This implements a DOM for representing an XML 1.0 document. $(LREF parseDOM) 5 uses an $(REF EntityRange, dxml, parser) to parse the document, and 6 $(LREF DOMEntity) recursively represents the DOM tree. 7 8 See the documentation for $(MREF dxml, parser) and 9 $(REF EntityRange, dxml, parser) for details on the parser and its 10 configuration options. 11 12 For convenience, $(REF EntityType, dxml, parser) and 13 $(REF simpleXML, dxml, parser) are publicly imported by this module, 14 since $(REF_ALTTEXT EntityType, EntityType, dxml, parser) is required 15 to correctly use $(LREF DOMEntity), and 16 $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) is highly likely to 17 be used when calling $(LREF parseDOM). 18 19 Copyright: Copyright 2018 - 2020 20 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 21 Authors: $(HTTPS jmdavisprog.com, Jonathan M Davis) 22 Source: $(LINK_TO_SRC dxml/_dom.d) 23 24 See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0) 25 +/ 26 module dxml.dom; 27 28 /// 29 unittest 30 { 31 import std.range.primitives : empty; 32 33 auto xml = "<!-- comment -->\n" ~ 34 "<root>\n" ~ 35 " <foo>some text<whatever/></foo>\n" ~ 36 " <bar/>\n" ~ 37 " <baz></baz>\n" ~ 38 "</root>"; 39 { 40 auto dom = parseDOM(xml); 41 assert(dom.type == EntityType.elementStart); 42 assert(dom.name.empty); 43 assert(dom.children.length == 2); 44 45 assert(dom.children[0].type == EntityType.comment); 46 assert(dom.children[0].text == " comment "); 47 48 auto root = dom.children[1]; 49 assert(root.type == EntityType.elementStart); 50 assert(root.name == "root"); 51 assert(root.children.length == 3); 52 53 auto foo = root.children[0]; 54 assert(foo.type == EntityType.elementStart); 55 assert(foo.name == "foo"); 56 assert(foo.children.length == 2); 57 58 assert(foo.children[0].type == EntityType.text); 59 assert(foo.children[0].text == "some text"); 60 61 assert(foo.children[1].type == EntityType.elementEmpty); 62 assert(foo.children[1].name == "whatever"); 63 64 assert(root.children[1].type == EntityType.elementEmpty); 65 assert(root.children[1].name == "bar"); 66 67 assert(root.children[2].type == EntityType.elementStart); 68 assert(root.children[2].name == "baz"); 69 assert(root.children[2].children.length == 0); 70 } 71 { 72 auto dom = parseDOM!simpleXML(xml); 73 assert(dom.type == EntityType.elementStart); 74 assert(dom.name.empty); 75 assert(dom.children.length == 1); 76 77 auto root = dom.children[0]; 78 assert(root.type == EntityType.elementStart); 79 assert(root.name == "root"); 80 assert(root.children.length == 3); 81 82 auto foo = root.children[0]; 83 assert(foo.type == EntityType.elementStart); 84 assert(foo.name == "foo"); 85 assert(foo.children.length == 2); 86 87 assert(foo.children[0].type == EntityType.text); 88 assert(foo.children[0].text == "some text"); 89 90 assert(foo.children[1].type == EntityType.elementStart); 91 assert(foo.children[1].name == "whatever"); 92 assert(foo.children[1].children.length == 0); 93 94 assert(root.children[1].type == EntityType.elementStart); 95 assert(root.children[1].name == "bar"); 96 assert(root.children[1].children.length == 0); 97 98 assert(root.children[2].type == EntityType.elementStart); 99 assert(root.children[2].name == "baz"); 100 assert(root.children[2].children.length == 0); 101 } 102 } 103 104 105 import std.range.primitives; 106 import std.traits; 107 108 public import dxml.parser : EntityType, simpleXML; 109 import dxml.parser : Config, EntityRange; 110 111 112 /++ 113 Represents an entity in an XML document as a DOM tree. 114 115 parseDOM either takes a range of characters or an 116 $(REF EntityRange, dxml, parser) and generates a DOMEntity from that XML. 117 118 When parseDOM processes the XML, it returns a DOMEntity representing the 119 entire document. Even though the XML document itself isn't technically an 120 entity in the XML document, it's simplest to treat it as if it were an 121 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 122 with an empty $(LREF2 name, _DOMEntity.name). That DOMEntity then contains 123 child entities that recursively define the DOM tree through their children. 124 125 For DOMEntities of type 126 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser), 127 $(LREF _DOMEntity.children) gives access to all of the child entities of 128 that start tag. Other DOMEntities have no children. 129 130 Note that the $(LREF2 type, _DOMEntity.type) determines which 131 properties of the DOMEntity can be used, and it can determine whether 132 functions which a DOMEntity is passed to are allowed to be called. Each 133 function lists which $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s 134 are allowed, and it is an error to call them with any other 135 $(REF_ALTTEXT EntityType, EntityType, dxml, parser). 136 137 If parseDOM is given a range of characters, it in turn passes that to 138 $(REF parseXML, dxml, parser) to do the actual XML parsing. As such, that 139 overload accepts an optional $(REF Config, dxml, parser) as a template 140 argument to configure the parser. 141 142 If parseDOM is given an 143 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the range does 144 not have to be at the start of the document. It can be used to create a DOM 145 for a portion of the document. When a character range is passed to it, it 146 will return a DOMEntity with the $(LREF2 type, _DOMEntity.type) 147 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 148 and an empty $(LREF2 name, _DOMEntity.name). It will iterate the range until 149 it either reaches the end of the range, or it reaches the end tag which 150 matches the start tag which is the parent of the entity that was the 151 $(D front) of the range when it was passed to parseDOM. The 152 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 153 is passed by $(K_REF), so if it was not at the top level when it was passed 154 to parseDOM (and thus still has elements in it when parseDOM returns), the 155 range will then be at the entity after that matching end tag, and the 156 application can continue to process the range after that if it so chooses. 157 158 Params: 159 config = The $(REF Config, dxml, parser) to use with 160 $(REF parseXML, dxml, parser) if the range passed to parseDOM 161 is a range of characters. 162 range = Either a range of characters representing an entire XML document 163 or a $(REF EntityRange, dxml, parser) which may refer to some 164 or all of an XML document. 165 166 Returns: A DOMEntity representing the DOM tree from the point in the 167 document that was passed to parseDOM (the start of the document if 168 a range of characters was passed, and wherever in the document the 169 range was if an 170 $(REF_ALTTEXT EntityRange, EntityRange dxml, parser) was passed). 171 172 Throws: $(REF_ALTTEXT XMLParsingException, XMLParsingException, dxml, parser) 173 if the parser encounters invalid XML. 174 +/ 175 struct DOMEntity(R) 176 { 177 public: 178 179 import std.algorithm.searching : canFind; 180 import std.range : only, takeExactly; 181 import std.typecons : Tuple; 182 import dxml.parser : TextPos; 183 184 private enum compileInTests = is(R == DOMCompileTests); 185 186 /++ 187 The type used when any slice of the original range of characters is 188 used. If the range was a string or supports slicing, then SliceOfR is 189 the same type as the range; otherwise, it's the result of calling 190 $(PHOBOS_REF takeExactly, std, range) on it. 191 192 --- 193 import std.algorithm : filter; 194 import std.range : takeExactly; 195 196 static assert(is(DOMEntity!string.SliceOfR == string)); 197 198 auto range = filter!(a => true)("some xml"); 199 200 static assert(is(DOMEntity!(typeof(range)).SliceOfR == 201 typeof(takeExactly(range, 42)))); 202 --- 203 +/ 204 static if(isDynamicArray!R || hasSlicing!R) 205 alias SliceOfR = R; 206 else 207 alias SliceOfR = typeof(takeExactly(R.init, 42)); 208 209 // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being 210 // a ddoc-ed unit test. 211 static if(compileInTests) @safe unittest 212 { 213 import std.algorithm : filter; 214 import std.range : takeExactly; 215 216 static assert(is(DOMEntity!string.SliceOfR == string)); 217 218 auto range = filter!(a => true)("some xml"); 219 220 static assert(is(DOMEntity!(typeof(range)).SliceOfR == 221 typeof(takeExactly(range, 42)))); 222 } 223 224 225 /++ 226 The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that 227 $(LREF2 attributes, DOMEntity) returns a range of. 228 229 See_Also: $(LREF2 attributes, DOMEntity) 230 +/ 231 alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos, "pos"); 232 233 234 /++ 235 The $(REF_ALTTEXT EntityType, EntityType, dxml, parser) for this 236 DOMEntity. 237 238 The type can never be 239 $(REF_ALTTEXT EntityType.elementEnd, EntityType.elementEnd, dxml, parser), 240 because the end of $(LREF2 children, DOMEntity.children) already 241 indicates where the contents of the start tag end. 242 243 type determines which properties of the DOMEntity can be used, and it 244 can determine whether functions which a DOMEntity is passed to are 245 allowed to be called. Each function lists which 246 $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s are allowed, and it 247 is an error to call them with any other 248 $(REF_ALTTEXT EntityType, EntityType, dxml, parser). 249 +/ 250 @property EntityType type() @safe const pure nothrow @nogc 251 { 252 return _type; 253 } 254 255 /// 256 static if(compileInTests) unittest 257 { 258 import std.range.primitives; 259 260 auto xml = "<root>\n" ~ 261 " <!--no comment-->\n" ~ 262 " <![CDATA[cdata run]]>\n" ~ 263 " <text>I am text!</text>\n" ~ 264 " <empty/>\n" ~ 265 " <?pi?>\n" ~ 266 "</root>"; 267 268 auto dom = parseDOM(xml); 269 assert(dom.type == EntityType.elementStart); 270 assert(dom.name.empty); 271 assert(dom.children.length == 1); 272 273 auto root = dom.children[0]; 274 assert(root.type == EntityType.elementStart); 275 assert(root.name == "root"); 276 assert(root.children.length == 5); 277 278 assert(root.children[0].type == EntityType.comment); 279 assert(root.children[0].text == "no comment"); 280 281 assert(root.children[1].type == EntityType.cdata); 282 assert(root.children[1].text == "cdata run"); 283 284 auto textTag = root.children[2]; 285 assert(textTag.type == EntityType.elementStart); 286 assert(textTag.name == "text"); 287 assert(textTag.children.length == 1); 288 289 assert(textTag.children[0].type == EntityType.text); 290 assert(textTag.children[0].text == "I am text!"); 291 292 assert(root.children[3].type == EntityType.elementEmpty); 293 assert(root.children[3].name == "empty"); 294 295 assert(root.children[4].type == EntityType.pi); 296 assert(root.children[4].name == "pi"); 297 } 298 299 300 /++ 301 The position in the the original text where the entity starts. 302 303 See_Also: $(REF_ALTTEXT TextPos, TextPos, dxml, parser)$(BR) 304 $(REF_ALTTEXT XMLParsingException._pos, XMLParsingException._pos, dxml, parser) 305 +/ 306 @property TextPos pos() @safe const pure nothrow @nogc 307 { 308 return _pos; 309 } 310 311 /// 312 static if(compileInTests) unittest 313 { 314 import std.range.primitives : empty; 315 import dxml.parser : TextPos; 316 import dxml.util : stripIndent; 317 318 auto xml = "<root>\n" ~ 319 " <foo>\n" ~ 320 " Foo and bar. Always foo and bar...\n" ~ 321 " </foo>\n" ~ 322 "</root>"; 323 324 auto dom = parseDOM(xml); 325 assert(dom.type == EntityType.elementStart); 326 assert(dom.name.empty); 327 assert(dom.pos == TextPos(1, 1)); 328 329 auto root = dom.children[0]; 330 assert(root.type == EntityType.elementStart); 331 assert(root.name == "root"); 332 assert(root.pos == TextPos(1, 1)); 333 334 auto foo = root.children[0]; 335 assert(foo.type == EntityType.elementStart); 336 assert(foo.name == "foo"); 337 assert(foo.pos == TextPos(2, 5)); 338 339 auto text = foo.children[0]; 340 assert(text.type == EntityType.text); 341 assert(text.text.stripIndent() == 342 "Foo and bar. Always foo and bar..."); 343 assert(text.pos == TextPos(2, 10)); 344 } 345 346 347 /++ 348 Gives the name of this DOMEntity. 349 350 Note that this is the direct name in the XML for this entity and 351 does not contain any of the names of any of the parent entities that 352 this entity has. 353 354 $(TABLE 355 $(TR $(TH Supported $(LREF EntityType)s:)) 356 $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser))) 357 $(TR $(TD $(REF_ALTTEXT elementEnd, EntityType.elementEnd, dxml, parser))) 358 $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser))) 359 $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser))) 360 ) 361 362 See_Also: $(LREF2 path, DOMEntity.path) 363 +/ 364 @property SliceOfR name() 365 { 366 import dxml.internal : checkedSave; 367 with(EntityType) 368 { 369 import std.format : format; 370 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type), 371 format("name cannot be called with %s", _type)); 372 } 373 return checkedSave(_name); 374 } 375 376 /// 377 static if(compileInTests) unittest 378 { 379 import std.range.primitives : empty; 380 381 auto xml = "<root>\n" ~ 382 " <empty/>\n" ~ 383 " <?pi?>\n" ~ 384 "</root>"; 385 386 auto dom = parseDOM(xml); 387 assert(dom.type == EntityType.elementStart); 388 assert(dom.name.empty); 389 390 auto root = dom.children[0]; 391 assert(root.type == EntityType.elementStart); 392 assert(root.name == "root"); 393 394 assert(root.children[0].type == EntityType.elementEmpty); 395 assert(root.children[0].name == "empty"); 396 397 assert(root.children[1].type == EntityType.pi); 398 assert(root.children[1].name == "pi"); 399 } 400 401 402 /++ 403 Gives the list of the names of the parent start tags of this DOMEntity. 404 405 The name of the current entity (if it has one) is not included in the 406 path. 407 408 Note that if parseDOM were given an 409 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the path 410 starts where the range started. So, it doesn't necessarily contain the 411 entire path from the start of the XML document. 412 413 See_Also: $(LREF2 name, DOMEntity.name) 414 +/ 415 @property SliceOfR[] path() 416 { 417 return _path; 418 } 419 420 /// 421 static if(compileInTests) unittest 422 { 423 import std.range.primitives : empty; 424 425 auto xml = "<root>\n" ~ 426 " <bar>\n" ~ 427 " <baz>\n" ~ 428 " <xyzzy/>\n" ~ 429 " </baz>\n" ~ 430 " <frobozz>\n" ~ 431 " <!-- comment -->\n" ~ 432 " It's magic!\n" ~ 433 " </frobozz>\n" ~ 434 " </bar>\n" ~ 435 " <foo></foo>\n" ~ 436 "</root>"; 437 438 auto dom = parseDOM(xml); 439 assert(dom.type == EntityType.elementStart); 440 assert(dom.name.empty); 441 assert(dom.path.empty); 442 443 auto root = dom.children[0]; 444 assert(root.type == EntityType.elementStart); 445 assert(root.name == "root"); 446 assert(root.path.empty); 447 448 auto bar = root.children[0]; 449 assert(bar.type == EntityType.elementStart); 450 assert(bar.name == "bar"); 451 assert(bar.path == ["root"]); 452 453 auto baz = bar.children[0]; 454 assert(baz.type == EntityType.elementStart); 455 assert(baz.name == "baz"); 456 assert(baz.path == ["root", "bar"]); 457 458 auto xyzzy = baz.children[0]; 459 assert(xyzzy.type == EntityType.elementEmpty); 460 assert(xyzzy.name == "xyzzy"); 461 assert(xyzzy.path == ["root", "bar", "baz"]); 462 463 auto frobozz = bar.children[1]; 464 assert(frobozz.type == EntityType.elementStart); 465 assert(frobozz.name == "frobozz"); 466 assert(frobozz.path == ["root", "bar"]); 467 468 auto comment = frobozz.children[0]; 469 assert(comment.type == EntityType.comment); 470 assert(comment.text == " comment "); 471 assert(comment.path == ["root", "bar", "frobozz"]); 472 473 auto text = frobozz.children[1]; 474 assert(text.type == EntityType.text); 475 assert(text.text == "\n It's magic!\n "); 476 assert(text.path == ["root", "bar", "frobozz"]); 477 478 auto foo = root.children[1]; 479 assert(foo.type == EntityType.elementStart); 480 assert(foo.name == "foo"); 481 assert(foo.path == ["root"]); 482 } 483 484 485 /++ 486 Returns a dynamic array of attributes for a start tag where each 487 attribute is represented as a$(BR) 488 $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!( 489 $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"), 490 $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"), 491 $(REF_ALTTEXT TextPos, TextPos, dxml, parser), $(D_STRING "pos"))). 492 493 $(TABLE 494 $(TR $(TH Supported $(LREF EntityType)s:)) 495 $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser))) 496 $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser))) 497 ) 498 499 See_Also: $(LREF DomEntity.Attribute)$(BR) 500 $(REF normalize, dxml, util)$(BR) 501 $(REF asNormalized, dxml, util) 502 +/ 503 @property auto attributes() 504 { 505 with(EntityType) 506 { 507 import std.format : format; 508 assert(_type == elementStart || _type == elementEmpty, 509 format("attributes cannot be called with %s", _type)); 510 } 511 return _attributes; 512 } 513 514 /// 515 static if(compileInTests) unittest 516 { 517 import std.algorithm.comparison : equal; 518 import std.algorithm.iteration : filter; 519 import std.range.primitives : empty; 520 import dxml.parser : TextPos; 521 522 { 523 auto xml = "<root/>"; 524 auto root = parseDOM(xml).children[0]; 525 assert(root.type == EntityType.elementEmpty); 526 assert(root.attributes.empty); 527 528 static assert(is(ElementType!(typeof(root.attributes)) == 529 typeof(root).Attribute)); 530 } 531 { 532 auto xml = "<root a='42' q='29' w='hello'/>"; 533 auto root = parseDOM(xml).children[0]; 534 assert(root.type == EntityType.elementEmpty); 535 536 auto attrs = root.attributes; 537 assert(attrs.length == 3); 538 539 assert(attrs[0].name == "a"); 540 assert(attrs[0].value == "42"); 541 assert(attrs[0].pos == TextPos(1, 7)); 542 543 assert(attrs[1].name == "q"); 544 assert(attrs[1].value == "29"); 545 assert(attrs[1].pos == TextPos(1, 14)); 546 547 assert(attrs[2].name == "w"); 548 assert(attrs[2].value == "hello"); 549 assert(attrs[2].pos == TextPos(1, 21)); 550 } 551 // Because the type of name and value is SliceOfR, == with a string 552 // only works if the range passed to parseXML was string. 553 { 554 auto xml = filter!"true"("<root a='42' q='29' w='hello'/>"); 555 auto root = parseDOM(xml).children[0]; 556 assert(root.type == EntityType.elementEmpty); 557 558 auto attrs = root.attributes; 559 assert(attrs.length == 3); 560 561 assert(equal(attrs[0].name, "a")); 562 assert(equal(attrs[0].value, "42")); 563 assert(attrs[0].pos == TextPos(1, 7)); 564 565 assert(equal(attrs[1].name, "q")); 566 assert(equal(attrs[1].value, "29")); 567 assert(attrs[1].pos == TextPos(1, 14)); 568 569 assert(equal(attrs[2].name, "w")); 570 assert(equal(attrs[2].value, "hello")); 571 assert(attrs[2].pos == TextPos(1, 21)); 572 } 573 } 574 575 576 /++ 577 Returns the textual value of this DOMEntity. 578 579 In the case of 580 $(REF_ALTTEXT EntityType.pi, EntityType.pi, dxml, parser), this is the 581 text that follows the name, whereas in the other cases, the text is the 582 entire contents of the entity (save for the delimeters on the ends if 583 that entity has them). 584 585 $(TABLE 586 $(TR $(TH Supported $(LREF EntityType)s:)) 587 $(TR $(TD $(REF_ALTTEXT cdata, EntityType.cdata, dxml, parser))) 588 $(TR $(TD $(REF_ALTTEXT comment, EntityType.comment, dxml, parser))) 589 $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser))) 590 $(TR $(TD $(REF_ALTTEXT _text, EntityType._text, dxml, parser))) 591 ) 592 593 See_Also: $(REF normalize, dxml, util)$(BR) 594 $(REF asNormalized, dxml, util)$(BR) 595 $(REF stripIndent, dxml, util)$(BR) 596 $(REF withoutIndent, dxml, util) 597 +/ 598 @property SliceOfR text() 599 { 600 import dxml.internal : checkedSave; 601 with(EntityType) 602 { 603 import std.format : format; 604 assert(only(cdata, comment, pi, text).canFind(_type), 605 format("text cannot be called with %s", _type)); 606 } 607 return checkedSave(_text); 608 } 609 610 /// 611 static if(compileInTests) unittest 612 { 613 import std.range.primitives : empty; 614 615 auto xml = "<?xml version='1.0'?>\n" ~ 616 "<?instructionName?>\n" ~ 617 "<?foo here is something to say?>\n" ~ 618 "<root>\n" ~ 619 " <![CDATA[ Yay! random text >> << ]]>\n" ~ 620 " <!-- some random comment -->\n" ~ 621 " <p>something here</p>\n" ~ 622 " <p>\n" ~ 623 " something else\n" ~ 624 " here</p>\n" ~ 625 "</root>"; 626 auto dom = parseDOM(xml); 627 628 // "<?instructionName?>\n" ~ 629 auto pi1 = dom.children[0]; 630 assert(pi1.type == EntityType.pi); 631 assert(pi1.name == "instructionName"); 632 assert(pi1.text.empty); 633 634 // "<?foo here is something to say?>\n" ~ 635 auto pi2 = dom.children[1]; 636 assert(pi2.type == EntityType.pi); 637 assert(pi2.name == "foo"); 638 assert(pi2.text == "here is something to say"); 639 640 // "<root>\n" ~ 641 auto root = dom.children[2]; 642 assert(root.type == EntityType.elementStart); 643 644 // " <![CDATA[ Yay! random text >> << ]]>\n" ~ 645 auto cdata = root.children[0]; 646 assert(cdata.type == EntityType.cdata); 647 assert(cdata.text == " Yay! random text >> << "); 648 649 // " <!-- some random comment -->\n" ~ 650 auto comment = root.children[1]; 651 assert(comment.type == EntityType.comment); 652 assert(comment.text == " some random comment "); 653 654 // " <p>something here</p>\n" ~ 655 auto p1 = root.children[2]; 656 assert(p1.type == EntityType.elementStart); 657 assert(p1.name == "p"); 658 659 assert(p1.children[0].type == EntityType.text); 660 assert(p1.children[0].text == "something here"); 661 662 // " <p>\n" ~ 663 // " something else\n" ~ 664 // " here</p>\n" ~ 665 auto p2 = root.children[3]; 666 assert(p2.type == EntityType.elementStart); 667 668 assert(p2.children[0].type == EntityType.text); 669 assert(p2.children[0].text == "\n something else\n here"); 670 } 671 672 673 /++ 674 Returns the child entities of the current entity. 675 676 They are in the same order that they were in the XML document. 677 678 $(TABLE 679 $(TR $(TH Supported $(LREF EntityType)s:)) 680 $(TR $(TD $(REF_ALTTEXT elementStart, elementStart.elementStart, dxml, parser))) 681 ) 682 +/ 683 @property DOMEntity[] children() 684 { 685 import std.format : format; 686 assert(_type == EntityType.elementStart, 687 format!"children cannot be called with %s"(_type)); 688 return _children; 689 } 690 691 /// 692 static if(compileInTests) unittest 693 { 694 auto xml = "<potato>\n" ~ 695 " <!--comment-->\n" ~ 696 " <foo>bar</foo>\n" ~ 697 " <tag>\n" ~ 698 " <silly>you</silly>\n" ~ 699 " <empty/>\n" ~ 700 " <nocontent></nocontent>\n" ~ 701 " </tag>\n" ~ 702 "</potato>\n" ~ 703 "<!--the end-->"; 704 auto dom = parseDOM(xml); 705 assert(dom.children.length == 2); 706 707 auto potato = dom.children[0]; 708 assert(potato.type == EntityType.elementStart); 709 assert(potato.name == "potato"); 710 assert(potato.children.length == 3); 711 712 auto comment = potato.children[0]; 713 assert(comment.type == EntityType.comment); 714 assert(comment.text == "comment"); 715 716 auto foo = potato.children[1]; 717 assert(foo.type == EntityType.elementStart); 718 assert(foo.name == "foo"); 719 assert(foo.children.length == 1); 720 721 assert(foo.children[0].type == EntityType.text); 722 assert(foo.children[0].text == "bar"); 723 724 auto tag = potato.children[2]; 725 assert(tag.type == EntityType.elementStart); 726 assert(tag.name == "tag"); 727 assert(tag.children.length == 3); 728 729 auto silly = tag.children[0]; 730 assert(silly.type == EntityType.elementStart); 731 assert(silly.name == "silly"); 732 assert(silly.children.length == 1); 733 734 assert(silly.children[0].type == EntityType.text); 735 assert(silly.children[0].text == "you"); 736 737 auto empty = tag.children[1]; 738 assert(empty.type == EntityType.elementEmpty); 739 assert(empty.name == "empty"); 740 741 auto nocontent = tag.children[2]; 742 assert(nocontent.type == EntityType.elementStart); 743 assert(nocontent.name == "nocontent"); 744 assert(nocontent.children.length == 0); 745 746 auto endComment = dom.children[1]; 747 assert(endComment.type == EntityType.comment); 748 assert(endComment.text == "the end"); 749 } 750 751 752 // Reduce the chance of bugs if reference-type ranges are involved. 753 static if(!isDynamicArray!R) this(this) 754 { 755 with(EntityType) final switch(_type) 756 { 757 case cdata: goto case text; 758 case comment: goto case text; 759 case elementStart: 760 { 761 _name = _name.save; 762 break; 763 } 764 case elementEnd: goto case elementStart; 765 case elementEmpty: goto case elementStart; 766 case text: 767 { 768 _text = _text.save; 769 break; 770 } 771 case pi: 772 { 773 _text = _text.save; 774 goto case elementStart; 775 } 776 } 777 } 778 779 780 private: 781 782 this(EntityType type, TextPos pos) 783 { 784 _type = type; 785 _pos = pos; 786 787 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 788 _name = typeof(_name).init; 789 _text = typeof(_text).init; 790 } 791 792 auto _type = EntityType.elementStart; 793 TextPos _pos; 794 SliceOfR _name; 795 SliceOfR[] _path; 796 Attribute[] _attributes; 797 SliceOfR _text; 798 DOMEntity[] _children; 799 } 800 801 /// Ditto 802 DOMEntity!R parseDOM(Config config = Config.init, R)(R range) 803 if(isForwardRange!R && isSomeChar!(ElementType!R)) 804 { 805 import dxml.parser : parseXML; 806 auto entityRange = parseXML!config(range); 807 typeof(return) retval; 808 _parseDOM(entityRange, retval); 809 return retval; 810 } 811 812 /// Ditto 813 DOMEntity!(ER.Input) parseDOM(ER)(ref ER range) 814 if(isInstanceOf!(EntityRange, ER)) 815 { 816 typeof(return) retval; 817 if(range.empty) 818 return retval; 819 retval._pos = range.front.pos; 820 if(range.front.type == EntityType.elementEnd) 821 return retval; 822 _parseDOM(range, retval); 823 return retval; 824 } 825 826 /++ 827 parseDOM with the default $(REF_ALTTEXT Config, Config, dxml, parser) and a 828 range of characters. 829 +/ 830 @safe unittest 831 { 832 import std.range.primitives; 833 834 auto xml = "<root>\n" ~ 835 " <!-- no comment -->\n" ~ 836 " <foo></foo>\n" ~ 837 " <baz>\n" ~ 838 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 839 " </baz>\n" ~ 840 " <tag/>\n" ~ 841 "</root>"; 842 843 auto dom = parseDOM(xml); 844 assert(dom.type == EntityType.elementStart); 845 assert(dom.name.empty); 846 assert(dom.children.length == 1); 847 848 auto root = dom.children[0]; 849 assert(root.type == EntityType.elementStart); 850 assert(root.name == "root"); 851 assert(root.children.length == 4); 852 853 assert(root.children[0].type == EntityType.comment); 854 assert(root.children[0].text == " no comment "); 855 856 assert(root.children[1].type == EntityType.elementStart); 857 assert(root.children[1].name == "foo"); 858 assert(root.children[1].children.length == 0); 859 860 auto baz = root.children[2]; 861 assert(baz.type == EntityType.elementStart); 862 assert(baz.name == "baz"); 863 assert(baz.children.length == 1); 864 865 auto xyzzy = baz.children[0]; 866 assert(xyzzy.type == EntityType.elementStart); 867 assert(xyzzy.name == "xyzzy"); 868 assert(xyzzy.children.length == 1); 869 870 assert(xyzzy.children[0].type == EntityType.text); 871 assert(xyzzy.children[0].text == "It's an adventure!"); 872 873 assert(root.children[3].type == EntityType.elementEmpty); 874 assert(root.children[3].name == "tag"); 875 } 876 877 /++ 878 parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and a range 879 of characters. 880 +/ 881 unittest 882 { 883 import std.range.primitives : empty; 884 885 auto xml = "<root>\n" ~ 886 " <!-- no comment -->\n" ~ 887 " <foo></foo>\n" ~ 888 " <baz>\n" ~ 889 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 890 " </baz>\n" ~ 891 " <tag/>\n" ~ 892 "</root>"; 893 894 auto dom = parseDOM!simpleXML(xml); 895 assert(dom.type == EntityType.elementStart); 896 assert(dom.name.empty); 897 assert(dom.children.length == 1); 898 899 auto root = dom.children[0]; 900 assert(root.type == EntityType.elementStart); 901 assert(root.name == "root"); 902 assert(root.children.length == 3); 903 904 assert(root.children[0].type == EntityType.elementStart); 905 assert(root.children[0].name == "foo"); 906 assert(root.children[0].children.length == 0); 907 908 auto baz = root.children[1]; 909 assert(baz.type == EntityType.elementStart); 910 assert(baz.name == "baz"); 911 assert(baz.children.length == 1); 912 913 auto xyzzy = baz.children[0]; 914 assert(xyzzy.type == EntityType.elementStart); 915 assert(xyzzy.name == "xyzzy"); 916 assert(xyzzy.children.length == 1); 917 918 assert(xyzzy.children[0].type == EntityType.text); 919 assert(xyzzy.children[0].text == "It's an adventure!"); 920 921 assert(root.children[2].type == EntityType.elementStart); 922 assert(root.children[2].name == "tag"); 923 assert(root.children[2].children.length == 0); 924 } 925 926 /++ 927 parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and an 928 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser). 929 +/ 930 unittest 931 { 932 import std.range.primitives : empty; 933 import dxml.parser : parseXML; 934 935 auto xml = "<root>\n" ~ 936 " <!-- no comment -->\n" ~ 937 " <foo></foo>\n" ~ 938 " <baz>\n" ~ 939 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 940 " </baz>\n" ~ 941 " <tag/>\n" ~ 942 "</root>"; 943 944 auto range = parseXML!simpleXML(xml); 945 auto dom = parseDOM(range); 946 assert(range.empty); 947 948 assert(dom.type == EntityType.elementStart); 949 assert(dom.name.empty); 950 assert(dom.children.length == 1); 951 952 auto root = dom.children[0]; 953 assert(root.type == EntityType.elementStart); 954 assert(root.name == "root"); 955 assert(root.children.length == 3); 956 957 assert(root.children[0].type == EntityType.elementStart); 958 assert(root.children[0].name == "foo"); 959 assert(root.children[0].children.length == 0); 960 961 auto baz = root.children[1]; 962 assert(baz.type == EntityType.elementStart); 963 assert(baz.name == "baz"); 964 assert(baz.children.length == 1); 965 966 auto xyzzy = baz.children[0]; 967 assert(xyzzy.type == EntityType.elementStart); 968 assert(xyzzy.name == "xyzzy"); 969 assert(xyzzy.children.length == 1); 970 971 assert(xyzzy.children[0].type == EntityType.text); 972 assert(xyzzy.children[0].text == "It's an adventure!"); 973 974 assert(root.children[2].type == EntityType.elementStart); 975 assert(root.children[2].name == "tag"); 976 assert(root.children[2].children.length == 0); 977 } 978 979 /++ 980 parseDOM with an $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser) 981 which is not at the start of the document. 982 +/ 983 unittest 984 { 985 import std.range.primitives : empty; 986 import dxml.parser : parseXML, skipToPath; 987 988 auto xml = "<root>\n" ~ 989 " <!-- no comment -->\n" ~ 990 " <foo></foo>\n" ~ 991 " <baz>\n" ~ 992 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 993 " </baz>\n" ~ 994 " <tag/>\n" ~ 995 "</root>"; 996 997 auto range = parseXML!simpleXML(xml).skipToPath("baz/xyzzy"); 998 assert(range.front.type == EntityType.elementStart); 999 assert(range.front.name == "xyzzy"); 1000 1001 auto dom = parseDOM(range); 1002 assert(range.front.type == EntityType.elementStart); 1003 assert(range.front.name == "tag"); 1004 1005 assert(dom.type == EntityType.elementStart); 1006 assert(dom.name.empty); 1007 assert(dom.children.length == 1); 1008 1009 auto xyzzy = dom.children[0]; 1010 assert(xyzzy.type == EntityType.elementStart); 1011 assert(xyzzy.name == "xyzzy"); 1012 assert(xyzzy.children.length == 1); 1013 1014 assert(xyzzy.children[0].type == EntityType.text); 1015 assert(xyzzy.children[0].text == "It's an adventure!"); 1016 } 1017 1018 /// parseDOM at compile-time 1019 unittest 1020 { 1021 enum xml = "<!-- comment -->\n" ~ 1022 "<root>\n" ~ 1023 " <foo>some text<whatever/></foo>\n" ~ 1024 " <bar/>\n" ~ 1025 " <baz></baz>\n" ~ 1026 "</root>"; 1027 1028 enum dom = parseDOM(xml); 1029 static assert(dom.type == EntityType.elementStart); 1030 static assert(dom.name.empty); 1031 static assert(dom.children.length == 2); 1032 1033 static assert(dom.children[0].type == EntityType.comment); 1034 static assert(dom.children[0].text == " comment "); 1035 } 1036 1037 // This is purely to provide a way to trigger the unittest blocks in DOMEntity 1038 // without compiling them in normally. 1039 private struct DOMCompileTests 1040 { 1041 @property bool empty() @safe pure nothrow @nogc { assert(0); } 1042 @property char front() @safe pure nothrow @nogc { assert(0); } 1043 void popFront() @safe pure nothrow @nogc { assert(0); } 1044 @property typeof(this) save() @safe pure nothrow @nogc { assert(0); } 1045 } 1046 1047 unittest 1048 { 1049 DOMEntity!DOMCompileTests _domTests; 1050 } 1051 1052 1053 private: 1054 1055 void _parseDOM(ER, DE)(ref ER range, ref DE parent, ER.SliceOfR[] path = null) 1056 { 1057 assert(!range.empty); 1058 assert(range.front.type != EntityType.elementEnd); 1059 1060 import std.array : appender, array; 1061 auto children = appender!(DE[])(); 1062 1063 while(!range.empty) 1064 { 1065 auto entity = range.front; 1066 range.popFront(); 1067 if(entity.type == EntityType.elementEnd) 1068 break; 1069 1070 auto child = DE(entity.type, entity.pos); 1071 child._path = path; 1072 1073 with(EntityType) final switch(entity.type) 1074 { 1075 case cdata: goto case text; 1076 case comment: goto case text; 1077 case elementStart: 1078 { 1079 child._name = entity.name; 1080 child._attributes = entity.attributes.array(); 1081 1082 if(range.front.type == EntityType.elementEnd) 1083 range.popFront(); 1084 else 1085 { 1086 if(!entity.name.empty) 1087 path ~= entity.name; 1088 // TODO The explicit instantiation doesn't hurt, but it 1089 // shouldn't be necessary, and if it's not there, we get 1090 // a compiler error. It should be reduced and reported. 1091 _parseDOM!(ER, DE)(range, child, path); 1092 --path.length; 1093 } 1094 break; 1095 } 1096 case elementEnd: assert(0); 1097 case elementEmpty: 1098 { 1099 child._name = entity.name; 1100 child._attributes = entity.attributes.array(); 1101 break; 1102 } 1103 case text: 1104 { 1105 child._text = entity.text; 1106 break; 1107 } 1108 case pi: 1109 { 1110 child._name = entity.name; 1111 child._text = entity.text; 1112 break; 1113 } 1114 } 1115 1116 put(children, child); 1117 } 1118 1119 parent._children = children.data; 1120 } 1121 1122 unittest 1123 { 1124 import std.algorithm.comparison : equal; 1125 import dxml.internal : testRangeFuncs; 1126 import dxml.parser : parseXML, TextPos; 1127 1128 static void testChildren(ER, size_t line = __LINE__)(ref ER entityRange, int row, int col, EntityType[] expected...) 1129 { 1130 import core.exception : AssertError; 1131 import std.exception : enforce; 1132 auto temp = entityRange.save; 1133 auto dom = parseDOM(temp); 1134 enforce!AssertError(dom.type == EntityType.elementStart, "unittest 1", __FILE__, line); 1135 enforce!AssertError(dom.children.length == expected.length, "unittest 2", __FILE__, line); 1136 foreach(i; 0 .. dom._children.length) 1137 enforce!AssertError(dom._children[i].type == expected[i], "unittest 3", __FILE__, line); 1138 enforce!AssertError(dom.pos == TextPos(row, col), "unittest 4", __FILE__, line); 1139 if(!entityRange.empty) 1140 entityRange.popFront(); 1141 } 1142 1143 static foreach(func; testRangeFuncs) 1144 {{ 1145 { 1146 foreach(i, xml; ["<!-- comment -->\n" ~ 1147 "<?pi foo?>\n" ~ 1148 "<su></su>", 1149 "<!-- comment -->\n" ~ 1150 "<?pi foo?>\n" ~ 1151 "<su/>"]) 1152 { 1153 auto range = parseXML(func(xml)); 1154 foreach(j; 0 .. 4 - i) 1155 { 1156 auto temp = range.save; 1157 auto dom = parseDOM(temp); 1158 assert(dom.type == EntityType.elementStart); 1159 assert(dom.children.length == 3 - j); 1160 if(j <= 2) 1161 { 1162 assert(dom.children[2 - j].type == 1163 (i == 0 ? EntityType.elementStart : EntityType.elementEmpty)); 1164 assert(equal(dom.children[2 - j].name, "su")); 1165 if(j <= 1) 1166 { 1167 assert(dom.children[1 - j].type == EntityType.pi); 1168 assert(equal(dom.children[1 - j].name, "pi")); 1169 assert(equal(dom.children[1 - j].text, "foo")); 1170 if(j == 0) 1171 { 1172 assert(dom.children[0].type == EntityType.comment); 1173 assert(equal(dom.children[0].text, " comment ")); 1174 } 1175 } 1176 } 1177 range.popFront(); 1178 } 1179 assert(range.empty); 1180 auto dom = parseDOM(range); 1181 assert(dom.type == EntityType.elementStart); 1182 assert(dom.name is typeof(dom.name).init); 1183 assert(dom.children.length == 0); 1184 } 1185 } 1186 { 1187 auto xml = "<root>\n" ~ 1188 " <foo>\n" ~ 1189 " <bar>\n" ~ 1190 " <baz>\n" ~ 1191 " It's silly, Charley\n" ~ 1192 " </baz>\n" ~ 1193 " <frobozz>\n" ~ 1194 " <is>the Wiz</is>\n" ~ 1195 " </frobozz>\n" ~ 1196 " <empty></empty>\n" ~ 1197 " <xyzzy/>\n" ~ 1198 " </bar>\n" ~ 1199 " </foo>\n" ~ 1200 " <!--This isn't the end-->\n" ~ 1201 "</root>\n" ~ 1202 "<?Poirot?>\n" ~ 1203 "<!--It's the end!-->"; 1204 1205 { 1206 auto range = parseXML(func(xml)); 1207 with(EntityType) 1208 { 1209 testChildren(range, 1, 1, elementStart, pi, comment); // <root> 1210 testChildren(range, 2, 5, elementStart, comment); // <foo> 1211 testChildren(range, 3, 9, elementStart); // <bar> 1212 testChildren(range, 4, 13, elementStart, elementStart, elementStart, elementEmpty); // <baz> 1213 testChildren(range, 4, 18, text); // It's silly, Charley 1214 testChildren(range, 6, 13); // </baz> 1215 testChildren(range, 7, 13, elementStart, elementStart, elementEmpty); // <frobozz> 1216 testChildren(range, 8, 17, elementStart); // <is> 1217 testChildren(range, 8, 21, text); // the Wiz 1218 testChildren(range, 8, 28); // </is> 1219 testChildren(range, 9, 13); // </frobozz> 1220 testChildren(range, 10, 13, elementStart, elementEmpty); // <empty> 1221 testChildren(range, 10, 20); // </empty> 1222 testChildren(range, 11, 13, elementEmpty); // <xyzzy/> 1223 testChildren(range, 12, 9); // </bar> 1224 testChildren(range, 13, 5); // </foo> 1225 testChildren(range, 14, 5, comment); // <!--This isn't the end--> 1226 testChildren(range, 15, 1); // </root> 1227 testChildren(range, 16, 1, pi, comment); // <?Poirot?> 1228 testChildren(range, 17, 1, comment); // <!--It's the end-->" 1229 testChildren(range, 1, 1); // empty range 1230 } 1231 } 1232 { 1233 auto dom = parseDOM(func(xml)); 1234 assert(dom.children.length == 3); 1235 1236 auto root = dom.children[0]; 1237 assert(root.type == EntityType.elementStart); 1238 assert(root.pos == TextPos(1, 1)); 1239 assert(root.children.length == 2); 1240 assert(equal(root.name, "root")); 1241 1242 auto foo = root.children[0]; 1243 assert(foo.type == EntityType.elementStart); 1244 assert(foo.pos == TextPos(2, 5)); 1245 assert(foo.children.length == 1); 1246 assert(equal(foo.name, "foo")); 1247 1248 auto bar = foo.children[0]; 1249 assert(bar.type == EntityType.elementStart); 1250 assert(bar.pos == TextPos(3, 9)); 1251 assert(bar.children.length == 4); 1252 assert(equal(bar.name, "bar")); 1253 1254 auto baz = bar.children[0]; 1255 assert(baz.type == EntityType.elementStart); 1256 assert(baz.pos == TextPos(4, 13)); 1257 assert(baz.children.length == 1); 1258 assert(equal(baz.name, "baz")); 1259 1260 auto silly = baz.children[0]; 1261 assert(silly.type == EntityType.text); 1262 assert(silly.pos == TextPos(4, 18)); 1263 assert(equal(silly.text, "\n It's silly, Charley\n ")); 1264 1265 auto frobozz = bar.children[1]; 1266 assert(frobozz.type == EntityType.elementStart); 1267 assert(frobozz.pos == TextPos(7, 13)); 1268 assert(frobozz.children.length == 1); 1269 assert(equal(frobozz.name, "frobozz")); 1270 1271 auto is_ = frobozz.children[0]; 1272 assert(is_.type == EntityType.elementStart); 1273 assert(is_.pos == TextPos(8, 17)); 1274 assert(is_.children.length == 1); 1275 assert(equal(is_.name, "is")); 1276 1277 auto wiz = is_.children[0]; 1278 assert(wiz.type == EntityType.text); 1279 assert(wiz.pos == TextPos(8, 21)); 1280 assert(equal(wiz.text, "the Wiz")); 1281 1282 auto empty = bar.children[2]; 1283 assert(empty.type == EntityType.elementStart); 1284 assert(empty.pos == TextPos(10, 13)); 1285 assert(empty.children.length == 0); 1286 assert(equal(empty.name, "empty")); 1287 1288 auto xyzzy = bar.children[3]; 1289 assert(xyzzy.type == EntityType.elementEmpty); 1290 assert(xyzzy.pos == TextPos(11, 13)); 1291 assert(equal(xyzzy.name, "xyzzy")); 1292 1293 auto comment = root.children[1]; 1294 assert(comment.type == EntityType.comment); 1295 assert(comment.pos == TextPos(14, 5)); 1296 assert(equal(comment.text, "This isn't the end")); 1297 1298 auto poirot = dom.children[1]; 1299 assert(poirot.type == EntityType.pi); 1300 assert(poirot.pos == TextPos(16, 1)); 1301 assert(equal(poirot.name, "Poirot")); 1302 assert(poirot.text.empty); 1303 1304 auto endComment = dom.children[2]; 1305 assert(endComment.type == EntityType.comment); 1306 assert(endComment.pos == TextPos(17, 1)); 1307 assert(equal(endComment.text, "It's the end!")); 1308 } 1309 } 1310 }} 1311 }