1 // Written in the D programming language 2 3 /++ 4 This implements a DOM for representing an XML 1.0 document. $(LREF parseDOM) 5 uses an $(REF EntityRange, dxml, parser) to parse the document, and 6 $(LREF DOMEntity) recursively represents the DOM tree. 7 8 See the documentation for $(MREF dxml, parser) and 9 $(REF EntityRange, dxml, parser) for details on the parser and its 10 configuration options. 11 12 For convenience, $(REF EntityType, dxml, parser) and 13 $(REF simpleXML, dxml, parser) are publicly imported by this module, 14 since $(REF_ALTTEXT EntityType, EntityType, dxml, parser) is required 15 to correctly use $(LREF DOMEntity), and 16 $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) is highly likely to 17 be used when calling $(LREF parseDOM). 18 19 Copyright: Copyright 2018 - 2023 20 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 21 Authors: $(HTTPS jmdavisprog.com, Jonathan M Davis) 22 Source: $(LINK_TO_SRC dxml/_dom.d) 23 24 See_Also: $(LINK2 http://www.w3.org/TR/REC-xml/, Official Specification for XML 1.0) 25 +/ 26 module dxml.dom; 27 28 /// 29 version(dxmlTests) unittest 30 { 31 import std.range.primitives : empty; 32 33 auto xml = "<!-- comment -->\n" ~ 34 "<root>\n" ~ 35 " <foo>some text<whatever/></foo>\n" ~ 36 " <bar/>\n" ~ 37 " <baz></baz>\n" ~ 38 "</root>"; 39 { 40 auto dom = parseDOM(xml); 41 assert(dom.type == EntityType.elementStart); 42 assert(dom.name.empty); 43 assert(dom.children.length == 2); 44 45 assert(dom.children[0].type == EntityType.comment); 46 assert(dom.children[0].text == " comment "); 47 48 auto root = dom.children[1]; 49 assert(root.type == EntityType.elementStart); 50 assert(root.name == "root"); 51 assert(root.children.length == 3); 52 53 auto foo = root.children[0]; 54 assert(foo.type == EntityType.elementStart); 55 assert(foo.name == "foo"); 56 assert(foo.children.length == 2); 57 58 assert(foo.children[0].type == EntityType.text); 59 assert(foo.children[0].text == "some text"); 60 61 assert(foo.children[1].type == EntityType.elementEmpty); 62 assert(foo.children[1].name == "whatever"); 63 64 assert(root.children[1].type == EntityType.elementEmpty); 65 assert(root.children[1].name == "bar"); 66 67 assert(root.children[2].type == EntityType.elementStart); 68 assert(root.children[2].name == "baz"); 69 assert(root.children[2].children.length == 0); 70 } 71 { 72 auto dom = parseDOM!simpleXML(xml); 73 assert(dom.type == EntityType.elementStart); 74 assert(dom.name.empty); 75 assert(dom.children.length == 1); 76 77 auto root = dom.children[0]; 78 assert(root.type == EntityType.elementStart); 79 assert(root.name == "root"); 80 assert(root.children.length == 3); 81 82 auto foo = root.children[0]; 83 assert(foo.type == EntityType.elementStart); 84 assert(foo.name == "foo"); 85 assert(foo.children.length == 2); 86 87 assert(foo.children[0].type == EntityType.text); 88 assert(foo.children[0].text == "some text"); 89 90 assert(foo.children[1].type == EntityType.elementStart); 91 assert(foo.children[1].name == "whatever"); 92 assert(foo.children[1].children.length == 0); 93 94 assert(root.children[1].type == EntityType.elementStart); 95 assert(root.children[1].name == "bar"); 96 assert(root.children[1].children.length == 0); 97 98 assert(root.children[2].type == EntityType.elementStart); 99 assert(root.children[2].name == "baz"); 100 assert(root.children[2].children.length == 0); 101 } 102 } 103 104 105 import std.range.primitives; 106 import std.traits; 107 108 public import dxml.parser : EntityType, simpleXML; 109 import dxml.parser : Config, EntityRange; 110 111 112 /++ 113 Represents an entity in an XML document as a DOM tree. 114 115 parseDOM either takes a range of characters or an 116 $(REF EntityRange, dxml, parser) and generates a DOMEntity from that XML. 117 118 When parseDOM processes the XML, it returns a DOMEntity representing the 119 entire document. Even though the XML document itself isn't technically an 120 entity in the XML document, it's simplest to treat it as if it were an 121 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 122 with an empty $(LREF2 name, _DOMEntity.name). That DOMEntity then contains 123 child entities that recursively define the DOM tree through their children. 124 125 For DOMEntities of type 126 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser), 127 $(LREF _DOMEntity.children) gives access to all of the child entities of 128 that start tag. Other DOMEntities have no children. 129 130 Note that the $(LREF2 type, _DOMEntity.type) determines which 131 properties of the DOMEntity can be used, and it can determine whether 132 functions which a DOMEntity is passed to are allowed to be called. Each 133 function lists which $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s 134 are allowed, and it is an error to call them with any other 135 $(REF_ALTTEXT EntityType, EntityType, dxml, parser). 136 137 If parseDOM is given a range of characters, it in turn passes that to 138 $(REF parseXML, dxml, parser) to do the actual XML parsing. As such, that 139 overload accepts an optional $(REF Config, dxml, parser) as a template 140 argument to configure the parser. 141 142 If parseDOM is given an 143 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the range does 144 not have to be at the start of the document. It can be used to create a DOM 145 for a portion of the document. When a character range is passed to it, it 146 will return a DOMEntity with the $(LREF2 type, _DOMEntity.type) 147 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 148 and an empty $(LREF2 name, _DOMEntity.name). It will iterate the range until 149 it either reaches the end of the range, or it reaches the end tag which 150 matches the start tag which is the parent of the entity that was the 151 $(D front) of the range when it was passed to parseDOM. The 152 $(REF_ALTTEXT EntityType.elementStart, EntityType.elementStart, dxml, parser) 153 is passed by $(K_REF), so if it was not at the top level when it was passed 154 to parseDOM (and thus still has elements in it when parseDOM returns), the 155 range will then be at the entity after that matching end tag, and the 156 application can continue to process the range after that if it so chooses. 157 158 Params: 159 config = The $(REF Config, dxml, parser) to use with 160 $(REF parseXML, dxml, parser) if the range passed to parseDOM 161 is a range of characters. 162 range = Either a range of characters representing an entire XML document 163 or a $(REF EntityRange, dxml, parser) which may refer to some 164 or all of an XML document. 165 166 Returns: A DOMEntity representing the DOM tree from the point in the 167 document that was passed to parseDOM (the start of the document if 168 a range of characters was passed, and wherever in the document the 169 range was if an 170 $(REF_ALTTEXT EntityRange, EntityRange dxml, parser) was passed). 171 172 Throws: $(REF_ALTTEXT XMLParsingException, XMLParsingException, dxml, parser) 173 if the parser encounters invalid XML. 174 +/ 175 struct DOMEntity(R) 176 { 177 public: 178 179 import std.algorithm.searching : canFind; 180 import std.range : only, takeExactly; 181 import std.typecons : Tuple; 182 import dxml.parser : TextPos; 183 184 private enum compileInTests = is(R == DOMCompileTests); 185 186 /++ 187 The type used when any slice of the original range of characters is 188 used. If the range was a string or supports slicing, then SliceOfR is 189 the same type as the range; otherwise, it's the result of calling 190 $(PHOBOS_REF takeExactly, std, range) on it. 191 192 --- 193 import std.algorithm : filter; 194 import std.range : takeExactly; 195 196 static assert(is(DOMEntity!string.SliceOfR == string)); 197 198 auto range = filter!(a => true)("some xml"); 199 200 static assert(is(DOMEntity!(typeof(range)).SliceOfR == 201 typeof(takeExactly(range, 42)))); 202 --- 203 +/ 204 static if(isDynamicArray!R || hasSlicing!R) 205 alias SliceOfR = R; 206 else 207 alias SliceOfR = typeof(takeExactly(R.init, 42)); 208 209 // https://issues.dlang.org/show_bug.cgi?id=11133 prevents this from being 210 // a ddoc-ed unit test. 211 static if(compileInTests) @safe unittest 212 { 213 import std.algorithm : filter; 214 import std.range : takeExactly; 215 216 static assert(is(DOMEntity!string.SliceOfR == string)); 217 218 auto range = filter!(a => true)("some xml"); 219 220 static assert(is(DOMEntity!(typeof(range)).SliceOfR == 221 typeof(takeExactly(range, 42)))); 222 } 223 224 225 /++ 226 The exact instantiation of $(PHOBOS_REF Tuple, std, typecons) that 227 $(LREF2 attributes, DOMEntity) returns a range of. 228 229 See_Also: $(LREF2 attributes, DOMEntity) 230 +/ 231 alias Attribute = Tuple!(SliceOfR, "name", SliceOfR, "value", TextPos, "pos"); 232 233 234 /++ 235 The $(REF_ALTTEXT EntityType, EntityType, dxml, parser) for this 236 DOMEntity. 237 238 The type can never be 239 $(REF_ALTTEXT EntityType.elementEnd, EntityType.elementEnd, dxml, parser), 240 because the end of $(LREF2 children, DOMEntity.children) already 241 indicates where the contents of the start tag end. 242 243 type determines which properties of the DOMEntity can be used, and it 244 can determine whether functions which a DOMEntity is passed to are 245 allowed to be called. Each function lists which 246 $(REF_ALTTEXT EntityType, EntityType, dxml, parser)s are allowed, and it 247 is an error to call them with any other 248 $(REF_ALTTEXT EntityType, EntityType, dxml, parser). 249 +/ 250 @property EntityType type() @safe const pure nothrow @nogc 251 { 252 return _type; 253 } 254 255 /// 256 static if(compileInTests) unittest 257 { 258 import std.range.primitives; 259 260 auto xml = "<root>\n" ~ 261 " <!--no comment-->\n" ~ 262 " <![CDATA[cdata run]]>\n" ~ 263 " <text>I am text!</text>\n" ~ 264 " <empty/>\n" ~ 265 " <?pi?>\n" ~ 266 "</root>"; 267 268 auto dom = parseDOM(xml); 269 assert(dom.type == EntityType.elementStart); 270 assert(dom.name.empty); 271 assert(dom.children.length == 1); 272 273 auto root = dom.children[0]; 274 assert(root.type == EntityType.elementStart); 275 assert(root.name == "root"); 276 assert(root.children.length == 5); 277 278 assert(root.children[0].type == EntityType.comment); 279 assert(root.children[0].text == "no comment"); 280 281 assert(root.children[1].type == EntityType.cdata); 282 assert(root.children[1].text == "cdata run"); 283 284 auto textTag = root.children[2]; 285 assert(textTag.type == EntityType.elementStart); 286 assert(textTag.name == "text"); 287 assert(textTag.children.length == 1); 288 289 assert(textTag.children[0].type == EntityType.text); 290 assert(textTag.children[0].text == "I am text!"); 291 292 assert(root.children[3].type == EntityType.elementEmpty); 293 assert(root.children[3].name == "empty"); 294 295 assert(root.children[4].type == EntityType.pi); 296 assert(root.children[4].name == "pi"); 297 } 298 299 300 /++ 301 The position in the the original text where the entity starts. 302 303 See_Also: $(REF_ALTTEXT TextPos, TextPos, dxml, parser)$(BR) 304 $(REF_ALTTEXT XMLParsingException._pos, XMLParsingException._pos, dxml, parser) 305 +/ 306 @property TextPos pos() @safe const pure nothrow @nogc 307 { 308 return _pos; 309 } 310 311 /// 312 static if(compileInTests) unittest 313 { 314 import std.range.primitives : empty; 315 import dxml.parser : TextPos; 316 import dxml.util : stripIndent; 317 318 auto xml = "<root>\n" ~ 319 " <foo>\n" ~ 320 " Foo and bar. Always foo and bar...\n" ~ 321 " </foo>\n" ~ 322 "</root>"; 323 324 auto dom = parseDOM(xml); 325 assert(dom.type == EntityType.elementStart); 326 assert(dom.name.empty); 327 assert(dom.pos == TextPos(1, 1)); 328 329 auto root = dom.children[0]; 330 assert(root.type == EntityType.elementStart); 331 assert(root.name == "root"); 332 assert(root.pos == TextPos(1, 1)); 333 334 auto foo = root.children[0]; 335 assert(foo.type == EntityType.elementStart); 336 assert(foo.name == "foo"); 337 assert(foo.pos == TextPos(2, 5)); 338 339 auto text = foo.children[0]; 340 assert(text.type == EntityType.text); 341 assert(text.text.stripIndent() == 342 "Foo and bar. Always foo and bar..."); 343 assert(text.pos == TextPos(2, 10)); 344 } 345 346 347 /++ 348 Gives the name of this DOMEntity. 349 350 Note that this is the direct name in the XML for this entity and 351 does not contain any of the names of any of the parent entities that 352 this entity has. 353 354 $(TABLE 355 $(TR $(TH Supported $(LREF EntityType)s:)) 356 $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser))) 357 $(TR $(TD $(REF_ALTTEXT elementEnd, EntityType.elementEnd, dxml, parser))) 358 $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser))) 359 $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser))) 360 ) 361 362 See_Also: $(LREF2 path, DOMEntity.path) 363 +/ 364 @property SliceOfR name() 365 { 366 import dxml.internal : checkedSave; 367 with(EntityType) 368 { 369 import std.format : format; 370 assert(only(elementStart, elementEnd, elementEmpty, pi).canFind(_type), 371 format("name cannot be called with %s", _type)); 372 } 373 return checkedSave(_name); 374 } 375 376 /// 377 static if(compileInTests) unittest 378 { 379 import std.range.primitives : empty; 380 381 auto xml = "<root>\n" ~ 382 " <empty/>\n" ~ 383 " <?pi?>\n" ~ 384 "</root>"; 385 386 auto dom = parseDOM(xml); 387 assert(dom.type == EntityType.elementStart); 388 assert(dom.name.empty); 389 390 auto root = dom.children[0]; 391 assert(root.type == EntityType.elementStart); 392 assert(root.name == "root"); 393 394 assert(root.children[0].type == EntityType.elementEmpty); 395 assert(root.children[0].name == "empty"); 396 397 assert(root.children[1].type == EntityType.pi); 398 assert(root.children[1].name == "pi"); 399 } 400 401 402 /++ 403 Gives the list of the names of the parent start tags of this DOMEntity. 404 405 The name of the current entity (if it has one) is not included in the 406 path. 407 408 Note that if parseDOM were given an 409 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser), the path 410 starts where the range started. So, it doesn't necessarily contain the 411 entire path from the start of the XML document. 412 413 See_Also: $(LREF2 name, DOMEntity.name) 414 +/ 415 @property SliceOfR[] path() 416 { 417 return _path; 418 } 419 420 /// 421 static if(compileInTests) unittest 422 { 423 import std.range.primitives : empty; 424 425 auto xml = "<root>\n" ~ 426 " <bar>\n" ~ 427 " <baz>\n" ~ 428 " <xyzzy/>\n" ~ 429 " </baz>\n" ~ 430 " <frobozz>\n" ~ 431 " <!-- comment -->\n" ~ 432 " It's magic!\n" ~ 433 " </frobozz>\n" ~ 434 " </bar>\n" ~ 435 " <foo></foo>\n" ~ 436 "</root>"; 437 438 auto dom = parseDOM(xml); 439 assert(dom.type == EntityType.elementStart); 440 assert(dom.name.empty); 441 assert(dom.path.empty); 442 443 auto root = dom.children[0]; 444 assert(root.type == EntityType.elementStart); 445 assert(root.name == "root"); 446 assert(root.path.empty); 447 448 auto bar = root.children[0]; 449 assert(bar.type == EntityType.elementStart); 450 assert(bar.name == "bar"); 451 assert(bar.path == ["root"]); 452 453 auto baz = bar.children[0]; 454 assert(baz.type == EntityType.elementStart); 455 assert(baz.name == "baz"); 456 assert(baz.path == ["root", "bar"]); 457 458 auto xyzzy = baz.children[0]; 459 assert(xyzzy.type == EntityType.elementEmpty); 460 assert(xyzzy.name == "xyzzy"); 461 assert(xyzzy.path == ["root", "bar", "baz"]); 462 463 auto frobozz = bar.children[1]; 464 assert(frobozz.type == EntityType.elementStart); 465 assert(frobozz.name == "frobozz"); 466 assert(frobozz.path == ["root", "bar"]); 467 468 auto comment = frobozz.children[0]; 469 assert(comment.type == EntityType.comment); 470 assert(comment.text == " comment "); 471 assert(comment.path == ["root", "bar", "frobozz"]); 472 473 auto text = frobozz.children[1]; 474 assert(text.type == EntityType.text); 475 assert(text.text == "\n It's magic!\n "); 476 assert(text.path == ["root", "bar", "frobozz"]); 477 478 auto foo = root.children[1]; 479 assert(foo.type == EntityType.elementStart); 480 assert(foo.name == "foo"); 481 assert(foo.path == ["root"]); 482 } 483 484 485 /++ 486 Returns a dynamic array of attributes for a start tag where each 487 attribute is represented as a$(BR) 488 $(D $(PHOBOS_REF_ALTTEXT Tuple, Tuple, std, typecons)!( 489 $(LREF2 SliceOfR, EntityRange), $(D_STRING "name"), 490 $(LREF2 SliceOfR, EntityRange), $(D_STRING "value"), 491 $(REF_ALTTEXT TextPos, TextPos, dxml, parser), $(D_STRING "pos"))). 492 493 $(TABLE 494 $(TR $(TH Supported $(LREF EntityType)s:)) 495 $(TR $(TD $(REF_ALTTEXT elementStart, EntityType.elementStart, dxml, parser))) 496 $(TR $(TD $(REF_ALTTEXT elementEmpty, EntityType.elementEmpty, dxml, parser))) 497 ) 498 499 See_Also: $(LREF DomEntity.Attribute)$(BR) 500 $(REF normalize, dxml, util)$(BR) 501 $(REF asNormalized, dxml, util) 502 +/ 503 @property auto attributes() 504 { 505 with(EntityType) 506 { 507 import std.format : format; 508 assert(_type == elementStart || _type == elementEmpty, 509 format("attributes cannot be called with %s", _type)); 510 } 511 return _attributes; 512 } 513 514 /// 515 static if(compileInTests) unittest 516 { 517 import std.algorithm.comparison : equal; 518 import std.algorithm.iteration : filter; 519 import std.range.primitives : empty; 520 import dxml.parser : TextPos; 521 522 { 523 auto xml = "<root/>"; 524 auto root = parseDOM(xml).children[0]; 525 assert(root.type == EntityType.elementEmpty); 526 assert(root.attributes.empty); 527 528 static assert(is(ElementType!(typeof(root.attributes)) == 529 typeof(root).Attribute)); 530 } 531 { 532 auto xml = "<root a='42' q='29' w='hello'/>"; 533 auto root = parseDOM(xml).children[0]; 534 assert(root.type == EntityType.elementEmpty); 535 536 auto attrs = root.attributes; 537 assert(attrs.length == 3); 538 539 assert(attrs[0].name == "a"); 540 assert(attrs[0].value == "42"); 541 assert(attrs[0].pos == TextPos(1, 7)); 542 543 assert(attrs[1].name == "q"); 544 assert(attrs[1].value == "29"); 545 assert(attrs[1].pos == TextPos(1, 14)); 546 547 assert(attrs[2].name == "w"); 548 assert(attrs[2].value == "hello"); 549 assert(attrs[2].pos == TextPos(1, 21)); 550 } 551 // Because the type of name and value is SliceOfR, == with a string 552 // only works if the range passed to parseXML was string. 553 { 554 auto xml = filter!"true"("<root a='42' q='29' w='hello'/>"); 555 auto root = parseDOM(xml).children[0]; 556 assert(root.type == EntityType.elementEmpty); 557 558 auto attrs = root.attributes; 559 assert(attrs.length == 3); 560 561 assert(equal(attrs[0].name, "a")); 562 assert(equal(attrs[0].value, "42")); 563 assert(attrs[0].pos == TextPos(1, 7)); 564 565 assert(equal(attrs[1].name, "q")); 566 assert(equal(attrs[1].value, "29")); 567 assert(attrs[1].pos == TextPos(1, 14)); 568 569 assert(equal(attrs[2].name, "w")); 570 assert(equal(attrs[2].value, "hello")); 571 assert(attrs[2].pos == TextPos(1, 21)); 572 } 573 } 574 575 576 /++ 577 Returns the textual value of this DOMEntity. 578 579 In the case of 580 $(REF_ALTTEXT EntityType.pi, EntityType.pi, dxml, parser), this is the 581 text that follows the name, whereas in the other cases, the text is the 582 entire contents of the entity (save for the delimeters on the ends if 583 that entity has them). 584 585 $(TABLE 586 $(TR $(TH Supported $(LREF EntityType)s:)) 587 $(TR $(TD $(REF_ALTTEXT cdata, EntityType.cdata, dxml, parser))) 588 $(TR $(TD $(REF_ALTTEXT comment, EntityType.comment, dxml, parser))) 589 $(TR $(TD $(REF_ALTTEXT pi, EntityType.pi, dxml, parser))) 590 $(TR $(TD $(REF_ALTTEXT _text, EntityType._text, dxml, parser))) 591 ) 592 593 See_Also: $(REF normalize, dxml, util)$(BR) 594 $(REF asNormalized, dxml, util)$(BR) 595 $(REF stripIndent, dxml, util)$(BR) 596 $(REF withoutIndent, dxml, util) 597 +/ 598 @property SliceOfR text() 599 { 600 import dxml.internal : checkedSave; 601 with(EntityType) 602 { 603 import std.format : format; 604 assert(only(cdata, comment, pi, text).canFind(_type), 605 format("text cannot be called with %s", _type)); 606 } 607 return checkedSave(_text); 608 } 609 610 /// 611 static if(compileInTests) unittest 612 { 613 import std.range.primitives : empty; 614 615 auto xml = "<?xml version='1.0'?>\n" ~ 616 "<?instructionName?>\n" ~ 617 "<?foo here is something to say?>\n" ~ 618 "<root>\n" ~ 619 " <![CDATA[ Yay! random text >> << ]]>\n" ~ 620 " <!-- some random comment -->\n" ~ 621 " <p>something here</p>\n" ~ 622 " <p>\n" ~ 623 " something else\n" ~ 624 " here</p>\n" ~ 625 "</root>"; 626 auto dom = parseDOM(xml); 627 628 // "<?instructionName?>\n" ~ 629 auto pi1 = dom.children[0]; 630 assert(pi1.type == EntityType.pi); 631 assert(pi1.name == "instructionName"); 632 assert(pi1.text.empty); 633 634 // "<?foo here is something to say?>\n" ~ 635 auto pi2 = dom.children[1]; 636 assert(pi2.type == EntityType.pi); 637 assert(pi2.name == "foo"); 638 assert(pi2.text == "here is something to say"); 639 640 // "<root>\n" ~ 641 auto root = dom.children[2]; 642 assert(root.type == EntityType.elementStart); 643 644 // " <![CDATA[ Yay! random text >> << ]]>\n" ~ 645 auto cdata = root.children[0]; 646 assert(cdata.type == EntityType.cdata); 647 assert(cdata.text == " Yay! random text >> << "); 648 649 // " <!-- some random comment -->\n" ~ 650 auto comment = root.children[1]; 651 assert(comment.type == EntityType.comment); 652 assert(comment.text == " some random comment "); 653 654 // " <p>something here</p>\n" ~ 655 auto p1 = root.children[2]; 656 assert(p1.type == EntityType.elementStart); 657 assert(p1.name == "p"); 658 659 assert(p1.children[0].type == EntityType.text); 660 assert(p1.children[0].text == "something here"); 661 662 // " <p>\n" ~ 663 // " something else\n" ~ 664 // " here</p>\n" ~ 665 auto p2 = root.children[3]; 666 assert(p2.type == EntityType.elementStart); 667 668 assert(p2.children[0].type == EntityType.text); 669 assert(p2.children[0].text == "\n something else\n here"); 670 } 671 672 673 /++ 674 Returns the child entities of the current entity. 675 676 They are in the same order that they were in the XML document. 677 678 $(TABLE 679 $(TR $(TH Supported $(LREF EntityType)s:)) 680 $(TR $(TD $(REF_ALTTEXT elementStart, elementStart.elementStart, dxml, parser))) 681 ) 682 +/ 683 @property DOMEntity[] children() 684 { 685 import std.format : format; 686 assert(_type == EntityType.elementStart, 687 format!"children cannot be called with %s"(_type)); 688 return _children; 689 } 690 691 /// 692 static if(compileInTests) unittest 693 { 694 auto xml = "<potato>\n" ~ 695 " <!--comment-->\n" ~ 696 " <foo>bar</foo>\n" ~ 697 " <tag>\n" ~ 698 " <silly>you</silly>\n" ~ 699 " <empty/>\n" ~ 700 " <nocontent></nocontent>\n" ~ 701 " </tag>\n" ~ 702 "</potato>\n" ~ 703 "<!--the end-->"; 704 auto dom = parseDOM(xml); 705 assert(dom.children.length == 2); 706 707 auto potato = dom.children[0]; 708 assert(potato.type == EntityType.elementStart); 709 assert(potato.name == "potato"); 710 assert(potato.children.length == 3); 711 712 auto comment = potato.children[0]; 713 assert(comment.type == EntityType.comment); 714 assert(comment.text == "comment"); 715 716 auto foo = potato.children[1]; 717 assert(foo.type == EntityType.elementStart); 718 assert(foo.name == "foo"); 719 assert(foo.children.length == 1); 720 721 assert(foo.children[0].type == EntityType.text); 722 assert(foo.children[0].text == "bar"); 723 724 auto tag = potato.children[2]; 725 assert(tag.type == EntityType.elementStart); 726 assert(tag.name == "tag"); 727 assert(tag.children.length == 3); 728 729 auto silly = tag.children[0]; 730 assert(silly.type == EntityType.elementStart); 731 assert(silly.name == "silly"); 732 assert(silly.children.length == 1); 733 734 assert(silly.children[0].type == EntityType.text); 735 assert(silly.children[0].text == "you"); 736 737 auto empty = tag.children[1]; 738 assert(empty.type == EntityType.elementEmpty); 739 assert(empty.name == "empty"); 740 741 auto nocontent = tag.children[2]; 742 assert(nocontent.type == EntityType.elementStart); 743 assert(nocontent.name == "nocontent"); 744 assert(nocontent.children.length == 0); 745 746 auto endComment = dom.children[1]; 747 assert(endComment.type == EntityType.comment); 748 assert(endComment.text == "the end"); 749 } 750 751 752 // Reduce the chance of bugs if reference-type ranges are involved. 753 static if(!isDynamicArray!R) this(this) 754 { 755 with(EntityType) final switch(_type) 756 { 757 case cdata: goto case text; 758 case comment: goto case text; 759 case elementStart: 760 { 761 _name = _name.save; 762 break; 763 } 764 case elementEnd: goto case elementStart; 765 case elementEmpty: goto case elementStart; 766 case text: 767 { 768 _text = _text.save; 769 break; 770 } 771 case pi: 772 { 773 _text = _text.save; 774 goto case elementStart; 775 } 776 } 777 } 778 779 780 private: 781 782 this(EntityType type, TextPos pos) 783 { 784 _type = type; 785 _pos = pos; 786 787 // None of these initializations should be required. https://issues.dlang.org/show_bug.cgi?id=13945 788 _name = typeof(_name).init; 789 _text = typeof(_text).init; 790 } 791 792 auto _type = EntityType.elementStart; 793 TextPos _pos; 794 SliceOfR _name; 795 SliceOfR[] _path; 796 Attribute[] _attributes; 797 SliceOfR _text; 798 DOMEntity[] _children; 799 } 800 801 /// Ditto 802 DOMEntity!R parseDOM(Config config = Config.init, R)(R range) 803 if(isForwardRange!R && isSomeChar!(ElementType!R)) 804 { 805 import dxml.parser : parseXML; 806 auto entityRange = parseXML!config(range); 807 typeof(return) retval; 808 _parseDOM(entityRange, retval); 809 return retval; 810 } 811 812 /// Ditto 813 DOMEntity!(ER.Input) parseDOM(ER)(ref ER range) 814 if(isInstanceOf!(EntityRange, ER)) 815 { 816 typeof(return) retval; 817 if(range.empty) 818 return retval; 819 retval._pos = range.front.pos; 820 if(range.front.type == EntityType.elementEnd) 821 return retval; 822 _parseDOM(range, retval); 823 return retval; 824 } 825 826 /++ 827 parseDOM with the default $(REF_ALTTEXT Config, Config, dxml, parser) and a 828 range of characters. 829 +/ 830 version(dxmlTests) @safe unittest 831 { 832 import std.range.primitives; 833 834 auto xml = "<root>\n" ~ 835 " <!-- no comment -->\n" ~ 836 " <foo></foo>\n" ~ 837 " <baz>\n" ~ 838 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 839 " </baz>\n" ~ 840 " <tag/>\n" ~ 841 "</root>"; 842 843 auto dom = parseDOM(xml); 844 assert(dom.type == EntityType.elementStart); 845 assert(dom.name.empty); 846 assert(dom.children.length == 1); 847 848 auto root = dom.children[0]; 849 assert(root.type == EntityType.elementStart); 850 assert(root.name == "root"); 851 assert(root.children.length == 4); 852 853 assert(root.children[0].type == EntityType.comment); 854 assert(root.children[0].text == " no comment "); 855 856 assert(root.children[1].type == EntityType.elementStart); 857 assert(root.children[1].name == "foo"); 858 assert(root.children[1].children.length == 0); 859 860 auto baz = root.children[2]; 861 assert(baz.type == EntityType.elementStart); 862 assert(baz.name == "baz"); 863 assert(baz.children.length == 1); 864 865 auto xyzzy = baz.children[0]; 866 assert(xyzzy.type == EntityType.elementStart); 867 assert(xyzzy.name == "xyzzy"); 868 assert(xyzzy.children.length == 1); 869 870 assert(xyzzy.children[0].type == EntityType.text); 871 assert(xyzzy.children[0].text == "It's an adventure!"); 872 873 assert(root.children[3].type == EntityType.elementEmpty); 874 assert(root.children[3].name == "tag"); 875 } 876 877 /++ 878 parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and a range 879 of characters. 880 +/ 881 version(dxmlTests) unittest 882 { 883 import std.range.primitives : empty; 884 885 auto xml = "<root>\n" ~ 886 " <!-- no comment -->\n" ~ 887 " <foo></foo>\n" ~ 888 " <baz>\n" ~ 889 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 890 " </baz>\n" ~ 891 " <tag/>\n" ~ 892 "</root>"; 893 894 auto dom = parseDOM!simpleXML(xml); 895 assert(dom.type == EntityType.elementStart); 896 assert(dom.name.empty); 897 assert(dom.children.length == 1); 898 899 auto root = dom.children[0]; 900 assert(root.type == EntityType.elementStart); 901 assert(root.name == "root"); 902 assert(root.children.length == 3); 903 904 assert(root.children[0].type == EntityType.elementStart); 905 assert(root.children[0].name == "foo"); 906 assert(root.children[0].children.length == 0); 907 908 auto baz = root.children[1]; 909 assert(baz.type == EntityType.elementStart); 910 assert(baz.name == "baz"); 911 assert(baz.children.length == 1); 912 913 auto xyzzy = baz.children[0]; 914 assert(xyzzy.type == EntityType.elementStart); 915 assert(xyzzy.name == "xyzzy"); 916 assert(xyzzy.children.length == 1); 917 918 assert(xyzzy.children[0].type == EntityType.text); 919 assert(xyzzy.children[0].text == "It's an adventure!"); 920 921 assert(root.children[2].type == EntityType.elementStart); 922 assert(root.children[2].name == "tag"); 923 assert(root.children[2].children.length == 0); 924 } 925 926 /++ 927 parseDOM with $(REF_ALTTEXT simpleXML, simpleXML, dxml, parser) and an 928 $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser). 929 +/ 930 version(dxmlTests) unittest 931 { 932 import std.range.primitives : empty; 933 import dxml.parser : parseXML; 934 935 auto xml = "<root>\n" ~ 936 " <!-- no comment -->\n" ~ 937 " <foo></foo>\n" ~ 938 " <baz>\n" ~ 939 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 940 " </baz>\n" ~ 941 " <tag/>\n" ~ 942 "</root>"; 943 944 auto range = parseXML!simpleXML(xml); 945 auto dom = parseDOM(range); 946 assert(range.empty); 947 948 assert(dom.type == EntityType.elementStart); 949 assert(dom.name.empty); 950 assert(dom.children.length == 1); 951 952 auto root = dom.children[0]; 953 assert(root.type == EntityType.elementStart); 954 assert(root.name == "root"); 955 assert(root.children.length == 3); 956 957 assert(root.children[0].type == EntityType.elementStart); 958 assert(root.children[0].name == "foo"); 959 assert(root.children[0].children.length == 0); 960 961 auto baz = root.children[1]; 962 assert(baz.type == EntityType.elementStart); 963 assert(baz.name == "baz"); 964 assert(baz.children.length == 1); 965 966 auto xyzzy = baz.children[0]; 967 assert(xyzzy.type == EntityType.elementStart); 968 assert(xyzzy.name == "xyzzy"); 969 assert(xyzzy.children.length == 1); 970 971 assert(xyzzy.children[0].type == EntityType.text); 972 assert(xyzzy.children[0].text == "It's an adventure!"); 973 974 assert(root.children[2].type == EntityType.elementStart); 975 assert(root.children[2].name == "tag"); 976 assert(root.children[2].children.length == 0); 977 } 978 979 /++ 980 parseDOM with an $(REF_ALTTEXT EntityRange, EntityRange, dxml, parser) 981 which is not at the start of the document. 982 +/ 983 version(dxmlTests) unittest 984 { 985 import std.range.primitives : empty; 986 import dxml.parser : parseXML, skipToPath; 987 988 auto xml = "<root>\n" ~ 989 " <!-- no comment -->\n" ~ 990 " <foo></foo>\n" ~ 991 " <baz>\n" ~ 992 " <xyzzy>It's an adventure!</xyzzy>\n" ~ 993 " </baz>\n" ~ 994 " <tag/>\n" ~ 995 "</root>"; 996 997 auto range = parseXML!simpleXML(xml).skipToPath("baz/xyzzy"); 998 assert(range.front.type == EntityType.elementStart); 999 assert(range.front.name == "xyzzy"); 1000 1001 auto dom = parseDOM(range); 1002 assert(range.front.type == EntityType.elementStart); 1003 assert(range.front.name == "tag"); 1004 1005 assert(dom.type == EntityType.elementStart); 1006 assert(dom.name.empty); 1007 assert(dom.children.length == 1); 1008 1009 auto xyzzy = dom.children[0]; 1010 assert(xyzzy.type == EntityType.elementStart); 1011 assert(xyzzy.name == "xyzzy"); 1012 assert(xyzzy.children.length == 1); 1013 1014 assert(xyzzy.children[0].type == EntityType.text); 1015 assert(xyzzy.children[0].text == "It's an adventure!"); 1016 } 1017 1018 /// parseDOM at compile-time 1019 version(dxmlTests) unittest 1020 { 1021 enum xml = "<!-- comment -->\n" ~ 1022 "<root>\n" ~ 1023 " <foo>some text<whatever/></foo>\n" ~ 1024 " <bar/>\n" ~ 1025 " <baz></baz>\n" ~ 1026 "</root>"; 1027 1028 enum dom = parseDOM(xml); 1029 static assert(dom.type == EntityType.elementStart); 1030 static assert(dom.name.empty); 1031 static assert(dom.children.length == 2); 1032 1033 static assert(dom.children[0].type == EntityType.comment); 1034 static assert(dom.children[0].text == " comment "); 1035 } 1036 1037 // This is purely to provide a way to trigger the unittest blocks in DOMEntity 1038 // without compiling them in normally. 1039 private struct DOMCompileTests 1040 { 1041 @property bool empty() @safe pure nothrow @nogc { assert(0); } 1042 @property char front() @safe pure nothrow @nogc { assert(0); } 1043 void popFront() @safe pure nothrow @nogc { assert(0); } 1044 @property typeof(this) save() @safe pure nothrow @nogc { assert(0); } 1045 } 1046 1047 version(dxmlTests) 1048 DOMEntity!DOMCompileTests _domTests; 1049 1050 1051 private: 1052 1053 void _parseDOM(ER, DE)(ref ER range, ref DE parent, ER.SliceOfR[] path = null) 1054 { 1055 assert(!range.empty); 1056 assert(range.front.type != EntityType.elementEnd); 1057 1058 import std.array : appender, array; 1059 auto children = appender!(DE[])(); 1060 1061 while(!range.empty) 1062 { 1063 auto entity = range.front; 1064 range.popFront(); 1065 if(entity.type == EntityType.elementEnd) 1066 break; 1067 1068 auto child = DE(entity.type, entity.pos); 1069 child._path = path; 1070 1071 with(EntityType) final switch(entity.type) 1072 { 1073 case cdata: goto case text; 1074 case comment: goto case text; 1075 case elementStart: 1076 { 1077 child._name = entity.name; 1078 child._attributes = entity.attributes.array(); 1079 1080 if(range.front.type == EntityType.elementEnd) 1081 range.popFront(); 1082 else 1083 { 1084 if(!entity.name.empty) 1085 path ~= entity.name; 1086 // TODO The explicit instantiation doesn't hurt, but it 1087 // shouldn't be necessary, and if it's not there, we get 1088 // a compiler error. It should be reduced and reported. 1089 _parseDOM!(ER, DE)(range, child, path); 1090 --path.length; 1091 } 1092 break; 1093 } 1094 case elementEnd: assert(0); 1095 case elementEmpty: 1096 { 1097 child._name = entity.name; 1098 child._attributes = entity.attributes.array(); 1099 break; 1100 } 1101 case text: 1102 { 1103 child._text = entity.text; 1104 break; 1105 } 1106 case pi: 1107 { 1108 child._name = entity.name; 1109 child._text = entity.text; 1110 break; 1111 } 1112 } 1113 1114 put(children, child); 1115 } 1116 1117 parent._children = children.data; 1118 } 1119 1120 version(dxmlTests) unittest 1121 { 1122 import std.algorithm.comparison : equal; 1123 import dxml.internal : testRangeFuncs; 1124 import dxml.parser : parseXML, TextPos; 1125 1126 static void testChildren(ER, size_t line = __LINE__)(ref ER entityRange, int row, int col, EntityType[] expected...) 1127 { 1128 import core.exception : AssertError; 1129 import std.exception : enforce; 1130 auto temp = entityRange.save; 1131 auto dom = parseDOM(temp); 1132 enforce!AssertError(dom.type == EntityType.elementStart, "unittest 1", __FILE__, line); 1133 enforce!AssertError(dom.children.length == expected.length, "unittest 2", __FILE__, line); 1134 foreach(i; 0 .. dom._children.length) 1135 enforce!AssertError(dom._children[i].type == expected[i], "unittest 3", __FILE__, line); 1136 enforce!AssertError(dom.pos == TextPos(row, col), "unittest 4", __FILE__, line); 1137 if(!entityRange.empty) 1138 entityRange.popFront(); 1139 } 1140 1141 static foreach(func; testRangeFuncs) 1142 {{ 1143 { 1144 foreach(i, xml; ["<!-- comment -->\n" ~ 1145 "<?pi foo?>\n" ~ 1146 "<su></su>", 1147 "<!-- comment -->\n" ~ 1148 "<?pi foo?>\n" ~ 1149 "<su/>"]) 1150 { 1151 auto range = parseXML(func(xml)); 1152 foreach(j; 0 .. 4 - i) 1153 { 1154 auto temp = range.save; 1155 auto dom = parseDOM(temp); 1156 assert(dom.type == EntityType.elementStart); 1157 assert(dom.children.length == 3 - j); 1158 if(j <= 2) 1159 { 1160 assert(dom.children[2 - j].type == 1161 (i == 0 ? EntityType.elementStart : EntityType.elementEmpty)); 1162 assert(equal(dom.children[2 - j].name, "su")); 1163 if(j <= 1) 1164 { 1165 assert(dom.children[1 - j].type == EntityType.pi); 1166 assert(equal(dom.children[1 - j].name, "pi")); 1167 assert(equal(dom.children[1 - j].text, "foo")); 1168 if(j == 0) 1169 { 1170 assert(dom.children[0].type == EntityType.comment); 1171 assert(equal(dom.children[0].text, " comment ")); 1172 } 1173 } 1174 } 1175 range.popFront(); 1176 } 1177 assert(range.empty); 1178 auto dom = parseDOM(range); 1179 assert(dom.type == EntityType.elementStart); 1180 assert(dom.name is typeof(dom.name).init); 1181 assert(dom.children.length == 0); 1182 } 1183 } 1184 { 1185 auto xml = "<root>\n" ~ 1186 " <foo>\n" ~ 1187 " <bar>\n" ~ 1188 " <baz>\n" ~ 1189 " It's silly, Charley\n" ~ 1190 " </baz>\n" ~ 1191 " <frobozz>\n" ~ 1192 " <is>the Wiz</is>\n" ~ 1193 " </frobozz>\n" ~ 1194 " <empty></empty>\n" ~ 1195 " <xyzzy/>\n" ~ 1196 " </bar>\n" ~ 1197 " </foo>\n" ~ 1198 " <!--This isn't the end-->\n" ~ 1199 "</root>\n" ~ 1200 "<?Poirot?>\n" ~ 1201 "<!--It's the end!-->"; 1202 1203 { 1204 auto range = parseXML(func(xml)); 1205 with(EntityType) 1206 { 1207 testChildren(range, 1, 1, elementStart, pi, comment); // <root> 1208 testChildren(range, 2, 5, elementStart, comment); // <foo> 1209 testChildren(range, 3, 9, elementStart); // <bar> 1210 testChildren(range, 4, 13, elementStart, elementStart, elementStart, elementEmpty); // <baz> 1211 testChildren(range, 4, 18, text); // It's silly, Charley 1212 testChildren(range, 6, 13); // </baz> 1213 testChildren(range, 7, 13, elementStart, elementStart, elementEmpty); // <frobozz> 1214 testChildren(range, 8, 17, elementStart); // <is> 1215 testChildren(range, 8, 21, text); // the Wiz 1216 testChildren(range, 8, 28); // </is> 1217 testChildren(range, 9, 13); // </frobozz> 1218 testChildren(range, 10, 13, elementStart, elementEmpty); // <empty> 1219 testChildren(range, 10, 20); // </empty> 1220 testChildren(range, 11, 13, elementEmpty); // <xyzzy/> 1221 testChildren(range, 12, 9); // </bar> 1222 testChildren(range, 13, 5); // </foo> 1223 testChildren(range, 14, 5, comment); // <!--This isn't the end--> 1224 testChildren(range, 15, 1); // </root> 1225 testChildren(range, 16, 1, pi, comment); // <?Poirot?> 1226 testChildren(range, 17, 1, comment); // <!--It's the end-->" 1227 testChildren(range, 1, 1); // empty range 1228 } 1229 } 1230 { 1231 auto dom = parseDOM(func(xml)); 1232 assert(dom.children.length == 3); 1233 1234 auto root = dom.children[0]; 1235 assert(root.type == EntityType.elementStart); 1236 assert(root.pos == TextPos(1, 1)); 1237 assert(root.children.length == 2); 1238 assert(equal(root.name, "root")); 1239 1240 auto foo = root.children[0]; 1241 assert(foo.type == EntityType.elementStart); 1242 assert(foo.pos == TextPos(2, 5)); 1243 assert(foo.children.length == 1); 1244 assert(equal(foo.name, "foo")); 1245 1246 auto bar = foo.children[0]; 1247 assert(bar.type == EntityType.elementStart); 1248 assert(bar.pos == TextPos(3, 9)); 1249 assert(bar.children.length == 4); 1250 assert(equal(bar.name, "bar")); 1251 1252 auto baz = bar.children[0]; 1253 assert(baz.type == EntityType.elementStart); 1254 assert(baz.pos == TextPos(4, 13)); 1255 assert(baz.children.length == 1); 1256 assert(equal(baz.name, "baz")); 1257 1258 auto silly = baz.children[0]; 1259 assert(silly.type == EntityType.text); 1260 assert(silly.pos == TextPos(4, 18)); 1261 assert(equal(silly.text, "\n It's silly, Charley\n ")); 1262 1263 auto frobozz = bar.children[1]; 1264 assert(frobozz.type == EntityType.elementStart); 1265 assert(frobozz.pos == TextPos(7, 13)); 1266 assert(frobozz.children.length == 1); 1267 assert(equal(frobozz.name, "frobozz")); 1268 1269 auto is_ = frobozz.children[0]; 1270 assert(is_.type == EntityType.elementStart); 1271 assert(is_.pos == TextPos(8, 17)); 1272 assert(is_.children.length == 1); 1273 assert(equal(is_.name, "is")); 1274 1275 auto wiz = is_.children[0]; 1276 assert(wiz.type == EntityType.text); 1277 assert(wiz.pos == TextPos(8, 21)); 1278 assert(equal(wiz.text, "the Wiz")); 1279 1280 auto empty = bar.children[2]; 1281 assert(empty.type == EntityType.elementStart); 1282 assert(empty.pos == TextPos(10, 13)); 1283 assert(empty.children.length == 0); 1284 assert(equal(empty.name, "empty")); 1285 1286 auto xyzzy = bar.children[3]; 1287 assert(xyzzy.type == EntityType.elementEmpty); 1288 assert(xyzzy.pos == TextPos(11, 13)); 1289 assert(equal(xyzzy.name, "xyzzy")); 1290 1291 auto comment = root.children[1]; 1292 assert(comment.type == EntityType.comment); 1293 assert(comment.pos == TextPos(14, 5)); 1294 assert(equal(comment.text, "This isn't the end")); 1295 1296 auto poirot = dom.children[1]; 1297 assert(poirot.type == EntityType.pi); 1298 assert(poirot.pos == TextPos(16, 1)); 1299 assert(equal(poirot.name, "Poirot")); 1300 assert(poirot.text.empty); 1301 1302 auto endComment = dom.children[2]; 1303 assert(endComment.type == EntityType.comment); 1304 assert(endComment.pos == TextPos(17, 1)); 1305 assert(equal(endComment.text, "It's the end!")); 1306 } 1307 } 1308 }} 1309 }