1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18/* 19 * $Id: DOMBuilder.java 472634 2006-11-08 20:43:55Z jycli $ 20 */ 21package org.apache.xml.utils; 22 23import java.util.Stack; 24import java.util.Vector; 25 26import org.apache.xml.res.XMLErrorResources; 27import org.apache.xml.res.XMLMessages; 28 29import org.w3c.dom.Document; 30import org.w3c.dom.DocumentFragment; 31import org.w3c.dom.Element; 32import org.w3c.dom.Node; 33import org.w3c.dom.Text; 34import org.w3c.dom.CDATASection; 35 36import org.xml.sax.Attributes; 37import org.xml.sax.ContentHandler; 38import org.xml.sax.Locator; 39import org.xml.sax.ext.LexicalHandler; 40/** 41 * This class takes SAX events (in addition to some extra events 42 * that SAX doesn't handle yet) and adds the result to a document 43 * or document fragment. 44 * @xsl.usage general 45 */ 46public class DOMBuilder 47 implements ContentHandler, LexicalHandler 48{ 49 50 /** Root document */ 51 public Document m_doc; 52 53 /** Current node */ 54 protected Node m_currentNode = null; 55 56 /** The root node */ 57 protected Node m_root = null; 58 59 /** The next sibling node */ 60 protected Node m_nextSibling = null; 61 62 /** First node of document fragment or null if not a DocumentFragment */ 63 public DocumentFragment m_docFrag = null; 64 65 /** Vector of element nodes */ 66 protected Stack m_elemStack = new Stack(); 67 68 /** Namespace support */ 69 protected Vector m_prefixMappings = new Vector(); 70 71 /** 72 * DOMBuilder instance constructor... it will add the DOM nodes 73 * to the document fragment. 74 * 75 * @param doc Root document 76 * @param node Current node 77 */ 78 public DOMBuilder(Document doc, Node node) 79 { 80 m_doc = doc; 81 m_currentNode = m_root = node; 82 83 if (node instanceof Element) 84 m_elemStack.push(node); 85 } 86 87 /** 88 * DOMBuilder instance constructor... it will add the DOM nodes 89 * to the document fragment. 90 * 91 * @param doc Root document 92 * @param docFrag Document fragment 93 */ 94 public DOMBuilder(Document doc, DocumentFragment docFrag) 95 { 96 m_doc = doc; 97 m_docFrag = docFrag; 98 } 99 100 /** 101 * DOMBuilder instance constructor... it will add the DOM nodes 102 * to the document. 103 * 104 * @param doc Root document 105 */ 106 public DOMBuilder(Document doc) 107 { 108 m_doc = doc; 109 } 110 111 /** 112 * Get the root document or DocumentFragment of the DOM being created. 113 * 114 * @return The root document or document fragment if not null 115 */ 116 public Node getRootDocument() 117 { 118 return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc; 119 } 120 121 /** 122 * Get the root node of the DOM tree. 123 */ 124 public Node getRootNode() 125 { 126 return m_root; 127 } 128 129 /** 130 * Get the node currently being processed. 131 * 132 * @return the current node being processed 133 */ 134 public Node getCurrentNode() 135 { 136 return m_currentNode; 137 } 138 139 /** 140 * Set the next sibling node, which is where the result nodes 141 * should be inserted before. 142 * 143 * @param nextSibling the next sibling node. 144 */ 145 public void setNextSibling(Node nextSibling) 146 { 147 m_nextSibling = nextSibling; 148 } 149 150 /** 151 * Return the next sibling node. 152 * 153 * @return the next sibling node. 154 */ 155 public Node getNextSibling() 156 { 157 return m_nextSibling; 158 } 159 160 /** 161 * Return null since there is no Writer for this class. 162 * 163 * @return null 164 */ 165 public java.io.Writer getWriter() 166 { 167 return null; 168 } 169 170 /** 171 * Append a node to the current container. 172 * 173 * @param newNode New node to append 174 */ 175 protected void append(Node newNode) throws org.xml.sax.SAXException 176 { 177 178 Node currentNode = m_currentNode; 179 180 if (null != currentNode) 181 { 182 if (currentNode == m_root && m_nextSibling != null) 183 currentNode.insertBefore(newNode, m_nextSibling); 184 else 185 currentNode.appendChild(newNode); 186 187 // System.out.println(newNode.getNodeName()); 188 } 189 else if (null != m_docFrag) 190 { 191 if (m_nextSibling != null) 192 m_docFrag.insertBefore(newNode, m_nextSibling); 193 else 194 m_docFrag.appendChild(newNode); 195 } 196 else 197 { 198 boolean ok = true; 199 short type = newNode.getNodeType(); 200 201 if (type == Node.TEXT_NODE) 202 { 203 String data = newNode.getNodeValue(); 204 205 if ((null != data) && (data.trim().length() > 0)) 206 { 207 throw new org.xml.sax.SAXException( 208 XMLMessages.createXMLMessage( 209 XMLErrorResources.ER_CANT_OUTPUT_TEXT_BEFORE_DOC, null)); //"Warning: can't output text before document element! Ignoring..."); 210 } 211 212 ok = false; 213 } 214 else if (type == Node.ELEMENT_NODE) 215 { 216 if (m_doc.getDocumentElement() != null) 217 { 218 ok = false; 219 220 throw new org.xml.sax.SAXException( 221 XMLMessages.createXMLMessage( 222 XMLErrorResources.ER_CANT_HAVE_MORE_THAN_ONE_ROOT, null)); //"Can't have more than one root on a DOM!"); 223 } 224 } 225 226 if (ok) 227 { 228 if (m_nextSibling != null) 229 m_doc.insertBefore(newNode, m_nextSibling); 230 else 231 m_doc.appendChild(newNode); 232 } 233 } 234 } 235 236 /** 237 * Receive an object for locating the origin of SAX document events. 238 * 239 * <p>SAX parsers are strongly encouraged (though not absolutely 240 * required) to supply a locator: if it does so, it must supply 241 * the locator to the application by invoking this method before 242 * invoking any of the other methods in the ContentHandler 243 * interface.</p> 244 * 245 * <p>The locator allows the application to determine the end 246 * position of any document-related event, even if the parser is 247 * not reporting an error. Typically, the application will 248 * use this information for reporting its own errors (such as 249 * character content that does not match an application's 250 * business rules). The information returned by the locator 251 * is probably not sufficient for use with a search engine.</p> 252 * 253 * <p>Note that the locator will return correct information only 254 * during the invocation of the events in this interface. The 255 * application should not attempt to use it at any other time.</p> 256 * 257 * @param locator An object that can return the location of 258 * any SAX document event. 259 * @see org.xml.sax.Locator 260 */ 261 public void setDocumentLocator(Locator locator) 262 { 263 264 // No action for the moment. 265 } 266 267 /** 268 * Receive notification of the beginning of a document. 269 * 270 * <p>The SAX parser will invoke this method only once, before any 271 * other methods in this interface or in DTDHandler (except for 272 * setDocumentLocator).</p> 273 */ 274 public void startDocument() throws org.xml.sax.SAXException 275 { 276 277 // No action for the moment. 278 } 279 280 /** 281 * Receive notification of the end of a document. 282 * 283 * <p>The SAX parser will invoke this method only once, and it will 284 * be the last method invoked during the parse. The parser shall 285 * not invoke this method until it has either abandoned parsing 286 * (because of an unrecoverable error) or reached the end of 287 * input.</p> 288 */ 289 public void endDocument() throws org.xml.sax.SAXException 290 { 291 292 // No action for the moment. 293 } 294 295 /** 296 * Receive notification of the beginning of an element. 297 * 298 * <p>The Parser will invoke this method at the beginning of every 299 * element in the XML document; there will be a corresponding 300 * endElement() event for every startElement() event (even when the 301 * element is empty). All of the element's content will be 302 * reported, in order, before the corresponding endElement() 303 * event.</p> 304 * 305 * <p>If the element name has a namespace prefix, the prefix will 306 * still be attached. Note that the attribute list provided will 307 * contain only attributes with explicit values (specified or 308 * defaulted): #IMPLIED attributes will be omitted.</p> 309 * 310 * 311 * @param ns The namespace of the node 312 * @param localName The local part of the qualified name 313 * @param name The element name. 314 * @param atts The attributes attached to the element, if any. 315 * @see #endElement 316 * @see org.xml.sax.Attributes 317 */ 318 public void startElement( 319 String ns, String localName, String name, Attributes atts) 320 throws org.xml.sax.SAXException 321 { 322 323 Element elem; 324 325 // Note that the namespace-aware call must be used to correctly 326 // construct a Level 2 DOM, even for non-namespaced nodes. 327 if ((null == ns) || (ns.length() == 0)) 328 elem = m_doc.createElementNS(null,name); 329 else 330 elem = m_doc.createElementNS(ns, name); 331 332 append(elem); 333 334 try 335 { 336 int nAtts = atts.getLength(); 337 338 if (0 != nAtts) 339 { 340 for (int i = 0; i < nAtts; i++) 341 { 342 343 //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) ); 344 // First handle a possible ID attribute 345 if (atts.getType(i).equalsIgnoreCase("ID")) 346 setIDAttribute(atts.getValue(i), elem); 347 348 String attrNS = atts.getURI(i); 349 350 if("".equals(attrNS)) 351 attrNS = null; // DOM represents no-namespace as null 352 353 // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i) 354 // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i)); 355 // Crimson won't let us set an xmlns: attribute on the DOM. 356 String attrQName = atts.getQName(i); 357 358 // In SAX, xmlns[:] attributes have an empty namespace, while in DOM they 359 // should have the xmlns namespace 360 if (attrQName.startsWith("xmlns:") || attrQName.equals("xmlns")) { 361 attrNS = "http://www.w3.org/2000/xmlns/"; 362 } 363 364 // ALWAYS use the DOM Level 2 call! 365 elem.setAttributeNS(attrNS,attrQName, atts.getValue(i)); 366 } 367 } 368 369 /* 370 * Adding namespace nodes to the DOM tree; 371 */ 372 int nDecls = m_prefixMappings.size(); 373 374 String prefix, declURL; 375 376 for (int i = 0; i < nDecls; i += 2) 377 { 378 prefix = (String) m_prefixMappings.elementAt(i); 379 380 if (prefix == null) 381 continue; 382 383 declURL = (String) m_prefixMappings.elementAt(i + 1); 384 385 elem.setAttributeNS("http://www.w3.org/2000/xmlns/", prefix, declURL); 386 } 387 388 m_prefixMappings.clear(); 389 390 // append(elem); 391 392 m_elemStack.push(elem); 393 394 m_currentNode = elem; 395 396 // append(elem); 397 } 398 catch(java.lang.Exception de) 399 { 400 // de.printStackTrace(); 401 throw new org.xml.sax.SAXException(de); 402 } 403 404 } 405 406 /** 407 408 409 410 * Receive notification of the end of an element. 411 * 412 * <p>The SAX parser will invoke this method at the end of every 413 * element in the XML document; there will be a corresponding 414 * startElement() event for every endElement() event (even when the 415 * element is empty).</p> 416 * 417 * <p>If the element name has a namespace prefix, the prefix will 418 * still be attached to the name.</p> 419 * 420 * 421 * @param ns the namespace of the element 422 * @param localName The local part of the qualified name of the element 423 * @param name The element name 424 */ 425 public void endElement(String ns, String localName, String name) 426 throws org.xml.sax.SAXException 427 { 428 m_elemStack.pop(); 429 m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek(); 430 } 431 432 /** 433 * Set an ID string to node association in the ID table. 434 * 435 * @param id The ID string. 436 * @param elem The associated ID. 437 */ 438 public void setIDAttribute(String id, Element elem) 439 { 440 441 // Do nothing. This method is meant to be overiden. 442 } 443 444 /** 445 * Receive notification of character data. 446 * 447 * <p>The Parser will call this method to report each chunk of 448 * character data. SAX parsers may return all contiguous character 449 * data in a single chunk, or they may split it into several 450 * chunks; however, all of the characters in any single event 451 * must come from the same external entity, so that the Locator 452 * provides useful information.</p> 453 * 454 * <p>The application must not attempt to read from the array 455 * outside of the specified range.</p> 456 * 457 * <p>Note that some parsers will report whitespace using the 458 * ignorableWhitespace() method rather than this one (validating 459 * parsers must do so).</p> 460 * 461 * @param ch The characters from the XML document. 462 * @param start The start position in the array. 463 * @param length The number of characters to read from the array. 464 * @see #ignorableWhitespace 465 * @see org.xml.sax.Locator 466 */ 467 public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException 468 { 469 if(isOutsideDocElem() 470 && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 471 return; // avoid DOM006 Hierarchy request error 472 473 if (m_inCData) 474 { 475 cdata(ch, start, length); 476 477 return; 478 } 479 480 String s = new String(ch, start, length); 481 Node childNode; 482 childNode = m_currentNode != null ? m_currentNode.getLastChild(): null; 483 if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){ 484 ((Text)childNode).appendData(s); 485 } 486 else{ 487 Text text = m_doc.createTextNode(s); 488 append(text); 489 } 490 } 491 492 /** 493 * If available, when the disable-output-escaping attribute is used, 494 * output raw text without escaping. A PI will be inserted in front 495 * of the node with the name "lotusxsl-next-is-raw" and a value of 496 * "formatter-to-dom". 497 * 498 * @param ch Array containing the characters 499 * @param start Index to start of characters in the array 500 * @param length Number of characters in the array 501 */ 502 public void charactersRaw(char ch[], int start, int length) 503 throws org.xml.sax.SAXException 504 { 505 if(isOutsideDocElem() 506 && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 507 return; // avoid DOM006 Hierarchy request error 508 509 510 String s = new String(ch, start, length); 511 512 append(m_doc.createProcessingInstruction("xslt-next-is-raw", 513 "formatter-to-dom")); 514 append(m_doc.createTextNode(s)); 515 } 516 517 /** 518 * Report the beginning of an entity. 519 * 520 * The start and end of the document entity are not reported. 521 * The start and end of the external DTD subset are reported 522 * using the pseudo-name "[dtd]". All other events must be 523 * properly nested within start/end entity events. 524 * 525 * @param name The name of the entity. If it is a parameter 526 * entity, the name will begin with '%'. 527 * @see #endEntity 528 * @see org.xml.sax.ext.DeclHandler#internalEntityDecl 529 * @see org.xml.sax.ext.DeclHandler#externalEntityDecl 530 */ 531 public void startEntity(String name) throws org.xml.sax.SAXException 532 { 533 534 // Almost certainly the wrong behavior... 535 // entityReference(name); 536 } 537 538 /** 539 * Report the end of an entity. 540 * 541 * @param name The name of the entity that is ending. 542 * @see #startEntity 543 */ 544 public void endEntity(String name) throws org.xml.sax.SAXException{} 545 546 /** 547 * Receive notivication of a entityReference. 548 * 549 * @param name name of the entity reference 550 */ 551 public void entityReference(String name) throws org.xml.sax.SAXException 552 { 553 append(m_doc.createEntityReference(name)); 554 } 555 556 /** 557 * Receive notification of ignorable whitespace in element content. 558 * 559 * <p>Validating Parsers must use this method to report each chunk 560 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 561 * section 2.10): non-validating parsers may also use this method 562 * if they are capable of parsing and using content models.</p> 563 * 564 * <p>SAX parsers may return all contiguous whitespace in a single 565 * chunk, or they may split it into several chunks; however, all of 566 * the characters in any single event must come from the same 567 * external entity, so that the Locator provides useful 568 * information.</p> 569 * 570 * <p>The application must not attempt to read from the array 571 * outside of the specified range.</p> 572 * 573 * @param ch The characters from the XML document. 574 * @param start The start position in the array. 575 * @param length The number of characters to read from the array. 576 * @see #characters 577 */ 578 public void ignorableWhitespace(char ch[], int start, int length) 579 throws org.xml.sax.SAXException 580 { 581 if(isOutsideDocElem()) 582 return; // avoid DOM006 Hierarchy request error 583 584 String s = new String(ch, start, length); 585 586 append(m_doc.createTextNode(s)); 587 } 588 589 /** 590 * Tell if the current node is outside the document element. 591 * 592 * @return true if the current node is outside the document element. 593 */ 594 private boolean isOutsideDocElem() 595 { 596 return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE); 597 } 598 599 /** 600 * Receive notification of a processing instruction. 601 * 602 * <p>The Parser will invoke this method once for each processing 603 * instruction found: note that processing instructions may occur 604 * before or after the main document element.</p> 605 * 606 * <p>A SAX parser should never report an XML declaration (XML 1.0, 607 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 608 * using this method.</p> 609 * 610 * @param target The processing instruction target. 611 * @param data The processing instruction data, or null if 612 * none was supplied. 613 */ 614 public void processingInstruction(String target, String data) 615 throws org.xml.sax.SAXException 616 { 617 append(m_doc.createProcessingInstruction(target, data)); 618 } 619 620 /** 621 * Report an XML comment anywhere in the document. 622 * 623 * This callback will be used for comments inside or outside the 624 * document element, including comments in the external DTD 625 * subset (if read). 626 * 627 * @param ch An array holding the characters in the comment. 628 * @param start The starting position in the array. 629 * @param length The number of characters to use from the array. 630 */ 631 public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException 632 { 633 append(m_doc.createComment(new String(ch, start, length))); 634 } 635 636 /** Flag indicating that we are processing a CData section */ 637 protected boolean m_inCData = false; 638 639 /** 640 * Report the start of a CDATA section. 641 * 642 * @see #endCDATA 643 */ 644 public void startCDATA() throws org.xml.sax.SAXException 645 { 646 m_inCData = true; 647 append(m_doc.createCDATASection("")); 648 } 649 650 /** 651 * Report the end of a CDATA section. 652 * 653 * @see #startCDATA 654 */ 655 public void endCDATA() throws org.xml.sax.SAXException 656 { 657 m_inCData = false; 658 } 659 660 /** 661 * Receive notification of cdata. 662 * 663 * <p>The Parser will call this method to report each chunk of 664 * character data. SAX parsers may return all contiguous character 665 * data in a single chunk, or they may split it into several 666 * chunks; however, all of the characters in any single event 667 * must come from the same external entity, so that the Locator 668 * provides useful information.</p> 669 * 670 * <p>The application must not attempt to read from the array 671 * outside of the specified range.</p> 672 * 673 * <p>Note that some parsers will report whitespace using the 674 * ignorableWhitespace() method rather than this one (validating 675 * parsers must do so).</p> 676 * 677 * @param ch The characters from the XML document. 678 * @param start The start position in the array. 679 * @param length The number of characters to read from the array. 680 * @see #ignorableWhitespace 681 * @see org.xml.sax.Locator 682 */ 683 public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException 684 { 685 if(isOutsideDocElem() 686 && org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 687 return; // avoid DOM006 Hierarchy request error 688 689 String s = new String(ch, start, length); 690 691 CDATASection section =(CDATASection) m_currentNode.getLastChild(); 692 section.appendData(s); 693 } 694 695 /** 696 * Report the start of DTD declarations, if any. 697 * 698 * Any declarations are assumed to be in the internal subset 699 * unless otherwise indicated. 700 * 701 * @param name The document type name. 702 * @param publicId The declared public identifier for the 703 * external DTD subset, or null if none was declared. 704 * @param systemId The declared system identifier for the 705 * external DTD subset, or null if none was declared. 706 * @see #endDTD 707 * @see #startEntity 708 */ 709 public void startDTD(String name, String publicId, String systemId) 710 throws org.xml.sax.SAXException 711 { 712 713 // Do nothing for now. 714 } 715 716 /** 717 * Report the end of DTD declarations. 718 * 719 * @see #startDTD 720 */ 721 public void endDTD() throws org.xml.sax.SAXException 722 { 723 724 // Do nothing for now. 725 } 726 727 /** 728 * Begin the scope of a prefix-URI Namespace mapping. 729 * 730 * <p>The information from this event is not necessary for 731 * normal Namespace processing: the SAX XML reader will 732 * automatically replace prefixes for element and attribute 733 * names when the http://xml.org/sax/features/namespaces 734 * feature is true (the default).</p> 735 * 736 * <p>There are cases, however, when applications need to 737 * use prefixes in character data or in attribute values, 738 * where they cannot safely be expanded automatically; the 739 * start/endPrefixMapping event supplies the information 740 * to the application to expand prefixes in those contexts 741 * itself, if necessary.</p> 742 * 743 * <p>Note that start/endPrefixMapping events are not 744 * guaranteed to be properly nested relative to each-other: 745 * all startPrefixMapping events will occur before the 746 * corresponding startElement event, and all endPrefixMapping 747 * events will occur after the corresponding endElement event, 748 * but their order is not guaranteed.</p> 749 * 750 * @param prefix The Namespace prefix being declared. 751 * @param uri The Namespace URI the prefix is mapped to. 752 * @see #endPrefixMapping 753 * @see #startElement 754 */ 755 public void startPrefixMapping(String prefix, String uri) 756 throws org.xml.sax.SAXException 757 { 758 if(null == prefix || prefix.equals("")) 759 prefix = "xmlns"; 760 else prefix = "xmlns:"+prefix; 761 m_prefixMappings.addElement(prefix); 762 m_prefixMappings.addElement(uri); 763 } 764 765 /** 766 * End the scope of a prefix-URI mapping. 767 * 768 * <p>See startPrefixMapping for details. This event will 769 * always occur after the corresponding endElement event, 770 * but the order of endPrefixMapping events is not otherwise 771 * guaranteed.</p> 772 * 773 * @param prefix The prefix that was being mapping. 774 * @see #startPrefixMapping 775 * @see #endElement 776 */ 777 public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{} 778 779 /** 780 * Receive notification of a skipped entity. 781 * 782 * <p>The Parser will invoke this method once for each entity 783 * skipped. Non-validating processors may skip entities if they 784 * have not seen the declarations (because, for example, the 785 * entity was declared in an external DTD subset). All processors 786 * may skip external entities, depending on the values of the 787 * http://xml.org/sax/features/external-general-entities and the 788 * http://xml.org/sax/features/external-parameter-entities 789 * properties.</p> 790 * 791 * @param name The name of the skipped entity. If it is a 792 * parameter entity, the name will begin with '%'. 793 */ 794 public void skippedEntity(String name) throws org.xml.sax.SAXException{} 795} 796