1/* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package org.apache.harmony.xml; 18 19import org.xml.sax.Attributes; 20import org.xml.sax.ContentHandler; 21import org.xml.sax.DTDHandler; 22import org.xml.sax.EntityResolver; 23import org.xml.sax.InputSource; 24import org.xml.sax.Locator; 25import org.xml.sax.SAXException; 26import org.xml.sax.SAXParseException; 27import org.xml.sax.ext.LexicalHandler; 28 29import java.io.Reader; 30import java.io.IOException; 31import java.io.InputStream; 32import java.net.URI; 33import java.net.URLConnection; 34import java.net.URL; 35import java.util.logging.Logger; 36import java.util.logging.Level; 37 38/** 39 * Adapts SAX API to the Expat native XML parser. Not intended for reuse 40 * across documents. 41 * 42 * @see org.apache.harmony.xml.ExpatPullParser 43 * @see org.apache.harmony.xml.ExpatReader 44 */ 45class ExpatParser { 46 47 private static final int BUFFER_SIZE = 8096; // in bytes 48 49 /** Pointer to XML_Parser instance. */ 50 private int pointer; 51 52 private boolean inStartElement = false; 53 private int attributeCount = -1; 54 private int attributePointer = 0; 55 56 private final Locator locator = new ExpatLocator(); 57 58 private final ExpatReader xmlReader; 59 60 private final String publicId; 61 private final String systemId; 62 63 private final String encoding; 64 65 private final ExpatAttributes attributes = new CurrentAttributes(); 66 67 private static final String OUTSIDE_START_ELEMENT 68 = "Attributes can only be used within the scope of startElement()."; 69 70 /** We default to UTF-8 when the user doesn't specify an encoding. */ 71 private static final String DEFAULT_ENCODING = "UTF-8"; 72 73 /** Encoding used for Java chars, used to parse Readers and Strings */ 74 /*package*/ static final String CHARACTER_ENCODING = "UTF-16"; 75 76 /** Timeout for HTTP connections (in ms) */ 77 private static final int TIMEOUT = 20 * 1000; 78 79 /** 80 * Constructs a new parser with the specified encoding. 81 */ 82 /*package*/ ExpatParser(String encoding, ExpatReader xmlReader, 83 boolean processNamespaces, String publicId, String systemId) { 84 this.publicId = publicId; 85 this.systemId = systemId; 86 87 this.xmlReader = xmlReader; 88 89 /* 90 * TODO: Let Expat try to guess the encoding instead of defaulting. 91 * Unfortunately, I don't know how to tell which encoding Expat picked, 92 * so I won't know how to encode "<externalEntity>" below. The solution 93 * I think is to fix Expat to not require the "<externalEntity>" 94 * workaround. 95 */ 96 this.encoding = encoding == null ? DEFAULT_ENCODING : encoding; 97 this.pointer = initialize( 98 this.encoding, 99 processNamespaces 100 ); 101 } 102 103 /** 104 * Used by {@link EntityParser}. 105 */ 106 private ExpatParser(String encoding, ExpatReader xmlReader, int pointer, 107 String publicId, String systemId) { 108 this.encoding = encoding; 109 this.xmlReader = xmlReader; 110 this.pointer = pointer; 111 this.systemId = systemId; 112 this.publicId = publicId; 113 } 114 115 /** 116 * Initializes native resources. 117 * 118 * @return the pointer to the native parser 119 */ 120 private native int initialize(String encoding, boolean namespacesEnabled); 121 122 /** 123 * Called at the start of an element. 124 * 125 * @param uri namespace URI of element or "" if namespace processing is 126 * disabled 127 * @param localName local name of element or "" if namespace processing is 128 * disabled 129 * @param qName qualified name or "" if namespace processing is enabled 130 * @param attributePointer pointer to native attribute char*--we keep 131 * a separate pointer so we can detach it from the parser instance 132 * @param attributeCount number of attributes 133 */ 134 /*package*/ void startElement(String uri, String localName, String qName, 135 int attributePointer, int attributeCount) throws SAXException { 136 ContentHandler contentHandler = xmlReader.contentHandler; 137 if (contentHandler == null) { 138 return; 139 } 140 141 try { 142 inStartElement = true; 143 this.attributePointer = attributePointer; 144 this.attributeCount = attributeCount; 145 146 contentHandler.startElement( 147 uri, localName, qName, this.attributes); 148 } 149 finally { 150 inStartElement = false; 151 this.attributeCount = -1; 152 this.attributePointer = 0; 153 } 154 } 155 156 /*package*/ void endElement(String uri, String localName, String qName) 157 throws SAXException { 158 ContentHandler contentHandler = xmlReader.contentHandler; 159 if (contentHandler != null) { 160 contentHandler.endElement(uri, localName, qName); 161 } 162 } 163 164 /*package*/ void text(char[] text, int length) throws SAXException { 165 ContentHandler contentHandler = xmlReader.contentHandler; 166 if (contentHandler != null) { 167 contentHandler.characters(text, 0, length); 168 } 169 } 170 171 /*package*/ void comment(char[] text, int length) throws SAXException { 172 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 173 if (lexicalHandler != null) { 174 lexicalHandler.comment(text, 0, length); 175 } 176 } 177 178 /*package*/ void startCdata() throws SAXException { 179 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 180 if (lexicalHandler != null) { 181 lexicalHandler.startCDATA(); 182 } 183 } 184 185 /*package*/ void endCdata() throws SAXException { 186 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 187 if (lexicalHandler != null) { 188 lexicalHandler.endCDATA(); 189 } 190 } 191 192 /*package*/ void startNamespace(String prefix, String uri) 193 throws SAXException { 194 ContentHandler contentHandler = xmlReader.contentHandler; 195 if (contentHandler != null) { 196 contentHandler.startPrefixMapping(prefix, uri); 197 } 198 } 199 200 /*package*/ void endNamespace(String prefix) throws SAXException { 201 ContentHandler contentHandler = xmlReader.contentHandler; 202 if (contentHandler != null) { 203 contentHandler.endPrefixMapping(prefix); 204 } 205 } 206 207 /*package*/ void startDtd(String name, String publicId, String systemId) 208 throws SAXException { 209 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 210 if (lexicalHandler != null) { 211 lexicalHandler.startDTD(name, publicId, systemId); 212 } 213 } 214 215 /*package*/ void endDtd() throws SAXException { 216 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 217 if (lexicalHandler != null) { 218 lexicalHandler.endDTD(); 219 } 220 } 221 222 /*package*/ void processingInstruction(String target, String data) 223 throws SAXException { 224 ContentHandler contentHandler = xmlReader.contentHandler; 225 if (contentHandler != null) { 226 contentHandler.processingInstruction(target, data); 227 } 228 } 229 230 /*package*/ void notationDecl(String name, String publicId, String systemId) throws SAXException { 231 DTDHandler dtdHandler = xmlReader.dtdHandler; 232 if (dtdHandler != null) { 233 dtdHandler.notationDecl(name, publicId, systemId); 234 } 235 } 236 237 /*package*/ void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { 238 DTDHandler dtdHandler = xmlReader.dtdHandler; 239 if (dtdHandler != null) { 240 dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName); 241 } 242 } 243 244 /** 245 * Handles an external entity. 246 * 247 * @param context to be passed back to Expat when we parse the entity 248 * @param publicId the publicId of the entity 249 * @param systemId the systemId of the entity 250 */ 251 /*package*/ void handleExternalEntity(String context, String publicId, 252 String systemId) throws SAXException, IOException { 253 EntityResolver entityResolver = xmlReader.entityResolver; 254 if (entityResolver == null) { 255 return; 256 } 257 258 /* 259 * The spec. is terribly under-specified here. It says that if the 260 * systemId is a URL, we should try to resolve it, but it doesn't 261 * specify how to tell whether or not the systemId is a URL let alone 262 * how to resolve it. 263 * 264 * Other implementations do various insane things. We try to keep it 265 * simple: if the systemId parses as a URI and it's relative, we try to 266 * resolve it against the parent document's systemId. If anything goes 267 * wrong, we go with the original systemId. If crazybob had designed 268 * the API, he would have left all resolving to the EntityResolver. 269 */ 270 if (this.systemId != null) { 271 try { 272 URI systemUri = new URI(systemId); 273 if (!systemUri.isAbsolute() && !systemUri.isOpaque()) { 274 // It could be relative (or it may not be a URI at all!) 275 URI baseUri = new URI(this.systemId); 276 systemUri = baseUri.resolve(systemUri); 277 278 // Replace systemId w/ resolved URI 279 systemId = systemUri.toString(); 280 } 281 } catch (Exception e) { 282 Logger.getLogger(ExpatParser.class.getName()).log(Level.INFO, 283 "Could not resolve '" + systemId + "' relative to" 284 + " '" + this.systemId + "' at " + locator, e); 285 } 286 } 287 288 InputSource inputSource = entityResolver.resolveEntity( 289 publicId, systemId); 290 if (inputSource == null) { 291 /* 292 * The spec. actually says that we should try to treat systemId 293 * as a URL and download and parse its contents here, but an 294 * entity resolver can easily accomplish the same by returning 295 * new InputSource(systemId). 296 * 297 * Downloading external entities by default would result in several 298 * unwanted DTD downloads, not to mention pose a security risk 299 * when parsing untrusted XML (http://tinyurl.com/56ggrk), 300 * so we just do nothing instead. This also enables the user to 301 * opt out of entity parsing when using 302 * {@link org.xml.sax.helpers.DefaultHandler}, something that 303 * wouldn't be possible otherwise. 304 */ 305 return; 306 } 307 308 String encoding = pickEncoding(inputSource); 309 int pointer = createEntityParser(this.pointer, context, encoding); 310 try { 311 EntityParser entityParser = new EntityParser(encoding, xmlReader, 312 pointer, inputSource.getPublicId(), 313 inputSource.getSystemId()); 314 315 parseExternalEntity(entityParser, inputSource); 316 } finally { 317 releaseParser(pointer); 318 } 319 } 320 321 /** 322 * Picks an encoding for an external entity. Defaults to UTF-8. 323 */ 324 private String pickEncoding(InputSource inputSource) { 325 Reader reader = inputSource.getCharacterStream(); 326 if (reader != null) { 327 return CHARACTER_ENCODING; 328 } 329 330 String encoding = inputSource.getEncoding(); 331 return encoding == null ? DEFAULT_ENCODING : encoding; 332 } 333 334 /** 335 * Parses the the external entity provided by the input source. 336 */ 337 private void parseExternalEntity(ExpatParser entityParser, 338 InputSource inputSource) throws IOException, SAXException { 339 /* 340 * Expat complains if the external entity isn't wrapped with a root 341 * element so we add one and ignore it later on during parsing. 342 */ 343 344 // Try the character stream. 345 Reader reader = inputSource.getCharacterStream(); 346 if (reader != null) { 347 try { 348 entityParser.append("<externalEntity>"); 349 entityParser.parseFragment(reader); 350 entityParser.append("</externalEntity>"); 351 } finally { 352 // TODO: Don't eat original exception when close() throws. 353 reader.close(); 354 } 355 return; 356 } 357 358 // Try the byte stream. 359 InputStream in = inputSource.getByteStream(); 360 if (in != null) { 361 try { 362 entityParser.append("<externalEntity>" 363 .getBytes(entityParser.encoding)); 364 entityParser.parseFragment(in); 365 entityParser.append("</externalEntity>" 366 .getBytes(entityParser.encoding)); 367 } finally { 368 // TODO: Don't eat original exception when close() throws. 369 in.close(); 370 } 371 return; 372 } 373 374 // Make sure we use the user-provided systemId. 375 String systemId = inputSource.getSystemId(); 376 if (systemId == null) { 377 // TODO: We could just try our systemId here. 378 throw new ParseException("No input specified.", locator); 379 } 380 381 // Try the system id. 382 in = openUrl(systemId); 383 try { 384 entityParser.append("<externalEntity>" 385 .getBytes(entityParser.encoding)); 386 entityParser.parseFragment(in); 387 entityParser.append("</externalEntity>" 388 .getBytes(entityParser.encoding)); 389 } finally { 390 in.close(); 391 } 392 } 393 394 /** 395 * Creates a native entity parser. 396 * 397 * @param parentPointer pointer to parent Expat parser 398 * @param context passed to {@link #handleExternalEntity} 399 * @param encoding 400 * @return pointer to native parser 401 */ 402 private static native int createEntityParser(int parentPointer, 403 String context, String encoding); 404 405 /** 406 * Appends part of an XML document. This parser will parse the given XML to 407 * the extent possible and dispatch to the appropriate methods. 408 * 409 * @param xml a whole or partial snippet of XML 410 * @throws SAXException if an error occurs during parsing 411 */ 412 /*package*/ void append(String xml) throws SAXException { 413 try { 414 append(this.pointer, xml, false); 415 } catch (ExpatException e) { 416 throw new ParseException(e.getMessage(), this.locator); 417 } 418 } 419 420 private native void append(int pointer, String xml, boolean isFinal) 421 throws SAXException, ExpatException; 422 423 /** 424 * Appends part of an XML document. This parser will parse the given XML to 425 * the extent possible and dispatch to the appropriate methods. 426 * 427 * @param xml a whole or partial snippet of XML 428 * @param offset into the char[] 429 * @param length of characters to use 430 * @throws SAXException if an error occurs during parsing 431 */ 432 /*package*/ void append(char[] xml, int offset, int length) 433 throws SAXException { 434 try { 435 append(this.pointer, xml, offset, length); 436 } catch (ExpatException e) { 437 throw new ParseException(e.getMessage(), this.locator); 438 } 439 } 440 441 private native void append(int pointer, char[] xml, int offset, 442 int length) throws SAXException, ExpatException; 443 444 /** 445 * Appends part of an XML document. This parser will parse the given XML to 446 * the extent possible and dispatch to the appropriate methods. 447 * 448 * @param xml a whole or partial snippet of XML 449 * @throws SAXException if an error occurs during parsing 450 */ 451 /*package*/ void append(byte[] xml) throws SAXException { 452 append(xml, 0, xml.length); 453 } 454 455 /** 456 * Appends part of an XML document. This parser will parse the given XML to 457 * the extent possible and dispatch to the appropriate methods. 458 * 459 * @param xml a whole or partial snippet of XML 460 * @param offset into the byte[] 461 * @param length of bytes to use 462 * @throws SAXException if an error occurs during parsing 463 */ 464 /*package*/ void append(byte[] xml, int offset, int length) 465 throws SAXException { 466 try { 467 append(this.pointer, xml, offset, length); 468 } catch (ExpatException e) { 469 throw new ParseException(e.getMessage(), this.locator); 470 } 471 } 472 473 private native void append(int pointer, byte[] xml, int offset, 474 int length) throws SAXException, ExpatException; 475 476 /** 477 * Parses an XML document from the given input stream. 478 */ 479 /*package*/ void parseDocument(InputStream in) throws IOException, 480 SAXException { 481 startDocument(); 482 parseFragment(in); 483 finish(); 484 endDocument(); 485 } 486 487 /** 488 * Parses an XML Document from the given reader. 489 */ 490 /*package*/ void parseDocument(Reader in) throws IOException, SAXException { 491 startDocument(); 492 parseFragment(in); 493 finish(); 494 endDocument(); 495 } 496 497 /** 498 * Parses XML from the given Reader. 499 */ 500 private void parseFragment(Reader in) throws IOException, SAXException { 501 char[] buffer = new char[BUFFER_SIZE / 2]; 502 int length; 503 while ((length = in.read(buffer)) != -1) { 504 try { 505 append(this.pointer, buffer, 0, length); 506 } catch (ExpatException e) { 507 throw new ParseException(e.getMessage(), locator); 508 } 509 } 510 } 511 512 /** 513 * Parses XML from the given input stream. 514 */ 515 private void parseFragment(InputStream in) 516 throws IOException, SAXException { 517 byte[] buffer = new byte[BUFFER_SIZE]; 518 int length; 519 while ((length = in.read(buffer)) != -1) { 520 try { 521 append(this.pointer, buffer, 0, length); 522 } catch (ExpatException e) { 523 throw new ParseException(e.getMessage(), this.locator); 524 } 525 } 526 } 527 528 private void startDocument() throws SAXException { 529 ContentHandler contentHandler = xmlReader.contentHandler; 530 if (contentHandler != null) { 531 contentHandler.setDocumentLocator(this.locator); 532 contentHandler.startDocument(); 533 } 534 } 535 536 private void endDocument() throws SAXException { 537 ContentHandler contentHandler; 538 contentHandler = xmlReader.contentHandler; 539 if (contentHandler != null) { 540 contentHandler.endDocument(); 541 } 542 } 543 544 /** 545 * Indicate that we're finished parsing. 546 * 547 * @throws SAXException if the xml is incomplete 548 */ 549 /*package*/ void finish() throws SAXException { 550 try { 551 append(this.pointer, "", true); 552 } catch (ExpatException e) { 553 throw new ParseException(e.getMessage(), this.locator); 554 } 555 } 556 557 @Override 558 @SuppressWarnings("FinalizeDoesntCallSuperFinalize") 559 protected synchronized void finalize() throws Throwable { 560 if (this.pointer != 0) { 561 release(this.pointer); 562 this.pointer = 0; 563 } 564 } 565 566 /** 567 * Releases all native objects. 568 */ 569 private native void release(int pointer); 570 571 /** 572 * Releases native parser only. 573 */ 574 private static native void releaseParser(int pointer); 575 576 /** 577 * Initialize static resources. 578 */ 579 private static native void staticInitialize(String emptyString); 580 581 static { 582 staticInitialize(""); 583 } 584 585 /** 586 * Gets the current line number within the XML file. 587 */ 588 private int line() { 589 return line(this.pointer); 590 } 591 592 private static native int line(int pointer); 593 594 /** 595 * Gets the current column number within the XML file. 596 */ 597 private int column() { 598 return column(this.pointer); 599 } 600 601 private static native int column(int pointer); 602 603 /** 604 * Clones the current attributes so they can be used outside of 605 * startElement(). 606 */ 607 /*package*/ Attributes cloneAttributes() { 608 if (!inStartElement) { 609 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 610 } 611 612 if (attributeCount == 0) { 613 return ClonedAttributes.EMPTY; 614 } 615 616 int clonePointer 617 = cloneAttributes(this.attributePointer, this.attributeCount); 618 return new ClonedAttributes(pointer, clonePointer, attributeCount); 619 } 620 621 private static native int cloneAttributes(int pointer, int attributeCount); 622 623 /** 624 * Used for cloned attributes. 625 */ 626 private static class ClonedAttributes extends ExpatAttributes { 627 628 private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0); 629 630 private final int parserPointer; 631 private int pointer; 632 private final int length; 633 634 /** 635 * Constructs a Java wrapper for native attributes. 636 * 637 * @param parserPointer pointer to the parse, can be 0 if length is 0. 638 * @param pointer pointer to the attributes array, can be 0 if the 639 * length is 0. 640 * @param length number of attributes 641 */ 642 private ClonedAttributes(int parserPointer, int pointer, int length) { 643 this.parserPointer = parserPointer; 644 this.pointer = pointer; 645 this.length = length; 646 } 647 648 @Override 649 public int getParserPointer() { 650 return this.parserPointer; 651 } 652 653 @Override 654 public int getPointer() { 655 return pointer; 656 } 657 658 @Override 659 public int getLength() { 660 return length; 661 } 662 663 @Override 664 @SuppressWarnings("FinalizeDoesntCallSuperFinalize") 665 protected synchronized void finalize() throws Throwable { 666 if (pointer != 0) { 667 freeAttributes(pointer); 668 pointer = 0; 669 } 670 } 671 } 672 673 private class ExpatLocator implements Locator { 674 675 public String getPublicId() { 676 return publicId; 677 } 678 679 public String getSystemId() { 680 return systemId; 681 } 682 683 public int getLineNumber() { 684 return line(); 685 } 686 687 public int getColumnNumber() { 688 return column(); 689 } 690 691 @Override 692 public String toString() { 693 return "Locator[publicId: " + publicId + ", systemId: " + systemId 694 + ", line: " + getLineNumber() 695 + ", column: " + getColumnNumber() + "]"; 696 } 697 } 698 699 /** 700 * Attributes that are only valid during startElement(). 701 */ 702 private class CurrentAttributes extends ExpatAttributes { 703 704 @Override 705 public int getParserPointer() { 706 return pointer; 707 } 708 709 @Override 710 public int getPointer() { 711 if (!inStartElement) { 712 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 713 } 714 return attributePointer; 715 } 716 717 @Override 718 public int getLength() { 719 if (!inStartElement) { 720 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 721 } 722 return attributeCount; 723 } 724 } 725 726 /** 727 * Includes line and column in the message. 728 */ 729 private static class ParseException extends SAXParseException { 730 731 private ParseException(String message, Locator locator) { 732 super(makeMessage(message, locator), locator); 733 } 734 735 private static String makeMessage(String message, Locator locator) { 736 return makeMessage(message, locator.getLineNumber(), 737 locator.getColumnNumber()); 738 } 739 740 private static String makeMessage( 741 String message, int line, int column) { 742 return "At line " + line + ", column " 743 + column + ": " + message; 744 } 745 } 746 747 /** 748 * Opens an InputStream for the given URL. 749 */ 750 /*package*/ static InputStream openUrl(String url) throws IOException { 751 try { 752 URLConnection urlConnection = new URL(url).openConnection(); 753 urlConnection.setConnectTimeout(TIMEOUT); 754 urlConnection.setReadTimeout(TIMEOUT); 755 urlConnection.setDoInput(true); 756 urlConnection.setDoOutput(false); 757 return urlConnection.getInputStream(); 758 } catch (Exception e) { 759 IOException ioe = new IOException("Couldn't open " + url); 760 ioe.initCause(e); 761 throw ioe; 762 } 763 } 764 765 /** 766 * Parses an external entity. 767 */ 768 private static class EntityParser extends ExpatParser { 769 770 private int depth = 0; 771 772 private EntityParser(String encoding, ExpatReader xmlReader, 773 int pointer, String publicId, String systemId) { 774 super(encoding, xmlReader, pointer, publicId, systemId); 775 } 776 777 @Override 778 void startElement(String uri, String localName, String qName, 779 int attributePointer, int attributeCount) throws SAXException { 780 /* 781 * Skip topmost element generated by our workaround in 782 * {@link #handleExternalEntity}. 783 */ 784 if (depth++ > 0) { 785 super.startElement(uri, localName, qName, attributePointer, 786 attributeCount); 787 } 788 } 789 790 @Override 791 void endElement(String uri, String localName, String qName) 792 throws SAXException { 793 if (--depth > 0) { 794 super.endElement(uri, localName, qName); 795 } 796 } 797 798 @Override 799 @SuppressWarnings("FinalizeDoesntCallSuperFinalize") 800 protected synchronized void finalize() throws Throwable { 801 /* 802 * Don't release our native resources. We do so explicitly in 803 * {@link #handleExternalEntity} and we don't want to release the 804 * parsing context--our parent is using it. 805 */ 806 } 807 } 808} 809