1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml;
18
19import org.xml.sax.Attributes;
20import org.xml.sax.ContentHandler;
21import org.xml.sax.DTDHandler;
22import org.xml.sax.EntityResolver;
23import org.xml.sax.InputSource;
24import org.xml.sax.Locator;
25import org.xml.sax.SAXException;
26import org.xml.sax.SAXParseException;
27import org.xml.sax.ext.LexicalHandler;
28
29import java.io.Reader;
30import java.io.IOException;
31import java.io.InputStream;
32import java.net.URI;
33import java.net.URLConnection;
34import java.net.URL;
35import java.util.logging.Logger;
36import java.util.logging.Level;
37
38/**
39 * Adapts SAX API to the Expat native XML parser. Not intended for reuse
40 * across documents.
41 *
42 * @see org.apache.harmony.xml.ExpatPullParser
43 * @see org.apache.harmony.xml.ExpatReader
44 */
45class ExpatParser {
46
47    private static final int BUFFER_SIZE = 8096; // in bytes
48
49    /** Pointer to XML_Parser instance. */
50    private int pointer;
51
52    private boolean inStartElement = false;
53    private int attributeCount = -1;
54    private int attributePointer = 0;
55
56    private final Locator locator = new ExpatLocator();
57
58    private final ExpatReader xmlReader;
59
60    private final String publicId;
61    private final String systemId;
62
63    private final String encoding;
64
65    private final ExpatAttributes attributes = new CurrentAttributes();
66
67    private static final String OUTSIDE_START_ELEMENT
68            = "Attributes can only be used within the scope of startElement().";
69
70    /** We default to UTF-8 when the user doesn't specify an encoding. */
71    private static final String DEFAULT_ENCODING = "UTF-8";
72
73    /** Encoding used for Java chars, used to parse Readers and Strings */
74    /*package*/ static final String CHARACTER_ENCODING = "UTF-16";
75
76    /** Timeout for HTTP connections (in ms) */
77    private static final int TIMEOUT = 20 * 1000;
78
79    /**
80     * Constructs a new parser with the specified encoding.
81     */
82    /*package*/ ExpatParser(String encoding, ExpatReader xmlReader,
83            boolean processNamespaces, String publicId, String systemId) {
84        this.publicId = publicId;
85        this.systemId = systemId;
86
87        this.xmlReader = xmlReader;
88
89        /*
90         * TODO: Let Expat try to guess the encoding instead of defaulting.
91         * Unfortunately, I don't know how to tell which encoding Expat picked,
92         * so I won't know how to encode "<externalEntity>" below. The solution
93         * I think is to fix Expat to not require the "<externalEntity>"
94         * workaround.
95         */
96        this.encoding = encoding == null ? DEFAULT_ENCODING : encoding;
97        this.pointer = initialize(
98            this.encoding,
99            processNamespaces
100        );
101    }
102
103    /**
104     * Used by {@link EntityParser}.
105     */
106    private ExpatParser(String encoding, ExpatReader xmlReader, int pointer,
107            String publicId, String systemId) {
108        this.encoding = encoding;
109        this.xmlReader = xmlReader;
110        this.pointer = pointer;
111        this.systemId = systemId;
112        this.publicId = publicId;
113    }
114
115    /**
116     * Initializes native resources.
117     *
118     * @return the pointer to the native parser
119     */
120    private native int initialize(String encoding, boolean namespacesEnabled);
121
122    /**
123     * Called at the start of an element.
124     *
125     * @param uri namespace URI of element or "" if namespace processing is
126     *  disabled
127     * @param localName local name of element or "" if namespace processing is
128     *  disabled
129     * @param qName qualified name or "" if namespace processing is enabled
130     * @param attributePointer pointer to native attribute char*--we keep
131     *  a separate pointer so we can detach it from the parser instance
132     * @param attributeCount number of attributes
133     */
134    /*package*/ void startElement(String uri, String localName, String qName,
135            int attributePointer, int attributeCount) throws SAXException {
136        ContentHandler contentHandler = xmlReader.contentHandler;
137        if (contentHandler == null) {
138            return;
139        }
140
141        try {
142            inStartElement = true;
143            this.attributePointer = attributePointer;
144            this.attributeCount = attributeCount;
145
146            contentHandler.startElement(
147                    uri, localName, qName, this.attributes);
148        }
149        finally {
150            inStartElement = false;
151            this.attributeCount = -1;
152            this.attributePointer = 0;
153        }
154    }
155
156    /*package*/ void endElement(String uri, String localName, String qName)
157            throws SAXException {
158        ContentHandler contentHandler = xmlReader.contentHandler;
159        if (contentHandler != null) {
160            contentHandler.endElement(uri, localName, qName);
161        }
162    }
163
164    /*package*/ void text(char[] text, int length) throws SAXException {
165        ContentHandler contentHandler = xmlReader.contentHandler;
166        if (contentHandler != null) {
167            contentHandler.characters(text, 0, length);
168        }
169    }
170
171    /*package*/ void comment(char[] text, int length) throws SAXException {
172        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
173        if (lexicalHandler != null) {
174            lexicalHandler.comment(text, 0, length);
175        }
176    }
177
178    /*package*/ void startCdata() throws SAXException {
179        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
180        if (lexicalHandler != null) {
181            lexicalHandler.startCDATA();
182        }
183    }
184
185    /*package*/ void endCdata() throws SAXException {
186        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
187        if (lexicalHandler != null) {
188            lexicalHandler.endCDATA();
189        }
190    }
191
192    /*package*/ void startNamespace(String prefix, String uri)
193            throws SAXException {
194        ContentHandler contentHandler = xmlReader.contentHandler;
195        if (contentHandler != null) {
196            contentHandler.startPrefixMapping(prefix, uri);
197        }
198    }
199
200    /*package*/ void endNamespace(String prefix) throws SAXException {
201        ContentHandler contentHandler = xmlReader.contentHandler;
202        if (contentHandler != null) {
203            contentHandler.endPrefixMapping(prefix);
204        }
205    }
206
207    /*package*/ void startDtd(String name, String publicId, String systemId)
208            throws SAXException {
209        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
210        if (lexicalHandler != null) {
211            lexicalHandler.startDTD(name, publicId, systemId);
212        }
213    }
214
215    /*package*/ void endDtd() throws SAXException {
216        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
217        if (lexicalHandler != null) {
218            lexicalHandler.endDTD();
219        }
220    }
221
222    /*package*/ void processingInstruction(String target, String data)
223            throws SAXException {
224        ContentHandler contentHandler = xmlReader.contentHandler;
225        if (contentHandler != null) {
226            contentHandler.processingInstruction(target, data);
227        }
228    }
229
230    /*package*/ void notationDecl(String name, String publicId, String systemId) throws SAXException {
231        DTDHandler dtdHandler = xmlReader.dtdHandler;
232        if (dtdHandler != null) {
233            dtdHandler.notationDecl(name, publicId, systemId);
234        }
235    }
236
237    /*package*/ void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException {
238        DTDHandler dtdHandler = xmlReader.dtdHandler;
239        if (dtdHandler != null) {
240            dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName);
241        }
242    }
243
244    /**
245     * Handles an external entity.
246     *
247     * @param context to be passed back to Expat when we parse the entity
248     * @param publicId the publicId of the entity
249     * @param systemId the systemId of the entity
250     */
251    /*package*/ void handleExternalEntity(String context, String publicId,
252            String systemId) throws SAXException, IOException {
253        EntityResolver entityResolver = xmlReader.entityResolver;
254        if (entityResolver == null) {
255            return;
256        }
257
258        /*
259         * The spec. is terribly under-specified here. It says that if the
260         * systemId is a URL, we should try to resolve it, but it doesn't
261         * specify how to tell whether or not the systemId is a URL let alone
262         * how to resolve it.
263         *
264         * Other implementations do various insane things. We try to keep it
265         * simple: if the systemId parses as a URI and it's relative, we try to
266         * resolve it against the parent document's systemId. If anything goes
267         * wrong, we go with the original systemId. If crazybob had designed
268         * the API, he would have left all resolving to the EntityResolver.
269         */
270        if (this.systemId != null) {
271            try {
272                URI systemUri = new URI(systemId);
273                if (!systemUri.isAbsolute() && !systemUri.isOpaque()) {
274                    // It could be relative (or it may not be a URI at all!)
275                    URI baseUri = new URI(this.systemId);
276                    systemUri = baseUri.resolve(systemUri);
277
278                    // Replace systemId w/ resolved URI
279                    systemId = systemUri.toString();
280                }
281            } catch (Exception e) {
282                Logger.getLogger(ExpatParser.class.getName()).log(Level.INFO,
283                        "Could not resolve '" + systemId + "' relative to"
284                        + " '" + this.systemId + "' at " + locator, e);
285            }
286        }
287
288        InputSource inputSource = entityResolver.resolveEntity(
289                publicId, systemId);
290        if (inputSource == null) {
291            /*
292             * The spec. actually says that we should try to treat systemId
293             * as a URL and download and parse its contents here, but an
294             * entity resolver can easily accomplish the same by returning
295             * new InputSource(systemId).
296             *
297             * Downloading external entities by default would result in several
298             * unwanted DTD downloads, not to mention pose a security risk
299             * when parsing untrusted XML (http://tinyurl.com/56ggrk),
300             * so we just do nothing instead. This also enables the user to
301             * opt out of entity parsing when using
302             * {@link org.xml.sax.helpers.DefaultHandler}, something that
303             * wouldn't be possible otherwise.
304             */
305            return;
306        }
307
308        String encoding = pickEncoding(inputSource);
309        int pointer = createEntityParser(this.pointer, context, encoding);
310        try {
311            EntityParser entityParser = new EntityParser(encoding, xmlReader,
312                    pointer, inputSource.getPublicId(),
313                    inputSource.getSystemId());
314
315            parseExternalEntity(entityParser, inputSource);
316        } finally {
317            releaseParser(pointer);
318        }
319    }
320
321    /**
322     * Picks an encoding for an external entity. Defaults to UTF-8.
323     */
324    private String pickEncoding(InputSource inputSource) {
325        Reader reader = inputSource.getCharacterStream();
326        if (reader != null) {
327            return CHARACTER_ENCODING;
328        }
329
330        String encoding = inputSource.getEncoding();
331        return encoding == null ? DEFAULT_ENCODING : encoding;
332    }
333
334    /**
335     * Parses the the external entity provided by the input source.
336     */
337    private void parseExternalEntity(ExpatParser entityParser,
338            InputSource inputSource) throws IOException, SAXException {
339        /*
340         * Expat complains if the external entity isn't wrapped with a root
341         * element so we add one and ignore it later on during parsing.
342         */
343
344        // Try the character stream.
345        Reader reader = inputSource.getCharacterStream();
346        if (reader != null) {
347            try {
348                entityParser.append("<externalEntity>");
349                entityParser.parseFragment(reader);
350                entityParser.append("</externalEntity>");
351            } finally {
352                // TODO: Don't eat original exception when close() throws.
353                reader.close();
354            }
355            return;
356        }
357
358        // Try the byte stream.
359        InputStream in = inputSource.getByteStream();
360        if (in != null) {
361            try {
362                entityParser.append("<externalEntity>"
363                        .getBytes(entityParser.encoding));
364                entityParser.parseFragment(in);
365                entityParser.append("</externalEntity>"
366                        .getBytes(entityParser.encoding));
367            } finally {
368                // TODO: Don't eat original exception when close() throws.
369                in.close();
370            }
371            return;
372        }
373
374        // Make sure we use the user-provided systemId.
375        String systemId = inputSource.getSystemId();
376        if (systemId == null) {
377            // TODO: We could just try our systemId here.
378            throw new ParseException("No input specified.", locator);
379        }
380
381        // Try the system id.
382        in = openUrl(systemId);
383        try {
384            entityParser.append("<externalEntity>"
385                    .getBytes(entityParser.encoding));
386            entityParser.parseFragment(in);
387            entityParser.append("</externalEntity>"
388                    .getBytes(entityParser.encoding));
389        } finally {
390            in.close();
391        }
392    }
393
394    /**
395     * Creates a native entity parser.
396     *
397     * @param parentPointer pointer to parent Expat parser
398     * @param context passed to {@link #handleExternalEntity}
399     * @param encoding
400     * @return pointer to native parser
401     */
402    private static native int createEntityParser(int parentPointer,
403            String context, String encoding);
404
405    /**
406     * Appends part of an XML document. This parser will parse the given XML to
407     * the extent possible and dispatch to the appropriate methods.
408     *
409     * @param xml a whole or partial snippet of XML
410     * @throws SAXException if an error occurs during parsing
411     */
412    /*package*/ void append(String xml) throws SAXException {
413        try {
414            append(this.pointer, xml, false);
415        } catch (ExpatException e) {
416            throw new ParseException(e.getMessage(), this.locator);
417        }
418    }
419
420    private native void append(int pointer, String xml, boolean isFinal)
421            throws SAXException, ExpatException;
422
423    /**
424     * Appends part of an XML document. This parser will parse the given XML to
425     * the extent possible and dispatch to the appropriate methods.
426     *
427     * @param xml a whole or partial snippet of XML
428     * @param offset into the char[]
429     * @param length of characters to use
430     * @throws SAXException if an error occurs during parsing
431     */
432    /*package*/ void append(char[] xml, int offset, int length)
433            throws SAXException {
434        try {
435            append(this.pointer, xml, offset, length);
436        } catch (ExpatException e) {
437            throw new ParseException(e.getMessage(), this.locator);
438        }
439    }
440
441    private native void append(int pointer, char[] xml, int offset,
442            int length) throws SAXException, ExpatException;
443
444    /**
445     * Appends part of an XML document. This parser will parse the given XML to
446     * the extent possible and dispatch to the appropriate methods.
447     *
448     * @param xml a whole or partial snippet of XML
449     * @throws SAXException if an error occurs during parsing
450     */
451    /*package*/ void append(byte[] xml) throws SAXException {
452        append(xml, 0, xml.length);
453    }
454
455    /**
456     * Appends part of an XML document. This parser will parse the given XML to
457     * the extent possible and dispatch to the appropriate methods.
458     *
459     * @param xml a whole or partial snippet of XML
460     * @param offset into the byte[]
461     * @param length of bytes to use
462     * @throws SAXException if an error occurs during parsing
463     */
464    /*package*/ void append(byte[] xml, int offset, int length)
465            throws SAXException {
466        try {
467            append(this.pointer, xml, offset, length);
468        } catch (ExpatException e) {
469            throw new ParseException(e.getMessage(), this.locator);
470        }
471    }
472
473    private native void append(int pointer, byte[] xml, int offset,
474            int length) throws SAXException, ExpatException;
475
476    /**
477     * Parses an XML document from the given input stream.
478     */
479    /*package*/ void parseDocument(InputStream in) throws IOException,
480            SAXException {
481        startDocument();
482        parseFragment(in);
483        finish();
484        endDocument();
485    }
486
487    /**
488     * Parses an XML Document from the given reader.
489     */
490    /*package*/ void parseDocument(Reader in) throws IOException, SAXException {
491        startDocument();
492        parseFragment(in);
493        finish();
494        endDocument();
495    }
496
497    /**
498     * Parses XML from the given Reader.
499     */
500    private void parseFragment(Reader in) throws IOException, SAXException {
501        char[] buffer = new char[BUFFER_SIZE / 2];
502        int length;
503        while ((length = in.read(buffer)) != -1) {
504            try {
505                append(this.pointer, buffer, 0, length);
506            } catch (ExpatException e) {
507                throw new ParseException(e.getMessage(), locator);
508            }
509        }
510    }
511
512    /**
513     * Parses XML from the given input stream.
514     */
515    private void parseFragment(InputStream in)
516            throws IOException, SAXException {
517        byte[] buffer = new byte[BUFFER_SIZE];
518        int length;
519        while ((length = in.read(buffer)) != -1) {
520            try {
521                append(this.pointer, buffer, 0, length);
522            } catch (ExpatException e) {
523                throw new ParseException(e.getMessage(), this.locator);
524            }
525        }
526    }
527
528    private void startDocument() throws SAXException {
529        ContentHandler contentHandler = xmlReader.contentHandler;
530        if (contentHandler != null) {
531            contentHandler.setDocumentLocator(this.locator);
532            contentHandler.startDocument();
533        }
534    }
535
536    private void endDocument() throws SAXException {
537        ContentHandler contentHandler;
538        contentHandler = xmlReader.contentHandler;
539        if (contentHandler != null) {
540            contentHandler.endDocument();
541        }
542    }
543
544    /**
545     * Indicate that we're finished parsing.
546     *
547     * @throws SAXException if the xml is incomplete
548     */
549    /*package*/ void finish() throws SAXException {
550        try {
551            append(this.pointer, "", true);
552        } catch (ExpatException e) {
553            throw new ParseException(e.getMessage(), this.locator);
554        }
555    }
556
557    @Override
558    @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
559    protected synchronized void finalize() throws Throwable {
560        if (this.pointer != 0) {
561            release(this.pointer);
562            this.pointer = 0;
563        }
564    }
565
566    /**
567     * Releases all native objects.
568     */
569    private native void release(int pointer);
570
571    /**
572     * Releases native parser only.
573     */
574    private static native void releaseParser(int pointer);
575
576    /**
577     * Initialize static resources.
578     */
579    private static native void staticInitialize(String emptyString);
580
581    static {
582        staticInitialize("");
583    }
584
585    /**
586     * Gets the current line number within the XML file.
587     */
588    private int line() {
589        return line(this.pointer);
590    }
591
592    private static native int line(int pointer);
593
594    /**
595     * Gets the current column number within the XML file.
596     */
597    private int column() {
598        return column(this.pointer);
599    }
600
601    private static native int column(int pointer);
602
603    /**
604     * Clones the current attributes so they can be used outside of
605     * startElement().
606     */
607    /*package*/ Attributes cloneAttributes() {
608        if (!inStartElement) {
609            throw new IllegalStateException(OUTSIDE_START_ELEMENT);
610        }
611
612        if (attributeCount == 0) {
613            return ClonedAttributes.EMPTY;
614        }
615
616        int clonePointer
617                = cloneAttributes(this.attributePointer, this.attributeCount);
618        return new ClonedAttributes(pointer, clonePointer, attributeCount);
619    }
620
621    private static native int cloneAttributes(int pointer, int attributeCount);
622
623    /**
624     * Used for cloned attributes.
625     */
626    private static class ClonedAttributes extends ExpatAttributes {
627
628        private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0);
629
630        private final int parserPointer;
631        private int pointer;
632        private final int length;
633
634        /**
635         * Constructs a Java wrapper for native attributes.
636         *
637         * @param parserPointer pointer to the parse, can be 0 if length is 0.
638         * @param pointer pointer to the attributes array, can be 0 if the
639         *  length is 0.
640         * @param length number of attributes
641         */
642        private ClonedAttributes(int parserPointer, int pointer, int length) {
643            this.parserPointer = parserPointer;
644            this.pointer = pointer;
645            this.length = length;
646        }
647
648        @Override
649        public int getParserPointer() {
650            return this.parserPointer;
651        }
652
653        @Override
654        public int getPointer() {
655            return pointer;
656        }
657
658        @Override
659        public int getLength() {
660            return length;
661        }
662
663        @Override
664        @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
665        protected synchronized void finalize() throws Throwable {
666            if (pointer != 0) {
667                freeAttributes(pointer);
668                pointer = 0;
669            }
670        }
671    }
672
673    private class ExpatLocator implements Locator {
674
675        public String getPublicId() {
676            return publicId;
677        }
678
679        public String getSystemId() {
680            return systemId;
681        }
682
683        public int getLineNumber() {
684            return line();
685        }
686
687        public int getColumnNumber() {
688            return column();
689        }
690
691        @Override
692        public String toString() {
693            return "Locator[publicId: " + publicId + ", systemId: " + systemId
694                + ", line: " + getLineNumber()
695                + ", column: " + getColumnNumber() + "]";
696        }
697    }
698
699    /**
700     * Attributes that are only valid during startElement().
701     */
702    private class CurrentAttributes extends ExpatAttributes {
703
704        @Override
705        public int getParserPointer() {
706            return pointer;
707        }
708
709        @Override
710        public int getPointer() {
711            if (!inStartElement) {
712                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
713            }
714            return attributePointer;
715        }
716
717        @Override
718        public int getLength() {
719            if (!inStartElement) {
720                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
721            }
722            return attributeCount;
723        }
724    }
725
726    /**
727     * Includes line and column in the message.
728     */
729    private static class ParseException extends SAXParseException {
730
731        private ParseException(String message, Locator locator) {
732            super(makeMessage(message, locator), locator);
733        }
734
735        private static String makeMessage(String message, Locator locator) {
736            return makeMessage(message, locator.getLineNumber(),
737                    locator.getColumnNumber());
738        }
739
740        private static String makeMessage(
741                String message, int line, int column) {
742            return "At line " + line + ", column "
743                    + column + ": " + message;
744        }
745    }
746
747    /**
748     * Opens an InputStream for the given URL.
749     */
750    /*package*/ static InputStream openUrl(String url) throws IOException {
751        try {
752            URLConnection urlConnection = new URL(url).openConnection();
753            urlConnection.setConnectTimeout(TIMEOUT);
754            urlConnection.setReadTimeout(TIMEOUT);
755            urlConnection.setDoInput(true);
756            urlConnection.setDoOutput(false);
757            return urlConnection.getInputStream();
758        } catch (Exception e) {
759            IOException ioe = new IOException("Couldn't open " + url);
760            ioe.initCause(e);
761            throw ioe;
762        }
763    }
764
765    /**
766     * Parses an external entity.
767     */
768    private static class EntityParser extends ExpatParser {
769
770        private int depth = 0;
771
772        private EntityParser(String encoding, ExpatReader xmlReader,
773                int pointer, String publicId, String systemId) {
774            super(encoding, xmlReader, pointer, publicId, systemId);
775        }
776
777        @Override
778        void startElement(String uri, String localName, String qName,
779                int attributePointer, int attributeCount) throws SAXException {
780            /*
781             * Skip topmost element generated by our workaround in
782             * {@link #handleExternalEntity}.
783             */
784            if (depth++ > 0) {
785                super.startElement(uri, localName, qName, attributePointer,
786                        attributeCount);
787            }
788        }
789
790        @Override
791        void endElement(String uri, String localName, String qName)
792                throws SAXException {
793            if (--depth > 0) {
794                super.endElement(uri, localName, qName);
795            }
796        }
797
798        @Override
799        @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
800        protected synchronized void finalize() throws Throwable {
801            /*
802             * Don't release our native resources. We do so explicitly in
803             * {@link #handleExternalEntity} and we don't want to release the
804             * parsing context--our parent is using it.
805             */
806        }
807    }
808}
809