1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml;
18
19import java.io.IOException;
20import java.io.InputStream;
21import java.io.Reader;
22import java.net.URI;
23import java.net.URL;
24import java.net.URLConnection;
25import libcore.io.IoUtils;
26import org.xml.sax.Attributes;
27import org.xml.sax.ContentHandler;
28import org.xml.sax.DTDHandler;
29import org.xml.sax.EntityResolver;
30import org.xml.sax.InputSource;
31import org.xml.sax.Locator;
32import org.xml.sax.SAXException;
33import org.xml.sax.SAXParseException;
34import org.xml.sax.ext.LexicalHandler;
35
36/**
37 * Adapts SAX API to the Expat native XML parser. Not intended for reuse
38 * across documents.
39 *
40 * @see org.apache.harmony.xml.ExpatReader
41 */
42class ExpatParser {
43
44    private static final int BUFFER_SIZE = 8096; // in bytes
45
46    /** Pointer to XML_Parser instance. */
47    private long pointer;
48
49    private boolean inStartElement = false;
50    private int attributeCount = -1;
51    private long attributePointer = 0;
52
53    private final Locator locator = new ExpatLocator();
54
55    private final ExpatReader xmlReader;
56
57    private final String publicId;
58    private final String systemId;
59
60    private final String encoding;
61
62    private final ExpatAttributes attributes = new CurrentAttributes();
63
64    private static final String OUTSIDE_START_ELEMENT
65            = "Attributes can only be used within the scope of startElement().";
66
67    /** We default to UTF-8 when the user doesn't specify an encoding. */
68    private static final String DEFAULT_ENCODING = "UTF-8";
69
70    /** Encoding used for Java chars, used to parse Readers and Strings */
71    /*package*/ static final String CHARACTER_ENCODING = "UTF-16";
72
73    /** Timeout for HTTP connections (in ms) */
74    private static final int TIMEOUT = 20 * 1000;
75
76    /**
77     * Constructs a new parser with the specified encoding.
78     */
79    /*package*/ ExpatParser(String encoding, ExpatReader xmlReader,
80            boolean processNamespaces, String publicId, String systemId) {
81        this.publicId = publicId;
82        this.systemId = systemId;
83
84        this.xmlReader = xmlReader;
85
86        /*
87         * TODO: Let Expat try to guess the encoding instead of defaulting.
88         * Unfortunately, I don't know how to tell which encoding Expat picked,
89         * so I won't know how to encode "<externalEntity>" below. The solution
90         * I think is to fix Expat to not require the "<externalEntity>"
91         * workaround.
92         */
93        this.encoding = encoding == null ? DEFAULT_ENCODING : encoding;
94        this.pointer = initialize(
95            this.encoding,
96            processNamespaces
97        );
98    }
99
100    /**
101     * Used by {@link EntityParser}.
102     */
103    private ExpatParser(String encoding, ExpatReader xmlReader, long pointer,
104            String publicId, String systemId) {
105        this.encoding = encoding;
106        this.xmlReader = xmlReader;
107        this.pointer = pointer;
108        this.systemId = systemId;
109        this.publicId = publicId;
110    }
111
112    /**
113     * Initializes native resources.
114     *
115     * @return the pointer to the native parser
116     */
117    private native long initialize(String encoding, boolean namespacesEnabled);
118
119    /**
120     * Called at the start of an element.
121     *
122     * @param uri namespace URI of element or "" if namespace processing is
123     *  disabled
124     * @param localName local name of element or "" if namespace processing is
125     *  disabled
126     * @param qName qualified name or "" if namespace processing is enabled
127     * @param attributePointer pointer to native attribute char*--we keep
128     *  a separate pointer so we can detach it from the parser instance
129     * @param attributeCount number of attributes
130     */
131    /*package*/ void startElement(String uri, String localName, String qName,
132            long attributePointer, int attributeCount) throws SAXException {
133        ContentHandler contentHandler = xmlReader.contentHandler;
134        if (contentHandler == null) {
135            return;
136        }
137
138        try {
139            inStartElement = true;
140            this.attributePointer = attributePointer;
141            this.attributeCount = attributeCount;
142
143            contentHandler.startElement(
144                    uri, localName, qName, this.attributes);
145        } finally {
146            inStartElement = false;
147            this.attributeCount = -1;
148            this.attributePointer = 0;
149        }
150    }
151
152    /*package*/ void endElement(String uri, String localName, String qName)
153            throws SAXException {
154        ContentHandler contentHandler = xmlReader.contentHandler;
155        if (contentHandler != null) {
156            contentHandler.endElement(uri, localName, qName);
157        }
158    }
159
160    /*package*/ void text(char[] text, int length) throws SAXException {
161        ContentHandler contentHandler = xmlReader.contentHandler;
162        if (contentHandler != null) {
163            contentHandler.characters(text, 0, length);
164        }
165    }
166
167    /*package*/ void comment(char[] text, int length) throws SAXException {
168        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
169        if (lexicalHandler != null) {
170            lexicalHandler.comment(text, 0, length);
171        }
172    }
173
174    /*package*/ void startCdata() throws SAXException {
175        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
176        if (lexicalHandler != null) {
177            lexicalHandler.startCDATA();
178        }
179    }
180
181    /*package*/ void endCdata() throws SAXException {
182        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
183        if (lexicalHandler != null) {
184            lexicalHandler.endCDATA();
185        }
186    }
187
188    /*package*/ void startNamespace(String prefix, String uri)
189            throws SAXException {
190        ContentHandler contentHandler = xmlReader.contentHandler;
191        if (contentHandler != null) {
192            contentHandler.startPrefixMapping(prefix, uri);
193        }
194    }
195
196    /*package*/ void endNamespace(String prefix) throws SAXException {
197        ContentHandler contentHandler = xmlReader.contentHandler;
198        if (contentHandler != null) {
199            contentHandler.endPrefixMapping(prefix);
200        }
201    }
202
203    /*package*/ void startDtd(String name, String publicId, String systemId)
204            throws SAXException {
205        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
206        if (lexicalHandler != null) {
207            lexicalHandler.startDTD(name, publicId, systemId);
208        }
209    }
210
211    /*package*/ void endDtd() throws SAXException {
212        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
213        if (lexicalHandler != null) {
214            lexicalHandler.endDTD();
215        }
216    }
217
218    /*package*/ void processingInstruction(String target, String data)
219            throws SAXException {
220        ContentHandler contentHandler = xmlReader.contentHandler;
221        if (contentHandler != null) {
222            contentHandler.processingInstruction(target, data);
223        }
224    }
225
226    /*package*/ void notationDecl(String name, String publicId, String systemId) throws SAXException {
227        DTDHandler dtdHandler = xmlReader.dtdHandler;
228        if (dtdHandler != null) {
229            dtdHandler.notationDecl(name, publicId, systemId);
230        }
231    }
232
233    /*package*/ void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException {
234        DTDHandler dtdHandler = xmlReader.dtdHandler;
235        if (dtdHandler != null) {
236            dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName);
237        }
238    }
239
240    /**
241     * Handles an external entity.
242     *
243     * @param context to be passed back to Expat when we parse the entity
244     * @param publicId the publicId of the entity
245     * @param systemId the systemId of the entity
246     */
247    /*package*/ void handleExternalEntity(String context, String publicId,
248            String systemId) throws SAXException, IOException {
249        EntityResolver entityResolver = xmlReader.entityResolver;
250        if (entityResolver == null) {
251            return;
252        }
253
254        /*
255         * The spec. is terribly under-specified here. It says that if the
256         * systemId is a URL, we should try to resolve it, but it doesn't
257         * specify how to tell whether or not the systemId is a URL let alone
258         * how to resolve it.
259         *
260         * Other implementations do various insane things. We try to keep it
261         * simple: if the systemId parses as a URI and it's relative, we try to
262         * resolve it against the parent document's systemId. If anything goes
263         * wrong, we go with the original systemId. If crazybob had designed
264         * the API, he would have left all resolving to the EntityResolver.
265         */
266        if (this.systemId != null) {
267            try {
268                URI systemUri = new URI(systemId);
269                if (!systemUri.isAbsolute() && !systemUri.isOpaque()) {
270                    // It could be relative (or it may not be a URI at all!)
271                    URI baseUri = new URI(this.systemId);
272                    systemUri = baseUri.resolve(systemUri);
273
274                    // Replace systemId w/ resolved URI
275                    systemId = systemUri.toString();
276                }
277            } catch (Exception e) {
278                System.logI("Could not resolve '" + systemId + "' relative to"
279                        + " '" + this.systemId + "' at " + locator, e);
280            }
281        }
282
283        InputSource inputSource = entityResolver.resolveEntity(
284                publicId, systemId);
285        if (inputSource == null) {
286            /*
287             * The spec. actually says that we should try to treat systemId
288             * as a URL and download and parse its contents here, but an
289             * entity resolver can easily accomplish the same by returning
290             * new InputSource(systemId).
291             *
292             * Downloading external entities by default would result in several
293             * unwanted DTD downloads, not to mention pose a security risk
294             * when parsing untrusted XML -- see for example
295             * http://archive.cert.uni-stuttgart.de/bugtraq/2002/10/msg00421.html --
296             * so we just do nothing instead. This also enables the user to
297             * opt out of entity parsing when using
298             * {@link org.xml.sax.helpers.DefaultHandler}, something that
299             * wouldn't be possible otherwise.
300             */
301            return;
302        }
303
304        String encoding = pickEncoding(inputSource);
305        long pointer = createEntityParser(this.pointer, context);
306        try {
307            EntityParser entityParser = new EntityParser(encoding, xmlReader,
308                    pointer, inputSource.getPublicId(),
309                    inputSource.getSystemId());
310
311            parseExternalEntity(entityParser, inputSource);
312        } finally {
313            releaseParser(pointer);
314        }
315    }
316
317    /**
318     * Picks an encoding for an external entity. Defaults to UTF-8.
319     */
320    private String pickEncoding(InputSource inputSource) {
321        Reader reader = inputSource.getCharacterStream();
322        if (reader != null) {
323            return CHARACTER_ENCODING;
324        }
325
326        String encoding = inputSource.getEncoding();
327        return encoding == null ? DEFAULT_ENCODING : encoding;
328    }
329
330    /**
331     * Parses the the external entity provided by the input source.
332     */
333    private void parseExternalEntity(ExpatParser entityParser,
334            InputSource inputSource) throws IOException, SAXException {
335        /*
336         * Expat complains if the external entity isn't wrapped with a root
337         * element so we add one and ignore it later on during parsing.
338         */
339
340        // Try the character stream.
341        Reader reader = inputSource.getCharacterStream();
342        if (reader != null) {
343            try {
344                entityParser.append("<externalEntity>");
345                entityParser.parseFragment(reader);
346                entityParser.append("</externalEntity>");
347            } finally {
348                IoUtils.closeQuietly(reader);
349            }
350            return;
351        }
352
353        // Try the byte stream.
354        InputStream in = inputSource.getByteStream();
355        if (in != null) {
356            try {
357                entityParser.append("<externalEntity>"
358                        .getBytes(entityParser.encoding));
359                entityParser.parseFragment(in);
360                entityParser.append("</externalEntity>"
361                        .getBytes(entityParser.encoding));
362            } finally {
363                IoUtils.closeQuietly(in);
364            }
365            return;
366        }
367
368        // Make sure we use the user-provided systemId.
369        String systemId = inputSource.getSystemId();
370        if (systemId == null) {
371            // TODO: We could just try our systemId here.
372            throw new ParseException("No input specified.", locator);
373        }
374
375        // Try the system id.
376        in = openUrl(systemId);
377        try {
378            entityParser.append("<externalEntity>"
379                    .getBytes(entityParser.encoding));
380            entityParser.parseFragment(in);
381            entityParser.append("</externalEntity>"
382                    .getBytes(entityParser.encoding));
383        } finally {
384            IoUtils.closeQuietly(in);
385        }
386    }
387
388    /**
389     * Creates a native entity parser.
390     *
391     * @param parentPointer pointer to parent Expat parser
392     * @param context passed to {@link #handleExternalEntity}
393     * @return pointer to native parser
394     */
395    private static native long createEntityParser(long parentPointer, String context);
396
397    /**
398     * Appends part of an XML document. This parser will parse the given XML to
399     * the extent possible and dispatch to the appropriate methods.
400     *
401     * @param xml a whole or partial snippet of XML
402     * @throws SAXException if an error occurs during parsing
403     */
404    /*package*/ void append(String xml) throws SAXException {
405        try {
406            appendString(this.pointer, xml, false);
407        } catch (ExpatException e) {
408            throw new ParseException(e.getMessage(), this.locator);
409        }
410    }
411
412    private native void appendString(long pointer, String xml, boolean isFinal)
413            throws SAXException, ExpatException;
414
415    /**
416     * Appends part of an XML document. This parser will parse the given XML to
417     * the extent possible and dispatch to the appropriate methods.
418     *
419     * @param xml a whole or partial snippet of XML
420     * @param offset into the char[]
421     * @param length of characters to use
422     * @throws SAXException if an error occurs during parsing
423     */
424    /*package*/ void append(char[] xml, int offset, int length)
425            throws SAXException {
426        try {
427            appendChars(this.pointer, xml, offset, length);
428        } catch (ExpatException e) {
429            throw new ParseException(e.getMessage(), this.locator);
430        }
431    }
432
433    private native void appendChars(long pointer, char[] xml, int offset,
434            int length) throws SAXException, ExpatException;
435
436    /**
437     * Appends part of an XML document. This parser will parse the given XML to
438     * the extent possible and dispatch to the appropriate methods.
439     *
440     * @param xml a whole or partial snippet of XML
441     * @throws SAXException if an error occurs during parsing
442     */
443    /*package*/ void append(byte[] xml) throws SAXException {
444        append(xml, 0, xml.length);
445    }
446
447    /**
448     * Appends part of an XML document. This parser will parse the given XML to
449     * the extent possible and dispatch to the appropriate methods.
450     *
451     * @param xml a whole or partial snippet of XML
452     * @param offset into the byte[]
453     * @param length of bytes to use
454     * @throws SAXException if an error occurs during parsing
455     */
456    /*package*/ void append(byte[] xml, int offset, int length)
457            throws SAXException {
458        try {
459            appendBytes(this.pointer, xml, offset, length);
460        } catch (ExpatException e) {
461            throw new ParseException(e.getMessage(), this.locator);
462        }
463    }
464
465    private native void appendBytes(long pointer, byte[] xml, int offset,
466            int length) throws SAXException, ExpatException;
467
468    /**
469     * Parses an XML document from the given input stream.
470     */
471    /*package*/ void parseDocument(InputStream in) throws IOException,
472            SAXException {
473        startDocument();
474        parseFragment(in);
475        finish();
476        endDocument();
477    }
478
479    /**
480     * Parses an XML Document from the given reader.
481     */
482    /*package*/ void parseDocument(Reader in) throws IOException, SAXException {
483        startDocument();
484        parseFragment(in);
485        finish();
486        endDocument();
487    }
488
489    /**
490     * Parses XML from the given Reader.
491     */
492    private void parseFragment(Reader in) throws IOException, SAXException {
493        char[] buffer = new char[BUFFER_SIZE / 2];
494        int length;
495        while ((length = in.read(buffer)) != -1) {
496            try {
497                appendChars(this.pointer, buffer, 0, length);
498            } catch (ExpatException e) {
499                throw new ParseException(e.getMessage(), locator);
500            }
501        }
502    }
503
504    /**
505     * Parses XML from the given input stream.
506     */
507    private void parseFragment(InputStream in)
508            throws IOException, SAXException {
509        byte[] buffer = new byte[BUFFER_SIZE];
510        int length;
511        while ((length = in.read(buffer)) != -1) {
512            try {
513                appendBytes(this.pointer, buffer, 0, length);
514            } catch (ExpatException e) {
515                throw new ParseException(e.getMessage(), this.locator);
516            }
517        }
518    }
519
520    private void startDocument() throws SAXException {
521        ContentHandler contentHandler = xmlReader.contentHandler;
522        if (contentHandler != null) {
523            contentHandler.setDocumentLocator(this.locator);
524            contentHandler.startDocument();
525        }
526    }
527
528    private void endDocument() throws SAXException {
529        ContentHandler contentHandler;
530        contentHandler = xmlReader.contentHandler;
531        if (contentHandler != null) {
532            contentHandler.endDocument();
533        }
534    }
535
536    /**
537     * Indicate that we're finished parsing.
538     *
539     * @throws SAXException if the xml is incomplete
540     */
541    /*package*/ void finish() throws SAXException {
542        try {
543            appendString(this.pointer, "", true);
544        } catch (ExpatException e) {
545            throw new ParseException(e.getMessage(), this.locator);
546        }
547    }
548
549    @Override protected synchronized void finalize() throws Throwable {
550        try {
551            if (this.pointer != 0) {
552                release(this.pointer);
553                this.pointer = 0;
554            }
555        } finally {
556            super.finalize();
557        }
558    }
559
560    /**
561     * Releases all native objects.
562     */
563    private native void release(long pointer);
564
565    /**
566     * Releases native parser only.
567     */
568    private static native void releaseParser(long pointer);
569
570    /**
571     * Initialize static resources.
572     */
573    private static native void staticInitialize(String emptyString);
574
575    static {
576        staticInitialize("");
577    }
578
579    /**
580     * Gets the current line number within the XML file.
581     */
582    private int line() {
583        return line(this.pointer);
584    }
585
586    private static native int line(long pointer);
587
588    /**
589     * Gets the current column number within the XML file.
590     */
591    private int column() {
592        return column(this.pointer);
593    }
594
595    private static native int column(long pointer);
596
597    /**
598     * Clones the current attributes so they can be used outside of
599     * startElement().
600     */
601    /*package*/ Attributes cloneAttributes() {
602        if (!inStartElement) {
603            throw new IllegalStateException(OUTSIDE_START_ELEMENT);
604        }
605
606        if (attributeCount == 0) {
607            return ClonedAttributes.EMPTY;
608        }
609
610        long clonePointer
611                = cloneAttributes(this.attributePointer, this.attributeCount);
612        return new ClonedAttributes(pointer, clonePointer, attributeCount);
613    }
614
615    private static native long cloneAttributes(long pointer, int attributeCount);
616
617    /**
618     * Used for cloned attributes.
619     */
620    private static class ClonedAttributes extends ExpatAttributes {
621
622        private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0);
623
624        private final long parserPointer;
625        private long pointer;
626        private final int length;
627
628        /**
629         * Constructs a Java wrapper for native attributes.
630         *
631         * @param parserPointer pointer to the parse, can be 0 if length is 0.
632         * @param pointer pointer to the attributes array, can be 0 if the
633         *  length is 0.
634         * @param length number of attributes
635         */
636        private ClonedAttributes(long parserPointer, long pointer, int length) {
637            this.parserPointer = parserPointer;
638            this.pointer = pointer;
639            this.length = length;
640        }
641
642        @Override
643        public long getParserPointer() {
644            return this.parserPointer;
645        }
646
647        @Override
648        public long getPointer() {
649            return pointer;
650        }
651
652        @Override
653        public int getLength() {
654            return length;
655        }
656
657        @Override protected synchronized void finalize() throws Throwable {
658            try {
659                if (pointer != 0) {
660                    freeAttributes(pointer);
661                    pointer = 0;
662                }
663            } finally {
664                super.finalize();
665            }
666        }
667    }
668
669    private class ExpatLocator implements Locator {
670
671        public String getPublicId() {
672            return publicId;
673        }
674
675        public String getSystemId() {
676            return systemId;
677        }
678
679        public int getLineNumber() {
680            return line();
681        }
682
683        public int getColumnNumber() {
684            return column();
685        }
686
687        @Override
688        public String toString() {
689            return "Locator[publicId: " + publicId + ", systemId: " + systemId
690                + ", line: " + getLineNumber()
691                + ", column: " + getColumnNumber() + "]";
692        }
693    }
694
695    /**
696     * Attributes that are only valid during startElement().
697     */
698    private class CurrentAttributes extends ExpatAttributes {
699
700        @Override
701        public long getParserPointer() {
702            return pointer;
703        }
704
705        @Override
706        public long getPointer() {
707            if (!inStartElement) {
708                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
709            }
710            return attributePointer;
711        }
712
713        @Override
714        public int getLength() {
715            if (!inStartElement) {
716                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
717            }
718            return attributeCount;
719        }
720    }
721
722    /**
723     * Includes line and column in the message.
724     */
725    private static class ParseException extends SAXParseException {
726
727        private ParseException(String message, Locator locator) {
728            super(makeMessage(message, locator), locator);
729        }
730
731        private static String makeMessage(String message, Locator locator) {
732            return makeMessage(message, locator.getLineNumber(),
733                    locator.getColumnNumber());
734        }
735
736        private static String makeMessage(
737                String message, int line, int column) {
738            return "At line " + line + ", column "
739                    + column + ": " + message;
740        }
741    }
742
743    /**
744     * Opens an InputStream for the given URL.
745     */
746    /*package*/ static InputStream openUrl(String url) throws IOException {
747        try {
748            URLConnection urlConnection = new URL(url).openConnection();
749            urlConnection.setConnectTimeout(TIMEOUT);
750            urlConnection.setReadTimeout(TIMEOUT);
751            urlConnection.setDoInput(true);
752            urlConnection.setDoOutput(false);
753            return urlConnection.getInputStream();
754        } catch (Exception e) {
755            IOException ioe = new IOException("Couldn't open " + url);
756            ioe.initCause(e);
757            throw ioe;
758        }
759    }
760
761    /**
762     * Parses an external entity.
763     */
764    private static class EntityParser extends ExpatParser {
765
766        private int depth = 0;
767
768        private EntityParser(String encoding, ExpatReader xmlReader,
769                long pointer, String publicId, String systemId) {
770            super(encoding, xmlReader, pointer, publicId, systemId);
771        }
772
773        @Override
774        void startElement(String uri, String localName, String qName,
775                long attributePointer, int attributeCount) throws SAXException {
776            /*
777             * Skip topmost element generated by our workaround in
778             * {@link #handleExternalEntity}.
779             */
780            if (depth++ > 0) {
781                super.startElement(uri, localName, qName, attributePointer,
782                        attributeCount);
783            }
784        }
785
786        @Override
787        void endElement(String uri, String localName, String qName)
788                throws SAXException {
789            if (--depth > 0) {
790                super.endElement(uri, localName, qName);
791            }
792        }
793
794        @Override
795        @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
796        protected synchronized void finalize() throws Throwable {
797            /*
798             * Don't release our native resources. We do so explicitly in
799             * {@link #handleExternalEntity} and we don't want to release the
800             * parsing context--our parent is using it.
801             */
802        }
803    }
804}
805