1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml;
18
19import org.xml.sax.Attributes;
20import org.xml.sax.SAXException;
21import org.xml.sax.ContentHandler;
22import org.xml.sax.Locator;
23import org.xmlpull.v1.XmlPullParser;
24import org.xmlpull.v1.XmlPullParserException;
25
26import java.io.IOException;
27import java.io.InputStream;
28import java.io.Reader;
29
30/**
31 * Fast, partial XmlPullParser implementation based upon Expat. Does not
32 * support validation or {@code DOCTYPE} processing.
33 */
34public class ExpatPullParser implements XmlPullParser {
35    /**
36     * This feature is identified by http://xmlpull.org/v1/doc/features.html#relaxed
37     * If this feature is supported that means that XmlPull parser will be
38     * lenient when checking XML well formedness.
39     * NOTE: use it only if XML input is not well-formed and in general usage
40     * if this feature is discouraged
41     * NOTE: as there is no definition of what is relaxed XML parsing
42     * therefore what parser will do completely depends on implementation used
43     */
44    public static final String FEATURE_RELAXED =
45            "http://xmlpull.org/v1/doc/features.html#relaxed";
46
47    private static final int BUFFER_SIZE = 8096;
48
49    private static final String NOT_A_START_TAG = "This is not a start tag.";
50
51    private Document document;
52    private boolean processNamespaces = false;
53    private boolean relaxed = false;
54
55    public void setFeature(String name, boolean state)
56            throws XmlPullParserException {
57        if (name == null) {
58            // Required by API.
59            throw new IllegalArgumentException("Null feature name");
60        }
61
62        if (name.equals(FEATURE_PROCESS_NAMESPACES)) {
63            processNamespaces = state;
64            return;
65        }
66
67        if (name.equals(FEATURE_RELAXED)) {
68            relaxed = true;
69            return;
70        }
71
72        // You're free to turn these features off because we don't support them.
73        if (!state && (name.equals(FEATURE_REPORT_NAMESPACE_ATTRIBUTES)
74                || name.equals(FEATURE_PROCESS_DOCDECL)
75                || name.equals(FEATURE_VALIDATION))) {
76            return;
77        }
78
79        throw new XmlPullParserException("Unsupported feature: " + name);
80    }
81
82    public boolean getFeature(String name) {
83        if (name == null) {
84            // Required by API.
85            throw new IllegalArgumentException("Null feature name");
86        }
87
88        // We always support namespaces, but no other features.
89        return name.equals(FEATURE_PROCESS_NAMESPACES) && processNamespaces;
90    }
91
92    /**
93     * Returns true if this parser processes namespaces.
94     *
95     * @see #setNamespaceProcessingEnabled(boolean)
96     */
97    public boolean isNamespaceProcessingEnabled() {
98        return processNamespaces;
99    }
100
101    /**
102     * Enables or disables namespace processing. Set to false by default.
103     *
104     * @see #isNamespaceProcessingEnabled()
105     */
106    public void setNamespaceProcessingEnabled(boolean processNamespaces) {
107        this.processNamespaces = processNamespaces;
108    }
109
110    public void setProperty(String name, Object value)
111            throws XmlPullParserException {
112        if (name == null) {
113            // Required by API.
114            throw new IllegalArgumentException("Null feature name");
115        }
116
117        // We don't support any properties.
118        throw new XmlPullParserException("Properties aren't supported.");
119    }
120
121    public Object getProperty(String name) {
122        return null;
123    }
124
125    public void setInput(Reader in) throws XmlPullParserException {
126        this.document = new CharDocument(in, processNamespaces);
127    }
128
129    public void setInput(InputStream in, String encodingName)
130            throws XmlPullParserException {
131        this.document = new ByteDocument(in, encodingName, processNamespaces);
132    }
133
134    public String getInputEncoding() {
135        return this.document.getEncoding();
136    }
137
138    /**
139     * Not supported.
140     *
141     * @throws UnsupportedOperationException always
142     */
143    public void defineEntityReplacementText(String entityName,
144            String replacementText) throws XmlPullParserException {
145        throw new UnsupportedOperationException();
146    }
147
148    public int getNamespaceCount(int depth) throws XmlPullParserException {
149        return document.currentEvent.namespaceStack.countAt(depth);
150    }
151
152    public String getNamespacePrefix(int pos) throws XmlPullParserException {
153        String prefix = document.currentEvent.namespaceStack.prefixAt(pos);
154        @SuppressWarnings("StringEquality")
155        boolean hasPrefix = prefix != "";
156        return hasPrefix ? prefix : null;
157    }
158
159    public String getNamespaceUri(int pos) throws XmlPullParserException {
160        return document.currentEvent.namespaceStack.uriAt(pos);
161    }
162
163    public String getNamespace(String prefix) {
164        // In XmlPullParser API, null == default namespace.
165        if (prefix == null) {
166            // Internally, we use empty string instead of null.
167            prefix = "";
168        }
169
170        return document.currentEvent.namespaceStack.uriFor(prefix);
171    }
172
173    public int getDepth() {
174        return this.document.getDepth();
175    }
176
177    public String getPositionDescription() {
178        return "line " + getLineNumber() + ", column " + getColumnNumber();
179    }
180
181    /**
182     * Not supported.
183     *
184     * @return {@literal -1} always
185     */
186    public int getLineNumber() {
187        // We would have to record the line number in each event.
188        return -1;
189    }
190
191    /**
192     * Not supported.
193     *
194     * @return {@literal -1} always
195     */
196    public int getColumnNumber() {
197        // We would have to record the column number in each event.
198        return -1;
199    }
200
201    public boolean isWhitespace() throws XmlPullParserException {
202        if (getEventType() != TEXT) {
203            throw new XmlPullParserException("Not on text.");
204        }
205
206        String text = getText();
207
208        if (text.length() == 0) {
209            return true;
210        }
211
212        int length = text.length();
213        for (int i = 0; i < length; i++) {
214            if (!Character.isWhitespace(text.charAt(i))) {
215                return false;
216            }
217        }
218
219        return true;
220    }
221
222    public String getText() {
223        final StringBuilder builder = this.document.currentEvent.getText();
224        return builder == null ? null : builder.toString();
225    }
226
227    public char[] getTextCharacters(int[] holderForStartAndLength) {
228        final StringBuilder builder = this.document.currentEvent.getText();
229
230        final int length = builder.length();
231        char[] characters = new char[length];
232        builder.getChars(0, length, characters, 0);
233
234        holderForStartAndLength[0] = 0;
235        holderForStartAndLength[1] = length;
236
237        return characters;
238    }
239
240    public String getNamespace() {
241        return this.document.currentEvent.getNamespace();
242    }
243
244    public String getName() {
245        return this.document.currentEvent.getName();
246    }
247
248    /**
249     * Not supported.
250     *
251     * @throws UnsupportedOperationException always
252     */
253    public String getPrefix() {
254        throw new UnsupportedOperationException();
255    }
256
257    public boolean isEmptyElementTag() throws XmlPullParserException {
258        return this.document.isCurrentElementEmpty();
259    }
260
261    public int getAttributeCount() {
262        return this.document.currentEvent.getAttributeCount();
263    }
264
265    public String getAttributeNamespace(int index) {
266        return this.document.currentEvent.getAttributeNamespace(index);
267    }
268
269    public String getAttributeName(int index) {
270        return this.document.currentEvent.getAttributeName(index);
271    }
272
273    /**
274     * Not supported.
275     *
276     * @throws UnsupportedOperationException always
277     */
278    public String getAttributePrefix(int index) {
279        throw new UnsupportedOperationException();
280    }
281
282    public String getAttributeType(int index) {
283        return "CDATA";
284    }
285
286    public boolean isAttributeDefault(int index) {
287        return false;
288    }
289
290    public String getAttributeValue(int index) {
291        return this.document.currentEvent.getAttributeValue(index);
292    }
293
294    public String getAttributeValue(String namespace, String name) {
295        return this.document.currentEvent.getAttributeValue(namespace, name);
296    }
297
298    public int getEventType() throws XmlPullParserException {
299        return this.document.currentEvent.getType();
300    }
301
302    public int next() throws XmlPullParserException, IOException {
303        return this.document.dequeue();
304    }
305
306    /**
307     * Not supported.
308     *
309     * @throws UnsupportedOperationException always
310     */
311    public int nextToken() throws XmlPullParserException, IOException {
312        throw new UnsupportedOperationException();
313    }
314
315    public void require(int type, String namespace, String name)
316            throws XmlPullParserException, IOException {
317        if (type != getEventType()
318                || (namespace != null && !namespace.equals(getNamespace()))
319                || (name != null && !name.equals(getName()))) {
320            throw new XmlPullParserException("expected "
321                    + TYPES[type] + getPositionDescription());
322        }
323    }
324
325    public String nextText() throws XmlPullParserException, IOException {
326        if (this.document.currentEvent.getType() != START_TAG)
327            throw new XmlPullParserException("Not on start tag.");
328
329        int next = this.document.dequeue();
330        switch (next) {
331            case TEXT: return getText();
332            case END_TAG: return "";
333            default: throw new XmlPullParserException(
334                "Unexpected event type: " + TYPES[next]);
335        }
336    }
337
338    public int nextTag() throws XmlPullParserException, IOException {
339        int eventType = next();
340        if (eventType == TEXT && isWhitespace()) {
341            eventType = next();
342        }
343        if (eventType != START_TAG && eventType != END_TAG) {
344            throw new XmlPullParserException(
345                "Expected start or end tag", this, null);
346        }
347        return eventType;
348    }
349
350    /**
351     * Immutable namespace stack. Pushing a new namespace on to the stack
352     * only results in one object allocation. Most operations are O(N) where
353     * N is the stack size. Accessing recently pushed namespaces, like those
354     * for the current element, is significantly faster.
355     */
356    static class NamespaceStack {
357
358        /** An empty stack. */
359        static final NamespaceStack EMPTY = new NamespaceStack();
360
361        private final NamespaceStack parent;
362        private final String prefix;
363        private final String uri;
364        private final int index;
365        private final int depth;
366
367        /**
368         * Constructs an actual namespace stack node. Internally, the nodes
369         * and the stack are one in the same making for a very efficient
370         * implementation. The user just sees an immutable stack and the
371         * builder.
372         */
373        private NamespaceStack(NamespaceStack parent, String prefix,
374                String uri, int depth) {
375            this.parent = parent;
376            this.prefix = prefix;
377            this.uri = uri;
378            this.index = parent.index + 1;
379            this.depth = depth;
380        }
381
382        /**
383         * Constructs a dummy node which only serves to point to the bottom
384         * of the stack. Using an actual node instead of null simplifies the
385         * code.
386         */
387        private NamespaceStack() {
388            this.parent = null;
389            this.prefix = null;
390            this.uri = null;
391
392            // This node has an index of -1 since the actual first node in the
393            // stack has index 0.
394            this.index = -1;
395
396            // The actual first node will have a depth of 1.
397            this.depth = 0;
398        }
399
400        String uriFor(String prefix) {
401            for (NamespaceStack node = this; node.index >= 0;
402                    node = node.parent) {
403                if (node.prefix.equals(prefix)) {
404                    return node.uri;
405                }
406            }
407
408            // Not found.
409            return null;
410        }
411
412        /**
413         * Gets the prefix at the given index in the stack.
414         */
415        String prefixAt(int index) {
416            return nodeAt(index).prefix;
417        }
418
419        /**
420         * Gets the URI at the given index in the stack.
421         */
422        String uriAt(int index) {
423            return nodeAt(index).uri;
424        }
425
426        private NamespaceStack nodeAt(int index) {
427            if (index > this.index) {
428                throw new IndexOutOfBoundsException("Index > size.");
429            }
430            if (index < 0) {
431                throw new IndexOutOfBoundsException("Index < 0.");
432            }
433
434            NamespaceStack node = this;
435            while (index != node.index) {
436                node = node.parent;
437            }
438            return node;
439        }
440
441        /**
442         * Gets the size of the stack at the given element depth.
443         */
444        int countAt(int depth) {
445            if (depth > this.depth) {
446                throw new IndexOutOfBoundsException("Depth > maximum.");
447            }
448            if (depth < 0) {
449                throw new IndexOutOfBoundsException("Depth < 0.");
450            }
451
452            NamespaceStack node = this;
453            while (depth < node.depth) {
454                node = node.parent;
455            }
456            return node.index + 1;
457        }
458
459        /** Builds a NamespaceStack. */
460        static class Builder {
461
462            NamespaceStack top = EMPTY;
463
464            /**
465             * Pushes a namespace onto the stack.
466             *
467             * @param depth of the element upon which the namespace was
468             *  declared
469             */
470            void push(String prefix, String uri, int depth) {
471                top = new NamespaceStack(top, prefix, uri, depth);
472            }
473
474            /**
475             * Pops all namespaces from the given element depth.
476             */
477            void pop(int depth) {
478                // Remove all nodes at the specified depth.
479                while (top != null && top.depth == depth) {
480                    top = top.parent;
481                }
482            }
483
484            /** Returns the current stack. */
485            NamespaceStack build() {
486                return top;
487            }
488        }
489    }
490
491    /**
492     * Base class for events. Implements event chaining and defines event API
493     * along with common implementations which can be overridden.
494     */
495    static abstract class Event {
496
497        /** Element depth at the time of this event. */
498        final int depth;
499
500        /** The namespace stack at the time of this event. */
501        final NamespaceStack namespaceStack;
502
503        /** Next event in the queue. */
504        Event next = null;
505
506        Event(int depth, NamespaceStack namespaceStack) {
507            this.depth = depth;
508            this.namespaceStack = namespaceStack;
509        }
510
511        void setNext(Event next) {
512            this.next = next;
513        }
514
515        Event getNext() {
516            return next;
517        }
518
519        StringBuilder getText() {
520            return null;
521        }
522
523        String getNamespace() {
524            return null;
525        }
526
527        String getName() {
528            return null;
529        }
530
531        int getAttributeCount() {
532            return -1;
533        }
534
535        String getAttributeNamespace(int index) {
536            throw new IndexOutOfBoundsException(NOT_A_START_TAG);
537        }
538
539        String getAttributeName(int index) {
540            throw new IndexOutOfBoundsException(NOT_A_START_TAG);
541        }
542
543        String getAttributeValue(int index) {
544            throw new IndexOutOfBoundsException(NOT_A_START_TAG);
545        }
546
547        abstract int getType();
548
549        String getAttributeValue(String namespace, String name) {
550            throw new IndexOutOfBoundsException(NOT_A_START_TAG);
551        }
552
553        public int getDepth() {
554            return this.depth;
555        }
556    }
557
558    static class StartDocumentEvent extends Event {
559
560        public StartDocumentEvent() {
561            super(0, NamespaceStack.EMPTY);
562        }
563
564        @Override
565        int getType() {
566            return START_DOCUMENT;
567        }
568    }
569
570    static class StartTagEvent extends Event {
571
572        final String name;
573        final String namespace;
574        final Attributes attributes;
575        final boolean processNamespaces;
576
577        StartTagEvent(String namespace,
578                String name,
579                ExpatParser expatParser,
580                int depth,
581                NamespaceStack namespaceStack,
582                boolean processNamespaces) {
583            super(depth, namespaceStack);
584            this.namespace = namespace;
585            this.name = name;
586            this.attributes = expatParser.cloneAttributes();
587            this.processNamespaces = processNamespaces;
588        }
589
590        @Override
591        String getNamespace() {
592            return namespace;
593        }
594
595        @Override
596        String getName() {
597            return name;
598        }
599
600        @Override
601        int getAttributeCount() {
602            return attributes.getLength();
603        }
604
605        @Override
606        String getAttributeNamespace(int index) {
607            return attributes.getURI(index);
608        }
609
610        @Override
611        String getAttributeName(int index) {
612            return processNamespaces ? attributes.getLocalName(index)
613                    : attributes.getQName(index);
614        }
615
616        @Override
617        String getAttributeValue(int index) {
618            return attributes.getValue(index);
619        }
620
621        @Override
622        String getAttributeValue(String namespace, String name) {
623            if (namespace == null) {
624                namespace = "";
625            }
626
627            return attributes.getValue(namespace, name);
628        }
629
630        @Override
631        int getType() {
632            return START_TAG;
633        }
634    }
635
636    static class EndTagEvent extends Event {
637
638        final String namespace;
639        final String localName;
640
641        EndTagEvent(String namespace, String localName, int depth,
642                NamespaceStack namespaceStack) {
643            super(depth, namespaceStack);
644            this.namespace = namespace;
645            this.localName = localName;
646        }
647
648        @Override
649        String getName() {
650            return this.localName;
651        }
652
653        @Override
654        String getNamespace() {
655            return this.namespace;
656        }
657
658        @Override
659        int getType() {
660            return END_TAG;
661        }
662    }
663
664    static class TextEvent extends Event {
665
666        final StringBuilder builder;
667
668        public TextEvent(int initialCapacity, int depth,
669                NamespaceStack namespaceStack) {
670            super(depth, namespaceStack);
671            this.builder = new StringBuilder(initialCapacity);
672        }
673
674        @Override
675        int getType() {
676            return TEXT;
677        }
678
679        @Override
680        StringBuilder getText() {
681            return this.builder;
682        }
683
684        void append(char[] text, int start, int length) {
685            builder.append(text, start, length);
686        }
687    }
688
689    static class EndDocumentEvent extends Event {
690
691        EndDocumentEvent() {
692            super(0, NamespaceStack.EMPTY);
693        }
694
695        @Override
696        Event getNext() {
697            throw new IllegalStateException("End of document.");
698        }
699
700        @Override
701        void setNext(Event next) {
702            throw new IllegalStateException("End of document.");
703        }
704
705        @Override
706        int getType() {
707            return END_DOCUMENT;
708        }
709    }
710
711    /**
712     * Encapsulates the parsing context of the current document.
713     */
714    abstract class Document {
715
716        final String encoding;
717        final ExpatParser parser;
718        final boolean processNamespaces;
719
720        TextEvent textEvent = null;
721        boolean finished = false;
722
723        Document(String encoding, boolean processNamespaces) {
724            this.encoding = encoding;
725            this.processNamespaces = processNamespaces;
726
727            ExpatReader xmlReader = new ExpatReader();
728            xmlReader.setContentHandler(new SaxHandler());
729
730            this.parser = new ExpatParser(
731                    encoding, xmlReader, processNamespaces, null, null);
732        }
733
734        /** Namespace stack builder. */
735        NamespaceStack.Builder namespaceStackBuilder
736                = new NamespaceStack.Builder();
737
738        Event currentEvent = new StartDocumentEvent();
739        Event last = currentEvent;
740
741        /**
742         * Sends some more XML to the parser.
743         */
744        void pump() throws IOException, XmlPullParserException {
745            if (this.finished) {
746                return;
747            }
748
749            int length = buffer();
750
751            // End of document.
752            if (length == -1) {
753                this.finished = true;
754                if (!relaxed) {
755                    try {
756                        parser.finish();
757                    } catch (SAXException e) {
758                        throw new XmlPullParserException(
759                            "Premature end of document.", ExpatPullParser.this, e);
760                    }
761                }
762                add(new EndDocumentEvent());
763                return;
764            }
765
766            if (length == 0) {
767                return;
768            }
769
770            flush(parser, length);
771        }
772
773        /**
774         * Reads data into the buffer.
775         *
776         * @return the length of data buffered or {@code -1} if we've reached
777         *  the end of the data.
778         */
779        abstract int buffer() throws IOException;
780
781        /**
782         * Sends buffered data to the parser.
783         *
784         * @param parser the parser to flush to
785         * @param length of data buffered
786         */
787        abstract void flush(ExpatParser parser, int length)
788                throws XmlPullParserException;
789
790        /**
791         * Adds an event.
792         */
793        void add(Event event) {
794            // Flush pre-exising text event if necessary.
795            if (textEvent != null) {
796                last.setNext(textEvent);
797                last = textEvent;
798                textEvent = null;
799            }
800
801            last.setNext(event);
802            last = event;
803        }
804
805        /**
806         * Moves to the next event in the queue.
807         *
808         * @return type of next event
809         */
810        int dequeue() throws XmlPullParserException, IOException {
811            Event next;
812
813            while ((next = currentEvent.getNext()) == null) {
814                pump();
815            }
816
817            currentEvent.next = null;
818            currentEvent = next;
819
820            return currentEvent.getType();
821        }
822
823        String getEncoding() {
824            return this.encoding;
825        }
826
827        int getDepth() {
828            return currentEvent.getDepth();
829        }
830
831        /**
832         * Returns true if we're on a start element and the next event is
833         * its corresponding end element.
834         *
835         * @throws XmlPullParserException if we aren't on a start element
836         */
837        boolean isCurrentElementEmpty() throws XmlPullParserException {
838            if (currentEvent.getType() != START_TAG) {
839                throw new XmlPullParserException(NOT_A_START_TAG);
840            }
841
842            Event next;
843
844            try {
845                while ((next = currentEvent.getNext()) == null) {
846                    pump();
847                }
848            } catch (IOException ex) {
849                throw new XmlPullParserException(ex.toString());
850            }
851
852            return next.getType() == END_TAG;
853        }
854
855        private class SaxHandler implements ContentHandler {
856
857            int depth = 0;
858
859            public void startPrefixMapping(String prefix, String uri)
860                    throws SAXException {
861                // Depth + 1--we aren't actually in the element yet.
862                namespaceStackBuilder.push(prefix, uri, depth + 1);
863            }
864
865            public void startElement(String uri, String localName, String qName,
866                    Attributes attributes) {
867                String name = processNamespaces ? localName : qName;
868
869                add(new StartTagEvent(uri, name, parser, ++this.depth,
870                        namespaceStackBuilder.build(), processNamespaces));
871            }
872
873            public void endElement(String uri, String localName, String qName) {
874                String name = processNamespaces ? localName : qName;
875
876                int depth = this.depth--;
877                add(new EndTagEvent(uri, name, depth,
878                        namespaceStackBuilder.build()));
879                namespaceStackBuilder.pop(depth);
880            }
881
882            public void characters(char ch[], int start, int length) {
883                // Ignore empty strings.
884                if (length == 0) {
885                    return;
886                }
887
888                // Start a new text event if necessary.
889                if (textEvent == null) {
890                    textEvent = new TextEvent(length, this.depth,
891                            namespaceStackBuilder.build());
892                }
893
894                // Append to an existing text event.
895                textEvent.append(ch, start, length);
896            }
897
898            public void setDocumentLocator(Locator locator) {}
899            public void startDocument() throws SAXException {}
900            public void endDocument() throws SAXException {}
901            public void endPrefixMapping(String prefix) throws SAXException {}
902            public void ignorableWhitespace(char ch[], int start, int length)
903                    throws SAXException {}
904            public void processingInstruction(String target, String data)
905                    throws SAXException {}
906            public void skippedEntity(String name) throws SAXException {}
907        }
908    }
909
910    class CharDocument extends Document {
911
912        final char[] buffer = new char[BUFFER_SIZE / 2];
913        final Reader in;
914
915        CharDocument(Reader in, boolean processNamespaces) {
916            super("UTF-16", processNamespaces);
917            this.in = in;
918        }
919
920        @Override
921        int buffer() throws IOException {
922            return in.read(buffer);
923        }
924
925        @Override
926        void flush(ExpatParser parser, int length)
927                throws XmlPullParserException {
928            try {
929                parser.append(buffer, 0, length);
930            } catch (SAXException e) {
931                throw new XmlPullParserException(
932                        "Error parsing document.", ExpatPullParser.this, e);
933            }
934        }
935    }
936
937    class ByteDocument extends Document {
938
939        final byte[] buffer = new byte[BUFFER_SIZE];
940        final InputStream in;
941
942        ByteDocument(InputStream in, String encoding,
943                boolean processNamespaces) {
944            super(encoding, processNamespaces);
945            this.in = in;
946        }
947
948        @Override
949        int buffer() throws IOException {
950            return in.read(buffer);
951        }
952
953        @Override
954        void flush(ExpatParser parser, int length)
955                throws XmlPullParserException {
956            try {
957                parser.append(buffer, 0, length);
958            } catch (SAXException e) {
959                throw new XmlPullParserException(
960                        "Error parsing document.", ExpatPullParser.this, e);
961            }
962        }
963    }
964}
965