1/* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The  above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE. */
20
21// Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode)
22
23package org.kxml2.io;
24
25import java.io.Closeable;
26import java.io.IOException;
27import java.io.InputStream;
28import java.io.InputStreamReader;
29import java.io.Reader;
30import java.util.HashMap;
31import java.util.Map;
32import libcore.internal.StringPool;
33import org.xmlpull.v1.XmlPullParser;
34import org.xmlpull.v1.XmlPullParserException;
35
36/**
37 * An XML pull parser with limited support for parsing internal DTDs.
38 */
39public class KXmlParser implements XmlPullParser, Closeable {
40
41    private static final String PROPERTY_XMLDECL_VERSION
42            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
43    private static final String PROPERTY_XMLDECL_STANDALONE
44            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
45    private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location";
46    private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed";
47
48    private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>();
49    static {
50        DEFAULT_ENTITIES.put("lt", "<");
51        DEFAULT_ENTITIES.put("gt", ">");
52        DEFAULT_ENTITIES.put("amp", "&");
53        DEFAULT_ENTITIES.put("apos", "'");
54        DEFAULT_ENTITIES.put("quot", "\"");
55    }
56
57    private static final int ELEMENTDECL = 11;
58    private static final int ENTITYDECL = 12;
59    private static final int ATTLISTDECL = 13;
60    private static final int NOTATIONDECL = 14;
61    private static final int PARAMETER_ENTITY_REF = 15;
62    private static final char[] START_COMMENT = { '<', '!', '-', '-' };
63    private static final char[] END_COMMENT = { '-', '-', '>' };
64    private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' };
65    private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
66    private static final char[] END_CDATA = { ']', ']', '>' };
67    private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
68    private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' };
69    private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' };
70    private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' };
71    private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' };
72    private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' };
73    private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' };
74    private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' };
75    private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
76    private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' };
77    private static final char[] ANY = new char[]{ 'A', 'N', 'Y' };
78    private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' };
79    private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
80    private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' };
81    private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' };
82    private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' };
83
84    static final private String UNEXPECTED_EOF = "Unexpected EOF";
85    static final private String ILLEGAL_TYPE = "Wrong event type";
86    static final private int XML_DECLARATION = 998;
87
88    // general
89    private String location;
90
91    private String version;
92    private Boolean standalone;
93    private String rootElementName;
94    private String systemId;
95    private String publicId;
96
97    /**
98     * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines
99     * entity values and default attribute values. These values are parsed at
100     * inclusion time and may contain both tags and entity references.
101     *
102     * <p>If this is false, the user must {@link #defineEntityReplacementText
103     * define entity values manually}. Such entity values are literal strings
104     * and will not be parsed. There is no API to define default attributes
105     * manually.
106     */
107    private boolean processDocDecl;
108    private boolean processNsp;
109    private boolean relaxed;
110    private boolean keepNamespaceAttributes;
111
112    /**
113     * If non-null, the contents of the read buffer must be copied into this
114     * string builder before the read buffer is overwritten. This is used to
115     * capture the raw DTD text while parsing the DTD.
116     */
117    private StringBuilder bufferCapture;
118
119    /**
120     * Entities defined in or for this document. This map is created lazily.
121     */
122    private Map<String, char[]> documentEntities;
123
124    /**
125     * Default attributes in this document. The outer map's key is the element
126     * name; the inner map's key is the attribute name. Both keys should be
127     * without namespace adjustments. This map is created lazily.
128     */
129    private Map<String, Map<String, String>> defaultAttributes;
130
131
132    private int depth;
133    private String[] elementStack = new String[16];
134    private String[] nspStack = new String[8];
135    private int[] nspCounts = new int[4];
136
137    // source
138
139    private Reader reader;
140    private String encoding;
141    private ContentSource nextContentSource;
142    private char[] buffer = new char[8192];
143    private int position = 0;
144    private int limit = 0;
145
146    /*
147     * Track the number of newlines and columns preceding the current buffer. To
148     * compute the line and column of a position in the buffer, compute the line
149     * and column in the buffer and add the preceding values.
150     */
151    private int bufferStartLine;
152    private int bufferStartColumn;
153
154    // the current token
155
156    private int type;
157    private boolean isWhitespace;
158    private String namespace;
159    private String prefix;
160    private String name;
161    private String text;
162
163    private boolean degenerated;
164    private int attributeCount;
165
166    /*
167     * The current element's attributes arranged in groups of 4:
168     * i + 0 = attribute namespace URI
169     * i + 1 = attribute namespace prefix
170     * i + 2 = attribute qualified name (may contain ":", as in "html:h1")
171     * i + 3 = attribute value
172     */
173    private String[] attributes = new String[16];
174
175    private String error;
176
177    private boolean unresolved;
178
179    public final StringPool stringPool = new StringPool();
180
181    /**
182     * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"}
183     * in pulled elements. Most applications will only be interested in the effective namespaces of
184     * their elements, so these attributes aren't useful. But for structure preserving wrappers like
185     * DOM, it is necessary to keep the namespace data around.
186     */
187    public void keepNamespaceAttributes() {
188        this.keepNamespaceAttributes = true;
189    }
190
191    private boolean adjustNsp() throws XmlPullParserException {
192        boolean any = false;
193
194        for (int i = 0; i < attributeCount << 2; i += 4) {
195            String attrName = attributes[i + 2];
196            int cut = attrName.indexOf(':');
197            String prefix;
198
199            if (cut != -1) {
200                prefix = attrName.substring(0, cut);
201                attrName = attrName.substring(cut + 1);
202            } else if (attrName.equals("xmlns")) {
203                prefix = attrName;
204                attrName = null;
205            } else {
206                continue;
207            }
208
209            if (!prefix.equals("xmlns")) {
210                any = true;
211            } else {
212                int j = (nspCounts[depth]++) << 1;
213
214                nspStack = ensureCapacity(nspStack, j + 2);
215                nspStack[j] = attrName;
216                nspStack[j + 1] = attributes[i + 3];
217
218                if (attrName != null && attributes[i + 3].isEmpty()) {
219                    checkRelaxed("illegal empty namespace");
220                }
221
222                if (keepNamespaceAttributes) {
223                    // explicitly set the namespace for unprefixed attributes
224                    // such as xmlns="http://foo"
225                    attributes[i] = "http://www.w3.org/2000/xmlns/";
226                    any = true;
227                } else {
228                    System.arraycopy(
229                            attributes,
230                            i + 4,
231                            attributes,
232                            i,
233                            ((--attributeCount) << 2) - i);
234
235                    i -= 4;
236                }
237            }
238        }
239
240        if (any) {
241            for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
242
243                String attrName = attributes[i + 2];
244                int cut = attrName.indexOf(':');
245
246                if (cut == 0 && !relaxed) {
247                    throw new RuntimeException(
248                            "illegal attribute name: " + attrName + " at " + this);
249                } else if (cut != -1) {
250                    String attrPrefix = attrName.substring(0, cut);
251
252                    attrName = attrName.substring(cut + 1);
253
254                    String attrNs = getNamespace(attrPrefix);
255
256                    if (attrNs == null && !relaxed) {
257                        throw new RuntimeException(
258                                "Undefined Prefix: " + attrPrefix + " in " + this);
259                    }
260
261                    attributes[i] = attrNs;
262                    attributes[i + 1] = attrPrefix;
263                    attributes[i + 2] = attrName;
264                }
265            }
266        }
267
268        int cut = name.indexOf(':');
269
270        if (cut == 0) {
271            checkRelaxed("illegal tag name: " + name);
272        }
273
274        if (cut != -1) {
275            prefix = name.substring(0, cut);
276            name = name.substring(cut + 1);
277        }
278
279        this.namespace = getNamespace(prefix);
280
281        if (this.namespace == null) {
282            if (prefix != null) {
283                checkRelaxed("undefined prefix: " + prefix);
284            }
285            this.namespace = NO_NAMESPACE;
286        }
287
288        return any;
289    }
290
291    private String[] ensureCapacity(String[] arr, int required) {
292        if (arr.length >= required) {
293            return arr;
294        }
295        String[] bigger = new String[required + 16];
296        System.arraycopy(arr, 0, bigger, 0, arr.length);
297        return bigger;
298    }
299
300    private void checkRelaxed(String errorMessage) throws XmlPullParserException {
301        if (!relaxed) {
302            throw new XmlPullParserException(errorMessage, this, null);
303        }
304        if (error == null) {
305            error = "Error: " + errorMessage;
306        }
307    }
308
309    public int next() throws XmlPullParserException, IOException {
310        return next(false);
311    }
312
313    public int nextToken() throws XmlPullParserException, IOException {
314        return next(true);
315    }
316
317    private int next(boolean justOneToken) throws IOException, XmlPullParserException {
318        if (reader == null) {
319            throw new XmlPullParserException("setInput() must be called first.", this, null);
320        }
321
322        if (type == END_TAG) {
323            depth--;
324        }
325
326        // degenerated needs to be handled before error because of possible
327        // processor expectations(!)
328
329        if (degenerated) {
330            degenerated = false;
331            type = END_TAG;
332            return type;
333        }
334
335        if (error != null) {
336            if (justOneToken) {
337                text = error;
338                type = COMMENT;
339                error = null;
340                return type;
341            } else {
342                error = null;
343            }
344        }
345
346        type = peekType(false);
347
348        if (type == XML_DECLARATION) {
349            readXmlDeclaration();
350            type = peekType(false);
351        }
352
353        text = null;
354        isWhitespace = true;
355        prefix = null;
356        name = null;
357        namespace = null;
358        attributeCount = -1;
359        boolean throwOnResolveFailure = !justOneToken;
360
361        while (true) {
362            switch (type) {
363
364            /*
365             * Return immediately after encountering a start tag, end tag, or
366             * the end of the document.
367             */
368            case START_TAG:
369                parseStartTag(false, throwOnResolveFailure);
370                return type;
371            case END_TAG:
372                readEndTag();
373                return type;
374            case END_DOCUMENT:
375                return type;
376
377            /*
378             * Return after any text token when we're looking for a single
379             * token. Otherwise concatenate all text between tags.
380             */
381            case ENTITY_REF:
382                if (justOneToken) {
383                    StringBuilder entityTextBuilder = new StringBuilder();
384                    readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT);
385                    text = entityTextBuilder.toString();
386                    break;
387                }
388                // fall-through
389            case TEXT:
390                text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT);
391                if (depth == 0 && isWhitespace) {
392                    type = IGNORABLE_WHITESPACE;
393                }
394                break;
395            case CDSECT:
396                read(START_CDATA);
397                text = readUntil(END_CDATA, true);
398                break;
399
400            /*
401             * Comments, processing instructions and declarations are returned
402             * when we're looking for a single token. Otherwise they're skipped.
403             */
404            case COMMENT:
405                String commentText = readComment(justOneToken);
406                if (justOneToken) {
407                    text = commentText;
408                }
409                break;
410            case PROCESSING_INSTRUCTION:
411                read(START_PROCESSING_INSTRUCTION);
412                String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken);
413                if (justOneToken) {
414                    text = processingInstruction;
415                }
416                break;
417            case DOCDECL:
418                readDoctype(justOneToken);
419                break;
420
421            default:
422                throw new XmlPullParserException("Unexpected token", this, null);
423            }
424
425            if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) {
426                throw new XmlPullParserException("Unexpected token", this, null);
427            }
428
429            if (justOneToken) {
430                return type;
431            }
432
433            if (type == IGNORABLE_WHITESPACE) {
434                text = null;
435            }
436
437            /*
438             * We've read all that we can of a non-empty text block. Always
439             * report this as text, even if it was a CDATA block or entity
440             * reference.
441             */
442            int peek = peekType(false);
443            if (text != null && !text.isEmpty() && peek < TEXT) {
444                type = TEXT;
445                return type;
446            }
447
448            type = peek;
449        }
450    }
451
452    /**
453     * Reads text until the specified delimiter is encountered. Consumes the
454     * text and the delimiter.
455     *
456     * @param returnText true to return the read text excluding the delimiter;
457     *     false to return null.
458     */
459    private String readUntil(char[] delimiter, boolean returnText)
460            throws IOException, XmlPullParserException {
461        int start = position;
462        StringBuilder result = null;
463
464        if (returnText && text != null) {
465            result = new StringBuilder();
466            result.append(text);
467        }
468
469        search:
470        while (true) {
471            if (position + delimiter.length >= limit) {
472                if (start < position && returnText) {
473                    if (result == null) {
474                        result = new StringBuilder();
475                    }
476                    result.append(buffer, start, position - start);
477                }
478                if (!fillBuffer(delimiter.length)) {
479                    checkRelaxed(UNEXPECTED_EOF);
480                    type = COMMENT;
481                    return null;
482                }
483                start = position;
484            }
485
486            // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
487            // when the VM has better method inlining
488            for (int i = 0; i < delimiter.length; i++) {
489                if (buffer[position + i] != delimiter[i]) {
490                    position++;
491                    continue search;
492                }
493            }
494
495            break;
496        }
497
498        int end = position;
499        position += delimiter.length;
500
501        if (!returnText) {
502            return null;
503        } else if (result == null) {
504            return stringPool.get(buffer, start, end - start);
505        } else {
506            result.append(buffer, start, end - start);
507            return result.toString();
508        }
509    }
510
511    /**
512     * Returns true if an XML declaration was read.
513     */
514    private void readXmlDeclaration() throws IOException, XmlPullParserException {
515        if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
516            checkRelaxed("processing instructions must not start with xml");
517        }
518
519        read(START_PROCESSING_INSTRUCTION);
520        parseStartTag(true, true);
521
522        if (attributeCount < 1 || !"version".equals(attributes[2])) {
523            checkRelaxed("version expected");
524        }
525
526        version = attributes[3];
527
528        int pos = 1;
529
530        if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) {
531            encoding = attributes[3 + 4];
532            pos++;
533        }
534
535        if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) {
536            String st = attributes[3 + 4 * pos];
537            if ("yes".equals(st)) {
538                standalone = Boolean.TRUE;
539            } else if ("no".equals(st)) {
540                standalone = Boolean.FALSE;
541            } else {
542                checkRelaxed("illegal standalone value: " + st);
543            }
544            pos++;
545        }
546
547        if (pos != attributeCount) {
548            checkRelaxed("unexpected attributes in XML declaration");
549        }
550
551        isWhitespace = true;
552        text = null;
553    }
554
555    private String readComment(boolean returnText) throws IOException, XmlPullParserException {
556        read(START_COMMENT);
557
558        if (relaxed) {
559            return readUntil(END_COMMENT, returnText);
560        }
561
562        String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText);
563        if (peekCharacter() != '>') {
564            throw new XmlPullParserException("Comments may not contain --", this, null);
565        }
566        position++;
567        return commentText;
568    }
569
570    /**
571     * Read the document's DTD. Although this parser is non-validating, the DTD
572     * must be parsed to capture entity values and default attribute values.
573     */
574    private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException {
575        read(START_DOCTYPE);
576
577        int startPosition = -1;
578        if (saveDtdText) {
579            bufferCapture = new StringBuilder();
580            startPosition = position;
581        }
582        try {
583            skip();
584            rootElementName = readName();
585            readExternalId(true, true);
586            skip();
587            if (peekCharacter() == '[') {
588                readInternalSubset();
589            }
590            skip();
591        } finally {
592            if (saveDtdText) {
593                bufferCapture.append(buffer, 0, position);
594                bufferCapture.delete(0, startPosition);
595                text = bufferCapture.toString();
596                bufferCapture = null;
597            }
598        }
599
600        read('>');
601    }
602
603    /**
604     * Reads an external ID of one of these two forms:
605     *   SYSTEM "quoted system name"
606     *   PUBLIC "quoted public id" "quoted system name"
607     *
608     * If the system name is not required, this also supports lone public IDs of
609     * this form:
610     *   PUBLIC "quoted public id"
611     *
612     * Returns true if any ID was read.
613     */
614    private boolean readExternalId(boolean requireSystemName, boolean assignFields)
615            throws IOException, XmlPullParserException {
616        skip();
617        int c = peekCharacter();
618
619        if (c == 'S') {
620            read(SYSTEM);
621        } else if (c == 'P') {
622            read(PUBLIC);
623            skip();
624            if (assignFields) {
625                publicId = readQuotedId(true);
626            } else {
627                readQuotedId(false);
628            }
629        } else {
630            return false;
631        }
632
633        skip();
634
635        if (!requireSystemName) {
636            int delimiter = peekCharacter();
637            if (delimiter != '"' && delimiter != '\'') {
638                return true; // no system name!
639            }
640        }
641
642        if (assignFields) {
643            systemId = readQuotedId(true);
644        } else {
645            readQuotedId(false);
646        }
647        return true;
648    }
649
650    private static final char[] SINGLE_QUOTE = new char[] { '\'' };
651    private static final char[] DOUBLE_QUOTE = new char[] { '"' };
652
653    /**
654     * Reads a quoted string, performing no entity escaping of the contents.
655     */
656    private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException {
657        int quote = peekCharacter();
658        char[] delimiter;
659        if (quote == '"') {
660            delimiter = DOUBLE_QUOTE;
661        } else if (quote == '\'') {
662            delimiter = SINGLE_QUOTE;
663        } else {
664            throw new XmlPullParserException("Expected a quoted string", this, null);
665        }
666        position++;
667        return readUntil(delimiter, returnText);
668    }
669
670    private void readInternalSubset() throws IOException, XmlPullParserException {
671        read('[');
672
673        while (true) {
674            skip();
675            if (peekCharacter() == ']') {
676                position++;
677                return;
678            }
679
680            int declarationType = peekType(true);
681            switch (declarationType) {
682            case ELEMENTDECL:
683                readElementDeclaration();
684                break;
685
686            case ATTLISTDECL:
687                readAttributeListDeclaration();
688                break;
689
690            case ENTITYDECL:
691                readEntityDeclaration();
692                break;
693
694            case NOTATIONDECL:
695                readNotationDeclaration();
696                break;
697
698            case PROCESSING_INSTRUCTION:
699                read(START_PROCESSING_INSTRUCTION);
700                readUntil(END_PROCESSING_INSTRUCTION, false);
701                break;
702
703            case COMMENT:
704                readComment(false);
705                break;
706
707            case PARAMETER_ENTITY_REF:
708                throw new XmlPullParserException(
709                        "Parameter entity references are not supported", this, null);
710
711            default:
712                throw new XmlPullParserException("Unexpected token", this, null);
713            }
714        }
715    }
716
717    /**
718     * Read an element declaration. This contains a name and a content spec.
719     *   <!ELEMENT foo EMPTY >
720     *   <!ELEMENT foo (bar?,(baz|quux)) >
721     *   <!ELEMENT foo (#PCDATA|bar)* >
722     */
723    private void readElementDeclaration() throws IOException, XmlPullParserException {
724        read(START_ELEMENT);
725        skip();
726        readName();
727        readContentSpec();
728        skip();
729        read('>');
730    }
731
732    /**
733     * Read an element content spec. This is a regular expression-like pattern
734     * of names or other content specs. The following operators are supported:
735     *   sequence:    (a,b,c)
736     *   choice:      (a|b|c)
737     *   optional:    a?
738     *   one or more: a+
739     *   any number:  a*
740     *
741     * The special name '#PCDATA' is permitted but only if it is the first
742     * element of the first group:
743     *   (#PCDATA|a|b)
744     *
745     * The top-level element must be either a choice, a sequence, or one of the
746     * special names EMPTY and ANY.
747     */
748    private void readContentSpec() throws IOException, XmlPullParserException {
749        // this implementation is very lenient; it scans for balanced parens only
750        skip();
751        int c = peekCharacter();
752        if (c == '(') {
753            int depth = 0;
754            do {
755                if (c == '(') {
756                    depth++;
757                } else if (c == ')') {
758                    depth--;
759                }
760                position++;
761                c = peekCharacter();
762            } while (depth > 0);
763
764            if (c == '*' || c == '?' || c == '+') {
765                position++;
766            }
767        } else if (c == EMPTY[0]) {
768            read(EMPTY);
769        } else if (c == ANY[0]) {
770            read(ANY);
771        } else {
772            throw new XmlPullParserException("Expected element content spec", this, null);
773        }
774    }
775
776    /**
777     * Reads an attribute list declaration such as the following:
778     *   <!ATTLIST foo
779     *       bar CDATA #IMPLIED
780     *       quux (a|b|c) "c"
781     *       baz NOTATION (a|b|c) #FIXED "c">
782     *
783     * Each attribute has a name, type and default.
784     *
785     * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY,
786     * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)"
787     * or NOTATION followed by an enumerated type.
788     *
789     * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or
790     * #FIXED with a quoted value.
791     */
792    private void readAttributeListDeclaration() throws IOException, XmlPullParserException {
793        read(START_ATTLIST);
794        skip();
795        String elementName = readName();
796
797        while (true) {
798            skip();
799            int c = peekCharacter();
800            if (c == '>') {
801                position++;
802                return;
803            }
804
805            // attribute name
806            String attributeName = readName();
807
808            // attribute type
809            skip();
810            if (position + 1 >= limit && !fillBuffer(2)) {
811                throw new XmlPullParserException("Malformed attribute list", this, null);
812            }
813            if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) {
814                read(NOTATION);
815                skip();
816            }
817            c = peekCharacter();
818            if (c == '(') {
819                position++;
820                while (true) {
821                    skip();
822                    readName();
823                    skip();
824                    c = peekCharacter();
825                    if (c == ')') {
826                        position++;
827                        break;
828                    } else if (c == '|') {
829                        position++;
830                    } else {
831                        throw new XmlPullParserException("Malformed attribute type", this, null);
832                    }
833                }
834            } else {
835                readName();
836            }
837
838            // default value
839            skip();
840            c = peekCharacter();
841            if (c == '#') {
842                position++;
843                c = peekCharacter();
844                if (c == 'R') {
845                    read(REQUIRED);
846                } else if (c == 'I') {
847                    read(IMPLIED);
848                } else if (c == 'F') {
849                    read(FIXED);
850                } else {
851                    throw new XmlPullParserException("Malformed attribute type", this, null);
852                }
853                skip();
854                c = peekCharacter();
855            }
856            if (c == '"' || c == '\'') {
857                position++;
858                // TODO: does this do escaping correctly?
859                String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE);
860                position++;
861                defineAttributeDefault(elementName, attributeName, value);
862            }
863        }
864    }
865
866    private void defineAttributeDefault(String elementName, String attributeName, String value) {
867        if (defaultAttributes == null) {
868            defaultAttributes = new HashMap<String, Map<String, String>>();
869        }
870        Map<String, String> elementAttributes = defaultAttributes.get(elementName);
871        if (elementAttributes == null) {
872            elementAttributes = new HashMap<String, String>();
873            defaultAttributes.put(elementName, elementAttributes);
874        }
875        elementAttributes.put(attributeName, value);
876    }
877
878    /**
879     * Read an entity declaration. The value of internal entities are inline:
880     *   <!ENTITY foo "bar">
881     *
882     * The values of external entities must be retrieved by URL or path:
883     *   <!ENTITY foo SYSTEM "http://host/file">
884     *   <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file">
885     *   <!ENTITY foo SYSTEM "../file.png" NDATA png>
886     *
887     * Entities may be general or parameterized. Parameterized entities are
888     * marked by a percent sign. Such entities may only be used in the DTD:
889     *   <!ENTITY % foo "bar">
890     */
891    private void readEntityDeclaration() throws IOException, XmlPullParserException {
892        read(START_ENTITY);
893        boolean generalEntity = true;
894
895        skip();
896        if (peekCharacter() == '%') {
897            generalEntity = false;
898            position++;
899            skip();
900        }
901
902        String name = readName();
903
904        skip();
905        int quote = peekCharacter();
906        String entityValue;
907        if (quote == '"' || quote == '\'') {
908            position++;
909            entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
910            position++;
911        } else if (readExternalId(true, false)) {
912            /*
913             * Map external entities to the empty string. This is dishonest,
914             * but it's consistent with Android's Expat pull parser.
915             */
916            entityValue = "";
917            skip();
918            if (peekCharacter() == NDATA[0]) {
919                read(NDATA);
920                skip();
921                readName();
922            }
923        } else {
924            throw new XmlPullParserException("Expected entity value or external ID", this, null);
925        }
926
927        if (generalEntity && processDocDecl) {
928            if (documentEntities == null) {
929                documentEntities = new HashMap<String, char[]>();
930            }
931            documentEntities.put(name, entityValue.toCharArray());
932        }
933
934        skip();
935        read('>');
936    }
937
938    private void readNotationDeclaration() throws IOException, XmlPullParserException {
939        read(START_NOTATION);
940        skip();
941        readName();
942        if (!readExternalId(false, false)) {
943            throw new XmlPullParserException(
944                    "Expected external ID or public ID for notation", this, null);
945        }
946        skip();
947        read('>');
948    }
949
950    private void readEndTag() throws IOException, XmlPullParserException {
951        read('<');
952        read('/');
953        name = readName(); // TODO: pass the expected name in as a hint?
954        skip();
955        read('>');
956
957        int sp = (depth - 1) * 4;
958
959        if (depth == 0) {
960            checkRelaxed("read end tag " + name + " with no tags open");
961            type = COMMENT;
962            return;
963        }
964
965        if (name.equals(elementStack[sp + 3])) {
966            namespace = elementStack[sp];
967            prefix = elementStack[sp + 1];
968            name = elementStack[sp + 2];
969        } else if (!relaxed) {
970            throw new XmlPullParserException(
971                    "expected: /" + elementStack[sp + 3] + " read: " + name, this, null);
972        }
973    }
974
975    /**
976     * Returns the type of the next token.
977     */
978    private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException {
979        if (position >= limit && !fillBuffer(1)) {
980            return END_DOCUMENT;
981        }
982
983        switch (buffer[position]) {
984        case '&':
985            return ENTITY_REF; // &
986        case '<':
987            if (position + 3 >= limit && !fillBuffer(4)) {
988                throw new XmlPullParserException("Dangling <", this, null);
989            }
990
991            switch (buffer[position + 1]) {
992            case '/':
993                return END_TAG; // </
994            case '?':
995                // we're looking for "<?xml " with case insensitivity
996                if ((position + 5 < limit || fillBuffer(6))
997                        && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X')
998                        && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M')
999                        && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L')
1000                        && (buffer[position + 5] == ' ')) {
1001                    return XML_DECLARATION; // <?xml
1002                } else {
1003                    return PROCESSING_INSTRUCTION; // <?
1004                }
1005            case '!':
1006                switch (buffer[position + 2]) {
1007                case 'D':
1008                    return DOCDECL; // <!D
1009                case '[':
1010                    return CDSECT; // <![
1011                case '-':
1012                    return COMMENT; // <!-
1013                case 'E':
1014                    switch (buffer[position + 3]) {
1015                    case 'L':
1016                        return ELEMENTDECL; // <!EL
1017                    case 'N':
1018                        return ENTITYDECL; // <!EN
1019                    }
1020                    break;
1021                case 'A':
1022                    return ATTLISTDECL;  // <!A
1023                case 'N':
1024                    return NOTATIONDECL; // <!N
1025                }
1026                throw new XmlPullParserException("Unexpected <!", this, null);
1027            default:
1028                return START_TAG; // <
1029            }
1030        case '%':
1031            return inDeclaration ? PARAMETER_ENTITY_REF : TEXT;
1032        default:
1033            return TEXT;
1034        }
1035    }
1036
1037    /**
1038     * Sets name and attributes
1039     */
1040    private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure)
1041            throws IOException, XmlPullParserException {
1042        if (!xmldecl) {
1043            read('<');
1044        }
1045        name = readName();
1046        attributeCount = 0;
1047
1048        while (true) {
1049            skip();
1050
1051            if (position >= limit && !fillBuffer(1)) {
1052                checkRelaxed(UNEXPECTED_EOF);
1053                return;
1054            }
1055
1056            int c = buffer[position];
1057
1058            if (xmldecl) {
1059                if (c == '?') {
1060                    position++;
1061                    read('>');
1062                    return;
1063                }
1064            } else {
1065                if (c == '/') {
1066                    degenerated = true;
1067                    position++;
1068                    skip();
1069                    read('>');
1070                    break;
1071                } else if (c == '>') {
1072                    position++;
1073                    break;
1074                }
1075            }
1076
1077            String attrName = readName();
1078
1079            int i = (attributeCount++) * 4;
1080            attributes = ensureCapacity(attributes, i + 4);
1081            attributes[i] = "";
1082            attributes[i + 1] = null;
1083            attributes[i + 2] = attrName;
1084
1085            skip();
1086            if (position >= limit && !fillBuffer(1)) {
1087                checkRelaxed(UNEXPECTED_EOF);
1088                return;
1089            }
1090
1091            if (buffer[position] == '=') {
1092                position++;
1093
1094                skip();
1095                if (position >= limit && !fillBuffer(1)) {
1096                    checkRelaxed(UNEXPECTED_EOF);
1097                    return;
1098                }
1099                char delimiter = buffer[position];
1100
1101                if (delimiter == '\'' || delimiter == '"') {
1102                    position++;
1103                } else if (relaxed) {
1104                    delimiter = ' ';
1105                } else {
1106                    throw new XmlPullParserException("attr value delimiter missing!", this, null);
1107                }
1108
1109                attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure,
1110                        ValueContext.ATTRIBUTE);
1111
1112                if (delimiter != ' ') {
1113                    position++; // end quote
1114                }
1115            } else if (relaxed) {
1116                attributes[i + 3] = attrName;
1117            } else {
1118                checkRelaxed("Attr.value missing f. " + attrName);
1119                attributes[i + 3] = attrName;
1120            }
1121        }
1122
1123        int sp = depth++ * 4;
1124        elementStack = ensureCapacity(elementStack, sp + 4);
1125        elementStack[sp + 3] = name;
1126
1127        if (depth >= nspCounts.length) {
1128            int[] bigger = new int[depth + 4];
1129            System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length);
1130            nspCounts = bigger;
1131        }
1132
1133        nspCounts[depth] = nspCounts[depth - 1];
1134
1135        if (processNsp) {
1136            adjustNsp();
1137        } else {
1138            namespace = "";
1139        }
1140
1141        // For consistency with Expat, add default attributes after fixing namespaces.
1142        if (defaultAttributes != null) {
1143            Map<String, String> elementDefaultAttributes = defaultAttributes.get(name);
1144            if (elementDefaultAttributes != null) {
1145                for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) {
1146                    if (getAttributeValue(null, entry.getKey()) != null) {
1147                        continue; // an explicit value overrides the default
1148                    }
1149
1150                    int i = (attributeCount++) * 4;
1151                    attributes = ensureCapacity(attributes, i + 4);
1152                    attributes[i] = "";
1153                    attributes[i + 1] = null;
1154                    attributes[i + 2] = entry.getKey();
1155                    attributes[i + 3] = entry.getValue();
1156                }
1157            }
1158        }
1159
1160        elementStack[sp] = namespace;
1161        elementStack[sp + 1] = prefix;
1162        elementStack[sp + 2] = name;
1163    }
1164
1165    /**
1166     * Reads an entity reference from the buffer, resolves it, and writes the
1167     * resolved entity to {@code out}. If the entity cannot be read or resolved,
1168     * {@code out} will contain the partial entity reference.
1169     */
1170    private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure,
1171            ValueContext valueContext) throws IOException, XmlPullParserException {
1172        int start = out.length();
1173
1174        if (buffer[position++] != '&') {
1175            throw new AssertionError();
1176        }
1177
1178        out.append('&');
1179
1180        while (true) {
1181            int c = peekCharacter();
1182
1183            if (c == ';') {
1184                out.append(';');
1185                position++;
1186                break;
1187
1188            } else if (c >= 128
1189                    || (c >= '0' && c <= '9')
1190                    || (c >= 'a' && c <= 'z')
1191                    || (c >= 'A' && c <= 'Z')
1192                    || c == '_'
1193                    || c == '-'
1194                    || c == '#') {
1195                position++;
1196                out.append((char) c);
1197
1198            } else if (relaxed) {
1199                // intentionally leave the partial reference in 'out'
1200                return;
1201
1202            } else {
1203                throw new XmlPullParserException("unterminated entity ref", this, null);
1204            }
1205        }
1206
1207        String code = out.substring(start + 1, out.length() - 1);
1208
1209        if (isEntityToken) {
1210            name = code;
1211        }
1212
1213        if (code.startsWith("#")) {
1214            try {
1215                int c = code.startsWith("#x")
1216                        ? Integer.parseInt(code.substring(2), 16)
1217                        : Integer.parseInt(code.substring(1));
1218                out.delete(start, out.length());
1219                out.appendCodePoint(c);
1220                unresolved = false;
1221                return;
1222            } catch (NumberFormatException notANumber) {
1223                throw new XmlPullParserException("Invalid character reference: &" + code);
1224            } catch (IllegalArgumentException invalidCodePoint) {
1225                throw new XmlPullParserException("Invalid character reference: &" + code);
1226            }
1227        }
1228
1229        if (valueContext == ValueContext.ENTITY_DECLARATION) {
1230            // keep the unresolved &code; in the text to resolve later
1231            return;
1232        }
1233
1234        String defaultEntity = DEFAULT_ENTITIES.get(code);
1235        if (defaultEntity != null) {
1236            out.delete(start, out.length());
1237            unresolved = false;
1238            out.append(defaultEntity);
1239            return;
1240        }
1241
1242        char[] resolved;
1243        if (documentEntities != null && (resolved = documentEntities.get(code)) != null) {
1244            out.delete(start, out.length());
1245            unresolved = false;
1246            if (processDocDecl) {
1247                pushContentSource(resolved); // parse the entity as XML
1248            } else {
1249                out.append(resolved); // include the entity value as text
1250            }
1251            return;
1252        }
1253
1254        /*
1255         * The parser skipped an external DTD, and now we've encountered an
1256         * unknown entity that could have been declared there. Map it to the
1257         * empty string. This is dishonest, but it's consistent with Android's
1258         * old ExpatPullParser.
1259         */
1260        if (systemId != null) {
1261            out.delete(start, out.length());
1262            return;
1263        }
1264
1265        // keep the unresolved entity "&code;" in the text for relaxed clients
1266        unresolved = true;
1267        if (throwOnResolveFailure) {
1268            checkRelaxed("unresolved: &" + code + ";");
1269        }
1270    }
1271
1272    /**
1273     * Where a value is found impacts how that value is interpreted. For
1274     * example, in attributes, "\n" must be replaced with a space character. In
1275     * text, "]]>" is forbidden. In entity declarations, named references are
1276     * not resolved.
1277     */
1278    enum ValueContext {
1279        ATTRIBUTE,
1280        TEXT,
1281        ENTITY_DECLARATION
1282    }
1283
1284    /**
1285     * Returns the current text or attribute value. This also has the side
1286     * effect of setting isWhitespace to false if a non-whitespace character is
1287     * encountered.
1288     *
1289     * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted
1290     *     attributes, or a space for unquoted attributes.
1291     */
1292    private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure,
1293            ValueContext valueContext) throws IOException, XmlPullParserException {
1294
1295        /*
1296         * This method returns all of the characters from the current position
1297         * through to an appropriate delimiter.
1298         *
1299         * If we're lucky (which we usually are), we'll return a single slice of
1300         * the buffer. This fast path avoids allocating a string builder.
1301         *
1302         * There are 6 unlucky characters we could encounter:
1303         *  - "&":  entities must be resolved.
1304         *  - "%":  parameter entities are unsupported in entity values.
1305         *  - "<":  this isn't permitted in attributes unless relaxed.
1306         *  - "]":  this requires a lookahead to defend against the forbidden
1307         *          CDATA section delimiter "]]>".
1308         *  - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it
1309         *          isn't followed by "\n", we replace "\r" with either a "\n"
1310         *          in text nodes or a space in attribute values.
1311         *  - "\n": In attribute values, "\n" must be replaced with a space.
1312         *
1313         * We could also get unlucky by needing to refill the buffer midway
1314         * through the text.
1315         */
1316
1317        int start = position;
1318        StringBuilder result = null;
1319
1320        // if a text section was already started, prefix the start
1321        if (valueContext == ValueContext.TEXT && text != null) {
1322            result = new StringBuilder();
1323            result.append(text);
1324        }
1325
1326        while (true) {
1327
1328            /*
1329             * Make sure we have at least a single character to read from the
1330             * buffer. This mutates the buffer, so save the partial result
1331             * to the slow path string builder first.
1332             */
1333            if (position >= limit) {
1334                if (start < position) {
1335                    if (result == null) {
1336                        result = new StringBuilder();
1337                    }
1338                    result.append(buffer, start, position - start);
1339                }
1340                if (!fillBuffer(1)) {
1341                    return result != null ? result.toString() : "";
1342                }
1343                start = position;
1344            }
1345
1346            char c = buffer[position];
1347
1348            if (c == delimiter
1349                    || (delimiter == ' ' && (c <= ' ' || c == '>'))
1350                    || c == '&' && !resolveEntities) {
1351                break;
1352            }
1353
1354            if (c != '\r'
1355                    && (c != '\n' || valueContext != ValueContext.ATTRIBUTE)
1356                    && c != '&'
1357                    && c != '<'
1358                    && (c != ']' || valueContext != ValueContext.TEXT)
1359                    && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) {
1360                isWhitespace &= (c <= ' ');
1361                position++;
1362                continue;
1363            }
1364
1365            /*
1366             * We've encountered an unlucky character! Convert from fast
1367             * path to slow path if we haven't done so already.
1368             */
1369            if (result == null) {
1370                result = new StringBuilder();
1371            }
1372            result.append(buffer, start, position - start);
1373
1374            if (c == '\r') {
1375                if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') {
1376                    position++;
1377                }
1378                c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n';
1379
1380            } else if (c == '\n') {
1381                c = ' ';
1382
1383            } else if (c == '&') {
1384                isWhitespace = false; // TODO: what if the entity resolves to whitespace?
1385                readEntity(result, false, throwOnResolveFailure, valueContext);
1386                start = position;
1387                continue;
1388
1389            } else if (c == '<') {
1390                if (valueContext == ValueContext.ATTRIBUTE) {
1391                    checkRelaxed("Illegal: \"<\" inside attribute value");
1392                }
1393                isWhitespace = false;
1394
1395            } else if (c == ']') {
1396                if ((position + 2 < limit || fillBuffer(3))
1397                        && buffer[position + 1] == ']' && buffer[position + 2] == '>') {
1398                    checkRelaxed("Illegal: \"]]>\" outside CDATA section");
1399                }
1400                isWhitespace = false;
1401
1402            } else if (c == '%') {
1403                throw new XmlPullParserException("This parser doesn't support parameter entities",
1404                        this, null);
1405
1406            } else {
1407                throw new AssertionError();
1408            }
1409
1410            position++;
1411            result.append(c);
1412            start = position;
1413        }
1414
1415        if (result == null) {
1416            return stringPool.get(buffer, start, position - start);
1417        } else {
1418            result.append(buffer, start, position - start);
1419            return result.toString();
1420        }
1421    }
1422
1423    private void read(char expected) throws IOException, XmlPullParserException {
1424        int c = peekCharacter();
1425        if (c != expected) {
1426            checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'");
1427        }
1428        position++;
1429    }
1430
1431    private void read(char[] chars) throws IOException, XmlPullParserException {
1432        if (position + chars.length >= limit && !fillBuffer(chars.length)) {
1433            checkRelaxed("expected: '" + new String(chars) + "' but was EOF");
1434            return;
1435        }
1436
1437        // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
1438        // when the VM has better method inlining
1439        for (int i = 0; i < chars.length; i++) {
1440            if (buffer[position + i] != chars[i]) {
1441                checkRelaxed("expected: \"" + new String(chars) + "\" but was \""
1442                        + new String(buffer, position, chars.length) + "...\"");
1443            }
1444        }
1445
1446        position += chars.length;
1447    }
1448
1449    private int peekCharacter() throws IOException, XmlPullParserException {
1450        if (position < limit || fillBuffer(1)) {
1451            return buffer[position];
1452        }
1453        return -1;
1454    }
1455
1456    /**
1457     * Returns true once {@code limit - position >= minimum}. If the data is
1458     * exhausted before that many characters are available, this returns
1459     * false.
1460     */
1461    private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException {
1462        // If we've exhausted the current content source, remove it
1463        while (nextContentSource != null) {
1464            if (position < limit) {
1465                throw new XmlPullParserException("Unbalanced entity!", this, null);
1466            }
1467            popContentSource();
1468            if (limit - position >= minimum) {
1469                return true;
1470            }
1471        }
1472
1473        // Before clobbering the old characters, update where buffer starts
1474        for (int i = 0; i < position; i++) {
1475            if (buffer[i] == '\n') {
1476                bufferStartLine++;
1477                bufferStartColumn = 0;
1478            } else {
1479                bufferStartColumn++;
1480            }
1481        }
1482
1483        if (bufferCapture != null) {
1484            bufferCapture.append(buffer, 0, position);
1485        }
1486
1487        if (limit != position) {
1488            limit -= position;
1489            System.arraycopy(buffer, position, buffer, 0, limit);
1490        } else {
1491            limit = 0;
1492        }
1493
1494        position = 0;
1495        int total;
1496        while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) {
1497            limit += total;
1498            if (limit >= minimum) {
1499                return true;
1500            }
1501        }
1502        return false;
1503    }
1504
1505    /**
1506     * Returns an element or attribute name. This is always non-empty for
1507     * non-relaxed parsers.
1508     */
1509    private String readName() throws IOException, XmlPullParserException {
1510        if (position >= limit && !fillBuffer(1)) {
1511            checkRelaxed("name expected");
1512            return "";
1513        }
1514
1515        int start = position;
1516        StringBuilder result = null;
1517
1518        // read the first character
1519        char c = buffer[position];
1520        if ((c >= 'a' && c <= 'z')
1521                || (c >= 'A' && c <= 'Z')
1522                || c == '_'
1523                || c == ':'
1524                || c >= '\u00c0' // TODO: check the XML spec
1525                || relaxed) {
1526            position++;
1527        } else {
1528            checkRelaxed("name expected");
1529            return "";
1530        }
1531
1532        while (true) {
1533            /*
1534             * Make sure we have at least a single character to read from the
1535             * buffer. This mutates the buffer, so save the partial result
1536             * to the slow path string builder first.
1537             */
1538            if (position >= limit) {
1539                if (result == null) {
1540                    result = new StringBuilder();
1541                }
1542                result.append(buffer, start, position - start);
1543                if (!fillBuffer(1)) {
1544                    return result.toString();
1545                }
1546                start = position;
1547            }
1548
1549            // read another character
1550            c = buffer[position];
1551            if ((c >= 'a' && c <= 'z')
1552                    || (c >= 'A' && c <= 'Z')
1553                    || (c >= '0' && c <= '9')
1554                    || c == '_'
1555                    || c == '-'
1556                    || c == ':'
1557                    || c == '.'
1558                    || c >= '\u00b7') {  // TODO: check the XML spec
1559                position++;
1560                continue;
1561            }
1562
1563            // we encountered a non-name character. done!
1564            if (result == null) {
1565                return stringPool.get(buffer, start, position - start);
1566            } else {
1567                result.append(buffer, start, position - start);
1568                return result.toString();
1569            }
1570        }
1571    }
1572
1573    private void skip() throws IOException, XmlPullParserException {
1574        while (position < limit || fillBuffer(1)) {
1575            int c = buffer[position];
1576            if (c > ' ') {
1577                break;
1578            }
1579            position++;
1580        }
1581    }
1582
1583    //  public part starts here...
1584
1585    public void setInput(Reader reader) throws XmlPullParserException {
1586        this.reader = reader;
1587
1588        type = START_DOCUMENT;
1589        name = null;
1590        namespace = null;
1591        degenerated = false;
1592        attributeCount = -1;
1593        encoding = null;
1594        version = null;
1595        standalone = null;
1596
1597        if (reader == null) {
1598            return;
1599        }
1600
1601        position = 0;
1602        limit = 0;
1603        bufferStartLine = 0;
1604        bufferStartColumn = 0;
1605        depth = 0;
1606        documentEntities = null;
1607    }
1608
1609    public void setInput(InputStream is, String charset) throws XmlPullParserException {
1610        position = 0;
1611        limit = 0;
1612        boolean detectCharset = (charset == null);
1613
1614        if (is == null) {
1615            throw new IllegalArgumentException();
1616        }
1617
1618        try {
1619            if (detectCharset) {
1620                // read the four bytes looking for an indication of the encoding in use
1621                int firstFourBytes = 0;
1622                while (limit < 4) {
1623                    int i = is.read();
1624                    if (i == -1) {
1625                        break;
1626                    }
1627                    firstFourBytes = (firstFourBytes << 8) | i;
1628                    buffer[limit++] = (char) i;
1629                }
1630
1631                if (limit == 4) {
1632                    switch (firstFourBytes) {
1633                    case 0x00000FEFF: // UTF-32BE BOM
1634                        charset = "UTF-32BE";
1635                        limit = 0;
1636                        break;
1637
1638                    case 0x0FFFE0000: // UTF-32LE BOM
1639                        charset = "UTF-32LE";
1640                        limit = 0;
1641                        break;
1642
1643                    case 0x0000003c: // '<' in UTF-32BE
1644                        charset = "UTF-32BE";
1645                        buffer[0] = '<';
1646                        limit = 1;
1647                        break;
1648
1649                    case 0x03c000000: // '<' in UTF-32LE
1650                        charset = "UTF-32LE";
1651                        buffer[0] = '<';
1652                        limit = 1;
1653                        break;
1654
1655                    case 0x0003c003f: // "<?" in UTF-16BE
1656                        charset = "UTF-16BE";
1657                        buffer[0] = '<';
1658                        buffer[1] = '?';
1659                        limit = 2;
1660                        break;
1661
1662                    case 0x03c003f00: // "<?" in UTF-16LE
1663                        charset = "UTF-16LE";
1664                        buffer[0] = '<';
1665                        buffer[1] = '?';
1666                        limit = 2;
1667                        break;
1668
1669                    case 0x03c3f786d: // "<?xm" in ASCII etc.
1670                        while (true) {
1671                            int i = is.read();
1672                            if (i == -1) {
1673                                break;
1674                            }
1675                            buffer[limit++] = (char) i;
1676                            if (i == '>') {
1677                                String s = new String(buffer, 0, limit);
1678                                int i0 = s.indexOf("encoding");
1679                                if (i0 != -1) {
1680                                    while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') {
1681                                        i0++;
1682                                    }
1683                                    char deli = s.charAt(i0++);
1684                                    int i1 = s.indexOf(deli, i0);
1685                                    charset = s.substring(i0, i1);
1686                                }
1687                                break;
1688                            }
1689                        }
1690                        break;
1691
1692                    default:
1693                        // handle a byte order mark followed by something other than <?
1694                        if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) {
1695                            charset = "UTF-16BE";
1696                            buffer[0] = (char) ((buffer[2] << 8) | buffer[3]);
1697                            limit = 1;
1698                        } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) {
1699                            charset = "UTF-16LE";
1700                            buffer[0] = (char) ((buffer[3] << 8) | buffer[2]);
1701                            limit = 1;
1702                        } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) {
1703                            charset = "UTF-8";
1704                            buffer[0] = buffer[3];
1705                            limit = 1;
1706                        }
1707                    }
1708                }
1709            }
1710
1711            if (charset == null) {
1712                charset = "UTF-8";
1713            }
1714
1715            int savedLimit = limit;
1716            setInput(new InputStreamReader(is, charset));
1717            encoding = charset;
1718            limit = savedLimit;
1719
1720            /*
1721             * Skip the optional BOM if we didn't above. This decrements limit
1722             * rather than incrementing position so that <?xml version='1.0'?>
1723             * is still at character 0.
1724             */
1725            if (!detectCharset && peekCharacter() == 0xfeff) {
1726                limit--;
1727                System.arraycopy(buffer, 1, buffer, 0, limit);
1728            }
1729        } catch (Exception e) {
1730            throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e);
1731        }
1732    }
1733
1734    public void close() throws IOException {
1735        if (reader != null) {
1736            reader.close();
1737        }
1738    }
1739
1740    public boolean getFeature(String feature) {
1741        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
1742            return processNsp;
1743        } else if (FEATURE_RELAXED.equals(feature)) {
1744            return relaxed;
1745        } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) {
1746            return processDocDecl;
1747        } else {
1748            return false;
1749        }
1750    }
1751
1752    public String getInputEncoding() {
1753        return encoding;
1754    }
1755
1756    public void defineEntityReplacementText(String entity, String value)
1757            throws XmlPullParserException {
1758        if (processDocDecl) {
1759            throw new IllegalStateException(
1760                    "Entity replacement text may not be defined with DOCTYPE processing enabled.");
1761        }
1762        if (reader == null) {
1763            throw new IllegalStateException(
1764                    "Entity replacement text must be defined after setInput()");
1765        }
1766        if (documentEntities == null) {
1767            documentEntities = new HashMap<String, char[]>();
1768        }
1769        documentEntities.put(entity, value.toCharArray());
1770    }
1771
1772    public Object getProperty(String property) {
1773        if (property.equals(PROPERTY_XMLDECL_VERSION)) {
1774            return version;
1775        } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) {
1776            return standalone;
1777        } else if (property.equals(PROPERTY_LOCATION)) {
1778            return location != null ? location : reader.toString();
1779        } else {
1780            return null;
1781        }
1782    }
1783
1784    /**
1785     * Returns the root element's name if it was declared in the DTD. This
1786     * equals the first tag's name for valid documents.
1787     */
1788    public String getRootElementName() {
1789        return rootElementName;
1790    }
1791
1792    /**
1793     * Returns the document's system ID if it was declared. This is typically a
1794     * string like {@code http://www.w3.org/TR/html4/strict.dtd}.
1795     */
1796    public String getSystemId() {
1797        return systemId;
1798    }
1799
1800    /**
1801     * Returns the document's public ID if it was declared. This is typically a
1802     * string like {@code -//W3C//DTD HTML 4.01//EN}.
1803     */
1804    public String getPublicId() {
1805        return publicId;
1806    }
1807
1808    public int getNamespaceCount(int depth) {
1809        if (depth > this.depth) {
1810            throw new IndexOutOfBoundsException();
1811        }
1812        return nspCounts[depth];
1813    }
1814
1815    public String getNamespacePrefix(int pos) {
1816        return nspStack[pos * 2];
1817    }
1818
1819    public String getNamespaceUri(int pos) {
1820        return nspStack[(pos * 2) + 1];
1821    }
1822
1823    public String getNamespace(String prefix) {
1824        if ("xml".equals(prefix)) {
1825            return "http://www.w3.org/XML/1998/namespace";
1826        }
1827        if ("xmlns".equals(prefix)) {
1828            return "http://www.w3.org/2000/xmlns/";
1829        }
1830
1831        for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) {
1832            if (prefix == null) {
1833                if (nspStack[i] == null) {
1834                    return nspStack[i + 1];
1835                }
1836            } else if (prefix.equals(nspStack[i])) {
1837                return nspStack[i + 1];
1838            }
1839        }
1840        return null;
1841    }
1842
1843    public int getDepth() {
1844        return depth;
1845    }
1846
1847    public String getPositionDescription() {
1848        StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown");
1849        buf.append(' ');
1850
1851        if (type == START_TAG || type == END_TAG) {
1852            if (degenerated) {
1853                buf.append("(empty) ");
1854            }
1855            buf.append('<');
1856            if (type == END_TAG) {
1857                buf.append('/');
1858            }
1859
1860            if (prefix != null) {
1861                buf.append("{" + namespace + "}" + prefix + ":");
1862            }
1863            buf.append(name);
1864
1865            int cnt = attributeCount * 4;
1866            for (int i = 0; i < cnt; i += 4) {
1867                buf.append(' ');
1868                if (attributes[i + 1] != null) {
1869                    buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":");
1870                }
1871                buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'");
1872            }
1873
1874            buf.append('>');
1875        } else if (type == IGNORABLE_WHITESPACE) {
1876            ;
1877        } else if (type != TEXT) {
1878            buf.append(getText());
1879        } else if (isWhitespace) {
1880            buf.append("(whitespace)");
1881        } else {
1882            String text = getText();
1883            if (text.length() > 16) {
1884                text = text.substring(0, 16) + "...";
1885            }
1886            buf.append(text);
1887        }
1888
1889        buf.append("@" + getLineNumber() + ":" + getColumnNumber());
1890        if (location != null) {
1891            buf.append(" in ");
1892            buf.append(location);
1893        } else if (reader != null) {
1894            buf.append(" in ");
1895            buf.append(reader.toString());
1896        }
1897        return buf.toString();
1898    }
1899
1900    public int getLineNumber() {
1901        int result = bufferStartLine;
1902        for (int i = 0; i < position; i++) {
1903            if (buffer[i] == '\n') {
1904                result++;
1905            }
1906        }
1907        return result + 1; // the first line is '1'
1908    }
1909
1910    public int getColumnNumber() {
1911        int result = bufferStartColumn;
1912        for (int i = 0; i < position; i++) {
1913            if (buffer[i] == '\n') {
1914                result = 0;
1915            } else {
1916                result++;
1917            }
1918        }
1919        return result + 1; // the first column is '1'
1920    }
1921
1922    public boolean isWhitespace() throws XmlPullParserException {
1923        if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) {
1924            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1925        }
1926        return isWhitespace;
1927    }
1928
1929    public String getText() {
1930        if (type < TEXT || (type == ENTITY_REF && unresolved)) {
1931            return null;
1932        } else if (text == null) {
1933            return "";
1934        } else {
1935            return text;
1936        }
1937    }
1938
1939    public char[] getTextCharacters(int[] poslen) {
1940        String text = getText();
1941        if (text == null) {
1942            poslen[0] = -1;
1943            poslen[1] = -1;
1944            return null;
1945        }
1946        char[] result = text.toCharArray();
1947        poslen[0] = 0;
1948        poslen[1] = result.length;
1949        return result;
1950    }
1951
1952    public String getNamespace() {
1953        return namespace;
1954    }
1955
1956    public String getName() {
1957        return name;
1958    }
1959
1960    public String getPrefix() {
1961        return prefix;
1962    }
1963
1964    public boolean isEmptyElementTag() throws XmlPullParserException {
1965        if (type != START_TAG) {
1966            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1967        }
1968        return degenerated;
1969    }
1970
1971    public int getAttributeCount() {
1972        return attributeCount;
1973    }
1974
1975    public String getAttributeType(int index) {
1976        return "CDATA";
1977    }
1978
1979    public boolean isAttributeDefault(int index) {
1980        return false;
1981    }
1982
1983    public String getAttributeNamespace(int index) {
1984        if (index >= attributeCount) {
1985            throw new IndexOutOfBoundsException();
1986        }
1987        return attributes[index * 4];
1988    }
1989
1990    public String getAttributeName(int index) {
1991        if (index >= attributeCount) {
1992            throw new IndexOutOfBoundsException();
1993        }
1994        return attributes[(index * 4) + 2];
1995    }
1996
1997    public String getAttributePrefix(int index) {
1998        if (index >= attributeCount) {
1999            throw new IndexOutOfBoundsException();
2000        }
2001        return attributes[(index * 4) + 1];
2002    }
2003
2004    public String getAttributeValue(int index) {
2005        if (index >= attributeCount) {
2006            throw new IndexOutOfBoundsException();
2007        }
2008        return attributes[(index * 4) + 3];
2009    }
2010
2011    public String getAttributeValue(String namespace, String name) {
2012        for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) {
2013            if (attributes[i + 2].equals(name)
2014                    && (namespace == null || attributes[i].equals(namespace))) {
2015                return attributes[i + 3];
2016            }
2017        }
2018
2019        return null;
2020    }
2021
2022    public int getEventType() throws XmlPullParserException {
2023        return type;
2024    }
2025
2026    // utility methods to make XML parsing easier ...
2027
2028    public int nextTag() throws XmlPullParserException, IOException {
2029        next();
2030        if (type == TEXT && isWhitespace) {
2031            next();
2032        }
2033
2034        if (type != END_TAG && type != START_TAG) {
2035            throw new XmlPullParserException("unexpected type", this, null);
2036        }
2037
2038        return type;
2039    }
2040
2041    public void require(int type, String namespace, String name)
2042            throws XmlPullParserException, IOException {
2043        if (type != this.type
2044                || (namespace != null && !namespace.equals(getNamespace()))
2045                || (name != null && !name.equals(getName()))) {
2046            throw new XmlPullParserException(
2047                    "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null);
2048        }
2049    }
2050
2051    public String nextText() throws XmlPullParserException, IOException {
2052        if (type != START_TAG) {
2053            throw new XmlPullParserException("precondition: START_TAG", this, null);
2054        }
2055
2056        next();
2057
2058        String result;
2059        if (type == TEXT) {
2060            result = getText();
2061            next();
2062        } else {
2063            result = "";
2064        }
2065
2066        if (type != END_TAG) {
2067            throw new XmlPullParserException("END_TAG expected", this, null);
2068        }
2069
2070        return result;
2071    }
2072
2073    public void setFeature(String feature, boolean value) throws XmlPullParserException {
2074        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
2075            processNsp = value;
2076        } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) {
2077            processDocDecl = value;
2078        } else if (FEATURE_RELAXED.equals(feature)) {
2079            relaxed = value;
2080        } else {
2081            throw new XmlPullParserException("unsupported feature: " + feature, this, null);
2082        }
2083    }
2084
2085    public void setProperty(String property, Object value) throws XmlPullParserException {
2086        if (property.equals(PROPERTY_LOCATION)) {
2087            location = String.valueOf(value);
2088        } else {
2089            throw new XmlPullParserException("unsupported property: " + property);
2090        }
2091    }
2092
2093    /**
2094     * A chain of buffers containing XML content. Each content source contains
2095     * the parser's primary read buffer or the characters of entities actively
2096     * being parsed.
2097     *
2098     * <p>For example, note the buffers needed to parse this document:
2099     * <pre>   {@code
2100     *   <!DOCTYPE foo [
2101     *       <!ENTITY baz "ghi">
2102     *       <!ENTITY bar "def &baz; jkl">
2103     *   ]>
2104     *   <foo>abc &bar; mno</foo>
2105     * }</pre>
2106     *
2107     * <p>Things get interesting when the bar entity is encountered. At that
2108     * point two buffers are active:
2109     * <ol>
2110     * <li>The value for the bar entity, containing {@code "def &baz; jkl"}
2111     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2112     * </ol>
2113     * <p>The parser will return the characters {@code "def "} from the bar
2114     * entity's buffer, and then it will encounter the baz entity. To handle
2115     * that, three buffers will be active:
2116     * <ol>
2117     * <li>The value for the baz entity, containing {@code "ghi"}
2118     * <li>The remaining value for the bar entity, containing {@code " jkl"}
2119     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2120     * </ol>
2121     * <p>The parser will then return the characters {@code ghi jkl mno} in that
2122     * sequence by reading each buffer in sequence.
2123     */
2124    static class ContentSource {
2125        private final ContentSource next;
2126        private final char[] buffer;
2127        private final int position;
2128        private final int limit;
2129        ContentSource(ContentSource next, char[] buffer, int position, int limit) {
2130            this.next = next;
2131            this.buffer = buffer;
2132            this.position = position;
2133            this.limit = limit;
2134        }
2135    }
2136
2137    /**
2138     * Prepends the characters of {@code newBuffer} to be read before the
2139     * current buffer.
2140     */
2141    private void pushContentSource(char[] newBuffer) {
2142        nextContentSource = new ContentSource(nextContentSource, buffer, position, limit);
2143        buffer = newBuffer;
2144        position = 0;
2145        limit = newBuffer.length;
2146    }
2147
2148    /**
2149     * Replaces the current exhausted buffer with the next buffer in the chain.
2150     */
2151    private void popContentSource() {
2152        buffer = nextContentSource.buffer;
2153        position = nextContentSource.position;
2154        limit = nextContentSource.limit;
2155        nextContentSource = nextContentSource.next;
2156    }
2157}
2158