1/* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The  above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE. */
20
21// Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode)
22
23package org.kxml2.io;
24
25import java.io.Closeable;
26import java.io.IOException;
27import java.io.InputStream;
28import java.io.InputStreamReader;
29import java.io.Reader;
30import java.util.HashMap;
31import java.util.Map;
32import libcore.internal.StringPool;
33import org.xmlpull.v1.XmlPullParser;
34import org.xmlpull.v1.XmlPullParserException;
35
36/**
37 * An XML pull parser with limited support for parsing internal DTDs.
38 */
39public class KXmlParser implements XmlPullParser, Closeable {
40
41    private static final String PROPERTY_XMLDECL_VERSION
42            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
43    private static final String PROPERTY_XMLDECL_STANDALONE
44            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
45    private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location";
46    private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed";
47
48    private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>();
49    static {
50        DEFAULT_ENTITIES.put("lt", "<");
51        DEFAULT_ENTITIES.put("gt", ">");
52        DEFAULT_ENTITIES.put("amp", "&");
53        DEFAULT_ENTITIES.put("apos", "'");
54        DEFAULT_ENTITIES.put("quot", "\"");
55    }
56
57    private static final int ELEMENTDECL = 11;
58    private static final int ENTITYDECL = 12;
59    private static final int ATTLISTDECL = 13;
60    private static final int NOTATIONDECL = 14;
61    private static final int PARAMETER_ENTITY_REF = 15;
62    private static final char[] START_COMMENT = { '<', '!', '-', '-' };
63    private static final char[] END_COMMENT = { '-', '-', '>' };
64    private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' };
65    private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
66    private static final char[] END_CDATA = { ']', ']', '>' };
67    private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
68    private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' };
69    private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' };
70    private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' };
71    private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' };
72    private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' };
73    private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' };
74    private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' };
75    private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
76    private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' };
77    private static final char[] ANY = new char[]{ 'A', 'N', 'Y' };
78    private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' };
79    private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
80    private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' };
81    private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' };
82    private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' };
83
84    static final private String UNEXPECTED_EOF = "Unexpected EOF";
85    static final private String ILLEGAL_TYPE = "Wrong event type";
86    static final private int XML_DECLARATION = 998;
87
88    // general
89    private String location;
90
91    private String version;
92    private Boolean standalone;
93    private String rootElementName;
94    private String systemId;
95    private String publicId;
96
97    /**
98     * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines
99     * entity values and default attribute values. These values are parsed at
100     * inclusion time and may contain both tags and entity references.
101     *
102     * <p>If this is false, the user must {@link #defineEntityReplacementText
103     * define entity values manually}. Such entity values are literal strings
104     * and will not be parsed. There is no API to define default attributes
105     * manually.
106     */
107    private boolean processDocDecl;
108    private boolean processNsp;
109    private boolean relaxed;
110    private boolean keepNamespaceAttributes;
111
112    /**
113     * If non-null, the contents of the read buffer must be copied into this
114     * string builder before the read buffer is overwritten. This is used to
115     * capture the raw DTD text while parsing the DTD.
116     */
117    private StringBuilder bufferCapture;
118
119    /**
120     * Entities defined in or for this document. This map is created lazily.
121     */
122    private Map<String, char[]> documentEntities;
123
124    /**
125     * Default attributes in this document. The outer map's key is the element
126     * name; the inner map's key is the attribute name. Both keys should be
127     * without namespace adjustments. This map is created lazily.
128     */
129    private Map<String, Map<String, String>> defaultAttributes;
130
131
132    private int depth;
133    private String[] elementStack = new String[16];
134    private String[] nspStack = new String[8];
135    private int[] nspCounts = new int[4];
136
137    // source
138
139    private Reader reader;
140    private String encoding;
141    private ContentSource nextContentSource;
142    private char[] buffer = new char[8192];
143    private int position = 0;
144    private int limit = 0;
145
146    /*
147     * Track the number of newlines and columns preceding the current buffer. To
148     * compute the line and column of a position in the buffer, compute the line
149     * and column in the buffer and add the preceding values.
150     */
151    private int bufferStartLine;
152    private int bufferStartColumn;
153
154    // the current token
155
156    private int type;
157    private boolean isWhitespace;
158    private String namespace;
159    private String prefix;
160    private String name;
161    private String text;
162
163    private boolean degenerated;
164    private int attributeCount;
165
166    // true iff. we've encountered the START_TAG of an XML element at depth == 0;
167    private boolean parsedTopLevelStartTag;
168
169    /*
170     * The current element's attributes arranged in groups of 4:
171     * i + 0 = attribute namespace URI
172     * i + 1 = attribute namespace prefix
173     * i + 2 = attribute qualified name (may contain ":", as in "html:h1")
174     * i + 3 = attribute value
175     */
176    private String[] attributes = new String[16];
177
178    private String error;
179
180    private boolean unresolved;
181
182    public final StringPool stringPool = new StringPool();
183
184    /**
185     * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"}
186     * in pulled elements. Most applications will only be interested in the effective namespaces of
187     * their elements, so these attributes aren't useful. But for structure preserving wrappers like
188     * DOM, it is necessary to keep the namespace data around.
189     */
190    public void keepNamespaceAttributes() {
191        this.keepNamespaceAttributes = true;
192    }
193
194    private boolean adjustNsp() throws XmlPullParserException {
195        boolean any = false;
196
197        for (int i = 0; i < attributeCount << 2; i += 4) {
198            String attrName = attributes[i + 2];
199            int cut = attrName.indexOf(':');
200            String prefix;
201
202            if (cut != -1) {
203                prefix = attrName.substring(0, cut);
204                attrName = attrName.substring(cut + 1);
205            } else if (attrName.equals("xmlns")) {
206                prefix = attrName;
207                attrName = null;
208            } else {
209                continue;
210            }
211
212            if (!prefix.equals("xmlns")) {
213                any = true;
214            } else {
215                int j = (nspCounts[depth]++) << 1;
216
217                nspStack = ensureCapacity(nspStack, j + 2);
218                nspStack[j] = attrName;
219                nspStack[j + 1] = attributes[i + 3];
220
221                if (attrName != null && attributes[i + 3].isEmpty()) {
222                    checkRelaxed("illegal empty namespace");
223                }
224
225                if (keepNamespaceAttributes) {
226                    // explicitly set the namespace for unprefixed attributes
227                    // such as xmlns="http://foo"
228                    attributes[i] = "http://www.w3.org/2000/xmlns/";
229                    any = true;
230                } else {
231                    System.arraycopy(
232                            attributes,
233                            i + 4,
234                            attributes,
235                            i,
236                            ((--attributeCount) << 2) - i);
237
238                    i -= 4;
239                }
240            }
241        }
242
243        if (any) {
244            for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
245
246                String attrName = attributes[i + 2];
247                int cut = attrName.indexOf(':');
248
249                if (cut == 0 && !relaxed) {
250                    throw new RuntimeException(
251                            "illegal attribute name: " + attrName + " at " + this);
252                } else if (cut != -1) {
253                    String attrPrefix = attrName.substring(0, cut);
254
255                    attrName = attrName.substring(cut + 1);
256
257                    String attrNs = getNamespace(attrPrefix);
258
259                    if (attrNs == null && !relaxed) {
260                        throw new RuntimeException(
261                                "Undefined Prefix: " + attrPrefix + " in " + this);
262                    }
263
264                    attributes[i] = attrNs;
265                    attributes[i + 1] = attrPrefix;
266                    attributes[i + 2] = attrName;
267                }
268            }
269        }
270
271        int cut = name.indexOf(':');
272
273        if (cut == 0) {
274            checkRelaxed("illegal tag name: " + name);
275        }
276
277        if (cut != -1) {
278            prefix = name.substring(0, cut);
279            name = name.substring(cut + 1);
280        }
281
282        this.namespace = getNamespace(prefix);
283
284        if (this.namespace == null) {
285            if (prefix != null) {
286                checkRelaxed("undefined prefix: " + prefix);
287            }
288            this.namespace = NO_NAMESPACE;
289        }
290
291        return any;
292    }
293
294    private String[] ensureCapacity(String[] arr, int required) {
295        if (arr.length >= required) {
296            return arr;
297        }
298        String[] bigger = new String[required + 16];
299        System.arraycopy(arr, 0, bigger, 0, arr.length);
300        return bigger;
301    }
302
303    private void checkRelaxed(String errorMessage) throws XmlPullParserException {
304        if (!relaxed) {
305            throw new XmlPullParserException(errorMessage, this, null);
306        }
307        if (error == null) {
308            error = "Error: " + errorMessage;
309        }
310    }
311
312    public int next() throws XmlPullParserException, IOException {
313        return next(false);
314    }
315
316    public int nextToken() throws XmlPullParserException, IOException {
317        return next(true);
318    }
319
320    private int next(boolean justOneToken) throws IOException, XmlPullParserException {
321        if (reader == null) {
322            throw new XmlPullParserException("setInput() must be called first.", this, null);
323        }
324
325        if (type == END_TAG) {
326            depth--;
327        }
328
329        // degenerated needs to be handled before error because of possible
330        // processor expectations(!)
331
332        if (degenerated) {
333            degenerated = false;
334            type = END_TAG;
335            return type;
336        }
337
338        if (error != null) {
339            if (justOneToken) {
340                text = error;
341                type = COMMENT;
342                error = null;
343                return type;
344            } else {
345                error = null;
346            }
347        }
348
349        type = peekType(false);
350
351        if (type == XML_DECLARATION) {
352            readXmlDeclaration();
353            type = peekType(false);
354        }
355
356        text = null;
357        isWhitespace = true;
358        prefix = null;
359        name = null;
360        namespace = null;
361        attributeCount = -1;
362        boolean throwOnResolveFailure = !justOneToken;
363
364        while (true) {
365            switch (type) {
366
367            /*
368             * Return immediately after encountering a start tag, end tag, or
369             * the end of the document.
370             */
371            case START_TAG:
372                parseStartTag(false, throwOnResolveFailure);
373                return type;
374            case END_TAG:
375                readEndTag();
376                return type;
377            case END_DOCUMENT:
378                return type;
379
380            /*
381             * Return after any text token when we're looking for a single
382             * token. Otherwise concatenate all text between tags.
383             */
384            case ENTITY_REF:
385                if (justOneToken) {
386                    StringBuilder entityTextBuilder = new StringBuilder();
387                    readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT);
388                    text = entityTextBuilder.toString();
389                    break;
390                }
391                // fall-through
392            case TEXT:
393                text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT);
394                if (depth == 0 && isWhitespace) {
395                    type = IGNORABLE_WHITESPACE;
396                }
397                break;
398            case CDSECT:
399                read(START_CDATA);
400                text = readUntil(END_CDATA, true);
401                break;
402
403            /*
404             * Comments, processing instructions and declarations are returned
405             * when we're looking for a single token. Otherwise they're skipped.
406             */
407            case COMMENT:
408                String commentText = readComment(justOneToken);
409                if (justOneToken) {
410                    text = commentText;
411                }
412                break;
413            case PROCESSING_INSTRUCTION:
414                read(START_PROCESSING_INSTRUCTION);
415                String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken);
416                if (justOneToken) {
417                    text = processingInstruction;
418                }
419                break;
420            case DOCDECL:
421                readDoctype(justOneToken);
422                if (parsedTopLevelStartTag) {
423                    throw new XmlPullParserException("Unexpected token", this, null);
424                }
425                break;
426
427            default:
428                throw new XmlPullParserException("Unexpected token", this, null);
429            }
430
431            if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) {
432                throw new XmlPullParserException("Unexpected token", this, null);
433            }
434
435            if (justOneToken) {
436                return type;
437            }
438
439            if (type == IGNORABLE_WHITESPACE) {
440                text = null;
441            }
442
443            /*
444             * We've read all that we can of a non-empty text block. Always
445             * report this as text, even if it was a CDATA block or entity
446             * reference.
447             */
448            int peek = peekType(false);
449            if (text != null && !text.isEmpty() && peek < TEXT) {
450                type = TEXT;
451                return type;
452            }
453
454            type = peek;
455        }
456    }
457
458    /**
459     * Reads text until the specified delimiter is encountered. Consumes the
460     * text and the delimiter.
461     *
462     * @param returnText true to return the read text excluding the delimiter;
463     *     false to return null.
464     */
465    private String readUntil(char[] delimiter, boolean returnText)
466            throws IOException, XmlPullParserException {
467        int start = position;
468        StringBuilder result = null;
469
470        if (returnText && text != null) {
471            result = new StringBuilder();
472            result.append(text);
473        }
474
475        search:
476        while (true) {
477            if (position + delimiter.length > limit) {
478                if (start < position && returnText) {
479                    if (result == null) {
480                        result = new StringBuilder();
481                    }
482                    result.append(buffer, start, position - start);
483                }
484                if (!fillBuffer(delimiter.length)) {
485                    checkRelaxed(UNEXPECTED_EOF);
486                    type = COMMENT;
487                    return null;
488                }
489                start = position;
490            }
491
492            // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
493            // when the VM has better method inlining
494            for (int i = 0; i < delimiter.length; i++) {
495                if (buffer[position + i] != delimiter[i]) {
496                    position++;
497                    continue search;
498                }
499            }
500
501            break;
502        }
503
504        int end = position;
505        position += delimiter.length;
506
507        if (!returnText) {
508            return null;
509        } else if (result == null) {
510            return stringPool.get(buffer, start, end - start);
511        } else {
512            result.append(buffer, start, end - start);
513            return result.toString();
514        }
515    }
516
517    /**
518     * Returns true if an XML declaration was read.
519     */
520    private void readXmlDeclaration() throws IOException, XmlPullParserException {
521        if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
522            checkRelaxed("processing instructions must not start with xml");
523        }
524
525        read(START_PROCESSING_INSTRUCTION);
526        parseStartTag(true, true);
527
528        if (attributeCount < 1 || !"version".equals(attributes[2])) {
529            checkRelaxed("version expected");
530        }
531
532        version = attributes[3];
533
534        int pos = 1;
535
536        if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) {
537            encoding = attributes[3 + 4];
538            pos++;
539        }
540
541        if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) {
542            String st = attributes[3 + 4 * pos];
543            if ("yes".equals(st)) {
544                standalone = Boolean.TRUE;
545            } else if ("no".equals(st)) {
546                standalone = Boolean.FALSE;
547            } else {
548                checkRelaxed("illegal standalone value: " + st);
549            }
550            pos++;
551        }
552
553        if (pos != attributeCount) {
554            checkRelaxed("unexpected attributes in XML declaration");
555        }
556
557        isWhitespace = true;
558        text = null;
559    }
560
561    private String readComment(boolean returnText) throws IOException, XmlPullParserException {
562        read(START_COMMENT);
563
564        if (relaxed) {
565            return readUntil(END_COMMENT, returnText);
566        }
567
568        String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText);
569        if (peekCharacter() != '>') {
570            throw new XmlPullParserException("Comments may not contain --", this, null);
571        }
572        position++;
573        return commentText;
574    }
575
576    /**
577     * Read the document's DTD. Although this parser is non-validating, the DTD
578     * must be parsed to capture entity values and default attribute values.
579     */
580    private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException {
581        read(START_DOCTYPE);
582
583        int startPosition = -1;
584        if (saveDtdText) {
585            bufferCapture = new StringBuilder();
586            startPosition = position;
587        }
588        try {
589            skip();
590            rootElementName = readName();
591            readExternalId(true, true);
592            skip();
593            if (peekCharacter() == '[') {
594                readInternalSubset();
595            }
596            skip();
597        } finally {
598            if (saveDtdText) {
599                bufferCapture.append(buffer, 0, position);
600                bufferCapture.delete(0, startPosition);
601                text = bufferCapture.toString();
602                bufferCapture = null;
603            }
604        }
605
606        read('>');
607    }
608
609    /**
610     * Reads an external ID of one of these two forms:
611     *   SYSTEM "quoted system name"
612     *   PUBLIC "quoted public id" "quoted system name"
613     *
614     * If the system name is not required, this also supports lone public IDs of
615     * this form:
616     *   PUBLIC "quoted public id"
617     *
618     * Returns true if any ID was read.
619     */
620    private boolean readExternalId(boolean requireSystemName, boolean assignFields)
621            throws IOException, XmlPullParserException {
622        skip();
623        int c = peekCharacter();
624
625        if (c == 'S') {
626            read(SYSTEM);
627        } else if (c == 'P') {
628            read(PUBLIC);
629            skip();
630            if (assignFields) {
631                publicId = readQuotedId(true);
632            } else {
633                readQuotedId(false);
634            }
635        } else {
636            return false;
637        }
638
639        skip();
640
641        if (!requireSystemName) {
642            int delimiter = peekCharacter();
643            if (delimiter != '"' && delimiter != '\'') {
644                return true; // no system name!
645            }
646        }
647
648        if (assignFields) {
649            systemId = readQuotedId(true);
650        } else {
651            readQuotedId(false);
652        }
653        return true;
654    }
655
656    private static final char[] SINGLE_QUOTE = new char[] { '\'' };
657    private static final char[] DOUBLE_QUOTE = new char[] { '"' };
658
659    /**
660     * Reads a quoted string, performing no entity escaping of the contents.
661     */
662    private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException {
663        int quote = peekCharacter();
664        char[] delimiter;
665        if (quote == '"') {
666            delimiter = DOUBLE_QUOTE;
667        } else if (quote == '\'') {
668            delimiter = SINGLE_QUOTE;
669        } else {
670            throw new XmlPullParserException("Expected a quoted string", this, null);
671        }
672        position++;
673        return readUntil(delimiter, returnText);
674    }
675
676    private void readInternalSubset() throws IOException, XmlPullParserException {
677        read('[');
678
679        while (true) {
680            skip();
681            if (peekCharacter() == ']') {
682                position++;
683                return;
684            }
685
686            int declarationType = peekType(true);
687            switch (declarationType) {
688            case ELEMENTDECL:
689                readElementDeclaration();
690                break;
691
692            case ATTLISTDECL:
693                readAttributeListDeclaration();
694                break;
695
696            case ENTITYDECL:
697                readEntityDeclaration();
698                break;
699
700            case NOTATIONDECL:
701                readNotationDeclaration();
702                break;
703
704            case PROCESSING_INSTRUCTION:
705                read(START_PROCESSING_INSTRUCTION);
706                readUntil(END_PROCESSING_INSTRUCTION, false);
707                break;
708
709            case COMMENT:
710                readComment(false);
711                break;
712
713            case PARAMETER_ENTITY_REF:
714                throw new XmlPullParserException(
715                        "Parameter entity references are not supported", this, null);
716
717            default:
718                throw new XmlPullParserException("Unexpected token", this, null);
719            }
720        }
721    }
722
723    /**
724     * Read an element declaration. This contains a name and a content spec.
725     *   <!ELEMENT foo EMPTY >
726     *   <!ELEMENT foo (bar?,(baz|quux)) >
727     *   <!ELEMENT foo (#PCDATA|bar)* >
728     */
729    private void readElementDeclaration() throws IOException, XmlPullParserException {
730        read(START_ELEMENT);
731        skip();
732        readName();
733        readContentSpec();
734        skip();
735        read('>');
736    }
737
738    /**
739     * Read an element content spec. This is a regular expression-like pattern
740     * of names or other content specs. The following operators are supported:
741     *   sequence:    (a,b,c)
742     *   choice:      (a|b|c)
743     *   optional:    a?
744     *   one or more: a+
745     *   any number:  a*
746     *
747     * The special name '#PCDATA' is permitted but only if it is the first
748     * element of the first group:
749     *   (#PCDATA|a|b)
750     *
751     * The top-level element must be either a choice, a sequence, or one of the
752     * special names EMPTY and ANY.
753     */
754    private void readContentSpec() throws IOException, XmlPullParserException {
755        // this implementation is very lenient; it scans for balanced parens only
756        skip();
757        int c = peekCharacter();
758        if (c == '(') {
759            int depth = 0;
760            do {
761                if (c == '(') {
762                    depth++;
763                } else if (c == ')') {
764                    depth--;
765                } else if (c == -1) {
766                    throw new XmlPullParserException(
767                            "Unterminated element content spec", this, null);
768                }
769                position++;
770                c = peekCharacter();
771            } while (depth > 0);
772
773            if (c == '*' || c == '?' || c == '+') {
774                position++;
775            }
776        } else if (c == EMPTY[0]) {
777            read(EMPTY);
778        } else if (c == ANY[0]) {
779            read(ANY);
780        } else {
781            throw new XmlPullParserException("Expected element content spec", this, null);
782        }
783    }
784
785    /**
786     * Reads an attribute list declaration such as the following:
787     *   <!ATTLIST foo
788     *       bar CDATA #IMPLIED
789     *       quux (a|b|c) "c"
790     *       baz NOTATION (a|b|c) #FIXED "c">
791     *
792     * Each attribute has a name, type and default.
793     *
794     * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY,
795     * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)"
796     * or NOTATION followed by an enumerated type.
797     *
798     * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or
799     * #FIXED with a quoted value.
800     */
801    private void readAttributeListDeclaration() throws IOException, XmlPullParserException {
802        read(START_ATTLIST);
803        skip();
804        String elementName = readName();
805
806        while (true) {
807            skip();
808            int c = peekCharacter();
809            if (c == '>') {
810                position++;
811                return;
812            }
813
814            // attribute name
815            String attributeName = readName();
816
817            // attribute type
818            skip();
819            if (position + 1 >= limit && !fillBuffer(2)) {
820                throw new XmlPullParserException("Malformed attribute list", this, null);
821            }
822            if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) {
823                read(NOTATION);
824                skip();
825            }
826            c = peekCharacter();
827            if (c == '(') {
828                position++;
829                while (true) {
830                    skip();
831                    readName();
832                    skip();
833                    c = peekCharacter();
834                    if (c == ')') {
835                        position++;
836                        break;
837                    } else if (c == '|') {
838                        position++;
839                    } else {
840                        throw new XmlPullParserException("Malformed attribute type", this, null);
841                    }
842                }
843            } else {
844                readName();
845            }
846
847            // default value
848            skip();
849            c = peekCharacter();
850            if (c == '#') {
851                position++;
852                c = peekCharacter();
853                if (c == 'R') {
854                    read(REQUIRED);
855                } else if (c == 'I') {
856                    read(IMPLIED);
857                } else if (c == 'F') {
858                    read(FIXED);
859                } else {
860                    throw new XmlPullParserException("Malformed attribute type", this, null);
861                }
862                skip();
863                c = peekCharacter();
864            }
865            if (c == '"' || c == '\'') {
866                position++;
867                // TODO: does this do escaping correctly?
868                String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE);
869                if (peekCharacter() == c) {
870                    position++;
871                }
872                defineAttributeDefault(elementName, attributeName, value);
873            }
874        }
875    }
876
877    private void defineAttributeDefault(String elementName, String attributeName, String value) {
878        if (defaultAttributes == null) {
879            defaultAttributes = new HashMap<String, Map<String, String>>();
880        }
881        Map<String, String> elementAttributes = defaultAttributes.get(elementName);
882        if (elementAttributes == null) {
883            elementAttributes = new HashMap<String, String>();
884            defaultAttributes.put(elementName, elementAttributes);
885        }
886        elementAttributes.put(attributeName, value);
887    }
888
889    /**
890     * Read an entity declaration. The value of internal entities are inline:
891     *   <!ENTITY foo "bar">
892     *
893     * The values of external entities must be retrieved by URL or path:
894     *   <!ENTITY foo SYSTEM "http://host/file">
895     *   <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file">
896     *   <!ENTITY foo SYSTEM "../file.png" NDATA png>
897     *
898     * Entities may be general or parameterized. Parameterized entities are
899     * marked by a percent sign. Such entities may only be used in the DTD:
900     *   <!ENTITY % foo "bar">
901     */
902    private void readEntityDeclaration() throws IOException, XmlPullParserException {
903        read(START_ENTITY);
904        boolean generalEntity = true;
905
906        skip();
907        if (peekCharacter() == '%') {
908            generalEntity = false;
909            position++;
910            skip();
911        }
912
913        String name = readName();
914
915        skip();
916        int quote = peekCharacter();
917        String entityValue;
918        if (quote == '"' || quote == '\'') {
919            position++;
920            entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
921            if (peekCharacter() == quote) {
922                position++;
923            }
924        } else if (readExternalId(true, false)) {
925            /*
926             * Map external entities to the empty string. This is dishonest,
927             * but it's consistent with Android's Expat pull parser.
928             */
929            entityValue = "";
930            skip();
931            if (peekCharacter() == NDATA[0]) {
932                read(NDATA);
933                skip();
934                readName();
935            }
936        } else {
937            throw new XmlPullParserException("Expected entity value or external ID", this, null);
938        }
939
940        if (generalEntity && processDocDecl) {
941            if (documentEntities == null) {
942                documentEntities = new HashMap<String, char[]>();
943            }
944            documentEntities.put(name, entityValue.toCharArray());
945        }
946
947        skip();
948        read('>');
949    }
950
951    private void readNotationDeclaration() throws IOException, XmlPullParserException {
952        read(START_NOTATION);
953        skip();
954        readName();
955        if (!readExternalId(false, false)) {
956            throw new XmlPullParserException(
957                    "Expected external ID or public ID for notation", this, null);
958        }
959        skip();
960        read('>');
961    }
962
963    private void readEndTag() throws IOException, XmlPullParserException {
964        read('<');
965        read('/');
966        name = readName(); // TODO: pass the expected name in as a hint?
967        skip();
968        read('>');
969
970        int sp = (depth - 1) * 4;
971
972        if (depth == 0) {
973            checkRelaxed("read end tag " + name + " with no tags open");
974            type = COMMENT;
975            return;
976        }
977
978        if (name.equals(elementStack[sp + 3])) {
979            namespace = elementStack[sp];
980            prefix = elementStack[sp + 1];
981            name = elementStack[sp + 2];
982        } else if (!relaxed) {
983            throw new XmlPullParserException(
984                    "expected: /" + elementStack[sp + 3] + " read: " + name, this, null);
985        }
986    }
987
988    /**
989     * Returns the type of the next token.
990     */
991    private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException {
992        if (position >= limit && !fillBuffer(1)) {
993            return END_DOCUMENT;
994        }
995
996        switch (buffer[position]) {
997        case '&':
998            return ENTITY_REF; // &
999        case '<':
1000            if (position + 3 >= limit && !fillBuffer(4)) {
1001                throw new XmlPullParserException("Dangling <", this, null);
1002            }
1003
1004            switch (buffer[position + 1]) {
1005            case '/':
1006                return END_TAG; // </
1007            case '?':
1008                // we're looking for "<?xml " with case insensitivity
1009                if ((position + 5 < limit || fillBuffer(6))
1010                        && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X')
1011                        && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M')
1012                        && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L')
1013                        && (buffer[position + 5] == ' ')) {
1014                    return XML_DECLARATION; // <?xml
1015                } else {
1016                    return PROCESSING_INSTRUCTION; // <?
1017                }
1018            case '!':
1019                switch (buffer[position + 2]) {
1020                case 'D':
1021                    return DOCDECL; // <!D
1022                case '[':
1023                    return CDSECT; // <![
1024                case '-':
1025                    return COMMENT; // <!-
1026                case 'E':
1027                    switch (buffer[position + 3]) {
1028                    case 'L':
1029                        return ELEMENTDECL; // <!EL
1030                    case 'N':
1031                        return ENTITYDECL; // <!EN
1032                    }
1033                    break;
1034                case 'A':
1035                    return ATTLISTDECL;  // <!A
1036                case 'N':
1037                    return NOTATIONDECL; // <!N
1038                }
1039                throw new XmlPullParserException("Unexpected <!", this, null);
1040            default:
1041                return START_TAG; // <
1042            }
1043        case '%':
1044            return inDeclaration ? PARAMETER_ENTITY_REF : TEXT;
1045        default:
1046            return TEXT;
1047        }
1048    }
1049
1050    /**
1051     * Sets name and attributes
1052     */
1053    private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure)
1054            throws IOException, XmlPullParserException {
1055        if (!xmldecl) {
1056            read('<');
1057        }
1058        name = readName();
1059        attributeCount = 0;
1060
1061        while (true) {
1062            skip();
1063
1064            if (position >= limit && !fillBuffer(1)) {
1065                checkRelaxed(UNEXPECTED_EOF);
1066                return;
1067            }
1068
1069            int c = buffer[position];
1070
1071            if (xmldecl) {
1072                if (c == '?') {
1073                    position++;
1074                    read('>');
1075                    return;
1076                }
1077            } else {
1078                if (c == '/') {
1079                    degenerated = true;
1080                    position++;
1081                    skip();
1082                    read('>');
1083                    break;
1084                } else if (c == '>') {
1085                    position++;
1086                    break;
1087                }
1088            }
1089
1090            String attrName = readName();
1091
1092            int i = (attributeCount++) * 4;
1093            attributes = ensureCapacity(attributes, i + 4);
1094            attributes[i] = "";
1095            attributes[i + 1] = null;
1096            attributes[i + 2] = attrName;
1097
1098            skip();
1099            if (position >= limit && !fillBuffer(1)) {
1100                checkRelaxed(UNEXPECTED_EOF);
1101                return;
1102            }
1103
1104            if (buffer[position] == '=') {
1105                position++;
1106
1107                skip();
1108                if (position >= limit && !fillBuffer(1)) {
1109                    checkRelaxed(UNEXPECTED_EOF);
1110                    return;
1111                }
1112                char delimiter = buffer[position];
1113
1114                if (delimiter == '\'' || delimiter == '"') {
1115                    position++;
1116                } else if (relaxed) {
1117                    delimiter = ' ';
1118                } else {
1119                    throw new XmlPullParserException("attr value delimiter missing!", this, null);
1120                }
1121
1122                attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure,
1123                        ValueContext.ATTRIBUTE);
1124
1125                if (delimiter != ' ' && peekCharacter() == delimiter) {
1126                    position++; // end quote
1127                }
1128            } else if (relaxed) {
1129                attributes[i + 3] = attrName;
1130            } else {
1131                checkRelaxed("Attr.value missing f. " + attrName);
1132                attributes[i + 3] = attrName;
1133            }
1134        }
1135
1136        int sp = depth++ * 4;
1137        if (depth == 1) {
1138            parsedTopLevelStartTag = true;
1139        }
1140        elementStack = ensureCapacity(elementStack, sp + 4);
1141        elementStack[sp + 3] = name;
1142
1143        if (depth >= nspCounts.length) {
1144            int[] bigger = new int[depth + 4];
1145            System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length);
1146            nspCounts = bigger;
1147        }
1148
1149        nspCounts[depth] = nspCounts[depth - 1];
1150
1151        if (processNsp) {
1152            adjustNsp();
1153        } else {
1154            namespace = "";
1155        }
1156
1157        // For consistency with Expat, add default attributes after fixing namespaces.
1158        if (defaultAttributes != null) {
1159            Map<String, String> elementDefaultAttributes = defaultAttributes.get(name);
1160            if (elementDefaultAttributes != null) {
1161                for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) {
1162                    if (getAttributeValue(null, entry.getKey()) != null) {
1163                        continue; // an explicit value overrides the default
1164                    }
1165
1166                    int i = (attributeCount++) * 4;
1167                    attributes = ensureCapacity(attributes, i + 4);
1168                    attributes[i] = "";
1169                    attributes[i + 1] = null;
1170                    attributes[i + 2] = entry.getKey();
1171                    attributes[i + 3] = entry.getValue();
1172                }
1173            }
1174        }
1175
1176        elementStack[sp] = namespace;
1177        elementStack[sp + 1] = prefix;
1178        elementStack[sp + 2] = name;
1179    }
1180
1181    /**
1182     * Reads an entity reference from the buffer, resolves it, and writes the
1183     * resolved entity to {@code out}. If the entity cannot be read or resolved,
1184     * {@code out} will contain the partial entity reference.
1185     */
1186    private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure,
1187            ValueContext valueContext) throws IOException, XmlPullParserException {
1188        int start = out.length();
1189
1190        if (buffer[position++] != '&') {
1191            throw new AssertionError();
1192        }
1193
1194        out.append('&');
1195
1196        while (true) {
1197            int c = peekCharacter();
1198
1199            if (c == ';') {
1200                out.append(';');
1201                position++;
1202                break;
1203
1204            } else if (c >= 128
1205                    || (c >= '0' && c <= '9')
1206                    || (c >= 'a' && c <= 'z')
1207                    || (c >= 'A' && c <= 'Z')
1208                    || c == '_'
1209                    || c == '-'
1210                    || c == '#') {
1211                position++;
1212                out.append((char) c);
1213
1214            } else if (relaxed) {
1215                // intentionally leave the partial reference in 'out'
1216                return;
1217
1218            } else {
1219                throw new XmlPullParserException("unterminated entity ref", this, null);
1220            }
1221        }
1222
1223        String code = out.substring(start + 1, out.length() - 1);
1224
1225        if (isEntityToken) {
1226            name = code;
1227        }
1228
1229        if (code.startsWith("#")) {
1230            try {
1231                int c = code.startsWith("#x")
1232                        ? Integer.parseInt(code.substring(2), 16)
1233                        : Integer.parseInt(code.substring(1));
1234                out.delete(start, out.length());
1235                out.appendCodePoint(c);
1236                unresolved = false;
1237                return;
1238            } catch (NumberFormatException notANumber) {
1239                throw new XmlPullParserException("Invalid character reference: &" + code);
1240            } catch (IllegalArgumentException invalidCodePoint) {
1241                throw new XmlPullParserException("Invalid character reference: &" + code);
1242            }
1243        }
1244
1245        if (valueContext == ValueContext.ENTITY_DECLARATION) {
1246            // keep the unresolved &code; in the text to resolve later
1247            return;
1248        }
1249
1250        String defaultEntity = DEFAULT_ENTITIES.get(code);
1251        if (defaultEntity != null) {
1252            out.delete(start, out.length());
1253            unresolved = false;
1254            out.append(defaultEntity);
1255            return;
1256        }
1257
1258        char[] resolved;
1259        if (documentEntities != null && (resolved = documentEntities.get(code)) != null) {
1260            out.delete(start, out.length());
1261            unresolved = false;
1262            if (processDocDecl) {
1263                pushContentSource(resolved); // parse the entity as XML
1264            } else {
1265                out.append(resolved); // include the entity value as text
1266            }
1267            return;
1268        }
1269
1270        /*
1271         * The parser skipped an external DTD, and now we've encountered an
1272         * unknown entity that could have been declared there. Map it to the
1273         * empty string. This is dishonest, but it's consistent with Android's
1274         * old ExpatPullParser.
1275         */
1276        if (systemId != null) {
1277            out.delete(start, out.length());
1278            return;
1279        }
1280
1281        // keep the unresolved entity "&code;" in the text for relaxed clients
1282        unresolved = true;
1283        if (throwOnResolveFailure) {
1284            checkRelaxed("unresolved: &" + code + ";");
1285        }
1286    }
1287
1288    /**
1289     * Where a value is found impacts how that value is interpreted. For
1290     * example, in attributes, "\n" must be replaced with a space character. In
1291     * text, "]]>" is forbidden. In entity declarations, named references are
1292     * not resolved.
1293     */
1294    enum ValueContext {
1295        ATTRIBUTE,
1296        TEXT,
1297        ENTITY_DECLARATION
1298    }
1299
1300    /**
1301     * Returns the current text or attribute value. This also has the side
1302     * effect of setting isWhitespace to false if a non-whitespace character is
1303     * encountered.
1304     *
1305     * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted
1306     *     attributes, or a space for unquoted attributes.
1307     */
1308    private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure,
1309            ValueContext valueContext) throws IOException, XmlPullParserException {
1310
1311        /*
1312         * This method returns all of the characters from the current position
1313         * through to an appropriate delimiter.
1314         *
1315         * If we're lucky (which we usually are), we'll return a single slice of
1316         * the buffer. This fast path avoids allocating a string builder.
1317         *
1318         * There are 6 unlucky characters we could encounter:
1319         *  - "&":  entities must be resolved.
1320         *  - "%":  parameter entities are unsupported in entity values.
1321         *  - "<":  this isn't permitted in attributes unless relaxed.
1322         *  - "]":  this requires a lookahead to defend against the forbidden
1323         *          CDATA section delimiter "]]>".
1324         *  - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it
1325         *          isn't followed by "\n", we replace "\r" with either a "\n"
1326         *          in text nodes or a space in attribute values.
1327         *  - "\n": In attribute values, "\n" must be replaced with a space.
1328         *
1329         * We could also get unlucky by needing to refill the buffer midway
1330         * through the text.
1331         */
1332
1333        int start = position;
1334        StringBuilder result = null;
1335
1336        // if a text section was already started, prefix the start
1337        if (valueContext == ValueContext.TEXT && text != null) {
1338            result = new StringBuilder();
1339            result.append(text);
1340        }
1341
1342        while (true) {
1343
1344            /*
1345             * Make sure we have at least a single character to read from the
1346             * buffer. This mutates the buffer, so save the partial result
1347             * to the slow path string builder first.
1348             */
1349            if (position >= limit) {
1350                if (start < position) {
1351                    if (result == null) {
1352                        result = new StringBuilder();
1353                    }
1354                    result.append(buffer, start, position - start);
1355                }
1356                if (!fillBuffer(1)) {
1357                    return result != null ? result.toString() : "";
1358                }
1359                start = position;
1360            }
1361
1362            char c = buffer[position];
1363
1364            if (c == delimiter
1365                    || (delimiter == ' ' && (c <= ' ' || c == '>'))
1366                    || c == '&' && !resolveEntities) {
1367                break;
1368            }
1369
1370            if (c != '\r'
1371                    && (c != '\n' || valueContext != ValueContext.ATTRIBUTE)
1372                    && c != '&'
1373                    && c != '<'
1374                    && (c != ']' || valueContext != ValueContext.TEXT)
1375                    && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) {
1376                isWhitespace &= (c <= ' ');
1377                position++;
1378                continue;
1379            }
1380
1381            /*
1382             * We've encountered an unlucky character! Convert from fast
1383             * path to slow path if we haven't done so already.
1384             */
1385            if (result == null) {
1386                result = new StringBuilder();
1387            }
1388            result.append(buffer, start, position - start);
1389
1390            if (c == '\r') {
1391                if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') {
1392                    position++;
1393                }
1394                c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n';
1395
1396            } else if (c == '\n') {
1397                c = ' ';
1398
1399            } else if (c == '&') {
1400                isWhitespace = false; // TODO: what if the entity resolves to whitespace?
1401                readEntity(result, false, throwOnResolveFailure, valueContext);
1402                start = position;
1403                continue;
1404
1405            } else if (c == '<') {
1406                if (valueContext == ValueContext.ATTRIBUTE) {
1407                    checkRelaxed("Illegal: \"<\" inside attribute value");
1408                }
1409                isWhitespace = false;
1410
1411            } else if (c == ']') {
1412                if ((position + 2 < limit || fillBuffer(3))
1413                        && buffer[position + 1] == ']' && buffer[position + 2] == '>') {
1414                    checkRelaxed("Illegal: \"]]>\" outside CDATA section");
1415                }
1416                isWhitespace = false;
1417
1418            } else if (c == '%') {
1419                throw new XmlPullParserException("This parser doesn't support parameter entities",
1420                        this, null);
1421
1422            } else {
1423                throw new AssertionError();
1424            }
1425
1426            position++;
1427            result.append(c);
1428            start = position;
1429        }
1430
1431        if (result == null) {
1432            return stringPool.get(buffer, start, position - start);
1433        } else {
1434            result.append(buffer, start, position - start);
1435            return result.toString();
1436        }
1437    }
1438
1439    private void read(char expected) throws IOException, XmlPullParserException {
1440        int c = peekCharacter();
1441        if (c != expected) {
1442            checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'");
1443            if (c == -1) {
1444                return; // On EOF, don't move position beyond limit
1445            }
1446        }
1447        position++;
1448    }
1449
1450    private void read(char[] chars) throws IOException, XmlPullParserException {
1451        if (position + chars.length > limit && !fillBuffer(chars.length)) {
1452            checkRelaxed("expected: '" + new String(chars) + "' but was EOF");
1453            return;
1454        }
1455
1456        // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
1457        // when the VM has better method inlining
1458        for (int i = 0; i < chars.length; i++) {
1459            if (buffer[position + i] != chars[i]) {
1460                checkRelaxed("expected: \"" + new String(chars) + "\" but was \""
1461                        + new String(buffer, position, chars.length) + "...\"");
1462            }
1463        }
1464
1465        position += chars.length;
1466    }
1467
1468    private int peekCharacter() throws IOException, XmlPullParserException {
1469        if (position < limit || fillBuffer(1)) {
1470            return buffer[position];
1471        }
1472        return -1;
1473    }
1474
1475    /**
1476     * Returns true once {@code limit - position >= minimum}. If the data is
1477     * exhausted before that many characters are available, this returns
1478     * false.
1479     */
1480    private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException {
1481        // If we've exhausted the current content source, remove it
1482        while (nextContentSource != null) {
1483            if (position < limit) {
1484                throw new XmlPullParserException("Unbalanced entity!", this, null);
1485            }
1486            popContentSource();
1487            if (limit - position >= minimum) {
1488                return true;
1489            }
1490        }
1491
1492        // Before clobbering the old characters, update where buffer starts
1493        for (int i = 0; i < position; i++) {
1494            if (buffer[i] == '\n') {
1495                bufferStartLine++;
1496                bufferStartColumn = 0;
1497            } else {
1498                bufferStartColumn++;
1499            }
1500        }
1501
1502        if (bufferCapture != null) {
1503            bufferCapture.append(buffer, 0, position);
1504        }
1505
1506        if (limit != position) {
1507            limit -= position;
1508            System.arraycopy(buffer, position, buffer, 0, limit);
1509        } else {
1510            limit = 0;
1511        }
1512
1513        position = 0;
1514        int total;
1515        while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) {
1516            limit += total;
1517            if (limit >= minimum) {
1518                return true;
1519            }
1520        }
1521        return false;
1522    }
1523
1524    /**
1525     * Returns an element or attribute name. This is always non-empty for
1526     * non-relaxed parsers.
1527     */
1528    private String readName() throws IOException, XmlPullParserException {
1529        if (position >= limit && !fillBuffer(1)) {
1530            checkRelaxed("name expected");
1531            return "";
1532        }
1533
1534        int start = position;
1535        StringBuilder result = null;
1536
1537        // read the first character
1538        char c = buffer[position];
1539        if ((c >= 'a' && c <= 'z')
1540                || (c >= 'A' && c <= 'Z')
1541                || c == '_'
1542                || c == ':'
1543                || c >= '\u00c0' // TODO: check the XML spec
1544                || relaxed) {
1545            position++;
1546        } else {
1547            checkRelaxed("name expected");
1548            return "";
1549        }
1550
1551        while (true) {
1552            /*
1553             * Make sure we have at least a single character to read from the
1554             * buffer. This mutates the buffer, so save the partial result
1555             * to the slow path string builder first.
1556             */
1557            if (position >= limit) {
1558                if (result == null) {
1559                    result = new StringBuilder();
1560                }
1561                result.append(buffer, start, position - start);
1562                if (!fillBuffer(1)) {
1563                    return result.toString();
1564                }
1565                start = position;
1566            }
1567
1568            // read another character
1569            c = buffer[position];
1570            if ((c >= 'a' && c <= 'z')
1571                    || (c >= 'A' && c <= 'Z')
1572                    || (c >= '0' && c <= '9')
1573                    || c == '_'
1574                    || c == '-'
1575                    || c == ':'
1576                    || c == '.'
1577                    || c >= '\u00b7') {  // TODO: check the XML spec
1578                position++;
1579                continue;
1580            }
1581
1582            // we encountered a non-name character. done!
1583            if (result == null) {
1584                return stringPool.get(buffer, start, position - start);
1585            } else {
1586                result.append(buffer, start, position - start);
1587                return result.toString();
1588            }
1589        }
1590    }
1591
1592    private void skip() throws IOException, XmlPullParserException {
1593        while (position < limit || fillBuffer(1)) {
1594            int c = buffer[position];
1595            if (c > ' ') {
1596                break;
1597            }
1598            position++;
1599        }
1600    }
1601
1602    //  public part starts here...
1603
1604    public void setInput(Reader reader) throws XmlPullParserException {
1605        this.reader = reader;
1606
1607        type = START_DOCUMENT;
1608        name = null;
1609        namespace = null;
1610        degenerated = false;
1611        attributeCount = -1;
1612        encoding = null;
1613        version = null;
1614        standalone = null;
1615
1616        if (reader == null) {
1617            return;
1618        }
1619
1620        position = 0;
1621        limit = 0;
1622        bufferStartLine = 0;
1623        bufferStartColumn = 0;
1624        depth = 0;
1625        documentEntities = null;
1626    }
1627
1628    public void setInput(InputStream is, String charset) throws XmlPullParserException {
1629        position = 0;
1630        limit = 0;
1631        boolean detectCharset = (charset == null);
1632
1633        if (is == null) {
1634            throw new IllegalArgumentException("is == null");
1635        }
1636
1637        try {
1638            if (detectCharset) {
1639                // read the four bytes looking for an indication of the encoding in use
1640                int firstFourBytes = 0;
1641                while (limit < 4) {
1642                    int i = is.read();
1643                    if (i == -1) {
1644                        break;
1645                    }
1646                    firstFourBytes = (firstFourBytes << 8) | i;
1647                    buffer[limit++] = (char) i;
1648                }
1649
1650                if (limit == 4) {
1651                    switch (firstFourBytes) {
1652                    case 0x00000FEFF: // UTF-32BE BOM
1653                        charset = "UTF-32BE";
1654                        limit = 0;
1655                        break;
1656
1657                    case 0x0FFFE0000: // UTF-32LE BOM
1658                        charset = "UTF-32LE";
1659                        limit = 0;
1660                        break;
1661
1662                    case 0x0000003c: // '<' in UTF-32BE
1663                        charset = "UTF-32BE";
1664                        buffer[0] = '<';
1665                        limit = 1;
1666                        break;
1667
1668                    case 0x03c000000: // '<' in UTF-32LE
1669                        charset = "UTF-32LE";
1670                        buffer[0] = '<';
1671                        limit = 1;
1672                        break;
1673
1674                    case 0x0003c003f: // "<?" in UTF-16BE
1675                        charset = "UTF-16BE";
1676                        buffer[0] = '<';
1677                        buffer[1] = '?';
1678                        limit = 2;
1679                        break;
1680
1681                    case 0x03c003f00: // "<?" in UTF-16LE
1682                        charset = "UTF-16LE";
1683                        buffer[0] = '<';
1684                        buffer[1] = '?';
1685                        limit = 2;
1686                        break;
1687
1688                    case 0x03c3f786d: // "<?xm" in ASCII etc.
1689                        while (true) {
1690                            int i = is.read();
1691                            if (i == -1) {
1692                                break;
1693                            }
1694                            buffer[limit++] = (char) i;
1695                            if (i == '>') {
1696                                String s = new String(buffer, 0, limit);
1697                                int i0 = s.indexOf("encoding");
1698                                if (i0 != -1) {
1699                                    while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') {
1700                                        i0++;
1701                                    }
1702                                    char deli = s.charAt(i0++);
1703                                    int i1 = s.indexOf(deli, i0);
1704                                    charset = s.substring(i0, i1);
1705                                }
1706                                break;
1707                            }
1708                        }
1709                        break;
1710
1711                    default:
1712                        // handle a byte order mark followed by something other than <?
1713                        if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) {
1714                            charset = "UTF-16BE";
1715                            buffer[0] = (char) ((buffer[2] << 8) | buffer[3]);
1716                            limit = 1;
1717                        } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) {
1718                            charset = "UTF-16LE";
1719                            buffer[0] = (char) ((buffer[3] << 8) | buffer[2]);
1720                            limit = 1;
1721                        } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) {
1722                            charset = "UTF-8";
1723                            buffer[0] = buffer[3];
1724                            limit = 1;
1725                        }
1726                    }
1727                }
1728            }
1729
1730            if (charset == null) {
1731                charset = "UTF-8";
1732            }
1733
1734            int savedLimit = limit;
1735            setInput(new InputStreamReader(is, charset));
1736            encoding = charset;
1737            limit = savedLimit;
1738
1739            /*
1740             * Skip the optional BOM if we didn't above. This decrements limit
1741             * rather than incrementing position so that <?xml version='1.0'?>
1742             * is still at character 0.
1743             */
1744            if (!detectCharset && peekCharacter() == 0xfeff) {
1745                limit--;
1746                System.arraycopy(buffer, 1, buffer, 0, limit);
1747            }
1748        } catch (Exception e) {
1749            throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e);
1750        }
1751    }
1752
1753    public void close() throws IOException {
1754        if (reader != null) {
1755            reader.close();
1756        }
1757    }
1758
1759    public boolean getFeature(String feature) {
1760        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
1761            return processNsp;
1762        } else if (FEATURE_RELAXED.equals(feature)) {
1763            return relaxed;
1764        } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) {
1765            return processDocDecl;
1766        } else {
1767            return false;
1768        }
1769    }
1770
1771    public String getInputEncoding() {
1772        return encoding;
1773    }
1774
1775    public void defineEntityReplacementText(String entity, String value)
1776            throws XmlPullParserException {
1777        if (processDocDecl) {
1778            throw new IllegalStateException(
1779                    "Entity replacement text may not be defined with DOCTYPE processing enabled.");
1780        }
1781        if (reader == null) {
1782            throw new IllegalStateException(
1783                    "Entity replacement text must be defined after setInput()");
1784        }
1785        if (documentEntities == null) {
1786            documentEntities = new HashMap<String, char[]>();
1787        }
1788        documentEntities.put(entity, value.toCharArray());
1789    }
1790
1791    public Object getProperty(String property) {
1792        if (property.equals(PROPERTY_XMLDECL_VERSION)) {
1793            return version;
1794        } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) {
1795            return standalone;
1796        } else if (property.equals(PROPERTY_LOCATION)) {
1797            return location != null ? location : reader.toString();
1798        } else {
1799            return null;
1800        }
1801    }
1802
1803    /**
1804     * Returns the root element's name if it was declared in the DTD. This
1805     * equals the first tag's name for valid documents.
1806     */
1807    public String getRootElementName() {
1808        return rootElementName;
1809    }
1810
1811    /**
1812     * Returns the document's system ID if it was declared. This is typically a
1813     * string like {@code http://www.w3.org/TR/html4/strict.dtd}.
1814     */
1815    public String getSystemId() {
1816        return systemId;
1817    }
1818
1819    /**
1820     * Returns the document's public ID if it was declared. This is typically a
1821     * string like {@code -//W3C//DTD HTML 4.01//EN}.
1822     */
1823    public String getPublicId() {
1824        return publicId;
1825    }
1826
1827    public int getNamespaceCount(int depth) {
1828        if (depth > this.depth) {
1829            throw new IndexOutOfBoundsException();
1830        }
1831        return nspCounts[depth];
1832    }
1833
1834    public String getNamespacePrefix(int pos) {
1835        return nspStack[pos * 2];
1836    }
1837
1838    public String getNamespaceUri(int pos) {
1839        return nspStack[(pos * 2) + 1];
1840    }
1841
1842    public String getNamespace(String prefix) {
1843        if ("xml".equals(prefix)) {
1844            return "http://www.w3.org/XML/1998/namespace";
1845        }
1846        if ("xmlns".equals(prefix)) {
1847            return "http://www.w3.org/2000/xmlns/";
1848        }
1849
1850        for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) {
1851            if (prefix == null) {
1852                if (nspStack[i] == null) {
1853                    return nspStack[i + 1];
1854                }
1855            } else if (prefix.equals(nspStack[i])) {
1856                return nspStack[i + 1];
1857            }
1858        }
1859        return null;
1860    }
1861
1862    public int getDepth() {
1863        return depth;
1864    }
1865
1866    public String getPositionDescription() {
1867        StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown");
1868        buf.append(' ');
1869
1870        if (type == START_TAG || type == END_TAG) {
1871            if (degenerated) {
1872                buf.append("(empty) ");
1873            }
1874            buf.append('<');
1875            if (type == END_TAG) {
1876                buf.append('/');
1877            }
1878
1879            if (prefix != null) {
1880                buf.append("{" + namespace + "}" + prefix + ":");
1881            }
1882            buf.append(name);
1883
1884            int cnt = attributeCount * 4;
1885            for (int i = 0; i < cnt; i += 4) {
1886                buf.append(' ');
1887                if (attributes[i + 1] != null) {
1888                    buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":");
1889                }
1890                buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'");
1891            }
1892
1893            buf.append('>');
1894        } else if (type == IGNORABLE_WHITESPACE) {
1895            ;
1896        } else if (type != TEXT) {
1897            buf.append(getText());
1898        } else if (isWhitespace) {
1899            buf.append("(whitespace)");
1900        } else {
1901            String text = getText();
1902            if (text.length() > 16) {
1903                text = text.substring(0, 16) + "...";
1904            }
1905            buf.append(text);
1906        }
1907
1908        buf.append("@" + getLineNumber() + ":" + getColumnNumber());
1909        if (location != null) {
1910            buf.append(" in ");
1911            buf.append(location);
1912        } else if (reader != null) {
1913            buf.append(" in ");
1914            buf.append(reader.toString());
1915        }
1916        return buf.toString();
1917    }
1918
1919    public int getLineNumber() {
1920        int result = bufferStartLine;
1921        for (int i = 0; i < position; i++) {
1922            if (buffer[i] == '\n') {
1923                result++;
1924            }
1925        }
1926        return result + 1; // the first line is '1'
1927    }
1928
1929    public int getColumnNumber() {
1930        int result = bufferStartColumn;
1931        for (int i = 0; i < position; i++) {
1932            if (buffer[i] == '\n') {
1933                result = 0;
1934            } else {
1935                result++;
1936            }
1937        }
1938        return result + 1; // the first column is '1'
1939    }
1940
1941    public boolean isWhitespace() throws XmlPullParserException {
1942        if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) {
1943            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1944        }
1945        return isWhitespace;
1946    }
1947
1948    public String getText() {
1949        if (type < TEXT || (type == ENTITY_REF && unresolved)) {
1950            return null;
1951        } else if (text == null) {
1952            return "";
1953        } else {
1954            return text;
1955        }
1956    }
1957
1958    public char[] getTextCharacters(int[] poslen) {
1959        String text = getText();
1960        if (text == null) {
1961            poslen[0] = -1;
1962            poslen[1] = -1;
1963            return null;
1964        }
1965        char[] result = text.toCharArray();
1966        poslen[0] = 0;
1967        poslen[1] = result.length;
1968        return result;
1969    }
1970
1971    public String getNamespace() {
1972        return namespace;
1973    }
1974
1975    public String getName() {
1976        return name;
1977    }
1978
1979    public String getPrefix() {
1980        return prefix;
1981    }
1982
1983    public boolean isEmptyElementTag() throws XmlPullParserException {
1984        if (type != START_TAG) {
1985            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1986        }
1987        return degenerated;
1988    }
1989
1990    public int getAttributeCount() {
1991        return attributeCount;
1992    }
1993
1994    public String getAttributeType(int index) {
1995        return "CDATA";
1996    }
1997
1998    public boolean isAttributeDefault(int index) {
1999        return false;
2000    }
2001
2002    public String getAttributeNamespace(int index) {
2003        if (index >= attributeCount) {
2004            throw new IndexOutOfBoundsException();
2005        }
2006        return attributes[index * 4];
2007    }
2008
2009    public String getAttributeName(int index) {
2010        if (index >= attributeCount) {
2011            throw new IndexOutOfBoundsException();
2012        }
2013        return attributes[(index * 4) + 2];
2014    }
2015
2016    public String getAttributePrefix(int index) {
2017        if (index >= attributeCount) {
2018            throw new IndexOutOfBoundsException();
2019        }
2020        return attributes[(index * 4) + 1];
2021    }
2022
2023    public String getAttributeValue(int index) {
2024        if (index >= attributeCount) {
2025            throw new IndexOutOfBoundsException();
2026        }
2027        return attributes[(index * 4) + 3];
2028    }
2029
2030    public String getAttributeValue(String namespace, String name) {
2031        for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) {
2032            if (attributes[i + 2].equals(name)
2033                    && (namespace == null || attributes[i].equals(namespace))) {
2034                return attributes[i + 3];
2035            }
2036        }
2037
2038        return null;
2039    }
2040
2041    public int getEventType() throws XmlPullParserException {
2042        return type;
2043    }
2044
2045    // utility methods to make XML parsing easier ...
2046
2047    public int nextTag() throws XmlPullParserException, IOException {
2048        next();
2049        if (type == TEXT && isWhitespace) {
2050            next();
2051        }
2052
2053        if (type != END_TAG && type != START_TAG) {
2054            throw new XmlPullParserException("unexpected type", this, null);
2055        }
2056
2057        return type;
2058    }
2059
2060    public void require(int type, String namespace, String name)
2061            throws XmlPullParserException, IOException {
2062        if (type != this.type
2063                || (namespace != null && !namespace.equals(getNamespace()))
2064                || (name != null && !name.equals(getName()))) {
2065            throw new XmlPullParserException(
2066                    "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null);
2067        }
2068    }
2069
2070    public String nextText() throws XmlPullParserException, IOException {
2071        if (type != START_TAG) {
2072            throw new XmlPullParserException("precondition: START_TAG", this, null);
2073        }
2074
2075        next();
2076
2077        String result;
2078        if (type == TEXT) {
2079            result = getText();
2080            next();
2081        } else {
2082            result = "";
2083        }
2084
2085        if (type != END_TAG) {
2086            throw new XmlPullParserException("END_TAG expected", this, null);
2087        }
2088
2089        return result;
2090    }
2091
2092    public void setFeature(String feature, boolean value) throws XmlPullParserException {
2093        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
2094            processNsp = value;
2095        } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) {
2096            processDocDecl = value;
2097        } else if (FEATURE_RELAXED.equals(feature)) {
2098            relaxed = value;
2099        } else {
2100            throw new XmlPullParserException("unsupported feature: " + feature, this, null);
2101        }
2102    }
2103
2104    public void setProperty(String property, Object value) throws XmlPullParserException {
2105        if (property.equals(PROPERTY_LOCATION)) {
2106            location = String.valueOf(value);
2107        } else {
2108            throw new XmlPullParserException("unsupported property: " + property);
2109        }
2110    }
2111
2112    /**
2113     * A chain of buffers containing XML content. Each content source contains
2114     * the parser's primary read buffer or the characters of entities actively
2115     * being parsed.
2116     *
2117     * <p>For example, note the buffers needed to parse this document:
2118     * <pre>   {@code
2119     *   <!DOCTYPE foo [
2120     *       <!ENTITY baz "ghi">
2121     *       <!ENTITY bar "def &baz; jkl">
2122     *   ]>
2123     *   <foo>abc &bar; mno</foo>
2124     * }</pre>
2125     *
2126     * <p>Things get interesting when the bar entity is encountered. At that
2127     * point two buffers are active:
2128     * <ol>
2129     * <li>The value for the bar entity, containing {@code "def &baz; jkl"}
2130     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2131     * </ol>
2132     * <p>The parser will return the characters {@code "def "} from the bar
2133     * entity's buffer, and then it will encounter the baz entity. To handle
2134     * that, three buffers will be active:
2135     * <ol>
2136     * <li>The value for the baz entity, containing {@code "ghi"}
2137     * <li>The remaining value for the bar entity, containing {@code " jkl"}
2138     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2139     * </ol>
2140     * <p>The parser will then return the characters {@code ghi jkl mno} in that
2141     * sequence by reading each buffer in sequence.
2142     */
2143    static class ContentSource {
2144        private final ContentSource next;
2145        private final char[] buffer;
2146        private final int position;
2147        private final int limit;
2148        ContentSource(ContentSource next, char[] buffer, int position, int limit) {
2149            this.next = next;
2150            this.buffer = buffer;
2151            this.position = position;
2152            this.limit = limit;
2153        }
2154    }
2155
2156    /**
2157     * Prepends the characters of {@code newBuffer} to be read before the
2158     * current buffer.
2159     */
2160    private void pushContentSource(char[] newBuffer) {
2161        nextContentSource = new ContentSource(nextContentSource, buffer, position, limit);
2162        buffer = newBuffer;
2163        position = 0;
2164        limit = newBuffer.length;
2165    }
2166
2167    /**
2168     * Replaces the current exhausted buffer with the next buffer in the chain.
2169     */
2170    private void popContentSource() {
2171        buffer = nextContentSource.buffer;
2172        position = nextContentSource.position;
2173        limit = nextContentSource.limit;
2174        nextContentSource = nextContentSource.next;
2175    }
2176}
2177