1/* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The  above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE. */
20
21// Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode)
22
23package org.kxml2.io;
24
25import java.io.Closeable;
26import java.io.IOException;
27import java.io.InputStream;
28import java.io.InputStreamReader;
29import java.io.Reader;
30import java.util.HashMap;
31import java.util.Map;
32import libcore.internal.StringPool;
33import org.xmlpull.v1.XmlPullParser;
34import org.xmlpull.v1.XmlPullParserException;
35
36/**
37 * An XML pull parser with limited support for parsing internal DTDs.
38 */
39public class KXmlParser implements XmlPullParser, Closeable {
40
41    private static final String PROPERTY_XMLDECL_VERSION
42            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
43    private static final String PROPERTY_XMLDECL_STANDALONE
44            = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
45    private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location";
46    private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed";
47
48    private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>();
49    static {
50        DEFAULT_ENTITIES.put("lt", "<");
51        DEFAULT_ENTITIES.put("gt", ">");
52        DEFAULT_ENTITIES.put("amp", "&");
53        DEFAULT_ENTITIES.put("apos", "'");
54        DEFAULT_ENTITIES.put("quot", "\"");
55    }
56
57    private static final int ELEMENTDECL = 11;
58    private static final int ENTITYDECL = 12;
59    private static final int ATTLISTDECL = 13;
60    private static final int NOTATIONDECL = 14;
61    private static final int PARAMETER_ENTITY_REF = 15;
62    private static final char[] START_COMMENT = { '<', '!', '-', '-' };
63    private static final char[] END_COMMENT = { '-', '-', '>' };
64    private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' };
65    private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
66    private static final char[] END_CDATA = { ']', ']', '>' };
67    private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
68    private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' };
69    private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' };
70    private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' };
71    private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' };
72    private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' };
73    private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' };
74    private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' };
75    private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
76    private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' };
77    private static final char[] ANY = new char[]{ 'A', 'N', 'Y' };
78    private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' };
79    private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
80    private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' };
81    private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' };
82    private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' };
83
84    static final private String UNEXPECTED_EOF = "Unexpected EOF";
85    static final private String ILLEGAL_TYPE = "Wrong event type";
86    static final private int XML_DECLARATION = 998;
87
88    // general
89    private String location;
90
91    private String version;
92    private Boolean standalone;
93    private String rootElementName;
94    private String systemId;
95    private String publicId;
96
97    /**
98     * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines
99     * entity values and default attribute values. These values are parsed at
100     * inclusion time and may contain both tags and entity references.
101     *
102     * <p>If this is false, the user must {@link #defineEntityReplacementText
103     * define entity values manually}. Such entity values are literal strings
104     * and will not be parsed. There is no API to define default attributes
105     * manually.
106     */
107    private boolean processDocDecl;
108    private boolean processNsp;
109    private boolean relaxed;
110    private boolean keepNamespaceAttributes;
111
112    /**
113     * If non-null, the contents of the read buffer must be copied into this
114     * string builder before the read buffer is overwritten. This is used to
115     * capture the raw DTD text while parsing the DTD.
116     */
117    private StringBuilder bufferCapture;
118
119    /**
120     * Entities defined in or for this document. This map is created lazily.
121     */
122    private Map<String, char[]> documentEntities;
123
124    /**
125     * Default attributes in this document. The outer map's key is the element
126     * name; the inner map's key is the attribute name. Both keys should be
127     * without namespace adjustments. This map is created lazily.
128     */
129    private Map<String, Map<String, String>> defaultAttributes;
130
131
132    private int depth;
133    private String[] elementStack = new String[16];
134    private String[] nspStack = new String[8];
135    private int[] nspCounts = new int[4];
136
137    // source
138
139    private Reader reader;
140    private String encoding;
141    private ContentSource nextContentSource;
142    private char[] buffer = new char[8192];
143    private int position = 0;
144    private int limit = 0;
145
146    /*
147     * Track the number of newlines and columns preceding the current buffer. To
148     * compute the line and column of a position in the buffer, compute the line
149     * and column in the buffer and add the preceding values.
150     */
151    private int bufferStartLine;
152    private int bufferStartColumn;
153
154    // the current token
155
156    private int type;
157    private boolean isWhitespace;
158    private String namespace;
159    private String prefix;
160    private String name;
161    private String text;
162
163    private boolean degenerated;
164    private int attributeCount;
165
166    // true iff. we've encountered the START_TAG of an XML element at depth == 0;
167    private boolean parsedTopLevelStartTag;
168
169    /*
170     * The current element's attributes arranged in groups of 4:
171     * i + 0 = attribute namespace URI
172     * i + 1 = attribute namespace prefix
173     * i + 2 = attribute qualified name (may contain ":", as in "html:h1")
174     * i + 3 = attribute value
175     */
176    private String[] attributes = new String[16];
177
178    private String error;
179
180    private boolean unresolved;
181
182    public final StringPool stringPool = new StringPool();
183
184    /**
185     * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"}
186     * in pulled elements. Most applications will only be interested in the effective namespaces of
187     * their elements, so these attributes aren't useful. But for structure preserving wrappers like
188     * DOM, it is necessary to keep the namespace data around.
189     */
190    public void keepNamespaceAttributes() {
191        this.keepNamespaceAttributes = true;
192    }
193
194    private boolean adjustNsp() throws XmlPullParserException {
195        boolean any = false;
196
197        for (int i = 0; i < attributeCount << 2; i += 4) {
198            String attrName = attributes[i + 2];
199            int cut = attrName.indexOf(':');
200            String prefix;
201
202            if (cut != -1) {
203                prefix = attrName.substring(0, cut);
204                attrName = attrName.substring(cut + 1);
205            } else if (attrName.equals("xmlns")) {
206                prefix = attrName;
207                attrName = null;
208            } else {
209                continue;
210            }
211
212            if (!prefix.equals("xmlns")) {
213                any = true;
214            } else {
215                int j = (nspCounts[depth]++) << 1;
216
217                nspStack = ensureCapacity(nspStack, j + 2);
218                nspStack[j] = attrName;
219                nspStack[j + 1] = attributes[i + 3];
220
221                if (attrName != null && attributes[i + 3].isEmpty()) {
222                    checkRelaxed("illegal empty namespace");
223                }
224
225                if (keepNamespaceAttributes) {
226                    // explicitly set the namespace for unprefixed attributes
227                    // such as xmlns="http://foo"
228                    attributes[i] = "http://www.w3.org/2000/xmlns/";
229                    any = true;
230                } else {
231                    System.arraycopy(
232                            attributes,
233                            i + 4,
234                            attributes,
235                            i,
236                            ((--attributeCount) << 2) - i);
237
238                    i -= 4;
239                }
240            }
241        }
242
243        if (any) {
244            for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
245
246                String attrName = attributes[i + 2];
247                int cut = attrName.indexOf(':');
248
249                if (cut == 0 && !relaxed) {
250                    throw new RuntimeException(
251                            "illegal attribute name: " + attrName + " at " + this);
252                } else if (cut != -1) {
253                    String attrPrefix = attrName.substring(0, cut);
254
255                    attrName = attrName.substring(cut + 1);
256
257                    String attrNs = getNamespace(attrPrefix);
258
259                    if (attrNs == null && !relaxed) {
260                        throw new RuntimeException(
261                                "Undefined Prefix: " + attrPrefix + " in " + this);
262                    }
263
264                    attributes[i] = attrNs;
265                    attributes[i + 1] = attrPrefix;
266                    attributes[i + 2] = attrName;
267                }
268            }
269        }
270
271        int cut = name.indexOf(':');
272
273        if (cut == 0) {
274            checkRelaxed("illegal tag name: " + name);
275        }
276
277        if (cut != -1) {
278            prefix = name.substring(0, cut);
279            name = name.substring(cut + 1);
280        }
281
282        this.namespace = getNamespace(prefix);
283
284        if (this.namespace == null) {
285            if (prefix != null) {
286                checkRelaxed("undefined prefix: " + prefix);
287            }
288            this.namespace = NO_NAMESPACE;
289        }
290
291        return any;
292    }
293
294    private String[] ensureCapacity(String[] arr, int required) {
295        if (arr.length >= required) {
296            return arr;
297        }
298        String[] bigger = new String[required + 16];
299        System.arraycopy(arr, 0, bigger, 0, arr.length);
300        return bigger;
301    }
302
303    private void checkRelaxed(String errorMessage) throws XmlPullParserException {
304        if (!relaxed) {
305            throw new XmlPullParserException(errorMessage, this, null);
306        }
307        if (error == null) {
308            error = "Error: " + errorMessage;
309        }
310    }
311
312    public int next() throws XmlPullParserException, IOException {
313        return next(false);
314    }
315
316    public int nextToken() throws XmlPullParserException, IOException {
317        return next(true);
318    }
319
320    private int next(boolean justOneToken) throws IOException, XmlPullParserException {
321        if (reader == null) {
322            throw new XmlPullParserException("setInput() must be called first.", this, null);
323        }
324
325        if (type == END_TAG) {
326            depth--;
327        }
328
329        // degenerated needs to be handled before error because of possible
330        // processor expectations(!)
331
332        if (degenerated) {
333            degenerated = false;
334            type = END_TAG;
335            return type;
336        }
337
338        if (error != null) {
339            if (justOneToken) {
340                text = error;
341                type = COMMENT;
342                error = null;
343                return type;
344            } else {
345                error = null;
346            }
347        }
348
349        type = peekType(false);
350
351        if (type == XML_DECLARATION) {
352            readXmlDeclaration();
353            type = peekType(false);
354        }
355
356        text = null;
357        isWhitespace = true;
358        prefix = null;
359        name = null;
360        namespace = null;
361        attributeCount = -1;
362        boolean throwOnResolveFailure = !justOneToken;
363
364        while (true) {
365            switch (type) {
366
367            /*
368             * Return immediately after encountering a start tag, end tag, or
369             * the end of the document.
370             */
371            case START_TAG:
372                parseStartTag(false, throwOnResolveFailure);
373                return type;
374            case END_TAG:
375                readEndTag();
376                return type;
377            case END_DOCUMENT:
378                return type;
379
380            /*
381             * Return after any text token when we're looking for a single
382             * token. Otherwise concatenate all text between tags.
383             */
384            case ENTITY_REF:
385                if (justOneToken) {
386                    StringBuilder entityTextBuilder = new StringBuilder();
387                    readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT);
388                    text = entityTextBuilder.toString();
389                    break;
390                }
391                // fall-through
392            case TEXT:
393                text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT);
394                if (depth == 0 && isWhitespace) {
395                    type = IGNORABLE_WHITESPACE;
396                }
397                break;
398            case CDSECT:
399                read(START_CDATA);
400                text = readUntil(END_CDATA, true);
401                break;
402
403            /*
404             * Comments, processing instructions and declarations are returned
405             * when we're looking for a single token. Otherwise they're skipped.
406             */
407            case COMMENT:
408                String commentText = readComment(justOneToken);
409                if (justOneToken) {
410                    text = commentText;
411                }
412                break;
413            case PROCESSING_INSTRUCTION:
414                read(START_PROCESSING_INSTRUCTION);
415                String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken);
416                if (justOneToken) {
417                    text = processingInstruction;
418                }
419                break;
420            case DOCDECL:
421                readDoctype(justOneToken);
422                if (parsedTopLevelStartTag) {
423                    throw new XmlPullParserException("Unexpected token", this, null);
424                }
425                break;
426
427            default:
428                throw new XmlPullParserException("Unexpected token", this, null);
429            }
430
431            if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) {
432                throw new XmlPullParserException("Unexpected token", this, null);
433            }
434
435            if (justOneToken) {
436                return type;
437            }
438
439            if (type == IGNORABLE_WHITESPACE) {
440                text = null;
441            }
442
443            /*
444             * We've read all that we can of a non-empty text block. Always
445             * report this as text, even if it was a CDATA block or entity
446             * reference.
447             */
448            int peek = peekType(false);
449            if (text != null && !text.isEmpty() && peek < TEXT) {
450                type = TEXT;
451                return type;
452            }
453
454            type = peek;
455        }
456    }
457
458    /**
459     * Reads text until the specified delimiter is encountered. Consumes the
460     * text and the delimiter.
461     *
462     * @param returnText true to return the read text excluding the delimiter;
463     *     false to return null.
464     */
465    private String readUntil(char[] delimiter, boolean returnText)
466            throws IOException, XmlPullParserException {
467        int start = position;
468        StringBuilder result = null;
469
470        if (returnText && text != null) {
471            result = new StringBuilder();
472            result.append(text);
473        }
474
475        search:
476        while (true) {
477            if (position + delimiter.length > limit) {
478                if (start < position && returnText) {
479                    if (result == null) {
480                        result = new StringBuilder();
481                    }
482                    result.append(buffer, start, position - start);
483                }
484                if (!fillBuffer(delimiter.length)) {
485                    checkRelaxed(UNEXPECTED_EOF);
486                    type = COMMENT;
487                    return null;
488                }
489                start = position;
490            }
491
492            // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
493            // when the VM has better method inlining
494            for (int i = 0; i < delimiter.length; i++) {
495                if (buffer[position + i] != delimiter[i]) {
496                    position++;
497                    continue search;
498                }
499            }
500
501            break;
502        }
503
504        int end = position;
505        position += delimiter.length;
506
507        if (!returnText) {
508            return null;
509        } else if (result == null) {
510            return stringPool.get(buffer, start, end - start);
511        } else {
512            result.append(buffer, start, end - start);
513            return result.toString();
514        }
515    }
516
517    /**
518     * Returns true if an XML declaration was read.
519     */
520    private void readXmlDeclaration() throws IOException, XmlPullParserException {
521        if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
522            checkRelaxed("processing instructions must not start with xml");
523        }
524
525        read(START_PROCESSING_INSTRUCTION);
526        parseStartTag(true, true);
527
528        if (attributeCount < 1 || !"version".equals(attributes[2])) {
529            checkRelaxed("version expected");
530        }
531
532        version = attributes[3];
533
534        int pos = 1;
535
536        if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) {
537            encoding = attributes[3 + 4];
538            pos++;
539        }
540
541        if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) {
542            String st = attributes[3 + 4 * pos];
543            if ("yes".equals(st)) {
544                standalone = Boolean.TRUE;
545            } else if ("no".equals(st)) {
546                standalone = Boolean.FALSE;
547            } else {
548                checkRelaxed("illegal standalone value: " + st);
549            }
550            pos++;
551        }
552
553        if (pos != attributeCount) {
554            checkRelaxed("unexpected attributes in XML declaration");
555        }
556
557        isWhitespace = true;
558        text = null;
559    }
560
561    private String readComment(boolean returnText) throws IOException, XmlPullParserException {
562        read(START_COMMENT);
563
564        if (relaxed) {
565            return readUntil(END_COMMENT, returnText);
566        }
567
568        String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText);
569        if (peekCharacter() != '>') {
570            throw new XmlPullParserException("Comments may not contain --", this, null);
571        }
572        position++;
573        return commentText;
574    }
575
576    /**
577     * Read the document's DTD. Although this parser is non-validating, the DTD
578     * must be parsed to capture entity values and default attribute values.
579     */
580    private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException {
581        read(START_DOCTYPE);
582
583        int startPosition = -1;
584        if (saveDtdText) {
585            bufferCapture = new StringBuilder();
586            startPosition = position;
587        }
588        try {
589            skip();
590            rootElementName = readName();
591            readExternalId(true, true);
592            skip();
593            if (peekCharacter() == '[') {
594                readInternalSubset();
595            }
596            skip();
597        } finally {
598            if (saveDtdText) {
599                bufferCapture.append(buffer, 0, position);
600                bufferCapture.delete(0, startPosition);
601                text = bufferCapture.toString();
602                bufferCapture = null;
603            }
604        }
605
606        read('>');
607        skip();
608    }
609
610    /**
611     * Reads an external ID of one of these two forms:
612     *   SYSTEM "quoted system name"
613     *   PUBLIC "quoted public id" "quoted system name"
614     *
615     * If the system name is not required, this also supports lone public IDs of
616     * this form:
617     *   PUBLIC "quoted public id"
618     *
619     * Returns true if any ID was read.
620     */
621    private boolean readExternalId(boolean requireSystemName, boolean assignFields)
622            throws IOException, XmlPullParserException {
623        skip();
624        int c = peekCharacter();
625
626        if (c == 'S') {
627            read(SYSTEM);
628        } else if (c == 'P') {
629            read(PUBLIC);
630            skip();
631            if (assignFields) {
632                publicId = readQuotedId(true);
633            } else {
634                readQuotedId(false);
635            }
636        } else {
637            return false;
638        }
639
640        skip();
641
642        if (!requireSystemName) {
643            int delimiter = peekCharacter();
644            if (delimiter != '"' && delimiter != '\'') {
645                return true; // no system name!
646            }
647        }
648
649        if (assignFields) {
650            systemId = readQuotedId(true);
651        } else {
652            readQuotedId(false);
653        }
654        return true;
655    }
656
657    private static final char[] SINGLE_QUOTE = new char[] { '\'' };
658    private static final char[] DOUBLE_QUOTE = new char[] { '"' };
659
660    /**
661     * Reads a quoted string, performing no entity escaping of the contents.
662     */
663    private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException {
664        int quote = peekCharacter();
665        char[] delimiter;
666        if (quote == '"') {
667            delimiter = DOUBLE_QUOTE;
668        } else if (quote == '\'') {
669            delimiter = SINGLE_QUOTE;
670        } else {
671            throw new XmlPullParserException("Expected a quoted string", this, null);
672        }
673        position++;
674        return readUntil(delimiter, returnText);
675    }
676
677    private void readInternalSubset() throws IOException, XmlPullParserException {
678        read('[');
679
680        while (true) {
681            skip();
682            if (peekCharacter() == ']') {
683                position++;
684                return;
685            }
686
687            int declarationType = peekType(true);
688            switch (declarationType) {
689            case ELEMENTDECL:
690                readElementDeclaration();
691                break;
692
693            case ATTLISTDECL:
694                readAttributeListDeclaration();
695                break;
696
697            case ENTITYDECL:
698                readEntityDeclaration();
699                break;
700
701            case NOTATIONDECL:
702                readNotationDeclaration();
703                break;
704
705            case PROCESSING_INSTRUCTION:
706                read(START_PROCESSING_INSTRUCTION);
707                readUntil(END_PROCESSING_INSTRUCTION, false);
708                break;
709
710            case COMMENT:
711                readComment(false);
712                break;
713
714            case PARAMETER_ENTITY_REF:
715                throw new XmlPullParserException(
716                        "Parameter entity references are not supported", this, null);
717
718            default:
719                throw new XmlPullParserException("Unexpected token", this, null);
720            }
721        }
722    }
723
724    /**
725     * Read an element declaration. This contains a name and a content spec.
726     *   <!ELEMENT foo EMPTY >
727     *   <!ELEMENT foo (bar?,(baz|quux)) >
728     *   <!ELEMENT foo (#PCDATA|bar)* >
729     */
730    private void readElementDeclaration() throws IOException, XmlPullParserException {
731        read(START_ELEMENT);
732        skip();
733        readName();
734        readContentSpec();
735        skip();
736        read('>');
737    }
738
739    /**
740     * Read an element content spec. This is a regular expression-like pattern
741     * of names or other content specs. The following operators are supported:
742     *   sequence:    (a,b,c)
743     *   choice:      (a|b|c)
744     *   optional:    a?
745     *   one or more: a+
746     *   any number:  a*
747     *
748     * The special name '#PCDATA' is permitted but only if it is the first
749     * element of the first group:
750     *   (#PCDATA|a|b)
751     *
752     * The top-level element must be either a choice, a sequence, or one of the
753     * special names EMPTY and ANY.
754     */
755    private void readContentSpec() throws IOException, XmlPullParserException {
756        // this implementation is very lenient; it scans for balanced parens only
757        skip();
758        int c = peekCharacter();
759        if (c == '(') {
760            int depth = 0;
761            do {
762                if (c == '(') {
763                    depth++;
764                } else if (c == ')') {
765                    depth--;
766                } else if (c == -1) {
767                    throw new XmlPullParserException(
768                            "Unterminated element content spec", this, null);
769                }
770                position++;
771                c = peekCharacter();
772            } while (depth > 0);
773
774            if (c == '*' || c == '?' || c == '+') {
775                position++;
776            }
777        } else if (c == EMPTY[0]) {
778            read(EMPTY);
779        } else if (c == ANY[0]) {
780            read(ANY);
781        } else {
782            throw new XmlPullParserException("Expected element content spec", this, null);
783        }
784    }
785
786    /**
787     * Reads an attribute list declaration such as the following:
788     *   <!ATTLIST foo
789     *       bar CDATA #IMPLIED
790     *       quux (a|b|c) "c"
791     *       baz NOTATION (a|b|c) #FIXED "c">
792     *
793     * Each attribute has a name, type and default.
794     *
795     * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY,
796     * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)"
797     * or NOTATION followed by an enumerated type.
798     *
799     * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or
800     * #FIXED with a quoted value.
801     */
802    private void readAttributeListDeclaration() throws IOException, XmlPullParserException {
803        read(START_ATTLIST);
804        skip();
805        String elementName = readName();
806
807        while (true) {
808            skip();
809            int c = peekCharacter();
810            if (c == '>') {
811                position++;
812                return;
813            }
814
815            // attribute name
816            String attributeName = readName();
817
818            // attribute type
819            skip();
820            if (position + 1 >= limit && !fillBuffer(2)) {
821                throw new XmlPullParserException("Malformed attribute list", this, null);
822            }
823            if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) {
824                read(NOTATION);
825                skip();
826            }
827            c = peekCharacter();
828            if (c == '(') {
829                position++;
830                while (true) {
831                    skip();
832                    readName();
833                    skip();
834                    c = peekCharacter();
835                    if (c == ')') {
836                        position++;
837                        break;
838                    } else if (c == '|') {
839                        position++;
840                    } else {
841                        throw new XmlPullParserException("Malformed attribute type", this, null);
842                    }
843                }
844            } else {
845                readName();
846            }
847
848            // default value
849            skip();
850            c = peekCharacter();
851            if (c == '#') {
852                position++;
853                c = peekCharacter();
854                if (c == 'R') {
855                    read(REQUIRED);
856                } else if (c == 'I') {
857                    read(IMPLIED);
858                } else if (c == 'F') {
859                    read(FIXED);
860                } else {
861                    throw new XmlPullParserException("Malformed attribute type", this, null);
862                }
863                skip();
864                c = peekCharacter();
865            }
866            if (c == '"' || c == '\'') {
867                position++;
868                // TODO: does this do escaping correctly?
869                String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE);
870                if (peekCharacter() == c) {
871                    position++;
872                }
873                defineAttributeDefault(elementName, attributeName, value);
874            }
875        }
876    }
877
878    private void defineAttributeDefault(String elementName, String attributeName, String value) {
879        if (defaultAttributes == null) {
880            defaultAttributes = new HashMap<String, Map<String, String>>();
881        }
882        Map<String, String> elementAttributes = defaultAttributes.get(elementName);
883        if (elementAttributes == null) {
884            elementAttributes = new HashMap<String, String>();
885            defaultAttributes.put(elementName, elementAttributes);
886        }
887        elementAttributes.put(attributeName, value);
888    }
889
890    /**
891     * Read an entity declaration. The value of internal entities are inline:
892     *   <!ENTITY foo "bar">
893     *
894     * The values of external entities must be retrieved by URL or path:
895     *   <!ENTITY foo SYSTEM "http://host/file">
896     *   <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file">
897     *   <!ENTITY foo SYSTEM "../file.png" NDATA png>
898     *
899     * Entities may be general or parameterized. Parameterized entities are
900     * marked by a percent sign. Such entities may only be used in the DTD:
901     *   <!ENTITY % foo "bar">
902     */
903    private void readEntityDeclaration() throws IOException, XmlPullParserException {
904        read(START_ENTITY);
905        boolean generalEntity = true;
906
907        skip();
908        if (peekCharacter() == '%') {
909            generalEntity = false;
910            position++;
911            skip();
912        }
913
914        String name = readName();
915
916        skip();
917        int quote = peekCharacter();
918        String entityValue;
919        if (quote == '"' || quote == '\'') {
920            position++;
921            entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
922            if (peekCharacter() == quote) {
923                position++;
924            }
925        } else if (readExternalId(true, false)) {
926            /*
927             * Map external entities to the empty string. This is dishonest,
928             * but it's consistent with Android's Expat pull parser.
929             */
930            entityValue = "";
931            skip();
932            if (peekCharacter() == NDATA[0]) {
933                read(NDATA);
934                skip();
935                readName();
936            }
937        } else {
938            throw new XmlPullParserException("Expected entity value or external ID", this, null);
939        }
940
941        if (generalEntity && processDocDecl) {
942            if (documentEntities == null) {
943                documentEntities = new HashMap<String, char[]>();
944            }
945            documentEntities.put(name, entityValue.toCharArray());
946        }
947
948        skip();
949        read('>');
950    }
951
952    private void readNotationDeclaration() throws IOException, XmlPullParserException {
953        read(START_NOTATION);
954        skip();
955        readName();
956        if (!readExternalId(false, false)) {
957            throw new XmlPullParserException(
958                    "Expected external ID or public ID for notation", this, null);
959        }
960        skip();
961        read('>');
962    }
963
964    private void readEndTag() throws IOException, XmlPullParserException {
965        read('<');
966        read('/');
967        name = readName(); // TODO: pass the expected name in as a hint?
968        skip();
969        read('>');
970
971        int sp = (depth - 1) * 4;
972
973        if (depth == 0) {
974            checkRelaxed("read end tag " + name + " with no tags open");
975            type = COMMENT;
976            return;
977        }
978
979        if (name.equals(elementStack[sp + 3])) {
980            namespace = elementStack[sp];
981            prefix = elementStack[sp + 1];
982            name = elementStack[sp + 2];
983        } else if (!relaxed) {
984            throw new XmlPullParserException(
985                    "expected: /" + elementStack[sp + 3] + " read: " + name, this, null);
986        }
987    }
988
989    /**
990     * Returns the type of the next token.
991     */
992    private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException {
993        if (position >= limit && !fillBuffer(1)) {
994            return END_DOCUMENT;
995        }
996
997        switch (buffer[position]) {
998        case '&':
999            return ENTITY_REF; // &
1000        case '<':
1001            if (position + 3 >= limit && !fillBuffer(4)) {
1002                throw new XmlPullParserException("Dangling <", this, null);
1003            }
1004
1005            switch (buffer[position + 1]) {
1006            case '/':
1007                return END_TAG; // </
1008            case '?':
1009                // we're looking for "<?xml " with case insensitivity
1010                if ((position + 5 < limit || fillBuffer(6))
1011                        && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X')
1012                        && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M')
1013                        && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L')
1014                        && (buffer[position + 5] == ' ')) {
1015                    return XML_DECLARATION; // <?xml
1016                } else {
1017                    return PROCESSING_INSTRUCTION; // <?
1018                }
1019            case '!':
1020                switch (buffer[position + 2]) {
1021                case 'D':
1022                    return DOCDECL; // <!D
1023                case '[':
1024                    return CDSECT; // <![
1025                case '-':
1026                    return COMMENT; // <!-
1027                case 'E':
1028                    switch (buffer[position + 3]) {
1029                    case 'L':
1030                        return ELEMENTDECL; // <!EL
1031                    case 'N':
1032                        return ENTITYDECL; // <!EN
1033                    }
1034                    break;
1035                case 'A':
1036                    return ATTLISTDECL;  // <!A
1037                case 'N':
1038                    return NOTATIONDECL; // <!N
1039                }
1040                throw new XmlPullParserException("Unexpected <!", this, null);
1041            default:
1042                return START_TAG; // <
1043            }
1044        case '%':
1045            return inDeclaration ? PARAMETER_ENTITY_REF : TEXT;
1046        default:
1047            return TEXT;
1048        }
1049    }
1050
1051    /**
1052     * Sets name and attributes
1053     */
1054    private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure)
1055            throws IOException, XmlPullParserException {
1056        if (!xmldecl) {
1057            read('<');
1058        }
1059        name = readName();
1060        attributeCount = 0;
1061
1062        while (true) {
1063            skip();
1064
1065            if (position >= limit && !fillBuffer(1)) {
1066                checkRelaxed(UNEXPECTED_EOF);
1067                return;
1068            }
1069
1070            int c = buffer[position];
1071
1072            if (xmldecl) {
1073                if (c == '?') {
1074                    position++;
1075                    read('>');
1076                    return;
1077                }
1078            } else {
1079                if (c == '/') {
1080                    degenerated = true;
1081                    position++;
1082                    skip();
1083                    read('>');
1084                    break;
1085                } else if (c == '>') {
1086                    position++;
1087                    break;
1088                }
1089            }
1090
1091            String attrName = readName();
1092
1093            int i = (attributeCount++) * 4;
1094            attributes = ensureCapacity(attributes, i + 4);
1095            attributes[i] = "";
1096            attributes[i + 1] = null;
1097            attributes[i + 2] = attrName;
1098
1099            skip();
1100            if (position >= limit && !fillBuffer(1)) {
1101                checkRelaxed(UNEXPECTED_EOF);
1102                return;
1103            }
1104
1105            if (buffer[position] == '=') {
1106                position++;
1107
1108                skip();
1109                if (position >= limit && !fillBuffer(1)) {
1110                    checkRelaxed(UNEXPECTED_EOF);
1111                    return;
1112                }
1113                char delimiter = buffer[position];
1114
1115                if (delimiter == '\'' || delimiter == '"') {
1116                    position++;
1117                } else if (relaxed) {
1118                    delimiter = ' ';
1119                } else {
1120                    throw new XmlPullParserException("attr value delimiter missing!", this, null);
1121                }
1122
1123                attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure,
1124                        ValueContext.ATTRIBUTE);
1125
1126                if (delimiter != ' ' && peekCharacter() == delimiter) {
1127                    position++; // end quote
1128                }
1129            } else if (relaxed) {
1130                attributes[i + 3] = attrName;
1131            } else {
1132                checkRelaxed("Attr.value missing f. " + attrName);
1133                attributes[i + 3] = attrName;
1134            }
1135        }
1136
1137        int sp = depth++ * 4;
1138        if (depth == 1) {
1139            parsedTopLevelStartTag = true;
1140        }
1141        elementStack = ensureCapacity(elementStack, sp + 4);
1142        elementStack[sp + 3] = name;
1143
1144        if (depth >= nspCounts.length) {
1145            int[] bigger = new int[depth + 4];
1146            System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length);
1147            nspCounts = bigger;
1148        }
1149
1150        nspCounts[depth] = nspCounts[depth - 1];
1151
1152        if (processNsp) {
1153            adjustNsp();
1154        } else {
1155            namespace = "";
1156        }
1157
1158        // For consistency with Expat, add default attributes after fixing namespaces.
1159        if (defaultAttributes != null) {
1160            Map<String, String> elementDefaultAttributes = defaultAttributes.get(name);
1161            if (elementDefaultAttributes != null) {
1162                for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) {
1163                    if (getAttributeValue(null, entry.getKey()) != null) {
1164                        continue; // an explicit value overrides the default
1165                    }
1166
1167                    int i = (attributeCount++) * 4;
1168                    attributes = ensureCapacity(attributes, i + 4);
1169                    attributes[i] = "";
1170                    attributes[i + 1] = null;
1171                    attributes[i + 2] = entry.getKey();
1172                    attributes[i + 3] = entry.getValue();
1173                }
1174            }
1175        }
1176
1177        elementStack[sp] = namespace;
1178        elementStack[sp + 1] = prefix;
1179        elementStack[sp + 2] = name;
1180    }
1181
1182    /**
1183     * Reads an entity reference from the buffer, resolves it, and writes the
1184     * resolved entity to {@code out}. If the entity cannot be read or resolved,
1185     * {@code out} will contain the partial entity reference.
1186     */
1187    private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure,
1188            ValueContext valueContext) throws IOException, XmlPullParserException {
1189        int start = out.length();
1190
1191        if (buffer[position++] != '&') {
1192            throw new AssertionError();
1193        }
1194
1195        out.append('&');
1196
1197        while (true) {
1198            int c = peekCharacter();
1199
1200            if (c == ';') {
1201                out.append(';');
1202                position++;
1203                break;
1204
1205            } else if (c >= 128
1206                    || (c >= '0' && c <= '9')
1207                    || (c >= 'a' && c <= 'z')
1208                    || (c >= 'A' && c <= 'Z')
1209                    || c == '_'
1210                    || c == '-'
1211                    || c == '#') {
1212                position++;
1213                out.append((char) c);
1214
1215            } else if (relaxed) {
1216                // intentionally leave the partial reference in 'out'
1217                return;
1218
1219            } else {
1220                throw new XmlPullParserException("unterminated entity ref", this, null);
1221            }
1222        }
1223
1224        String code = out.substring(start + 1, out.length() - 1);
1225
1226        if (isEntityToken) {
1227            name = code;
1228        }
1229
1230        if (code.startsWith("#")) {
1231            try {
1232                int c = code.startsWith("#x")
1233                        ? Integer.parseInt(code.substring(2), 16)
1234                        : Integer.parseInt(code.substring(1));
1235                out.delete(start, out.length());
1236                out.appendCodePoint(c);
1237                unresolved = false;
1238                return;
1239            } catch (NumberFormatException notANumber) {
1240                throw new XmlPullParserException("Invalid character reference: &" + code);
1241            } catch (IllegalArgumentException invalidCodePoint) {
1242                throw new XmlPullParserException("Invalid character reference: &" + code);
1243            }
1244        }
1245
1246        if (valueContext == ValueContext.ENTITY_DECLARATION) {
1247            // keep the unresolved &code; in the text to resolve later
1248            return;
1249        }
1250
1251        String defaultEntity = DEFAULT_ENTITIES.get(code);
1252        if (defaultEntity != null) {
1253            out.delete(start, out.length());
1254            unresolved = false;
1255            out.append(defaultEntity);
1256            return;
1257        }
1258
1259        char[] resolved;
1260        if (documentEntities != null && (resolved = documentEntities.get(code)) != null) {
1261            out.delete(start, out.length());
1262            unresolved = false;
1263            if (processDocDecl) {
1264                pushContentSource(resolved); // parse the entity as XML
1265            } else {
1266                out.append(resolved); // include the entity value as text
1267            }
1268            return;
1269        }
1270
1271        /*
1272         * The parser skipped an external DTD, and now we've encountered an
1273         * unknown entity that could have been declared there. Map it to the
1274         * empty string. This is dishonest, but it's consistent with Android's
1275         * old ExpatPullParser.
1276         */
1277        if (systemId != null) {
1278            out.delete(start, out.length());
1279            return;
1280        }
1281
1282        // keep the unresolved entity "&code;" in the text for relaxed clients
1283        unresolved = true;
1284        if (throwOnResolveFailure) {
1285            checkRelaxed("unresolved: &" + code + ";");
1286        }
1287    }
1288
1289    /**
1290     * Where a value is found impacts how that value is interpreted. For
1291     * example, in attributes, "\n" must be replaced with a space character. In
1292     * text, "]]>" is forbidden. In entity declarations, named references are
1293     * not resolved.
1294     */
1295    enum ValueContext {
1296        ATTRIBUTE,
1297        TEXT,
1298        ENTITY_DECLARATION
1299    }
1300
1301    /**
1302     * Returns the current text or attribute value. This also has the side
1303     * effect of setting isWhitespace to false if a non-whitespace character is
1304     * encountered.
1305     *
1306     * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted
1307     *     attributes, or a space for unquoted attributes.
1308     */
1309    private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure,
1310            ValueContext valueContext) throws IOException, XmlPullParserException {
1311
1312        /*
1313         * This method returns all of the characters from the current position
1314         * through to an appropriate delimiter.
1315         *
1316         * If we're lucky (which we usually are), we'll return a single slice of
1317         * the buffer. This fast path avoids allocating a string builder.
1318         *
1319         * There are 6 unlucky characters we could encounter:
1320         *  - "&":  entities must be resolved.
1321         *  - "%":  parameter entities are unsupported in entity values.
1322         *  - "<":  this isn't permitted in attributes unless relaxed.
1323         *  - "]":  this requires a lookahead to defend against the forbidden
1324         *          CDATA section delimiter "]]>".
1325         *  - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it
1326         *          isn't followed by "\n", we replace "\r" with either a "\n"
1327         *          in text nodes or a space in attribute values.
1328         *  - "\n": In attribute values, "\n" must be replaced with a space.
1329         *
1330         * We could also get unlucky by needing to refill the buffer midway
1331         * through the text.
1332         */
1333
1334        int start = position;
1335        StringBuilder result = null;
1336
1337        // if a text section was already started, prefix the start
1338        if (valueContext == ValueContext.TEXT && text != null) {
1339            result = new StringBuilder();
1340            result.append(text);
1341        }
1342
1343        while (true) {
1344
1345            /*
1346             * Make sure we have at least a single character to read from the
1347             * buffer. This mutates the buffer, so save the partial result
1348             * to the slow path string builder first.
1349             */
1350            if (position >= limit) {
1351                if (start < position) {
1352                    if (result == null) {
1353                        result = new StringBuilder();
1354                    }
1355                    result.append(buffer, start, position - start);
1356                }
1357                if (!fillBuffer(1)) {
1358                    return result != null ? result.toString() : "";
1359                }
1360                start = position;
1361            }
1362
1363            char c = buffer[position];
1364
1365            if (c == delimiter
1366                    || (delimiter == ' ' && (c <= ' ' || c == '>'))
1367                    || c == '&' && !resolveEntities) {
1368                break;
1369            }
1370
1371            if (c != '\r'
1372                    && (c != '\n' || valueContext != ValueContext.ATTRIBUTE)
1373                    && c != '&'
1374                    && c != '<'
1375                    && (c != ']' || valueContext != ValueContext.TEXT)
1376                    && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) {
1377                isWhitespace &= (c <= ' ');
1378                position++;
1379                continue;
1380            }
1381
1382            /*
1383             * We've encountered an unlucky character! Convert from fast
1384             * path to slow path if we haven't done so already.
1385             */
1386            if (result == null) {
1387                result = new StringBuilder();
1388            }
1389            result.append(buffer, start, position - start);
1390
1391            if (c == '\r') {
1392                if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') {
1393                    position++;
1394                }
1395                c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n';
1396
1397            } else if (c == '\n') {
1398                c = ' ';
1399
1400            } else if (c == '&') {
1401                isWhitespace = false; // TODO: what if the entity resolves to whitespace?
1402                readEntity(result, false, throwOnResolveFailure, valueContext);
1403                start = position;
1404                continue;
1405
1406            } else if (c == '<') {
1407                if (valueContext == ValueContext.ATTRIBUTE) {
1408                    checkRelaxed("Illegal: \"<\" inside attribute value");
1409                }
1410                isWhitespace = false;
1411
1412            } else if (c == ']') {
1413                if ((position + 2 < limit || fillBuffer(3))
1414                        && buffer[position + 1] == ']' && buffer[position + 2] == '>') {
1415                    checkRelaxed("Illegal: \"]]>\" outside CDATA section");
1416                }
1417                isWhitespace = false;
1418
1419            } else if (c == '%') {
1420                throw new XmlPullParserException("This parser doesn't support parameter entities",
1421                        this, null);
1422
1423            } else {
1424                throw new AssertionError();
1425            }
1426
1427            position++;
1428            result.append(c);
1429            start = position;
1430        }
1431
1432        if (result == null) {
1433            return stringPool.get(buffer, start, position - start);
1434        } else {
1435            result.append(buffer, start, position - start);
1436            return result.toString();
1437        }
1438    }
1439
1440    private void read(char expected) throws IOException, XmlPullParserException {
1441        int c = peekCharacter();
1442        if (c != expected) {
1443            checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'");
1444            if (c == -1) {
1445                return; // On EOF, don't move position beyond limit
1446            }
1447        }
1448        position++;
1449    }
1450
1451    private void read(char[] chars) throws IOException, XmlPullParserException {
1452        if (position + chars.length > limit && !fillBuffer(chars.length)) {
1453            checkRelaxed("expected: '" + new String(chars) + "' but was EOF");
1454            return;
1455        }
1456
1457        // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
1458        // when the VM has better method inlining
1459        for (int i = 0; i < chars.length; i++) {
1460            if (buffer[position + i] != chars[i]) {
1461                checkRelaxed("expected: \"" + new String(chars) + "\" but was \""
1462                        + new String(buffer, position, chars.length) + "...\"");
1463            }
1464        }
1465
1466        position += chars.length;
1467    }
1468
1469    private int peekCharacter() throws IOException, XmlPullParserException {
1470        if (position < limit || fillBuffer(1)) {
1471            return buffer[position];
1472        }
1473        return -1;
1474    }
1475
1476    /**
1477     * Returns true once {@code limit - position >= minimum}. If the data is
1478     * exhausted before that many characters are available, this returns
1479     * false.
1480     */
1481    private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException {
1482        // If we've exhausted the current content source, remove it
1483        while (nextContentSource != null) {
1484            if (position < limit) {
1485                throw new XmlPullParserException("Unbalanced entity!", this, null);
1486            }
1487            popContentSource();
1488            if (limit - position >= minimum) {
1489                return true;
1490            }
1491        }
1492
1493        // Before clobbering the old characters, update where buffer starts
1494        for (int i = 0; i < position; i++) {
1495            if (buffer[i] == '\n') {
1496                bufferStartLine++;
1497                bufferStartColumn = 0;
1498            } else {
1499                bufferStartColumn++;
1500            }
1501        }
1502
1503        if (bufferCapture != null) {
1504            bufferCapture.append(buffer, 0, position);
1505        }
1506
1507        if (limit != position) {
1508            limit -= position;
1509            System.arraycopy(buffer, position, buffer, 0, limit);
1510        } else {
1511            limit = 0;
1512        }
1513
1514        position = 0;
1515        int total;
1516        while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) {
1517            limit += total;
1518            if (limit >= minimum) {
1519                return true;
1520            }
1521        }
1522        return false;
1523    }
1524
1525    /**
1526     * Returns an element or attribute name. This is always non-empty for
1527     * non-relaxed parsers.
1528     */
1529    private String readName() throws IOException, XmlPullParserException {
1530        if (position >= limit && !fillBuffer(1)) {
1531            checkRelaxed("name expected");
1532            return "";
1533        }
1534
1535        int start = position;
1536        StringBuilder result = null;
1537
1538        // read the first character
1539        char c = buffer[position];
1540        if ((c >= 'a' && c <= 'z')
1541                || (c >= 'A' && c <= 'Z')
1542                || c == '_'
1543                || c == ':'
1544                || c >= '\u00c0' // TODO: check the XML spec
1545                || relaxed) {
1546            position++;
1547        } else {
1548            checkRelaxed("name expected");
1549            return "";
1550        }
1551
1552        while (true) {
1553            /*
1554             * Make sure we have at least a single character to read from the
1555             * buffer. This mutates the buffer, so save the partial result
1556             * to the slow path string builder first.
1557             */
1558            if (position >= limit) {
1559                if (result == null) {
1560                    result = new StringBuilder();
1561                }
1562                result.append(buffer, start, position - start);
1563                if (!fillBuffer(1)) {
1564                    return result.toString();
1565                }
1566                start = position;
1567            }
1568
1569            // read another character
1570            c = buffer[position];
1571            if ((c >= 'a' && c <= 'z')
1572                    || (c >= 'A' && c <= 'Z')
1573                    || (c >= '0' && c <= '9')
1574                    || c == '_'
1575                    || c == '-'
1576                    || c == ':'
1577                    || c == '.'
1578                    || c >= '\u00b7') {  // TODO: check the XML spec
1579                position++;
1580                continue;
1581            }
1582
1583            // we encountered a non-name character. done!
1584            if (result == null) {
1585                return stringPool.get(buffer, start, position - start);
1586            } else {
1587                result.append(buffer, start, position - start);
1588                return result.toString();
1589            }
1590        }
1591    }
1592
1593    private void skip() throws IOException, XmlPullParserException {
1594        while (position < limit || fillBuffer(1)) {
1595            int c = buffer[position];
1596            if (c > ' ') {
1597                break;
1598            }
1599            position++;
1600        }
1601    }
1602
1603    //  public part starts here...
1604
1605    public void setInput(Reader reader) throws XmlPullParserException {
1606        this.reader = reader;
1607
1608        type = START_DOCUMENT;
1609        parsedTopLevelStartTag = false;
1610        name = null;
1611        namespace = null;
1612        degenerated = false;
1613        attributeCount = -1;
1614        encoding = null;
1615        version = null;
1616        standalone = null;
1617
1618        if (reader == null) {
1619            return;
1620        }
1621
1622        position = 0;
1623        limit = 0;
1624        bufferStartLine = 0;
1625        bufferStartColumn = 0;
1626        depth = 0;
1627        documentEntities = null;
1628    }
1629
1630    public void setInput(InputStream is, String charset) throws XmlPullParserException {
1631        position = 0;
1632        limit = 0;
1633        boolean detectCharset = (charset == null);
1634
1635        if (is == null) {
1636            throw new IllegalArgumentException("is == null");
1637        }
1638
1639        try {
1640            if (detectCharset) {
1641                // read the four bytes looking for an indication of the encoding in use
1642                int firstFourBytes = 0;
1643                while (limit < 4) {
1644                    int i = is.read();
1645                    if (i == -1) {
1646                        break;
1647                    }
1648                    firstFourBytes = (firstFourBytes << 8) | i;
1649                    buffer[limit++] = (char) i;
1650                }
1651
1652                if (limit == 4) {
1653                    switch (firstFourBytes) {
1654                    case 0x00000FEFF: // UTF-32BE BOM
1655                        charset = "UTF-32BE";
1656                        limit = 0;
1657                        break;
1658
1659                    case 0x0FFFE0000: // UTF-32LE BOM
1660                        charset = "UTF-32LE";
1661                        limit = 0;
1662                        break;
1663
1664                    case 0x0000003c: // '<' in UTF-32BE
1665                        charset = "UTF-32BE";
1666                        buffer[0] = '<';
1667                        limit = 1;
1668                        break;
1669
1670                    case 0x03c000000: // '<' in UTF-32LE
1671                        charset = "UTF-32LE";
1672                        buffer[0] = '<';
1673                        limit = 1;
1674                        break;
1675
1676                    case 0x0003c003f: // "<?" in UTF-16BE
1677                        charset = "UTF-16BE";
1678                        buffer[0] = '<';
1679                        buffer[1] = '?';
1680                        limit = 2;
1681                        break;
1682
1683                    case 0x03c003f00: // "<?" in UTF-16LE
1684                        charset = "UTF-16LE";
1685                        buffer[0] = '<';
1686                        buffer[1] = '?';
1687                        limit = 2;
1688                        break;
1689
1690                    case 0x03c3f786d: // "<?xm" in ASCII etc.
1691                        while (true) {
1692                            int i = is.read();
1693                            if (i == -1) {
1694                                break;
1695                            }
1696                            buffer[limit++] = (char) i;
1697                            if (i == '>') {
1698                                String s = new String(buffer, 0, limit);
1699                                int i0 = s.indexOf("encoding");
1700                                if (i0 != -1) {
1701                                    while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') {
1702                                        i0++;
1703                                    }
1704                                    char deli = s.charAt(i0++);
1705                                    int i1 = s.indexOf(deli, i0);
1706                                    charset = s.substring(i0, i1);
1707                                }
1708                                break;
1709                            }
1710                        }
1711                        break;
1712
1713                    default:
1714                        // handle a byte order mark followed by something other than <?
1715                        if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) {
1716                            charset = "UTF-16BE";
1717                            buffer[0] = (char) ((buffer[2] << 8) | buffer[3]);
1718                            limit = 1;
1719                        } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) {
1720                            charset = "UTF-16LE";
1721                            buffer[0] = (char) ((buffer[3] << 8) | buffer[2]);
1722                            limit = 1;
1723                        } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) {
1724                            charset = "UTF-8";
1725                            buffer[0] = buffer[3];
1726                            limit = 1;
1727                        }
1728                    }
1729                }
1730            }
1731
1732            if (charset == null) {
1733                charset = "UTF-8";
1734            }
1735
1736            int savedLimit = limit;
1737            setInput(new InputStreamReader(is, charset));
1738            encoding = charset;
1739            limit = savedLimit;
1740
1741            /*
1742             * Skip the optional BOM if we didn't above. This decrements limit
1743             * rather than incrementing position so that <?xml version='1.0'?>
1744             * is still at character 0.
1745             */
1746            if (!detectCharset && peekCharacter() == 0xfeff) {
1747                limit--;
1748                System.arraycopy(buffer, 1, buffer, 0, limit);
1749            }
1750        } catch (Exception e) {
1751            throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e);
1752        }
1753    }
1754
1755    public void close() throws IOException {
1756        if (reader != null) {
1757            reader.close();
1758        }
1759    }
1760
1761    public boolean getFeature(String feature) {
1762        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
1763            return processNsp;
1764        } else if (FEATURE_RELAXED.equals(feature)) {
1765            return relaxed;
1766        } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) {
1767            return processDocDecl;
1768        } else {
1769            return false;
1770        }
1771    }
1772
1773    public String getInputEncoding() {
1774        return encoding;
1775    }
1776
1777    public void defineEntityReplacementText(String entity, String value)
1778            throws XmlPullParserException {
1779        if (processDocDecl) {
1780            throw new IllegalStateException(
1781                    "Entity replacement text may not be defined with DOCTYPE processing enabled.");
1782        }
1783        if (reader == null) {
1784            throw new IllegalStateException(
1785                    "Entity replacement text must be defined after setInput()");
1786        }
1787        if (documentEntities == null) {
1788            documentEntities = new HashMap<String, char[]>();
1789        }
1790        documentEntities.put(entity, value.toCharArray());
1791    }
1792
1793    public Object getProperty(String property) {
1794        if (property.equals(PROPERTY_XMLDECL_VERSION)) {
1795            return version;
1796        } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) {
1797            return standalone;
1798        } else if (property.equals(PROPERTY_LOCATION)) {
1799            return location != null ? location : reader.toString();
1800        } else {
1801            return null;
1802        }
1803    }
1804
1805    /**
1806     * Returns the root element's name if it was declared in the DTD. This
1807     * equals the first tag's name for valid documents.
1808     */
1809    public String getRootElementName() {
1810        return rootElementName;
1811    }
1812
1813    /**
1814     * Returns the document's system ID if it was declared. This is typically a
1815     * string like {@code http://www.w3.org/TR/html4/strict.dtd}.
1816     */
1817    public String getSystemId() {
1818        return systemId;
1819    }
1820
1821    /**
1822     * Returns the document's public ID if it was declared. This is typically a
1823     * string like {@code -//W3C//DTD HTML 4.01//EN}.
1824     */
1825    public String getPublicId() {
1826        return publicId;
1827    }
1828
1829    public int getNamespaceCount(int depth) {
1830        if (depth > this.depth) {
1831            throw new IndexOutOfBoundsException();
1832        }
1833        return nspCounts[depth];
1834    }
1835
1836    public String getNamespacePrefix(int pos) {
1837        return nspStack[pos * 2];
1838    }
1839
1840    public String getNamespaceUri(int pos) {
1841        return nspStack[(pos * 2) + 1];
1842    }
1843
1844    public String getNamespace(String prefix) {
1845        if ("xml".equals(prefix)) {
1846            return "http://www.w3.org/XML/1998/namespace";
1847        }
1848        if ("xmlns".equals(prefix)) {
1849            return "http://www.w3.org/2000/xmlns/";
1850        }
1851
1852        for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) {
1853            if (prefix == null) {
1854                if (nspStack[i] == null) {
1855                    return nspStack[i + 1];
1856                }
1857            } else if (prefix.equals(nspStack[i])) {
1858                return nspStack[i + 1];
1859            }
1860        }
1861        return null;
1862    }
1863
1864    public int getDepth() {
1865        return depth;
1866    }
1867
1868    public String getPositionDescription() {
1869        StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown");
1870        buf.append(' ');
1871
1872        if (type == START_TAG || type == END_TAG) {
1873            if (degenerated) {
1874                buf.append("(empty) ");
1875            }
1876            buf.append('<');
1877            if (type == END_TAG) {
1878                buf.append('/');
1879            }
1880
1881            if (prefix != null) {
1882                buf.append("{" + namespace + "}" + prefix + ":");
1883            }
1884            buf.append(name);
1885
1886            int cnt = attributeCount * 4;
1887            for (int i = 0; i < cnt; i += 4) {
1888                buf.append(' ');
1889                if (attributes[i + 1] != null) {
1890                    buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":");
1891                }
1892                buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'");
1893            }
1894
1895            buf.append('>');
1896        } else if (type == IGNORABLE_WHITESPACE) {
1897            ;
1898        } else if (type != TEXT) {
1899            buf.append(getText());
1900        } else if (isWhitespace) {
1901            buf.append("(whitespace)");
1902        } else {
1903            String text = getText();
1904            if (text.length() > 16) {
1905                text = text.substring(0, 16) + "...";
1906            }
1907            buf.append(text);
1908        }
1909
1910        buf.append("@" + getLineNumber() + ":" + getColumnNumber());
1911        if (location != null) {
1912            buf.append(" in ");
1913            buf.append(location);
1914        } else if (reader != null) {
1915            buf.append(" in ");
1916            buf.append(reader.toString());
1917        }
1918        return buf.toString();
1919    }
1920
1921    public int getLineNumber() {
1922        int result = bufferStartLine;
1923        for (int i = 0; i < position; i++) {
1924            if (buffer[i] == '\n') {
1925                result++;
1926            }
1927        }
1928        return result + 1; // the first line is '1'
1929    }
1930
1931    public int getColumnNumber() {
1932        int result = bufferStartColumn;
1933        for (int i = 0; i < position; i++) {
1934            if (buffer[i] == '\n') {
1935                result = 0;
1936            } else {
1937                result++;
1938            }
1939        }
1940        return result + 1; // the first column is '1'
1941    }
1942
1943    public boolean isWhitespace() throws XmlPullParserException {
1944        if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) {
1945            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1946        }
1947        return isWhitespace;
1948    }
1949
1950    public String getText() {
1951        if (type < TEXT || (type == ENTITY_REF && unresolved)) {
1952            return null;
1953        } else if (text == null) {
1954            return "";
1955        } else {
1956            return text;
1957        }
1958    }
1959
1960    public char[] getTextCharacters(int[] poslen) {
1961        String text = getText();
1962        if (text == null) {
1963            poslen[0] = -1;
1964            poslen[1] = -1;
1965            return null;
1966        }
1967        char[] result = text.toCharArray();
1968        poslen[0] = 0;
1969        poslen[1] = result.length;
1970        return result;
1971    }
1972
1973    public String getNamespace() {
1974        return namespace;
1975    }
1976
1977    public String getName() {
1978        return name;
1979    }
1980
1981    public String getPrefix() {
1982        return prefix;
1983    }
1984
1985    public boolean isEmptyElementTag() throws XmlPullParserException {
1986        if (type != START_TAG) {
1987            throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1988        }
1989        return degenerated;
1990    }
1991
1992    public int getAttributeCount() {
1993        return attributeCount;
1994    }
1995
1996    public String getAttributeType(int index) {
1997        return "CDATA";
1998    }
1999
2000    public boolean isAttributeDefault(int index) {
2001        return false;
2002    }
2003
2004    public String getAttributeNamespace(int index) {
2005        if (index >= attributeCount) {
2006            throw new IndexOutOfBoundsException();
2007        }
2008        return attributes[index * 4];
2009    }
2010
2011    public String getAttributeName(int index) {
2012        if (index >= attributeCount) {
2013            throw new IndexOutOfBoundsException();
2014        }
2015        return attributes[(index * 4) + 2];
2016    }
2017
2018    public String getAttributePrefix(int index) {
2019        if (index >= attributeCount) {
2020            throw new IndexOutOfBoundsException();
2021        }
2022        return attributes[(index * 4) + 1];
2023    }
2024
2025    public String getAttributeValue(int index) {
2026        if (index >= attributeCount) {
2027            throw new IndexOutOfBoundsException();
2028        }
2029        return attributes[(index * 4) + 3];
2030    }
2031
2032    public String getAttributeValue(String namespace, String name) {
2033        for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) {
2034            if (attributes[i + 2].equals(name)
2035                    && (namespace == null || attributes[i].equals(namespace))) {
2036                return attributes[i + 3];
2037            }
2038        }
2039
2040        return null;
2041    }
2042
2043    public int getEventType() throws XmlPullParserException {
2044        return type;
2045    }
2046
2047    // utility methods to make XML parsing easier ...
2048
2049    public int nextTag() throws XmlPullParserException, IOException {
2050        next();
2051        if (type == TEXT && isWhitespace) {
2052            next();
2053        }
2054
2055        if (type != END_TAG && type != START_TAG) {
2056            throw new XmlPullParserException("unexpected type", this, null);
2057        }
2058
2059        return type;
2060    }
2061
2062    public void require(int type, String namespace, String name)
2063            throws XmlPullParserException, IOException {
2064        if (type != this.type
2065                || (namespace != null && !namespace.equals(getNamespace()))
2066                || (name != null && !name.equals(getName()))) {
2067            throw new XmlPullParserException(
2068                    "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null);
2069        }
2070    }
2071
2072    public String nextText() throws XmlPullParserException, IOException {
2073        if (type != START_TAG) {
2074            throw new XmlPullParserException("precondition: START_TAG", this, null);
2075        }
2076
2077        next();
2078
2079        String result;
2080        if (type == TEXT) {
2081            result = getText();
2082            next();
2083        } else {
2084            result = "";
2085        }
2086
2087        if (type != END_TAG) {
2088            throw new XmlPullParserException("END_TAG expected", this, null);
2089        }
2090
2091        return result;
2092    }
2093
2094    public void setFeature(String feature, boolean value) throws XmlPullParserException {
2095        if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
2096            processNsp = value;
2097        } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) {
2098            processDocDecl = value;
2099        } else if (FEATURE_RELAXED.equals(feature)) {
2100            relaxed = value;
2101        } else {
2102            throw new XmlPullParserException("unsupported feature: " + feature, this, null);
2103        }
2104    }
2105
2106    public void setProperty(String property, Object value) throws XmlPullParserException {
2107        if (property.equals(PROPERTY_LOCATION)) {
2108            location = String.valueOf(value);
2109        } else {
2110            throw new XmlPullParserException("unsupported property: " + property);
2111        }
2112    }
2113
2114    /**
2115     * A chain of buffers containing XML content. Each content source contains
2116     * the parser's primary read buffer or the characters of entities actively
2117     * being parsed.
2118     *
2119     * <p>For example, note the buffers needed to parse this document:
2120     * <pre>   {@code
2121     *   <!DOCTYPE foo [
2122     *       <!ENTITY baz "ghi">
2123     *       <!ENTITY bar "def &baz; jkl">
2124     *   ]>
2125     *   <foo>abc &bar; mno</foo>
2126     * }</pre>
2127     *
2128     * <p>Things get interesting when the bar entity is encountered. At that
2129     * point two buffers are active:
2130     * <ol>
2131     * <li>The value for the bar entity, containing {@code "def &baz; jkl"}
2132     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2133     * </ol>
2134     * <p>The parser will return the characters {@code "def "} from the bar
2135     * entity's buffer, and then it will encounter the baz entity. To handle
2136     * that, three buffers will be active:
2137     * <ol>
2138     * <li>The value for the baz entity, containing {@code "ghi"}
2139     * <li>The remaining value for the bar entity, containing {@code " jkl"}
2140     * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2141     * </ol>
2142     * <p>The parser will then return the characters {@code ghi jkl mno} in that
2143     * sequence by reading each buffer in sequence.
2144     */
2145    static class ContentSource {
2146        private final ContentSource next;
2147        private final char[] buffer;
2148        private final int position;
2149        private final int limit;
2150        ContentSource(ContentSource next, char[] buffer, int position, int limit) {
2151            this.next = next;
2152            this.buffer = buffer;
2153            this.position = position;
2154            this.limit = limit;
2155        }
2156    }
2157
2158    /**
2159     * Prepends the characters of {@code newBuffer} to be read before the
2160     * current buffer.
2161     */
2162    private void pushContentSource(char[] newBuffer) {
2163        nextContentSource = new ContentSource(nextContentSource, buffer, position, limit);
2164        buffer = newBuffer;
2165        position = 0;
2166        limit = newBuffer.length;
2167    }
2168
2169    /**
2170     * Replaces the current exhausted buffer with the next buffer in the chain.
2171     */
2172    private void popContentSource() {
2173        buffer = nextContentSource.buffer;
2174        position = nextContentSource.position;
2175        limit = nextContentSource.limit;
2176        nextContentSource = nextContentSource.next;
2177    }
2178}
2179