1package com.google.polo.json;
2
3/*
4Copyright (c) 2002 JSON.org
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16The Software shall be used for Good, not Evil.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24SOFTWARE.
25*/
26
27/**
28 * The XMLTokener extends the JSONTokener to provide additional methods
29 * for the parsing of XML texts.
30 * @author JSON.org
31 * @version 2008-09-18
32 */
33public class XMLTokener extends JSONTokener {
34
35
36   /** The table of entity values. It initially contains Character values for
37    * amp, apos, gt, lt, quot.
38    */
39   public static final java.util.HashMap entity;
40
41   static {
42       entity = new java.util.HashMap(8);
43       entity.put("amp",  XML.AMP);
44       entity.put("apos", XML.APOS);
45       entity.put("gt",   XML.GT);
46       entity.put("lt",   XML.LT);
47       entity.put("quot", XML.QUOT);
48   }
49
50    /**
51     * Construct an XMLTokener from a string.
52     * @param s A source string.
53     */
54    public XMLTokener(String s) {
55        super(s);
56    }
57
58    /**
59     * Get the text in the CDATA block.
60     * @return The string up to the <code>]]&gt;</code>.
61     * @throws JSONException If the <code>]]&gt;</code> is not found.
62     */
63    public String nextCDATA() throws JSONException {
64        char         c;
65        int          i;
66        StringBuffer sb = new StringBuffer();
67        for (;;) {
68            c = next();
69            if (c == 0) {
70                throw syntaxError("Unclosed CDATA");
71            }
72            sb.append(c);
73            i = sb.length() - 3;
74            if (i >= 0 && sb.charAt(i) == ']' &&
75                          sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
76                sb.setLength(i);
77                return sb.toString();
78            }
79        }
80    }
81
82
83    /**
84     * Get the next XML outer token, trimming whitespace. There are two kinds
85     * of tokens: the '<' character which begins a markup tag, and the content
86     * text between markup tags.
87     *
88     * @return  A string, or a '<' Character, or null if there is no more
89     * source text.
90     * @throws JSONException
91     */
92    public Object nextContent() throws JSONException {
93        char         c;
94        StringBuffer sb;
95        do {
96            c = next();
97        } while (Character.isWhitespace(c));
98        if (c == 0) {
99            return null;
100        }
101        if (c == '<') {
102            return XML.LT;
103        }
104        sb = new StringBuffer();
105        for (;;) {
106            if (c == '<' || c == 0) {
107                back();
108                return sb.toString().trim();
109            }
110            if (c == '&') {
111                sb.append(nextEntity(c));
112            } else {
113                sb.append(c);
114            }
115            c = next();
116        }
117    }
118
119
120    /**
121     * Return the next entity. These entities are translated to Characters:
122     *     <code>&amp;  &apos;  &gt;  &lt;  &quot;</code>.
123     * @param a An ampersand character.
124     * @return  A Character or an entity String if the entity is not recognized.
125     * @throws JSONException If missing ';' in XML entity.
126     */
127    public Object nextEntity(char a) throws JSONException {
128        StringBuffer sb = new StringBuffer();
129        for (;;) {
130            char c = next();
131            if (Character.isLetterOrDigit(c) || c == '#') {
132                sb.append(Character.toLowerCase(c));
133            } else if (c == ';') {
134                break;
135            } else {
136                throw syntaxError("Missing ';' in XML entity: &" + sb);
137            }
138        }
139        String s = sb.toString();
140        Object e = entity.get(s);
141        return e != null ? e : a + s + ";";
142    }
143
144
145    /**
146     * Returns the next XML meta token. This is used for skipping over <!...>
147     * and <?...?> structures.
148     * @return Syntax characters (<code>< > / = ! ?</code>) are returned as
149     *  Character, and strings and names are returned as Boolean. We don't care
150     *  what the values actually are.
151     * @throws JSONException If a string is not properly closed or if the XML
152     *  is badly structured.
153     */
154    public Object nextMeta() throws JSONException {
155        char c;
156        char q;
157        do {
158            c = next();
159        } while (Character.isWhitespace(c));
160        switch (c) {
161        case 0:
162            throw syntaxError("Misshaped meta tag");
163        case '<':
164            return XML.LT;
165        case '>':
166            return XML.GT;
167        case '/':
168            return XML.SLASH;
169        case '=':
170            return XML.EQ;
171        case '!':
172            return XML.BANG;
173        case '?':
174            return XML.QUEST;
175        case '"':
176        case '\'':
177            q = c;
178            for (;;) {
179                c = next();
180                if (c == 0) {
181                    throw syntaxError("Unterminated string");
182                }
183                if (c == q) {
184                    return Boolean.TRUE;
185                }
186            }
187        default:
188            for (;;) {
189                c = next();
190                if (Character.isWhitespace(c)) {
191                    return Boolean.TRUE;
192                }
193                switch (c) {
194                case 0:
195                case '<':
196                case '>':
197                case '/':
198                case '=':
199                case '!':
200                case '?':
201                case '"':
202                case '\'':
203                    back();
204                    return Boolean.TRUE;
205                }
206            }
207        }
208    }
209
210
211    /**
212     * Get the next XML Token. These tokens are found inside of angle
213     * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it
214     * may be a string wrapped in single quotes or double quotes, or it may be a
215     * name.
216     * @return a String or a Character.
217     * @throws JSONException If the XML is not well formed.
218     */
219    public Object nextToken() throws JSONException {
220        char c;
221        char q;
222        StringBuffer sb;
223        do {
224            c = next();
225        } while (Character.isWhitespace(c));
226        switch (c) {
227        case 0:
228            throw syntaxError("Misshaped element");
229        case '<':
230            throw syntaxError("Misplaced '<'");
231        case '>':
232            return XML.GT;
233        case '/':
234            return XML.SLASH;
235        case '=':
236            return XML.EQ;
237        case '!':
238            return XML.BANG;
239        case '?':
240            return XML.QUEST;
241
242// Quoted string
243
244        case '"':
245        case '\'':
246            q = c;
247            sb = new StringBuffer();
248            for (;;) {
249                c = next();
250                if (c == 0) {
251                    throw syntaxError("Unterminated string");
252                }
253                if (c == q) {
254                    return sb.toString();
255                }
256                if (c == '&') {
257                    sb.append(nextEntity(c));
258                } else {
259                    sb.append(c);
260                }
261            }
262        default:
263
264// Name
265
266            sb = new StringBuffer();
267            for (;;) {
268                sb.append(c);
269                c = next();
270                if (Character.isWhitespace(c)) {
271                    return sb.toString();
272                }
273                switch (c) {
274                case 0:
275                	return sb.toString();
276                case '>':
277                case '/':
278                case '=':
279                case '!':
280                case '?':
281                case '[':
282                case ']':
283                    back();
284                    return sb.toString();
285                case '<':
286                case '"':
287                case '\'':
288                    throw syntaxError("Bad character in a name");
289                }
290            }
291        }
292    }
293
294
295    /**
296     * Skip characters until past the requested string.
297     * If it is not found, we are left at the end of the source with a result of false.
298     * @param to A string to skip past.
299     * @throws JSONException
300     */
301    public boolean skipPast(String to) throws JSONException {
302    	boolean b;
303    	char c;
304    	int i;
305    	int j;
306    	int offset = 0;
307    	int n = to.length();
308        char[] circle = new char[n];
309
310        /*
311         * First fill the circle buffer with as many characters as are in the
312         * to string. If we reach an early end, bail.
313         */
314
315    	for (i = 0; i < n; i += 1) {
316    		c = next();
317    		if (c == 0) {
318    			return false;
319    		}
320    		circle[i] = c;
321    	}
322    	/*
323    	 * We will loop, possibly for all of the remaining characters.
324    	 */
325    	for (;;) {
326    		j = offset;
327    		b = true;
328    		/*
329    		 * Compare the circle buffer with the to string.
330    		 */
331    		for (i = 0; i < n; i += 1) {
332    			if (circle[j] != to.charAt(i)) {
333    				b = false;
334    				break;
335    			}
336    			j += 1;
337    			if (j >= n) {
338    				j -= n;
339    			}
340    		}
341    		/*
342    		 * If we exit the loop with b intact, then victory is ours.
343    		 */
344    		if (b) {
345    			return true;
346    		}
347    		/*
348    		 * Get the next character. If there isn't one, then defeat is ours.
349    		 */
350    		c = next();
351    		if (c == 0) {
352    			return false;
353    		}
354    		/*
355    		 * Shove the character in the circle buffer and advance the
356    		 * circle offset. The offset is mod n.
357    		 */
358    		circle[offset] = c;
359    		offset += 1;
360    		if (offset >= n) {
361    			offset -= n;
362    		}
363    	}
364    }
365}
366