JSONTokener.java revision d79a9eede731ac48cfeb152ca59f8dd574ae1284
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package org.json; 18 19// Note: this class was written without inspecting the non-free org.json sourcecode. 20 21/** 22 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 23 * encoded string into the corresponding object. Most clients of 24 * this class will use only need the {@link #JSONTokener(String) constructor} 25 * and {@link #nextValue} method. Example usage: <pre> 26 * String json = "{" 27 * + " \"query\": \"Pizza\", " 28 * + " \"locations\": [ 94043, 90210 ] " 29 * + "}"; 30 * 31 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 32 * String query = object.getString("query"); 33 * JSONArray locations = object.getJSONArray("locations");</pre> 34 * 35 * <p>This parser is lenient. A successful parse does not necessarily indicate 36 * that the input string is valid JSON. 37 * 38 * <p>Each tokener may be used to parse a single JSON string. Instances of this 39 * class are not thread safe. Although this class is nonfinal, it was not 40 * designed for inheritance and should not be subclassed. In particular, 41 * self-use by overridable methods is not specified. See <i>Effective Java</i> 42 * Item 17, "Design and Document or inheritance or else prohibit it" for further 43 * information. 44 */ 45public class JSONTokener { 46 47 /** The input JSON. */ 48 private final String in; 49 50 /** 51 * The index of the next character to be returned by {@link #next()}. When 52 * the input is exhausted, this equals the input's length. 53 */ 54 private int pos; 55 56 /** 57 * @param in JSON encoded string. Null is not permitted and will yield a 58 * tokener that throws {@code NullPointerExceptions} when methods are 59 * called. 60 */ 61 public JSONTokener(String in) { 62 this.in = in; 63 } 64 65 /** 66 * Returns the next value from the input. 67 * 68 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 69 * Integer, Long, Double or {@link JSONObject#NULL}. 70 * @throws JSONException if the input is malformed. 71 */ 72 public Object nextValue() throws JSONException { 73 int c = nextCleanInternal(); 74 switch (c) { 75 case -1: 76 throw syntaxError("End of input"); 77 78 case '{': 79 return readObject(); 80 81 case '[': 82 return readArray(); 83 84 case '\'': 85 case '"': 86 return nextString((char) c); 87 88 default: 89 pos--; 90 return readLiteral(); 91 } 92 } 93 94 private int nextCleanInternal() throws JSONException { 95 while (pos < in.length()) { 96 int c = in.charAt(pos++); 97 switch (c) { 98 case '\t': 99 case ' ': 100 case '\n': 101 case '\r': 102 continue; 103 104 case '/': 105 if (pos == in.length()) { 106 return c; 107 } 108 109 char peek = in.charAt(pos); 110 if (peek != '*' && peek != '/') { 111 return c; 112 } 113 114 skipComment(); 115 continue; 116 117 default: 118 return c; 119 } 120 } 121 122 return -1; 123 } 124 125 /** 126 * Advances the position until it is beyond the current comment. The opening 127 * slash '/' should have already been read, and character at the current 128 * position be an asterisk '*' for a C-style comment or a slash '/' for an 129 * end-of-line comment. 130 * 131 * @throws JSONException if a C-style comment was not terminated. 132 */ 133 private void skipComment() throws JSONException { 134 if (in.charAt(pos++) == '*') { 135 int commentEnd = in.indexOf("*/", pos); 136 if (commentEnd == -1) { 137 throw syntaxError("Unterminated comment"); 138 } 139 pos = commentEnd + 2; 140 141 } else { 142 /* 143 * Skip to the next newline character. If the line is terminated by 144 * "\r\n", the '\n' will be consumed as whitespace by the caller. 145 */ 146 for (; pos < in.length(); pos++) { 147 char c = in.charAt(pos); 148 if (c == '\r' || c == '\n') { 149 pos++; 150 break; 151 } 152 } 153 } 154 } 155 156 /** 157 * Returns the string up to but not including {@code quote}, unescaping any 158 * character escape sequences encountered along the way. The opening quote 159 * should have already been read. This consumes the closing quote, but does 160 * not include it in the returned string. 161 * 162 * @param quote either ' or ". 163 * @throws NumberFormatException if any unicode escape sequences are 164 * malformed. 165 */ 166 public String nextString(char quote) throws JSONException { 167 /* 168 * For strings that are free of escape sequences, we can just extract 169 * the result as a substring of the input. But if we encounter an escape 170 * sequence, we need to use a StringBuilder to compose the result. 171 */ 172 StringBuilder builder = null; 173 174 /* the index of the first character not yet appended to the builder. */ 175 int start = pos; 176 177 while (pos < in.length()) { 178 int c = in.charAt(pos++); 179 if (c == quote) { 180 if (builder == null) { 181 // a new string avoids leaking memory 182 return new String(in.substring(start, pos - 1)); 183 } else { 184 builder.append(in, start, pos - 1); 185 return builder.toString(); 186 } 187 } 188 189 if (c == '\\') { 190 if (pos == in.length()) { 191 throw syntaxError("Unterminated escape sequence"); 192 } 193 if (builder == null) { 194 builder = new StringBuilder(); 195 } 196 builder.append(in, start, pos - 1); 197 builder.append(readEscapeCharacter()); 198 start = pos; 199 } 200 } 201 202 throw syntaxError("Unterminated string"); 203 } 204 205 /** 206 * Unescapes the character identified by the character or characters that 207 * immediately follow a backslash. The backslash '\' should have already 208 * been read. This supports both unicode escapes "u000A" and two-character 209 * escapes "\n". 210 * 211 * @throws NumberFormatException if any unicode escape sequences are 212 * malformed. 213 */ 214 private char readEscapeCharacter() throws JSONException { 215 char escaped = in.charAt(pos++); 216 switch (escaped) { 217 case 'u': 218 if (pos + 4 > in.length()) { 219 throw syntaxError("Unterminated escape sequence"); 220 } 221 String hex = in.substring(pos, pos + 4); 222 pos += 4; 223 return (char) Integer.parseInt(hex, 16); 224 225 case 't': 226 return '\t'; 227 228 case 'b': 229 return '\b'; 230 231 case 'n': 232 return '\n'; 233 234 case 'r': 235 return '\r'; 236 237 case 'f': 238 return '\f'; 239 240 case '\'': 241 case '"': 242 case '\\': 243 default: 244 return escaped; 245 } 246 } 247 248 /** 249 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 250 * values will be returned as an Integer, Long, or Double, in that order of 251 * preference. 252 */ 253 private Object readLiteral() throws JSONException { 254 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 255 256 if (literal.length() == 0) { 257 throw syntaxError("Expected literal value"); 258 } else if ("null".equalsIgnoreCase(literal)) { 259 return JSONObject.NULL; 260 } else if ("true".equalsIgnoreCase(literal)) { 261 return Boolean.TRUE; 262 } else if ("false".equalsIgnoreCase(literal)) { 263 return Boolean.FALSE; 264 } 265 266 /* try to parse as an integral type... */ 267 if (literal.indexOf('.') == -1) { 268 int base = 10; 269 String number = literal; 270 if (number.startsWith("0x") || number.startsWith("0X")) { 271 number = number.substring(2); 272 base = 16; 273 } else if (number.startsWith("0") && number.length() > 1) { 274 number = number.substring(1); 275 base = 8; 276 } 277 try { 278 long longValue = Long.parseLong(number, base); 279 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 280 return (int) longValue; 281 } else { 282 return longValue; 283 } 284 } catch (NumberFormatException e) { 285 /* 286 * This only happens for integral numbers greater than 287 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 288 * unquoted strings. Fall through to try floating point. 289 */ 290 } 291 } 292 293 /* ...next try to parse as a floating point... */ 294 try { 295 return Double.valueOf(literal); 296 } catch (NumberFormatException e) { 297 } 298 299 /* ... finally give up. We have an unquoted string */ 300 return new String(literal); // a new string avoids leaking memory 301 } 302 303 /** 304 * Returns the string up to but not including any of the given characters or 305 * a newline character. This does not consume the excluded character. 306 */ 307 private String nextToInternal(String excluded) { 308 int start = pos; 309 for (; pos < in.length(); pos++) { 310 char c = in.charAt(pos); 311 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 312 return in.substring(start, pos); 313 } 314 } 315 return in.substring(start); 316 } 317 318 /** 319 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 320 * an object. The opening brace '{' should have already been read. 321 */ 322 private JSONObject readObject() throws JSONException { 323 JSONObject result = new JSONObject(); 324 325 /* Peek to see if this is the empty object. */ 326 int first = nextCleanInternal(); 327 if (first == '}') { 328 return result; 329 } else if (first != -1) { 330 pos--; 331 } 332 333 while (true) { 334 Object name = nextValue(); 335 if (!(name instanceof String)) { 336 if (name == null) { 337 throw syntaxError("Names cannot be null"); 338 } else { 339 throw syntaxError("Names must be strings, but " + name 340 + " is of type " + name.getClass().getName()); 341 } 342 } 343 344 /* 345 * Expect the name/value separator to be either a colon ':', an 346 * equals sign '=', or an arrow "=>". The last two are bogus but we 347 * include them because that's what the original implementation did. 348 */ 349 int separator = nextCleanInternal(); 350 if (separator != ':' && separator != '=') { 351 throw syntaxError("Expected ':' after " + name); 352 } 353 if (pos < in.length() && in.charAt(pos) == '>') { 354 pos++; 355 } 356 357 result.put((String) name, nextValue()); 358 359 switch (nextCleanInternal()) { 360 case '}': 361 return result; 362 case ';': 363 case ',': 364 continue; 365 default: 366 throw syntaxError("Unterminated object"); 367 } 368 } 369 } 370 371 /** 372 * Reads a sequence of values and the trailing closing brace ']' of an 373 * array. The opening brace '[' should have already been read. Note that 374 * "[]" yields an empty array, but "[,]" returns a two-element array 375 * equivalent to "[null,null]". 376 */ 377 private JSONArray readArray() throws JSONException { 378 JSONArray result = new JSONArray(); 379 380 /* to cover input that ends with ",]". */ 381 boolean hasTrailingSeparator = false; 382 383 while (true) { 384 switch (nextCleanInternal()) { 385 case -1: 386 throw syntaxError("Unterminated array"); 387 case ']': 388 if (hasTrailingSeparator) { 389 result.put(null); 390 } 391 return result; 392 case ',': 393 case ';': 394 /* A separator without a value first means "null". */ 395 result.put(null); 396 hasTrailingSeparator = true; 397 continue; 398 default: 399 pos--; 400 } 401 402 result.put(nextValue()); 403 404 switch (nextCleanInternal()) { 405 case ']': 406 return result; 407 case ',': 408 case ';': 409 hasTrailingSeparator = true; 410 continue; 411 default: 412 throw syntaxError("Unterminated array"); 413 } 414 } 415 } 416 417 /** 418 * Returns an exception containing the given message plus the current 419 * position and the entire input string. 420 */ 421 public JSONException syntaxError(String message) { 422 return new JSONException(message + this); 423 } 424 425 /** 426 * Returns the current position and the entire input string. 427 */ 428 @Override public String toString() { 429 // consistent with the original implementation 430 return " at character " + pos + " of " + in; 431 } 432 433 /* 434 * Legacy APIs. 435 * 436 * None of the methods below are on the critical path of parsing JSON 437 * documents. They exist only because they were exposed by the original 438 * implementation and may be used by some clients. 439 */ 440 441 /** 442 * Returns true until the input has been exhausted. 443 */ 444 public boolean more() { 445 return pos < in.length(); 446 } 447 448 /** 449 * Returns the next available character, or the null character '\0' if all 450 * input has been exhausted. The return value of this method is ambiguous 451 * for JSON strings that contain the character '\0'. 452 */ 453 public char next() { 454 return pos < in.length() ? in.charAt(pos++) : '\0'; 455 } 456 457 /** 458 * Returns the next available character if it equals {@code c}. Otherwise an 459 * exception is thrown. 460 */ 461 public char next(char c) throws JSONException { 462 char result = next(); 463 if (result != c) { 464 throw syntaxError("Expected " + c + " but was " + result); 465 } 466 return result; 467 } 468 469 /** 470 * Returns the next character that is not whitespace and does not belong to 471 * a comment. If the input is exhausted before such a character can be 472 * found, the null character '\0' is returned. The return value of this 473 * method is ambiguous for JSON strings that contain the character '\0'. 474 */ 475 public char nextClean() throws JSONException { 476 int nextCleanInt = nextCleanInternal(); 477 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 478 } 479 480 /** 481 * Returns the next {@code length} characters of the input. 482 * 483 * <p>The returned string shares its backing character array with this 484 * tokener's input string. If a reference to the returned string may be held 485 * indefinitely, you should use {@code new String(result)} to copy it first 486 * to avoid memory leaks. 487 * 488 * @throws JSONException if the remaining input is not long enough to 489 * satisfy this request. 490 */ 491 public String next(int length) throws JSONException { 492 if (pos + length > in.length()) { 493 throw syntaxError(length + " is out of bounds"); 494 } 495 String result = in.substring(pos, pos + length); 496 pos += length; 497 return result; 498 } 499 500 /** 501 * Returns the {@link String#trim trimmed} string holding the characters up 502 * to but not including the first of: 503 * <ul> 504 * <li>any character in {@code excluded} 505 * <li>a newline character '\n' 506 * <li>a carriage return '\r' 507 * </ul> 508 * 509 * <p>The returned string shares its backing character array with this 510 * tokener's input string. If a reference to the returned string may be held 511 * indefinitely, you should use {@code new String(result)} to copy it first 512 * to avoid memory leaks. 513 * 514 * @return a possibly-empty string 515 */ 516 public String nextTo(String excluded) { 517 if (excluded == null) { 518 throw new NullPointerException(); 519 } 520 return nextToInternal(excluded).trim(); 521 } 522 523 /** 524 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 525 */ 526 public String nextTo(char excluded) { 527 return nextToInternal(String.valueOf(excluded)).trim(); 528 } 529 530 /** 531 * Advances past all input up to and including the next occurrence of 532 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 533 * input is exhausted. 534 */ 535 public void skipPast(String thru) { 536 int thruStart = in.indexOf(thru, pos); 537 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 538 } 539 540 /** 541 * Advances past all input up to but not including the next occurrence of 542 * {@code to}. If the remaining input doesn't contain {@code to}, the input 543 * is unchanged. 544 */ 545 public char skipTo(char to) { 546 int index = in.indexOf(to, pos); 547 if (index != -1) { 548 pos = index; 549 return to; 550 } else { 551 return '\0'; 552 } 553 } 554 555 /** 556 * Unreads the most recent character of input. If no input characters have 557 * been read, the input is unchanged. 558 */ 559 public void back() { 560 if (--pos == -1) { 561 pos = 0; 562 } 563 } 564 565 /** 566 * Returns the integer [0..15] value for the given hex character, or -1 567 * for non-hex input. 568 * 569 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 570 * character will yield a -1 result. 571 */ 572 public static int dehexchar(char hex) { 573 if (hex >= '0' && hex <= '9') { 574 return hex - '0'; 575 } else if (hex >= 'A' && hex <= 'F') { 576 return hex - 'A' + 10; 577 } else if (hex >= 'a' && hex <= 'f') { 578 return hex - 'a' + 10; 579 } else { 580 return -1; 581 } 582 } 583} 584