JSONTokener.java revision 661054f5a2f7f8f5f3ceffb97e803211b546e7fc
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package org.json; 18 19// Note: this class was written without inspecting the non-free org.json sourcecode. 20 21/** 22 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 23 * encoded string into the corresponding object. Most clients of 24 * this class will use only need the {@link #JSONTokener(String) constructor} 25 * and {@link #nextValue} method. Example usage: <pre> 26 * String json = "{" 27 * + " \"query\": \"Pizza\", " 28 * + " \"locations\": [ 94043, 90210 ] " 29 * + "}"; 30 * 31 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 32 * String query = object.getString("query"); 33 * JSONArray locations = object.getJSONArray("locations");</pre> 34 * 35 * <p>For best interoperability and performance use JSON that complies with 36 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons 37 * this parser is lenient, so a successful parse does not indicate that the 38 * input string was valid JSON. All of the following syntax errors will be 39 * ignored: 40 * <ul> 41 * <li>End of line comments starting with {@code //} or {@code #} and ending 42 * with a newline character. 43 * <li>C-style comments starting with {@code /*} and ending with 44 * {@code *}{@code /}. Such comments may not be nested. 45 * <li>Strings that are unquoted or {@code 'single quoted'}. 46 * <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. 47 * <li>Octal integers prefixed with {@code 0}. 48 * <li>Array elements separated by {@code ;}. 49 * <li>Unnecessary array separators. These are interpreted as if null was the 50 * omitted value. 51 * <li>Key-value pairs separated by {@code =} or {@code =>}. 52 * <li>Key-value pairs separated by {@code ;}. 53 * </ul> 54 * 55 * <p>Each tokener may be used to parse a single JSON string. Instances of this 56 * class are not thread safe. Although this class is nonfinal, it was not 57 * designed for inheritance and should not be subclassed. In particular, 58 * self-use by overrideable methods is not specified. See <i>Effective Java</i> 59 * Item 17, "Design and Document or inheritance or else prohibit it" for further 60 * information. 61 */ 62public class JSONTokener { 63 64 /** The input JSON. */ 65 private final String in; 66 67 /** 68 * The index of the next character to be returned by {@link #next}. When 69 * the input is exhausted, this equals the input's length. 70 */ 71 private int pos; 72 73 /** 74 * @param in JSON encoded string. Null is not permitted and will yield a 75 * tokener that throws {@code NullPointerExceptions} when methods are 76 * called. 77 */ 78 public JSONTokener(String in) { 79 this.in = in; 80 } 81 82 /** 83 * Returns the next value from the input. 84 * 85 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 86 * Integer, Long, Double or {@link JSONObject#NULL}. 87 * @throws JSONException if the input is malformed. 88 */ 89 public Object nextValue() throws JSONException { 90 int c = nextCleanInternal(); 91 switch (c) { 92 case -1: 93 throw syntaxError("End of input"); 94 95 case '{': 96 return readObject(); 97 98 case '[': 99 return readArray(); 100 101 case '\'': 102 case '"': 103 return nextString((char) c); 104 105 default: 106 pos--; 107 return readLiteral(); 108 } 109 } 110 111 private int nextCleanInternal() throws JSONException { 112 while (pos < in.length()) { 113 int c = in.charAt(pos++); 114 switch (c) { 115 case '\t': 116 case ' ': 117 case '\n': 118 case '\r': 119 continue; 120 121 case '/': 122 if (pos == in.length()) { 123 return c; 124 } 125 126 char peek = in.charAt(pos); 127 switch (peek) { 128 case '*': 129 // skip a /* c-style comment */ 130 pos++; 131 int commentEnd = in.indexOf("*/", pos); 132 if (commentEnd == -1) { 133 throw syntaxError("Unterminated comment"); 134 } 135 pos = commentEnd + 2; 136 continue; 137 138 case '/': 139 // skip a // end-of-line comment 140 pos++; 141 skipToEndOfLine(); 142 continue; 143 144 default: 145 return c; 146 } 147 148 case '#': 149 /* 150 * Skip a # hash end-of-line comment. The JSON RFC doesn't 151 * specify this behavior, but it's required to parse 152 * existing documents. See http://b/2571423. 153 */ 154 skipToEndOfLine(); 155 continue; 156 157 default: 158 return c; 159 } 160 } 161 162 return -1; 163 } 164 165 /** 166 * Advances the position until after the next newline character. If the line 167 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 168 * caller. 169 */ 170 private void skipToEndOfLine() { 171 for (; pos < in.length(); pos++) { 172 char c = in.charAt(pos); 173 if (c == '\r' || c == '\n') { 174 pos++; 175 break; 176 } 177 } 178 } 179 180 /** 181 * Returns the string up to but not including {@code quote}, unescaping any 182 * character escape sequences encountered along the way. The opening quote 183 * should have already been read. This consumes the closing quote, but does 184 * not include it in the returned string. 185 * 186 * @param quote either ' or ". 187 * @throws NumberFormatException if any unicode escape sequences are 188 * malformed. 189 */ 190 public String nextString(char quote) throws JSONException { 191 /* 192 * For strings that are free of escape sequences, we can just extract 193 * the result as a substring of the input. But if we encounter an escape 194 * sequence, we need to use a StringBuilder to compose the result. 195 */ 196 StringBuilder builder = null; 197 198 /* the index of the first character not yet appended to the builder. */ 199 int start = pos; 200 201 while (pos < in.length()) { 202 int c = in.charAt(pos++); 203 if (c == quote) { 204 if (builder == null) { 205 // a new string avoids leaking memory 206 return new String(in.substring(start, pos - 1)); 207 } else { 208 builder.append(in, start, pos - 1); 209 return builder.toString(); 210 } 211 } 212 213 if (c == '\\') { 214 if (pos == in.length()) { 215 throw syntaxError("Unterminated escape sequence"); 216 } 217 if (builder == null) { 218 builder = new StringBuilder(); 219 } 220 builder.append(in, start, pos - 1); 221 builder.append(readEscapeCharacter()); 222 start = pos; 223 } 224 } 225 226 throw syntaxError("Unterminated string"); 227 } 228 229 /** 230 * Unescapes the character identified by the character or characters that 231 * immediately follow a backslash. The backslash '\' should have already 232 * been read. This supports both unicode escapes "u000A" and two-character 233 * escapes "\n". 234 * 235 * @throws NumberFormatException if any unicode escape sequences are 236 * malformed. 237 */ 238 private char readEscapeCharacter() throws JSONException { 239 char escaped = in.charAt(pos++); 240 switch (escaped) { 241 case 'u': 242 if (pos + 4 > in.length()) { 243 throw syntaxError("Unterminated escape sequence"); 244 } 245 String hex = in.substring(pos, pos + 4); 246 pos += 4; 247 return (char) Integer.parseInt(hex, 16); 248 249 case 't': 250 return '\t'; 251 252 case 'b': 253 return '\b'; 254 255 case 'n': 256 return '\n'; 257 258 case 'r': 259 return '\r'; 260 261 case 'f': 262 return '\f'; 263 264 case '\'': 265 case '"': 266 case '\\': 267 default: 268 return escaped; 269 } 270 } 271 272 /** 273 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 274 * values will be returned as an Integer, Long, or Double, in that order of 275 * preference. 276 */ 277 private Object readLiteral() throws JSONException { 278 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 279 280 if (literal.length() == 0) { 281 throw syntaxError("Expected literal value"); 282 } else if ("null".equalsIgnoreCase(literal)) { 283 return JSONObject.NULL; 284 } else if ("true".equalsIgnoreCase(literal)) { 285 return Boolean.TRUE; 286 } else if ("false".equalsIgnoreCase(literal)) { 287 return Boolean.FALSE; 288 } 289 290 /* try to parse as an integral type... */ 291 if (literal.indexOf('.') == -1) { 292 int base = 10; 293 String number = literal; 294 if (number.startsWith("0x") || number.startsWith("0X")) { 295 number = number.substring(2); 296 base = 16; 297 } else if (number.startsWith("0") && number.length() > 1) { 298 number = number.substring(1); 299 base = 8; 300 } 301 try { 302 long longValue = Long.parseLong(number, base); 303 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 304 return (int) longValue; 305 } else { 306 return longValue; 307 } 308 } catch (NumberFormatException e) { 309 /* 310 * This only happens for integral numbers greater than 311 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 312 * unquoted strings. Fall through to try floating point. 313 */ 314 } 315 } 316 317 /* ...next try to parse as a floating point... */ 318 try { 319 return Double.valueOf(literal); 320 } catch (NumberFormatException ignored) { 321 } 322 323 /* ... finally give up. We have an unquoted string */ 324 return new String(literal); // a new string avoids leaking memory 325 } 326 327 /** 328 * Returns the string up to but not including any of the given characters or 329 * a newline character. This does not consume the excluded character. 330 */ 331 private String nextToInternal(String excluded) { 332 int start = pos; 333 for (; pos < in.length(); pos++) { 334 char c = in.charAt(pos); 335 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 336 return in.substring(start, pos); 337 } 338 } 339 return in.substring(start); 340 } 341 342 /** 343 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 344 * an object. The opening brace '{' should have already been read. 345 */ 346 private JSONObject readObject() throws JSONException { 347 JSONObject result = new JSONObject(); 348 349 /* Peek to see if this is the empty object. */ 350 int first = nextCleanInternal(); 351 if (first == '}') { 352 return result; 353 } else if (first != -1) { 354 pos--; 355 } 356 357 while (true) { 358 Object name = nextValue(); 359 if (!(name instanceof String)) { 360 if (name == null) { 361 throw syntaxError("Names cannot be null"); 362 } else { 363 throw syntaxError("Names must be strings, but " + name 364 + " is of type " + name.getClass().getName()); 365 } 366 } 367 368 /* 369 * Expect the name/value separator to be either a colon ':', an 370 * equals sign '=', or an arrow "=>". The last two are bogus but we 371 * include them because that's what the original implementation did. 372 */ 373 int separator = nextCleanInternal(); 374 if (separator != ':' && separator != '=') { 375 throw syntaxError("Expected ':' after " + name); 376 } 377 if (pos < in.length() && in.charAt(pos) == '>') { 378 pos++; 379 } 380 381 result.put((String) name, nextValue()); 382 383 switch (nextCleanInternal()) { 384 case '}': 385 return result; 386 case ';': 387 case ',': 388 continue; 389 default: 390 throw syntaxError("Unterminated object"); 391 } 392 } 393 } 394 395 /** 396 * Reads a sequence of values and the trailing closing brace ']' of an 397 * array. The opening brace '[' should have already been read. Note that 398 * "[]" yields an empty array, but "[,]" returns a two-element array 399 * equivalent to "[null,null]". 400 */ 401 private JSONArray readArray() throws JSONException { 402 JSONArray result = new JSONArray(); 403 404 /* to cover input that ends with ",]". */ 405 boolean hasTrailingSeparator = false; 406 407 while (true) { 408 switch (nextCleanInternal()) { 409 case -1: 410 throw syntaxError("Unterminated array"); 411 case ']': 412 if (hasTrailingSeparator) { 413 result.put(null); 414 } 415 return result; 416 case ',': 417 case ';': 418 /* A separator without a value first means "null". */ 419 result.put(null); 420 hasTrailingSeparator = true; 421 continue; 422 default: 423 pos--; 424 } 425 426 result.put(nextValue()); 427 428 switch (nextCleanInternal()) { 429 case ']': 430 return result; 431 case ',': 432 case ';': 433 hasTrailingSeparator = true; 434 continue; 435 default: 436 throw syntaxError("Unterminated array"); 437 } 438 } 439 } 440 441 /** 442 * Returns an exception containing the given message plus the current 443 * position and the entire input string. 444 */ 445 public JSONException syntaxError(String message) { 446 return new JSONException(message + this); 447 } 448 449 /** 450 * Returns the current position and the entire input string. 451 */ 452 @Override public String toString() { 453 // consistent with the original implementation 454 return " at character " + pos + " of " + in; 455 } 456 457 /* 458 * Legacy APIs. 459 * 460 * None of the methods below are on the critical path of parsing JSON 461 * documents. They exist only because they were exposed by the original 462 * implementation and may be used by some clients. 463 */ 464 465 /** 466 * Returns true until the input has been exhausted. 467 */ 468 public boolean more() { 469 return pos < in.length(); 470 } 471 472 /** 473 * Returns the next available character, or the null character '\0' if all 474 * input has been exhausted. The return value of this method is ambiguous 475 * for JSON strings that contain the character '\0'. 476 */ 477 public char next() { 478 return pos < in.length() ? in.charAt(pos++) : '\0'; 479 } 480 481 /** 482 * Returns the next available character if it equals {@code c}. Otherwise an 483 * exception is thrown. 484 */ 485 public char next(char c) throws JSONException { 486 char result = next(); 487 if (result != c) { 488 throw syntaxError("Expected " + c + " but was " + result); 489 } 490 return result; 491 } 492 493 /** 494 * Returns the next character that is not whitespace and does not belong to 495 * a comment. If the input is exhausted before such a character can be 496 * found, the null character '\0' is returned. The return value of this 497 * method is ambiguous for JSON strings that contain the character '\0'. 498 */ 499 public char nextClean() throws JSONException { 500 int nextCleanInt = nextCleanInternal(); 501 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 502 } 503 504 /** 505 * Returns the next {@code length} characters of the input. 506 * 507 * <p>The returned string shares its backing character array with this 508 * tokener's input string. If a reference to the returned string may be held 509 * indefinitely, you should use {@code new String(result)} to copy it first 510 * to avoid memory leaks. 511 * 512 * @throws JSONException if the remaining input is not long enough to 513 * satisfy this request. 514 */ 515 public String next(int length) throws JSONException { 516 if (pos + length > in.length()) { 517 throw syntaxError(length + " is out of bounds"); 518 } 519 String result = in.substring(pos, pos + length); 520 pos += length; 521 return result; 522 } 523 524 /** 525 * Returns the {@link String#trim trimmed} string holding the characters up 526 * to but not including the first of: 527 * <ul> 528 * <li>any character in {@code excluded} 529 * <li>a newline character '\n' 530 * <li>a carriage return '\r' 531 * </ul> 532 * 533 * <p>The returned string shares its backing character array with this 534 * tokener's input string. If a reference to the returned string may be held 535 * indefinitely, you should use {@code new String(result)} to copy it first 536 * to avoid memory leaks. 537 * 538 * @return a possibly-empty string 539 */ 540 public String nextTo(String excluded) { 541 if (excluded == null) { 542 throw new NullPointerException(); 543 } 544 return nextToInternal(excluded).trim(); 545 } 546 547 /** 548 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 549 */ 550 public String nextTo(char excluded) { 551 return nextToInternal(String.valueOf(excluded)).trim(); 552 } 553 554 /** 555 * Advances past all input up to and including the next occurrence of 556 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 557 * input is exhausted. 558 */ 559 public void skipPast(String thru) { 560 int thruStart = in.indexOf(thru, pos); 561 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 562 } 563 564 /** 565 * Advances past all input up to but not including the next occurrence of 566 * {@code to}. If the remaining input doesn't contain {@code to}, the input 567 * is unchanged. 568 */ 569 public char skipTo(char to) { 570 int index = in.indexOf(to, pos); 571 if (index != -1) { 572 pos = index; 573 return to; 574 } else { 575 return '\0'; 576 } 577 } 578 579 /** 580 * Unreads the most recent character of input. If no input characters have 581 * been read, the input is unchanged. 582 */ 583 public void back() { 584 if (--pos == -1) { 585 pos = 0; 586 } 587 } 588 589 /** 590 * Returns the integer [0..15] value for the given hex character, or -1 591 * for non-hex input. 592 * 593 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 594 * character will yield a -1 result. 595 */ 596 public static int dehexchar(char hex) { 597 if (hex >= '0' && hex <= '9') { 598 return hex - '0'; 599 } else if (hex >= 'A' && hex <= 'F') { 600 return hex - 'A' + 10; 601 } else if (hex >= 'a' && hex <= 'f') { 602 return hex - 'a' + 10; 603 } else { 604 return -1; 605 } 606 } 607} 608