1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// https://developers.google.com/protocol-buffers/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31package com.google.protobuf; 32 33import com.google.protobuf.Descriptors.Descriptor; 34import com.google.protobuf.Descriptors.EnumDescriptor; 35import com.google.protobuf.Descriptors.EnumValueDescriptor; 36import com.google.protobuf.Descriptors.FieldDescriptor; 37 38import java.io.IOException; 39import java.math.BigInteger; 40import java.nio.CharBuffer; 41import java.util.ArrayList; 42import java.util.List; 43import java.util.Locale; 44import java.util.Map; 45import java.util.logging.Logger; 46import java.util.regex.Matcher; 47import java.util.regex.Pattern; 48 49/** 50 * Provide text parsing and formatting support for proto2 instances. 51 * The implementation largely follows google/protobuf/text_format.cc. 52 * 53 * @author wenboz@google.com Wenbo Zhu 54 * @author kenton@google.com Kenton Varda 55 */ 56public final class TextFormat { 57 private TextFormat() {} 58 59 private static final Logger logger = 60 Logger.getLogger(TextFormat.class.getName()); 61 62 private static final Printer DEFAULT_PRINTER = new Printer(); 63 private static final Printer SINGLE_LINE_PRINTER = 64 (new Printer()).setSingleLineMode(true); 65 private static final Printer UNICODE_PRINTER = 66 (new Printer()).setEscapeNonAscii(false); 67 68 /** 69 * Outputs a textual representation of the Protocol Message supplied into 70 * the parameter output. (This representation is the new version of the 71 * classic "ProtocolPrinter" output from the original Protocol Buffer system) 72 */ 73 public static void print( 74 final MessageOrBuilder message, final Appendable output) 75 throws IOException { 76 DEFAULT_PRINTER.print(message, new TextGenerator(output)); 77 } 78 79 /** Outputs a textual representation of {@code fields} to {@code output}. */ 80 public static void print(final UnknownFieldSet fields, 81 final Appendable output) 82 throws IOException { 83 DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output)); 84 } 85 86 /** 87 * Same as {@code print()}, except that non-ASCII characters are not 88 * escaped. 89 */ 90 public static void printUnicode( 91 final MessageOrBuilder message, final Appendable output) 92 throws IOException { 93 UNICODE_PRINTER.print(message, new TextGenerator(output)); 94 } 95 96 /** 97 * Same as {@code print()}, except that non-ASCII characters are not 98 * escaped. 99 */ 100 public static void printUnicode(final UnknownFieldSet fields, 101 final Appendable output) 102 throws IOException { 103 UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output)); 104 } 105 106 /** 107 * Generates a human readable form of this message, useful for debugging and 108 * other purposes, with no newline characters. 109 */ 110 public static String shortDebugString(final MessageOrBuilder message) { 111 try { 112 final StringBuilder sb = new StringBuilder(); 113 SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); 114 // Single line mode currently might have an extra space at the end. 115 return sb.toString().trim(); 116 } catch (IOException e) { 117 throw new IllegalStateException(e); 118 } 119 } 120 121 /** 122 * Generates a human readable form of the unknown fields, useful for debugging 123 * and other purposes, with no newline characters. 124 */ 125 public static String shortDebugString(final UnknownFieldSet fields) { 126 try { 127 final StringBuilder sb = new StringBuilder(); 128 SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb)); 129 // Single line mode currently might have an extra space at the end. 130 return sb.toString().trim(); 131 } catch (IOException e) { 132 throw new IllegalStateException(e); 133 } 134 } 135 136 /** 137 * Like {@code print()}, but writes directly to a {@code String} and 138 * returns it. 139 */ 140 public static String printToString(final MessageOrBuilder message) { 141 try { 142 final StringBuilder text = new StringBuilder(); 143 print(message, text); 144 return text.toString(); 145 } catch (IOException e) { 146 throw new IllegalStateException(e); 147 } 148 } 149 150 /** 151 * Like {@code print()}, but writes directly to a {@code String} and 152 * returns it. 153 */ 154 public static String printToString(final UnknownFieldSet fields) { 155 try { 156 final StringBuilder text = new StringBuilder(); 157 print(fields, text); 158 return text.toString(); 159 } catch (IOException e) { 160 throw new IllegalStateException(e); 161 } 162 } 163 164 /** 165 * Same as {@code printToString()}, except that non-ASCII characters 166 * in string type fields are not escaped in backslash+octals. 167 */ 168 public static String printToUnicodeString(final MessageOrBuilder message) { 169 try { 170 final StringBuilder text = new StringBuilder(); 171 UNICODE_PRINTER.print(message, new TextGenerator(text)); 172 return text.toString(); 173 } catch (IOException e) { 174 throw new IllegalStateException(e); 175 } 176 } 177 178 /** 179 * Same as {@code printToString()}, except that non-ASCII characters 180 * in string type fields are not escaped in backslash+octals. 181 */ 182 public static String printToUnicodeString(final UnknownFieldSet fields) { 183 try { 184 final StringBuilder text = new StringBuilder(); 185 UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text)); 186 return text.toString(); 187 } catch (IOException e) { 188 throw new IllegalStateException(e); 189 } 190 } 191 192 public static void printField(final FieldDescriptor field, 193 final Object value, 194 final Appendable output) 195 throws IOException { 196 DEFAULT_PRINTER.printField(field, value, new TextGenerator(output)); 197 } 198 199 public static String printFieldToString(final FieldDescriptor field, 200 final Object value) { 201 try { 202 final StringBuilder text = new StringBuilder(); 203 printField(field, value, text); 204 return text.toString(); 205 } catch (IOException e) { 206 throw new IllegalStateException(e); 207 } 208 } 209 210 /** 211 * Outputs a textual representation of the value of given field value. 212 * 213 * @param field the descriptor of the field 214 * @param value the value of the field 215 * @param output the output to which to append the formatted value 216 * @throws ClassCastException if the value is not appropriate for the 217 * given field descriptor 218 * @throws IOException if there is an exception writing to the output 219 */ 220 public static void printFieldValue(final FieldDescriptor field, 221 final Object value, 222 final Appendable output) 223 throws IOException { 224 DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output)); 225 } 226 227 /** 228 * Outputs a textual representation of the value of an unknown field. 229 * 230 * @param tag the field's tag number 231 * @param value the value of the field 232 * @param output the output to which to append the formatted value 233 * @throws ClassCastException if the value is not appropriate for the 234 * given field descriptor 235 * @throws IOException if there is an exception writing to the output 236 */ 237 public static void printUnknownFieldValue(final int tag, 238 final Object value, 239 final Appendable output) 240 throws IOException { 241 printUnknownFieldValue(tag, value, new TextGenerator(output)); 242 } 243 244 private static void printUnknownFieldValue(final int tag, 245 final Object value, 246 final TextGenerator generator) 247 throws IOException { 248 switch (WireFormat.getTagWireType(tag)) { 249 case WireFormat.WIRETYPE_VARINT: 250 generator.print(unsignedToString((Long) value)); 251 break; 252 case WireFormat.WIRETYPE_FIXED32: 253 generator.print( 254 String.format((Locale) null, "0x%08x", (Integer) value)); 255 break; 256 case WireFormat.WIRETYPE_FIXED64: 257 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 258 break; 259 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 260 generator.print("\""); 261 generator.print(escapeBytes((ByteString) value)); 262 generator.print("\""); 263 break; 264 case WireFormat.WIRETYPE_START_GROUP: 265 DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator); 266 break; 267 default: 268 throw new IllegalArgumentException("Bad tag: " + tag); 269 } 270 } 271 272 /** Helper class for converting protobufs to text. */ 273 private static final class Printer { 274 /** Whether to omit newlines from the output. */ 275 boolean singleLineMode = false; 276 277 /** Whether to escape non ASCII characters with backslash and octal. */ 278 boolean escapeNonAscii = true; 279 280 private Printer() {} 281 282 /** Setter of singleLineMode */ 283 private Printer setSingleLineMode(boolean singleLineMode) { 284 this.singleLineMode = singleLineMode; 285 return this; 286 } 287 288 /** Setter of escapeNonAscii */ 289 private Printer setEscapeNonAscii(boolean escapeNonAscii) { 290 this.escapeNonAscii = escapeNonAscii; 291 return this; 292 } 293 294 private void print( 295 final MessageOrBuilder message, final TextGenerator generator) 296 throws IOException { 297 for (Map.Entry<FieldDescriptor, Object> field 298 : message.getAllFields().entrySet()) { 299 printField(field.getKey(), field.getValue(), generator); 300 } 301 printUnknownFields(message.getUnknownFields(), generator); 302 } 303 304 private void printField(final FieldDescriptor field, final Object value, 305 final TextGenerator generator) throws IOException { 306 if (field.isRepeated()) { 307 // Repeated field. Print each element. 308 for (Object element : (List<?>) value) { 309 printSingleField(field, element, generator); 310 } 311 } else { 312 printSingleField(field, value, generator); 313 } 314 } 315 316 private void printSingleField(final FieldDescriptor field, 317 final Object value, 318 final TextGenerator generator) 319 throws IOException { 320 if (field.isExtension()) { 321 generator.print("["); 322 // We special-case MessageSet elements for compatibility with proto1. 323 if (field.getContainingType().getOptions().getMessageSetWireFormat() 324 && (field.getType() == FieldDescriptor.Type.MESSAGE) 325 && (field.isOptional()) 326 // object equality 327 && (field.getExtensionScope() == field.getMessageType())) { 328 generator.print(field.getMessageType().getFullName()); 329 } else { 330 generator.print(field.getFullName()); 331 } 332 generator.print("]"); 333 } else { 334 if (field.getType() == FieldDescriptor.Type.GROUP) { 335 // Groups must be serialized with their original capitalization. 336 generator.print(field.getMessageType().getName()); 337 } else { 338 generator.print(field.getName()); 339 } 340 } 341 342 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 343 if (singleLineMode) { 344 generator.print(" { "); 345 } else { 346 generator.print(" {\n"); 347 generator.indent(); 348 } 349 } else { 350 generator.print(": "); 351 } 352 353 printFieldValue(field, value, generator); 354 355 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 356 if (singleLineMode) { 357 generator.print("} "); 358 } else { 359 generator.outdent(); 360 generator.print("}\n"); 361 } 362 } else { 363 if (singleLineMode) { 364 generator.print(" "); 365 } else { 366 generator.print("\n"); 367 } 368 } 369 } 370 371 private void printFieldValue(final FieldDescriptor field, 372 final Object value, 373 final TextGenerator generator) 374 throws IOException { 375 switch (field.getType()) { 376 case INT32: 377 case SINT32: 378 case SFIXED32: 379 generator.print(((Integer) value).toString()); 380 break; 381 382 case INT64: 383 case SINT64: 384 case SFIXED64: 385 generator.print(((Long) value).toString()); 386 break; 387 388 case BOOL: 389 generator.print(((Boolean) value).toString()); 390 break; 391 392 case FLOAT: 393 generator.print(((Float) value).toString()); 394 break; 395 396 case DOUBLE: 397 generator.print(((Double) value).toString()); 398 break; 399 400 case UINT32: 401 case FIXED32: 402 generator.print(unsignedToString((Integer) value)); 403 break; 404 405 case UINT64: 406 case FIXED64: 407 generator.print(unsignedToString((Long) value)); 408 break; 409 410 case STRING: 411 generator.print("\""); 412 generator.print(escapeNonAscii ? 413 escapeText((String) value) : 414 escapeDoubleQuotesAndBackslashes((String) value)); 415 generator.print("\""); 416 break; 417 418 case BYTES: 419 generator.print("\""); 420 if (value instanceof ByteString) { 421 generator.print(escapeBytes((ByteString) value)); 422 } else { 423 generator.print(escapeBytes((byte[]) value)); 424 } 425 generator.print("\""); 426 break; 427 428 case ENUM: 429 generator.print(((EnumValueDescriptor) value).getName()); 430 break; 431 432 case MESSAGE: 433 case GROUP: 434 print((Message) value, generator); 435 break; 436 } 437 } 438 439 private void printUnknownFields(final UnknownFieldSet unknownFields, 440 final TextGenerator generator) 441 throws IOException { 442 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : 443 unknownFields.asMap().entrySet()) { 444 final int number = entry.getKey(); 445 final UnknownFieldSet.Field field = entry.getValue(); 446 printUnknownField(number, WireFormat.WIRETYPE_VARINT, 447 field.getVarintList(), generator); 448 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, 449 field.getFixed32List(), generator); 450 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, 451 field.getFixed64List(), generator); 452 printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED, 453 field.getLengthDelimitedList(), generator); 454 for (final UnknownFieldSet value : field.getGroupList()) { 455 generator.print(entry.getKey().toString()); 456 if (singleLineMode) { 457 generator.print(" { "); 458 } else { 459 generator.print(" {\n"); 460 generator.indent(); 461 } 462 printUnknownFields(value, generator); 463 if (singleLineMode) { 464 generator.print("} "); 465 } else { 466 generator.outdent(); 467 generator.print("}\n"); 468 } 469 } 470 } 471 } 472 473 private void printUnknownField(final int number, 474 final int wireType, 475 final List<?> values, 476 final TextGenerator generator) 477 throws IOException { 478 for (final Object value : values) { 479 generator.print(String.valueOf(number)); 480 generator.print(": "); 481 printUnknownFieldValue(wireType, value, generator); 482 generator.print(singleLineMode ? " " : "\n"); 483 } 484 } 485 } 486 487 /** Convert an unsigned 32-bit integer to a string. */ 488 public static String unsignedToString(final int value) { 489 if (value >= 0) { 490 return Integer.toString(value); 491 } else { 492 return Long.toString(value & 0x00000000FFFFFFFFL); 493 } 494 } 495 496 /** Convert an unsigned 64-bit integer to a string. */ 497 public static String unsignedToString(final long value) { 498 if (value >= 0) { 499 return Long.toString(value); 500 } else { 501 // Pull off the most-significant bit so that BigInteger doesn't think 502 // the number is negative, then set it again using setBit(). 503 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL) 504 .setBit(63).toString(); 505 } 506 } 507 508 /** 509 * An inner class for writing text to the output stream. 510 */ 511 private static final class TextGenerator { 512 private final Appendable output; 513 private final StringBuilder indent = new StringBuilder(); 514 private boolean atStartOfLine = true; 515 516 private TextGenerator(final Appendable output) { 517 this.output = output; 518 } 519 520 /** 521 * Indent text by two spaces. After calling Indent(), two spaces will be 522 * inserted at the beginning of each line of text. Indent() may be called 523 * multiple times to produce deeper indents. 524 */ 525 public void indent() { 526 indent.append(" "); 527 } 528 529 /** 530 * Reduces the current indent level by two spaces, or crashes if the indent 531 * level is zero. 532 */ 533 public void outdent() { 534 final int length = indent.length(); 535 if (length == 0) { 536 throw new IllegalArgumentException( 537 " Outdent() without matching Indent()."); 538 } 539 indent.delete(length - 2, length); 540 } 541 542 /** 543 * Print text to the output stream. 544 */ 545 public void print(final CharSequence text) throws IOException { 546 final int size = text.length(); 547 int pos = 0; 548 549 for (int i = 0; i < size; i++) { 550 if (text.charAt(i) == '\n') { 551 write(text.subSequence(pos, i + 1)); 552 pos = i + 1; 553 atStartOfLine = true; 554 } 555 } 556 write(text.subSequence(pos, size)); 557 } 558 559 private void write(final CharSequence data) throws IOException { 560 if (data.length() == 0) { 561 return; 562 } 563 if (atStartOfLine) { 564 atStartOfLine = false; 565 output.append(indent); 566 } 567 output.append(data); 568 } 569 } 570 571 // ================================================================= 572 // Parsing 573 574 /** 575 * Represents a stream of tokens parsed from a {@code String}. 576 * 577 * <p>The Java standard library provides many classes that you might think 578 * would be useful for implementing this, but aren't. For example: 579 * 580 * <ul> 581 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, 582 * at least, something that would get us close to what we want -- except 583 * for one fatal flaw: It automatically un-escapes strings using Java 584 * escape sequences, which do not include all the escape sequences we 585 * need to support (e.g. '\x'). 586 * <li>{@code java.util.Scanner}: This seems like a great way at least to 587 * parse regular expressions out of a stream (so we wouldn't have to load 588 * the entire input into a single string before parsing). Sadly, 589 * {@code Scanner} requires that tokens be delimited with some delimiter. 590 * Thus, although the text "foo:" should parse to two tokens ("foo" and 591 * ":"), {@code Scanner} would recognize it only as a single token. 592 * Furthermore, {@code Scanner} provides no way to inspect the contents 593 * of delimiters, making it impossible to keep track of line and column 594 * numbers. 595 * </ul> 596 * 597 * <p>Luckily, Java's regular expression support does manage to be useful to 598 * us. (Barely: We need {@code Matcher.usePattern()}, which is new in 599 * Java 1.5.) So, we can use that, at least. Unfortunately, this implies 600 * that we need to have the entire input in one contiguous string. 601 */ 602 private static final class Tokenizer { 603 private final CharSequence text; 604 private final Matcher matcher; 605 private String currentToken; 606 607 // The character index within this.text at which the current token begins. 608 private int pos = 0; 609 610 // The line and column numbers of the current token. 611 private int line = 0; 612 private int column = 0; 613 614 // The line and column numbers of the previous token (allows throwing 615 // errors *after* consuming). 616 private int previousLine = 0; 617 private int previousColumn = 0; 618 619 // We use possessive quantifiers (*+ and ++) because otherwise the Java 620 // regex matcher has stack overflows on large inputs. 621 private static final Pattern WHITESPACE = 622 Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 623 private static final Pattern TOKEN = Pattern.compile( 624 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier 625 "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number 626 "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string 627 "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 628 Pattern.MULTILINE); 629 630 private static final Pattern DOUBLE_INFINITY = Pattern.compile( 631 "-?inf(inity)?", 632 Pattern.CASE_INSENSITIVE); 633 private static final Pattern FLOAT_INFINITY = Pattern.compile( 634 "-?inf(inity)?f?", 635 Pattern.CASE_INSENSITIVE); 636 private static final Pattern FLOAT_NAN = Pattern.compile( 637 "nanf?", 638 Pattern.CASE_INSENSITIVE); 639 640 /** Construct a tokenizer that parses tokens from the given text. */ 641 private Tokenizer(final CharSequence text) { 642 this.text = text; 643 this.matcher = WHITESPACE.matcher(text); 644 skipWhitespace(); 645 nextToken(); 646 } 647 648 /** Are we at the end of the input? */ 649 public boolean atEnd() { 650 return currentToken.length() == 0; 651 } 652 653 /** Advance to the next token. */ 654 public void nextToken() { 655 previousLine = line; 656 previousColumn = column; 657 658 // Advance the line counter to the current position. 659 while (pos < matcher.regionStart()) { 660 if (text.charAt(pos) == '\n') { 661 ++line; 662 column = 0; 663 } else { 664 ++column; 665 } 666 ++pos; 667 } 668 669 // Match the next token. 670 if (matcher.regionStart() == matcher.regionEnd()) { 671 // EOF 672 currentToken = ""; 673 } else { 674 matcher.usePattern(TOKEN); 675 if (matcher.lookingAt()) { 676 currentToken = matcher.group(); 677 matcher.region(matcher.end(), matcher.regionEnd()); 678 } else { 679 // Take one character. 680 currentToken = String.valueOf(text.charAt(pos)); 681 matcher.region(pos + 1, matcher.regionEnd()); 682 } 683 684 skipWhitespace(); 685 } 686 } 687 688 /** 689 * Skip over any whitespace so that the matcher region starts at the next 690 * token. 691 */ 692 private void skipWhitespace() { 693 matcher.usePattern(WHITESPACE); 694 if (matcher.lookingAt()) { 695 matcher.region(matcher.end(), matcher.regionEnd()); 696 } 697 } 698 699 /** 700 * If the next token exactly matches {@code token}, consume it and return 701 * {@code true}. Otherwise, return {@code false} without doing anything. 702 */ 703 public boolean tryConsume(final String token) { 704 if (currentToken.equals(token)) { 705 nextToken(); 706 return true; 707 } else { 708 return false; 709 } 710 } 711 712 /** 713 * If the next token exactly matches {@code token}, consume it. Otherwise, 714 * throw a {@link ParseException}. 715 */ 716 public void consume(final String token) throws ParseException { 717 if (!tryConsume(token)) { 718 throw parseException("Expected \"" + token + "\"."); 719 } 720 } 721 722 /** 723 * Returns {@code true} if the next token is an integer, but does 724 * not consume it. 725 */ 726 public boolean lookingAtInteger() { 727 if (currentToken.length() == 0) { 728 return false; 729 } 730 731 final char c = currentToken.charAt(0); 732 return ('0' <= c && c <= '9') || 733 c == '-' || c == '+'; 734 } 735 736 /** 737 * Returns {@code true} if the current token's text is equal to that 738 * specified. 739 */ 740 public boolean lookingAt(String text) { 741 return currentToken.equals(text); 742 } 743 744 /** 745 * If the next token is an identifier, consume it and return its value. 746 * Otherwise, throw a {@link ParseException}. 747 */ 748 public String consumeIdentifier() throws ParseException { 749 for (int i = 0; i < currentToken.length(); i++) { 750 final char c = currentToken.charAt(i); 751 if (('a' <= c && c <= 'z') || 752 ('A' <= c && c <= 'Z') || 753 ('0' <= c && c <= '9') || 754 (c == '_') || (c == '.')) { 755 // OK 756 } else { 757 throw parseException( 758 "Expected identifier. Found '" + currentToken + "'"); 759 } 760 } 761 762 final String result = currentToken; 763 nextToken(); 764 return result; 765 } 766 767 /** 768 * If the next token is an identifier, consume it and return {@code true}. 769 * Otherwise, return {@code false} without doing anything. 770 */ 771 public boolean tryConsumeIdentifier() { 772 try { 773 consumeIdentifier(); 774 return true; 775 } catch (ParseException e) { 776 return false; 777 } 778 } 779 780 /** 781 * If the next token is a 32-bit signed integer, consume it and return its 782 * value. Otherwise, throw a {@link ParseException}. 783 */ 784 public int consumeInt32() throws ParseException { 785 try { 786 final int result = parseInt32(currentToken); 787 nextToken(); 788 return result; 789 } catch (NumberFormatException e) { 790 throw integerParseException(e); 791 } 792 } 793 794 /** 795 * If the next token is a 32-bit unsigned integer, consume it and return its 796 * value. Otherwise, throw a {@link ParseException}. 797 */ 798 public int consumeUInt32() throws ParseException { 799 try { 800 final int result = parseUInt32(currentToken); 801 nextToken(); 802 return result; 803 } catch (NumberFormatException e) { 804 throw integerParseException(e); 805 } 806 } 807 808 /** 809 * If the next token is a 64-bit signed integer, consume it and return its 810 * value. Otherwise, throw a {@link ParseException}. 811 */ 812 public long consumeInt64() throws ParseException { 813 try { 814 final long result = parseInt64(currentToken); 815 nextToken(); 816 return result; 817 } catch (NumberFormatException e) { 818 throw integerParseException(e); 819 } 820 } 821 822 /** 823 * If the next token is a 64-bit signed integer, consume it and return 824 * {@code true}. Otherwise, return {@code false} without doing anything. 825 */ 826 public boolean tryConsumeInt64() { 827 try { 828 consumeInt64(); 829 return true; 830 } catch (ParseException e) { 831 return false; 832 } 833 } 834 835 /** 836 * If the next token is a 64-bit unsigned integer, consume it and return its 837 * value. Otherwise, throw a {@link ParseException}. 838 */ 839 public long consumeUInt64() throws ParseException { 840 try { 841 final long result = parseUInt64(currentToken); 842 nextToken(); 843 return result; 844 } catch (NumberFormatException e) { 845 throw integerParseException(e); 846 } 847 } 848 849 /** 850 * If the next token is a 64-bit unsigned integer, consume it and return 851 * {@code true}. Otherwise, return {@code false} without doing anything. 852 */ 853 public boolean tryConsumeUInt64() { 854 try { 855 consumeUInt64(); 856 return true; 857 } catch (ParseException e) { 858 return false; 859 } 860 } 861 862 /** 863 * If the next token is a double, consume it and return its value. 864 * Otherwise, throw a {@link ParseException}. 865 */ 866 public double consumeDouble() throws ParseException { 867 // We need to parse infinity and nan separately because 868 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 869 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 870 final boolean negative = currentToken.startsWith("-"); 871 nextToken(); 872 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 873 } 874 if (currentToken.equalsIgnoreCase("nan")) { 875 nextToken(); 876 return Double.NaN; 877 } 878 try { 879 final double result = Double.parseDouble(currentToken); 880 nextToken(); 881 return result; 882 } catch (NumberFormatException e) { 883 throw floatParseException(e); 884 } 885 } 886 887 /** 888 * If the next token is a double, consume it and return {@code true}. 889 * Otherwise, return {@code false} without doing anything. 890 */ 891 public boolean tryConsumeDouble() { 892 try { 893 consumeDouble(); 894 return true; 895 } catch (ParseException e) { 896 return false; 897 } 898 } 899 900 /** 901 * If the next token is a float, consume it and return its value. 902 * Otherwise, throw a {@link ParseException}. 903 */ 904 public float consumeFloat() throws ParseException { 905 // We need to parse infinity and nan separately because 906 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 907 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 908 final boolean negative = currentToken.startsWith("-"); 909 nextToken(); 910 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 911 } 912 if (FLOAT_NAN.matcher(currentToken).matches()) { 913 nextToken(); 914 return Float.NaN; 915 } 916 try { 917 final float result = Float.parseFloat(currentToken); 918 nextToken(); 919 return result; 920 } catch (NumberFormatException e) { 921 throw floatParseException(e); 922 } 923 } 924 925 /** 926 * If the next token is a float, consume it and return {@code true}. 927 * Otherwise, return {@code false} without doing anything. 928 */ 929 public boolean tryConsumeFloat() { 930 try { 931 consumeFloat(); 932 return true; 933 } catch (ParseException e) { 934 return false; 935 } 936 } 937 938 /** 939 * If the next token is a boolean, consume it and return its value. 940 * Otherwise, throw a {@link ParseException}. 941 */ 942 public boolean consumeBoolean() throws ParseException { 943 if (currentToken.equals("true") || 944 currentToken.equals("t") || 945 currentToken.equals("1")) { 946 nextToken(); 947 return true; 948 } else if (currentToken.equals("false") || 949 currentToken.equals("f") || 950 currentToken.equals("0")) { 951 nextToken(); 952 return false; 953 } else { 954 throw parseException("Expected \"true\" or \"false\"."); 955 } 956 } 957 958 /** 959 * If the next token is a string, consume it and return its (unescaped) 960 * value. Otherwise, throw a {@link ParseException}. 961 */ 962 public String consumeString() throws ParseException { 963 return consumeByteString().toStringUtf8(); 964 } 965 966 /** 967 * If the next token is a string, consume it and return true. Otherwise, 968 * return false. 969 */ 970 public boolean tryConsumeString() { 971 try { 972 consumeString(); 973 return true; 974 } catch (ParseException e) { 975 return false; 976 } 977 } 978 979 /** 980 * If the next token is a string, consume it, unescape it as a 981 * {@link ByteString}, and return it. Otherwise, throw a 982 * {@link ParseException}. 983 */ 984 public ByteString consumeByteString() throws ParseException { 985 List<ByteString> list = new ArrayList<ByteString>(); 986 consumeByteString(list); 987 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 988 consumeByteString(list); 989 } 990 return ByteString.copyFrom(list); 991 } 992 993 /** 994 * Like {@link #consumeByteString()} but adds each token of the string to 995 * the given list. String literals (whether bytes or text) may come in 996 * multiple adjacent tokens which are automatically concatenated, like in 997 * C or Python. 998 */ 999 private void consumeByteString(List<ByteString> list) 1000 throws ParseException { 1001 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) 1002 : '\0'; 1003 if (quote != '\"' && quote != '\'') { 1004 throw parseException("Expected string."); 1005 } 1006 1007 if (currentToken.length() < 2 || 1008 currentToken.charAt(currentToken.length() - 1) != quote) { 1009 throw parseException("String missing ending quote."); 1010 } 1011 1012 try { 1013 final String escaped = 1014 currentToken.substring(1, currentToken.length() - 1); 1015 final ByteString result = unescapeBytes(escaped); 1016 nextToken(); 1017 list.add(result); 1018 } catch (InvalidEscapeSequenceException e) { 1019 throw parseException(e.getMessage()); 1020 } 1021 } 1022 1023 /** 1024 * Returns a {@link ParseException} with the current line and column 1025 * numbers in the description, suitable for throwing. 1026 */ 1027 public ParseException parseException(final String description) { 1028 // Note: People generally prefer one-based line and column numbers. 1029 return new ParseException( 1030 line + 1, column + 1, description); 1031 } 1032 1033 /** 1034 * Returns a {@link ParseException} with the line and column numbers of 1035 * the previous token in the description, suitable for throwing. 1036 */ 1037 public ParseException parseExceptionPreviousToken( 1038 final String description) { 1039 // Note: People generally prefer one-based line and column numbers. 1040 return new ParseException( 1041 previousLine + 1, previousColumn + 1, description); 1042 } 1043 1044 /** 1045 * Constructs an appropriate {@link ParseException} for the given 1046 * {@code NumberFormatException} when trying to parse an integer. 1047 */ 1048 private ParseException integerParseException( 1049 final NumberFormatException e) { 1050 return parseException("Couldn't parse integer: " + e.getMessage()); 1051 } 1052 1053 /** 1054 * Constructs an appropriate {@link ParseException} for the given 1055 * {@code NumberFormatException} when trying to parse a float or double. 1056 */ 1057 private ParseException floatParseException(final NumberFormatException e) { 1058 return parseException("Couldn't parse number: " + e.getMessage()); 1059 } 1060 } 1061 1062 /** Thrown when parsing an invalid text format message. */ 1063 public static class ParseException extends IOException { 1064 private static final long serialVersionUID = 3196188060225107702L; 1065 1066 private final int line; 1067 private final int column; 1068 1069 /** Create a new instance, with -1 as the line and column numbers. */ 1070 public ParseException(final String message) { 1071 this(-1, -1, message); 1072 } 1073 1074 /** 1075 * Create a new instance 1076 * 1077 * @param line the line number where the parse error occurred, 1078 * using 1-offset. 1079 * @param column the column number where the parser error occurred, 1080 * using 1-offset. 1081 */ 1082 public ParseException(final int line, final int column, 1083 final String message) { 1084 super(Integer.toString(line) + ":" + column + ": " + message); 1085 this.line = line; 1086 this.column = column; 1087 } 1088 1089 /** 1090 * Return the line where the parse exception occurred, or -1 when 1091 * none is provided. The value is specified as 1-offset, so the first 1092 * line is line 1. 1093 */ 1094 public int getLine() { 1095 return line; 1096 } 1097 1098 /** 1099 * Return the column where the parse exception occurred, or -1 when 1100 * none is provided. The value is specified as 1-offset, so the first 1101 * line is line 1. 1102 */ 1103 public int getColumn() { 1104 return column; 1105 } 1106 } 1107 1108 private static final Parser PARSER = Parser.newBuilder().build(); 1109 1110 /** 1111 * Return a {@link Parser} instance which can parse text-format 1112 * messages. The returned instance is thread-safe. 1113 */ 1114 public static Parser getParser() { 1115 return PARSER; 1116 } 1117 1118 /** 1119 * Parse a text-format message from {@code input} and merge the contents 1120 * into {@code builder}. 1121 */ 1122 public static void merge(final Readable input, 1123 final Message.Builder builder) 1124 throws IOException { 1125 PARSER.merge(input, builder); 1126 } 1127 1128 /** 1129 * Parse a text-format message from {@code input} and merge the contents 1130 * into {@code builder}. 1131 */ 1132 public static void merge(final CharSequence input, 1133 final Message.Builder builder) 1134 throws ParseException { 1135 PARSER.merge(input, builder); 1136 } 1137 1138 /** 1139 * Parse a text-format message from {@code input} and merge the contents 1140 * into {@code builder}. Extensions will be recognized if they are 1141 * registered in {@code extensionRegistry}. 1142 */ 1143 public static void merge(final Readable input, 1144 final ExtensionRegistry extensionRegistry, 1145 final Message.Builder builder) 1146 throws IOException { 1147 PARSER.merge(input, extensionRegistry, builder); 1148 } 1149 1150 1151 /** 1152 * Parse a text-format message from {@code input} and merge the contents 1153 * into {@code builder}. Extensions will be recognized if they are 1154 * registered in {@code extensionRegistry}. 1155 */ 1156 public static void merge(final CharSequence input, 1157 final ExtensionRegistry extensionRegistry, 1158 final Message.Builder builder) 1159 throws ParseException { 1160 PARSER.merge(input, extensionRegistry, builder); 1161 } 1162 1163 1164 /** 1165 * Parser for text-format proto2 instances. This class is thread-safe. 1166 * The implementation largely follows google/protobuf/text_format.cc. 1167 * 1168 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or 1169 * {@link Builder} to control the parser behavior. 1170 */ 1171 public static class Parser { 1172 /** 1173 * Determines if repeated values for non-repeated fields and 1174 * oneofs are permitted. For example, given required/optional field "foo" 1175 * and a oneof containing "baz" and "qux": 1176 * <li> 1177 * <ul>"foo: 1 foo: 2" 1178 * <ul>"baz: 1 qux: 2" 1179 * <ul>merging "foo: 2" into a proto in which foo is already set, or 1180 * <ul>merging "qux: 2" into a proto in which baz is already set. 1181 * </li> 1182 */ 1183 public enum SingularOverwritePolicy { 1184 /** The last value is retained. */ 1185 ALLOW_SINGULAR_OVERWRITES, 1186 /** An error is issued. */ 1187 FORBID_SINGULAR_OVERWRITES 1188 } 1189 1190 private final boolean allowUnknownFields; 1191 private final SingularOverwritePolicy singularOverwritePolicy; 1192 1193 private Parser(boolean allowUnknownFields, 1194 SingularOverwritePolicy singularOverwritePolicy) { 1195 this.allowUnknownFields = allowUnknownFields; 1196 this.singularOverwritePolicy = singularOverwritePolicy; 1197 } 1198 1199 /** 1200 * Returns a new instance of {@link Builder}. 1201 */ 1202 public static Builder newBuilder() { 1203 return new Builder(); 1204 } 1205 1206 /** 1207 * Builder that can be used to obtain new instances of {@link Parser}. 1208 */ 1209 public static class Builder { 1210 private boolean allowUnknownFields = false; 1211 private SingularOverwritePolicy singularOverwritePolicy = 1212 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1213 1214 /** 1215 * Sets parser behavior when a non-repeated field appears more than once. 1216 */ 1217 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1218 this.singularOverwritePolicy = p; 1219 return this; 1220 } 1221 1222 public Parser build() { 1223 return new Parser(allowUnknownFields, singularOverwritePolicy); 1224 } 1225 } 1226 1227 /** 1228 * Parse a text-format message from {@code input} and merge the contents 1229 * into {@code builder}. 1230 */ 1231 public void merge(final Readable input, 1232 final Message.Builder builder) 1233 throws IOException { 1234 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1235 } 1236 1237 /** 1238 * Parse a text-format message from {@code input} and merge the contents 1239 * into {@code builder}. 1240 */ 1241 public void merge(final CharSequence input, 1242 final Message.Builder builder) 1243 throws ParseException { 1244 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1245 } 1246 1247 /** 1248 * Parse a text-format message from {@code input} and merge the contents 1249 * into {@code builder}. Extensions will be recognized if they are 1250 * registered in {@code extensionRegistry}. 1251 */ 1252 public void merge(final Readable input, 1253 final ExtensionRegistry extensionRegistry, 1254 final Message.Builder builder) 1255 throws IOException { 1256 // Read the entire input to a String then parse that. 1257 1258 // If StreamTokenizer were not quite so crippled, or if there were a kind 1259 // of Reader that could read in chunks that match some particular regex, 1260 // or if we wanted to write a custom Reader to tokenize our stream, then 1261 // we would not have to read to one big String. Alas, none of these is 1262 // the case. Oh well. 1263 1264 merge(toStringBuilder(input), extensionRegistry, builder); 1265 } 1266 1267 1268 private static final int BUFFER_SIZE = 4096; 1269 1270 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1271 // overhead is worthwhile 1272 private static StringBuilder toStringBuilder(final Readable input) 1273 throws IOException { 1274 final StringBuilder text = new StringBuilder(); 1275 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1276 while (true) { 1277 final int n = input.read(buffer); 1278 if (n == -1) { 1279 break; 1280 } 1281 buffer.flip(); 1282 text.append(buffer, 0, n); 1283 } 1284 return text; 1285 } 1286 1287 /** 1288 * Parse a text-format message from {@code input} and merge the contents 1289 * into {@code builder}. Extensions will be recognized if they are 1290 * registered in {@code extensionRegistry}. 1291 */ 1292 public void merge(final CharSequence input, 1293 final ExtensionRegistry extensionRegistry, 1294 final Message.Builder builder) 1295 throws ParseException { 1296 final Tokenizer tokenizer = new Tokenizer(input); 1297 MessageReflection.BuilderAdapter target = 1298 new MessageReflection.BuilderAdapter(builder); 1299 1300 while (!tokenizer.atEnd()) { 1301 mergeField(tokenizer, extensionRegistry, target); 1302 } 1303 } 1304 1305 1306 /** 1307 * Parse a single field from {@code tokenizer} and merge it into 1308 * {@code builder}. 1309 */ 1310 private void mergeField(final Tokenizer tokenizer, 1311 final ExtensionRegistry extensionRegistry, 1312 final MessageReflection.MergeTarget target) 1313 throws ParseException { 1314 FieldDescriptor field = null; 1315 final Descriptor type = target.getDescriptorForType(); 1316 ExtensionRegistry.ExtensionInfo extension = null; 1317 1318 if (tokenizer.tryConsume("[")) { 1319 // An extension. 1320 final StringBuilder name = 1321 new StringBuilder(tokenizer.consumeIdentifier()); 1322 while (tokenizer.tryConsume(".")) { 1323 name.append('.'); 1324 name.append(tokenizer.consumeIdentifier()); 1325 } 1326 1327 extension = target.findExtensionByName( 1328 extensionRegistry, name.toString()); 1329 1330 if (extension == null) { 1331 if (!allowUnknownFields) { 1332 throw tokenizer.parseExceptionPreviousToken( 1333 "Extension \"" + name + "\" not found in the ExtensionRegistry."); 1334 } else { 1335 logger.warning( 1336 "Extension \"" + name + "\" not found in the ExtensionRegistry."); 1337 } 1338 } else { 1339 if (extension.descriptor.getContainingType() != type) { 1340 throw tokenizer.parseExceptionPreviousToken( 1341 "Extension \"" + name + "\" does not extend message type \"" + 1342 type.getFullName() + "\"."); 1343 } 1344 field = extension.descriptor; 1345 } 1346 1347 tokenizer.consume("]"); 1348 } else { 1349 final String name = tokenizer.consumeIdentifier(); 1350 field = type.findFieldByName(name); 1351 1352 // Group names are expected to be capitalized as they appear in the 1353 // .proto file, which actually matches their type names, not their field 1354 // names. 1355 if (field == null) { 1356 // Explicitly specify US locale so that this code does not break when 1357 // executing in Turkey. 1358 final String lowerName = name.toLowerCase(Locale.US); 1359 field = type.findFieldByName(lowerName); 1360 // If the case-insensitive match worked but the field is NOT a group, 1361 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1362 field = null; 1363 } 1364 } 1365 // Again, special-case group names as described above. 1366 if (field != null && field.getType() == FieldDescriptor.Type.GROUP && 1367 !field.getMessageType().getName().equals(name)) { 1368 field = null; 1369 } 1370 1371 if (field == null) { 1372 if (!allowUnknownFields) { 1373 throw tokenizer.parseExceptionPreviousToken( 1374 "Message type \"" + type.getFullName() + 1375 "\" has no field named \"" + name + "\"."); 1376 } else { 1377 logger.warning( 1378 "Message type \"" + type.getFullName() + 1379 "\" has no field named \"" + name + "\"."); 1380 } 1381 } 1382 } 1383 1384 // Skips unknown fields. 1385 if (field == null) { 1386 // Try to guess the type of this field. 1387 // If this field is not a message, there should be a ":" between the 1388 // field name and the field value and also the field value should not 1389 // start with "{" or "<" which indicates the begining of a message body. 1390 // If there is no ":" or there is a "{" or "<" after ":", this field has 1391 // to be a message or the input is ill-formed. 1392 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && 1393 !tokenizer.lookingAt("<")) { 1394 skipFieldValue(tokenizer); 1395 } else { 1396 skipFieldMessage(tokenizer); 1397 } 1398 return; 1399 } 1400 1401 // Handle potential ':'. 1402 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1403 tokenizer.tryConsume(":"); // optional 1404 } else { 1405 tokenizer.consume(":"); // required 1406 } 1407 // Support specifying repeated field values as a comma-separated list. 1408 // Ex."foo: [1, 2, 3]" 1409 if (field.isRepeated() && tokenizer.tryConsume("[")) { 1410 while (true) { 1411 consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); 1412 if (tokenizer.tryConsume("]")) { 1413 // End of list. 1414 break; 1415 } 1416 tokenizer.consume(","); 1417 } 1418 } else { 1419 consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); 1420 } 1421 } 1422 1423 /** 1424 * Parse a single field value from {@code tokenizer} and merge it into 1425 * {@code builder}. 1426 */ 1427 private void consumeFieldValue( 1428 final Tokenizer tokenizer, 1429 final ExtensionRegistry extensionRegistry, 1430 final MessageReflection.MergeTarget target, 1431 final FieldDescriptor field, 1432 final ExtensionRegistry.ExtensionInfo extension) 1433 throws ParseException { 1434 Object value = null; 1435 1436 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1437 final String endToken; 1438 if (tokenizer.tryConsume("<")) { 1439 endToken = ">"; 1440 } else { 1441 tokenizer.consume("{"); 1442 endToken = "}"; 1443 } 1444 1445 final MessageReflection.MergeTarget subField; 1446 subField = target.newMergeTargetForField(field, 1447 (extension == null) ? null : extension.defaultInstance); 1448 1449 while (!tokenizer.tryConsume(endToken)) { 1450 if (tokenizer.atEnd()) { 1451 throw tokenizer.parseException( 1452 "Expected \"" + endToken + "\"."); 1453 } 1454 mergeField(tokenizer, extensionRegistry, subField); 1455 } 1456 1457 value = subField.finish(); 1458 1459 } else { 1460 switch (field.getType()) { 1461 case INT32: 1462 case SINT32: 1463 case SFIXED32: 1464 value = tokenizer.consumeInt32(); 1465 break; 1466 1467 case INT64: 1468 case SINT64: 1469 case SFIXED64: 1470 value = tokenizer.consumeInt64(); 1471 break; 1472 1473 case UINT32: 1474 case FIXED32: 1475 value = tokenizer.consumeUInt32(); 1476 break; 1477 1478 case UINT64: 1479 case FIXED64: 1480 value = tokenizer.consumeUInt64(); 1481 break; 1482 1483 case FLOAT: 1484 value = tokenizer.consumeFloat(); 1485 break; 1486 1487 case DOUBLE: 1488 value = tokenizer.consumeDouble(); 1489 break; 1490 1491 case BOOL: 1492 value = tokenizer.consumeBoolean(); 1493 break; 1494 1495 case STRING: 1496 value = tokenizer.consumeString(); 1497 break; 1498 1499 case BYTES: 1500 value = tokenizer.consumeByteString(); 1501 break; 1502 1503 case ENUM: 1504 final EnumDescriptor enumType = field.getEnumType(); 1505 1506 if (tokenizer.lookingAtInteger()) { 1507 final int number = tokenizer.consumeInt32(); 1508 value = enumType.findValueByNumber(number); 1509 if (value == null) { 1510 throw tokenizer.parseExceptionPreviousToken( 1511 "Enum type \"" + enumType.getFullName() + 1512 "\" has no value with number " + number + '.'); 1513 } 1514 } else { 1515 final String id = tokenizer.consumeIdentifier(); 1516 value = enumType.findValueByName(id); 1517 if (value == null) { 1518 throw tokenizer.parseExceptionPreviousToken( 1519 "Enum type \"" + enumType.getFullName() + 1520 "\" has no value named \"" + id + "\"."); 1521 } 1522 } 1523 1524 break; 1525 1526 case MESSAGE: 1527 case GROUP: 1528 throw new RuntimeException("Can't get here."); 1529 } 1530 } 1531 1532 if (field.isRepeated()) { 1533 target.addRepeatedField(field, value); 1534 } else if ((singularOverwritePolicy 1535 == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) 1536 && target.hasField(field)) { 1537 throw tokenizer.parseExceptionPreviousToken("Non-repeated field \"" 1538 + field.getFullName() + "\" cannot be overwritten."); 1539 } else if ((singularOverwritePolicy 1540 == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) 1541 && field.getContainingOneof() != null 1542 && target.hasOneof(field.getContainingOneof())) { 1543 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 1544 throw tokenizer.parseExceptionPreviousToken("Field \"" 1545 + field.getFullName() + "\" is specified along with field \"" 1546 + target.getOneofFieldDescriptor(oneof).getFullName() 1547 + "\", another member of oneof \"" + oneof.getName() + "\"."); 1548 } else { 1549 target.setField(field, value); 1550 } 1551 } 1552 1553 /** 1554 * Skips the next field including the field's name and value. 1555 */ 1556 private void skipField(Tokenizer tokenizer) throws ParseException { 1557 if (tokenizer.tryConsume("[")) { 1558 // Extension name. 1559 do { 1560 tokenizer.consumeIdentifier(); 1561 } while (tokenizer.tryConsume(".")); 1562 tokenizer.consume("]"); 1563 } else { 1564 tokenizer.consumeIdentifier(); 1565 } 1566 1567 // Try to guess the type of this field. 1568 // If this field is not a message, there should be a ":" between the 1569 // field name and the field value and also the field value should not 1570 // start with "{" or "<" which indicates the begining of a message body. 1571 // If there is no ":" or there is a "{" or "<" after ":", this field has 1572 // to be a message or the input is ill-formed. 1573 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && 1574 !tokenizer.lookingAt("{")) { 1575 skipFieldValue(tokenizer); 1576 } else { 1577 skipFieldMessage(tokenizer); 1578 } 1579 // For historical reasons, fields may optionally be separated by commas or 1580 // semicolons. 1581 if (!tokenizer.tryConsume(";")) { 1582 tokenizer.tryConsume(","); 1583 } 1584 } 1585 1586 /** 1587 * Skips the whole body of a message including the beginning delimeter and 1588 * the ending delimeter. 1589 */ 1590 private void skipFieldMessage(Tokenizer tokenizer) throws ParseException { 1591 final String delimiter; 1592 if (tokenizer.tryConsume("<")) { 1593 delimiter = ">"; 1594 } else { 1595 tokenizer.consume("{"); 1596 delimiter = "}"; 1597 } 1598 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 1599 skipField(tokenizer); 1600 } 1601 tokenizer.consume(delimiter); 1602 } 1603 1604 /** 1605 * Skips a field value. 1606 */ 1607 private void skipFieldValue(Tokenizer tokenizer) throws ParseException { 1608 if (tokenizer.tryConsumeString()) { 1609 while (tokenizer.tryConsumeString()) {} 1610 return; 1611 } 1612 if (!tokenizer.tryConsumeIdentifier() && // includes enum & boolean 1613 !tokenizer.tryConsumeInt64() && // includes int32 1614 !tokenizer.tryConsumeUInt64() && // includes uint32 1615 !tokenizer.tryConsumeDouble() && 1616 !tokenizer.tryConsumeFloat()) { 1617 throw tokenizer.parseException( 1618 "Invalid field value: " + tokenizer.currentToken); 1619 } 1620 } 1621 } 1622 1623 // ================================================================= 1624 // Utility functions 1625 // 1626 // Some of these methods are package-private because Descriptors.java uses 1627 // them. 1628 1629 private interface ByteSequence { 1630 int size(); 1631 byte byteAt(int offset); 1632 } 1633 1634 /** 1635 * Escapes bytes in the format used in protocol buffer text format, which 1636 * is the same as the format used for C string literals. All bytes 1637 * that are not printable 7-bit ASCII characters are escaped, as well as 1638 * backslash, single-quote, and double-quote characters. Characters for 1639 * which no defined short-hand escape sequence is defined will be escaped 1640 * using 3-digit octal sequences. 1641 */ 1642 private static String escapeBytes(final ByteSequence input) { 1643 final StringBuilder builder = new StringBuilder(input.size()); 1644 for (int i = 0; i < input.size(); i++) { 1645 final byte b = input.byteAt(i); 1646 switch (b) { 1647 // Java does not recognize \a or \v, apparently. 1648 case 0x07: builder.append("\\a" ); break; 1649 case '\b': builder.append("\\b" ); break; 1650 case '\f': builder.append("\\f" ); break; 1651 case '\n': builder.append("\\n" ); break; 1652 case '\r': builder.append("\\r" ); break; 1653 case '\t': builder.append("\\t" ); break; 1654 case 0x0b: builder.append("\\v" ); break; 1655 case '\\': builder.append("\\\\"); break; 1656 case '\'': builder.append("\\\'"); break; 1657 case '"' : builder.append("\\\""); break; 1658 default: 1659 // Note: Bytes with the high-order bit set should be escaped. Since 1660 // bytes are signed, such bytes will compare less than 0x20, hence 1661 // the following line is correct. 1662 if (b >= 0x20) { 1663 builder.append((char) b); 1664 } else { 1665 builder.append('\\'); 1666 builder.append((char) ('0' + ((b >>> 6) & 3))); 1667 builder.append((char) ('0' + ((b >>> 3) & 7))); 1668 builder.append((char) ('0' + (b & 7))); 1669 } 1670 break; 1671 } 1672 } 1673 return builder.toString(); 1674 } 1675 1676 /** 1677 * Escapes bytes in the format used in protocol buffer text format, which 1678 * is the same as the format used for C string literals. All bytes 1679 * that are not printable 7-bit ASCII characters are escaped, as well as 1680 * backslash, single-quote, and double-quote characters. Characters for 1681 * which no defined short-hand escape sequence is defined will be escaped 1682 * using 3-digit octal sequences. 1683 */ 1684 static String escapeBytes(final ByteString input) { 1685 return escapeBytes(new ByteSequence() { 1686 public int size() { 1687 return input.size(); 1688 } 1689 public byte byteAt(int offset) { 1690 return input.byteAt(offset); 1691 } 1692 }); 1693 } 1694 1695 /** 1696 * Like {@link #escapeBytes(ByteString)}, but used for byte array. 1697 */ 1698 static String escapeBytes(final byte[] input) { 1699 return escapeBytes(new ByteSequence() { 1700 public int size() { 1701 return input.length; 1702 } 1703 public byte byteAt(int offset) { 1704 return input[offset]; 1705 } 1706 }); 1707 } 1708 1709 /** 1710 * Un-escape a byte sequence as escaped using 1711 * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with 1712 * "\x") are also recognized. 1713 */ 1714 static ByteString unescapeBytes(final CharSequence charString) 1715 throws InvalidEscapeSequenceException { 1716 // First convert the Java character sequence to UTF-8 bytes. 1717 ByteString input = ByteString.copyFromUtf8(charString.toString()); 1718 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 1719 // escapes can all be expressed with ASCII characters, so it is safe to 1720 // operate on bytes here. 1721 // 1722 // Unescaping the input byte array will result in a byte sequence that's no 1723 // longer than the input. That's because each escape sequence is between 1724 // two and four bytes long and stands for a single byte. 1725 final byte[] result = new byte[input.size()]; 1726 int pos = 0; 1727 for (int i = 0; i < input.size(); i++) { 1728 byte c = input.byteAt(i); 1729 if (c == '\\') { 1730 if (i + 1 < input.size()) { 1731 ++i; 1732 c = input.byteAt(i); 1733 if (isOctal(c)) { 1734 // Octal escape. 1735 int code = digitValue(c); 1736 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1737 ++i; 1738 code = code * 8 + digitValue(input.byteAt(i)); 1739 } 1740 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1741 ++i; 1742 code = code * 8 + digitValue(input.byteAt(i)); 1743 } 1744 // TODO: Check that 0 <= code && code <= 0xFF. 1745 result[pos++] = (byte)code; 1746 } else { 1747 switch (c) { 1748 case 'a' : result[pos++] = 0x07; break; 1749 case 'b' : result[pos++] = '\b'; break; 1750 case 'f' : result[pos++] = '\f'; break; 1751 case 'n' : result[pos++] = '\n'; break; 1752 case 'r' : result[pos++] = '\r'; break; 1753 case 't' : result[pos++] = '\t'; break; 1754 case 'v' : result[pos++] = 0x0b; break; 1755 case '\\': result[pos++] = '\\'; break; 1756 case '\'': result[pos++] = '\''; break; 1757 case '"' : result[pos++] = '\"'; break; 1758 1759 case 'x': 1760 // hex escape 1761 int code = 0; 1762 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1763 ++i; 1764 code = digitValue(input.byteAt(i)); 1765 } else { 1766 throw new InvalidEscapeSequenceException( 1767 "Invalid escape sequence: '\\x' with no digits"); 1768 } 1769 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1770 ++i; 1771 code = code * 16 + digitValue(input.byteAt(i)); 1772 } 1773 result[pos++] = (byte)code; 1774 break; 1775 1776 default: 1777 throw new InvalidEscapeSequenceException( 1778 "Invalid escape sequence: '\\" + (char)c + '\''); 1779 } 1780 } 1781 } else { 1782 throw new InvalidEscapeSequenceException( 1783 "Invalid escape sequence: '\\' at end of string."); 1784 } 1785 } else { 1786 result[pos++] = c; 1787 } 1788 } 1789 1790 return ByteString.copyFrom(result, 0, pos); 1791 } 1792 1793 /** 1794 * Thrown by {@link TextFormat#unescapeBytes} and 1795 * {@link TextFormat#unescapeText} when an invalid escape sequence is seen. 1796 */ 1797 static class InvalidEscapeSequenceException extends IOException { 1798 private static final long serialVersionUID = -8164033650142593304L; 1799 1800 InvalidEscapeSequenceException(final String description) { 1801 super(description); 1802 } 1803 } 1804 1805 /** 1806 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. 1807 * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped 1808 * individually as a 3-digit octal escape. Yes, it's weird. 1809 */ 1810 static String escapeText(final String input) { 1811 return escapeBytes(ByteString.copyFromUtf8(input)); 1812 } 1813 1814 /** 1815 * Escape double quotes and backslashes in a String for unicode output of a message. 1816 */ 1817 public static String escapeDoubleQuotesAndBackslashes(final String input) { 1818 return input.replace("\\", "\\\\").replace("\"", "\\\""); 1819 } 1820 1821 /** 1822 * Un-escape a text string as escaped using {@link #escapeText(String)}. 1823 * Two-digit hex escapes (starting with "\x") are also recognized. 1824 */ 1825 static String unescapeText(final String input) 1826 throws InvalidEscapeSequenceException { 1827 return unescapeBytes(input).toStringUtf8(); 1828 } 1829 1830 /** Is this an octal digit? */ 1831 private static boolean isOctal(final byte c) { 1832 return '0' <= c && c <= '7'; 1833 } 1834 1835 /** Is this a hex digit? */ 1836 private static boolean isHex(final byte c) { 1837 return ('0' <= c && c <= '9') || 1838 ('a' <= c && c <= 'f') || 1839 ('A' <= c && c <= 'F'); 1840 } 1841 1842 /** 1843 * Interpret a character as a digit (in any base up to 36) and return the 1844 * numeric value. This is like {@code Character.digit()} but we don't accept 1845 * non-ASCII digits. 1846 */ 1847 private static int digitValue(final byte c) { 1848 if ('0' <= c && c <= '9') { 1849 return c - '0'; 1850 } else if ('a' <= c && c <= 'z') { 1851 return c - 'a' + 10; 1852 } else { 1853 return c - 'A' + 10; 1854 } 1855 } 1856 1857 /** 1858 * Parse a 32-bit signed integer from the text. Unlike the Java standard 1859 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1860 * and "0" to signify hexadecimal and octal numbers, respectively. 1861 */ 1862 static int parseInt32(final String text) throws NumberFormatException { 1863 return (int) parseInteger(text, true, false); 1864 } 1865 1866 /** 1867 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard 1868 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1869 * and "0" to signify hexadecimal and octal numbers, respectively. The 1870 * result is coerced to a (signed) {@code int} when returned since Java has 1871 * no unsigned integer type. 1872 */ 1873 static int parseUInt32(final String text) throws NumberFormatException { 1874 return (int) parseInteger(text, false, false); 1875 } 1876 1877 /** 1878 * Parse a 64-bit signed integer from the text. Unlike the Java standard 1879 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1880 * and "0" to signify hexadecimal and octal numbers, respectively. 1881 */ 1882 static long parseInt64(final String text) throws NumberFormatException { 1883 return parseInteger(text, true, true); 1884 } 1885 1886 /** 1887 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard 1888 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1889 * and "0" to signify hexadecimal and octal numbers, respectively. The 1890 * result is coerced to a (signed) {@code long} when returned since Java has 1891 * no unsigned long type. 1892 */ 1893 static long parseUInt64(final String text) throws NumberFormatException { 1894 return parseInteger(text, false, true); 1895 } 1896 1897 private static long parseInteger(final String text, 1898 final boolean isSigned, 1899 final boolean isLong) 1900 throws NumberFormatException { 1901 int pos = 0; 1902 1903 boolean negative = false; 1904 if (text.startsWith("-", pos)) { 1905 if (!isSigned) { 1906 throw new NumberFormatException("Number must be positive: " + text); 1907 } 1908 ++pos; 1909 negative = true; 1910 } 1911 1912 int radix = 10; 1913 if (text.startsWith("0x", pos)) { 1914 pos += 2; 1915 radix = 16; 1916 } else if (text.startsWith("0", pos)) { 1917 radix = 8; 1918 } 1919 1920 final String numberText = text.substring(pos); 1921 1922 long result = 0; 1923 if (numberText.length() < 16) { 1924 // Can safely assume no overflow. 1925 result = Long.parseLong(numberText, radix); 1926 if (negative) { 1927 result = -result; 1928 } 1929 1930 // Check bounds. 1931 // No need to check for 64-bit numbers since they'd have to be 16 chars 1932 // or longer to overflow. 1933 if (!isLong) { 1934 if (isSigned) { 1935 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 1936 throw new NumberFormatException( 1937 "Number out of range for 32-bit signed integer: " + text); 1938 } 1939 } else { 1940 if (result >= (1L << 32) || result < 0) { 1941 throw new NumberFormatException( 1942 "Number out of range for 32-bit unsigned integer: " + text); 1943 } 1944 } 1945 } 1946 } else { 1947 BigInteger bigValue = new BigInteger(numberText, radix); 1948 if (negative) { 1949 bigValue = bigValue.negate(); 1950 } 1951 1952 // Check bounds. 1953 if (!isLong) { 1954 if (isSigned) { 1955 if (bigValue.bitLength() > 31) { 1956 throw new NumberFormatException( 1957 "Number out of range for 32-bit signed integer: " + text); 1958 } 1959 } else { 1960 if (bigValue.bitLength() > 32) { 1961 throw new NumberFormatException( 1962 "Number out of range for 32-bit unsigned integer: " + text); 1963 } 1964 } 1965 } else { 1966 if (isSigned) { 1967 if (bigValue.bitLength() > 63) { 1968 throw new NumberFormatException( 1969 "Number out of range for 64-bit signed integer: " + text); 1970 } 1971 } else { 1972 if (bigValue.bitLength() > 64) { 1973 throw new NumberFormatException( 1974 "Number out of range for 64-bit unsigned integer: " + text); 1975 } 1976 } 1977 } 1978 1979 result = bigValue.longValue(); 1980 } 1981 1982 return result; 1983 } 1984} 1985