1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <algorithm>
36#include <float.h>
37#include <math.h>
38#include <stdio.h>
39#include <stack>
40#include <limits>
41#include <vector>
42
43#include <google/protobuf/text_format.h>
44
45#include <google/protobuf/descriptor.h>
46#include <google/protobuf/dynamic_message.h>
47#include <google/protobuf/repeated_field.h>
48#include <google/protobuf/wire_format_lite.h>
49#include <google/protobuf/io/strtod.h>
50#include <google/protobuf/io/coded_stream.h>
51#include <google/protobuf/io/zero_copy_stream.h>
52#include <google/protobuf/io/zero_copy_stream_impl.h>
53#include <google/protobuf/unknown_field_set.h>
54#include <google/protobuf/descriptor.pb.h>
55#include <google/protobuf/io/tokenizer.h>
56#include <google/protobuf/any.h>
57#include <google/protobuf/stubs/stringprintf.h>
58#include <google/protobuf/stubs/strutil.h>
59#include <google/protobuf/stubs/map_util.h>
60#include <google/protobuf/stubs/stl_util.h>
61
62namespace google {
63namespace protobuf {
64
65namespace {
66
67inline bool IsHexNumber(const string& str) {
68  return (str.length() >= 2 && str[0] == '0' &&
69          (str[1] == 'x' || str[1] == 'X'));
70}
71
72inline bool IsOctNumber(const string& str) {
73  return (str.length() >= 2 && str[0] == '0' &&
74          (str[1] >= '0' && str[1] < '8'));
75}
76
77inline bool GetAnyFieldDescriptors(const Message& message,
78                                   const FieldDescriptor** type_url_field,
79                                   const FieldDescriptor** value_field) {
80    const Descriptor* descriptor = message.GetDescriptor();
81    *type_url_field = descriptor->FindFieldByNumber(1);
82    *value_field = descriptor->FindFieldByNumber(2);
83    return (*type_url_field != NULL &&
84            (*type_url_field)->type() == FieldDescriptor::TYPE_STRING &&
85            *value_field != NULL &&
86            (*value_field)->type() == FieldDescriptor::TYPE_BYTES);
87}
88
89}  // namespace
90
91string Message::DebugString() const {
92  string debug_string;
93
94  TextFormat::Printer printer;
95  printer.SetExpandAny(true);
96
97  printer.PrintToString(*this, &debug_string);
98
99  return debug_string;
100}
101
102string Message::ShortDebugString() const {
103  string debug_string;
104
105  TextFormat::Printer printer;
106  printer.SetSingleLineMode(true);
107  printer.SetExpandAny(true);
108
109  printer.PrintToString(*this, &debug_string);
110  // Single line mode currently might have an extra space at the end.
111  if (debug_string.size() > 0 &&
112      debug_string[debug_string.size() - 1] == ' ') {
113    debug_string.resize(debug_string.size() - 1);
114  }
115
116  return debug_string;
117}
118
119string Message::Utf8DebugString() const {
120  string debug_string;
121
122  TextFormat::Printer printer;
123  printer.SetUseUtf8StringEscaping(true);
124  printer.SetExpandAny(true);
125
126  printer.PrintToString(*this, &debug_string);
127
128  return debug_string;
129}
130
131void Message::PrintDebugString() const {
132  printf("%s", DebugString().c_str());
133}
134
135
136// ===========================================================================
137// Implementation of the parse information tree class.
138TextFormat::ParseInfoTree::ParseInfoTree() { }
139
140TextFormat::ParseInfoTree::~ParseInfoTree() {
141  // Remove any nested information trees, as they are owned by this tree.
142  for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
143    STLDeleteElements(&(it->second));
144  }
145}
146
147void TextFormat::ParseInfoTree::RecordLocation(
148    const FieldDescriptor* field,
149    TextFormat::ParseLocation location) {
150  locations_[field].push_back(location);
151}
152
153TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
154    const FieldDescriptor* field) {
155  // Owned by us in the map.
156  TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
157  vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
158  GOOGLE_CHECK(trees);
159  trees->push_back(instance);
160  return instance;
161}
162
163void CheckFieldIndex(const FieldDescriptor* field, int index) {
164  if (field == NULL) { return; }
165
166  if (field->is_repeated() && index == -1) {
167    GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
168                << "Field: " << field->name();
169  } else if (!field->is_repeated() && index != -1) {
170    GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
171                << "Field: " << field->name();
172  }
173}
174
175TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
176    const FieldDescriptor* field, int index) const {
177  CheckFieldIndex(field, index);
178  if (index == -1) { index = 0; }
179
180  const vector<TextFormat::ParseLocation>* locations =
181      FindOrNull(locations_, field);
182  if (locations == NULL || index >= locations->size()) {
183    return TextFormat::ParseLocation();
184  }
185
186  return (*locations)[index];
187}
188
189TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
190    const FieldDescriptor* field, int index) const {
191  CheckFieldIndex(field, index);
192  if (index == -1) { index = 0; }
193
194  const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
195  if (trees == NULL || index >= trees->size()) {
196    return NULL;
197  }
198
199  return (*trees)[index];
200}
201
202
203// ===========================================================================
204// Internal class for parsing an ASCII representation of a Protocol Message.
205// This class makes use of the Protocol Message compiler's tokenizer found
206// in //google/protobuf/io/tokenizer.h. Note that class's Parse
207// method is *not* thread-safe and should only be used in a single thread at
208// a time.
209
210// Makes code slightly more readable.  The meaning of "DO(foo)" is
211// "Execute foo and fail if it fails.", where failure is indicated by
212// returning false. Borrowed from parser.cc (Thanks Kenton!).
213#define DO(STATEMENT) if (STATEMENT) {} else return false
214
215class TextFormat::Parser::ParserImpl {
216 public:
217
218  // Determines if repeated values for non-repeated fields and
219  // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
220  // required/optional field named "foo", or "baz: 1 qux: 2"
221  // where "baz" and "qux" are members of the same oneof.
222  enum SingularOverwritePolicy {
223    ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
224    FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
225  };
226
227  ParserImpl(const Descriptor* root_message_type,
228             io::ZeroCopyInputStream* input_stream,
229             io::ErrorCollector* error_collector,
230             TextFormat::Finder* finder,
231             ParseInfoTree* parse_info_tree,
232             SingularOverwritePolicy singular_overwrite_policy,
233             bool allow_case_insensitive_field,
234             bool allow_unknown_field,
235             bool allow_unknown_enum,
236             bool allow_field_number,
237             bool allow_relaxed_whitespace)
238    : error_collector_(error_collector),
239      finder_(finder),
240      parse_info_tree_(parse_info_tree),
241      tokenizer_error_collector_(this),
242      tokenizer_(input_stream, &tokenizer_error_collector_),
243      root_message_type_(root_message_type),
244      singular_overwrite_policy_(singular_overwrite_policy),
245      allow_case_insensitive_field_(allow_case_insensitive_field),
246      allow_unknown_field_(allow_unknown_field),
247      allow_unknown_enum_(allow_unknown_enum),
248      allow_field_number_(allow_field_number),
249      had_errors_(false) {
250    // For backwards-compatibility with proto1, we need to allow the 'f' suffix
251    // for floats.
252    tokenizer_.set_allow_f_after_float(true);
253
254    // '#' starts a comment.
255    tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
256
257    if (allow_relaxed_whitespace) {
258      tokenizer_.set_require_space_after_number(false);
259      tokenizer_.set_allow_multiline_strings(true);
260    }
261
262    // Consume the starting token.
263    tokenizer_.Next();
264  }
265  ~ParserImpl() { }
266
267  // Parses the ASCII representation specified in input and saves the
268  // information into the output pointer (a Message). Returns
269  // false if an error occurs (an error will also be logged to
270  // GOOGLE_LOG(ERROR)).
271  bool Parse(Message* output) {
272    // Consume fields until we cannot do so anymore.
273    while (true) {
274      if (LookingAtType(io::Tokenizer::TYPE_END)) {
275        return !had_errors_;
276      }
277
278      DO(ConsumeField(output));
279    }
280  }
281
282  bool ParseField(const FieldDescriptor* field, Message* output) {
283    bool suc;
284    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
285      suc = ConsumeFieldMessage(output, output->GetReflection(), field);
286    } else {
287      suc = ConsumeFieldValue(output, output->GetReflection(), field);
288    }
289    return suc && LookingAtType(io::Tokenizer::TYPE_END);
290  }
291
292  void ReportError(int line, int col, const string& message) {
293    had_errors_ = true;
294    if (error_collector_ == NULL) {
295      if (line >= 0) {
296        GOOGLE_LOG(ERROR) << "Error parsing text-format "
297                   << root_message_type_->full_name()
298                   << ": " << (line + 1) << ":"
299                   << (col + 1) << ": " << message;
300      } else {
301        GOOGLE_LOG(ERROR) << "Error parsing text-format "
302                   << root_message_type_->full_name()
303                   << ": " << message;
304      }
305    } else {
306      error_collector_->AddError(line, col, message);
307    }
308  }
309
310  void ReportWarning(int line, int col, const string& message) {
311    if (error_collector_ == NULL) {
312      if (line >= 0) {
313        GOOGLE_LOG(WARNING) << "Warning parsing text-format "
314                     << root_message_type_->full_name()
315                     << ": " << (line + 1) << ":"
316                     << (col + 1) << ": " << message;
317      } else {
318        GOOGLE_LOG(WARNING) << "Warning parsing text-format "
319                     << root_message_type_->full_name()
320                     << ": " << message;
321      }
322    } else {
323      error_collector_->AddWarning(line, col, message);
324    }
325  }
326
327 private:
328  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
329
330  // Reports an error with the given message with information indicating
331  // the position (as derived from the current token).
332  void ReportError(const string& message) {
333    ReportError(tokenizer_.current().line, tokenizer_.current().column,
334                message);
335  }
336
337  // Reports a warning with the given message with information indicating
338  // the position (as derived from the current token).
339  void ReportWarning(const string& message) {
340    ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
341                  message);
342  }
343
344  // Consumes the specified message with the given starting delimiter.
345  // This method checks to see that the end delimiter at the conclusion of
346  // the consumption matches the starting delimiter passed in here.
347  bool ConsumeMessage(Message* message, const string delimiter) {
348    while (!LookingAt(">") &&  !LookingAt("}")) {
349      DO(ConsumeField(message));
350    }
351
352    // Confirm that we have a valid ending delimiter.
353    DO(Consume(delimiter));
354    return true;
355  }
356
357  // Consume either "<" or "{".
358  bool ConsumeMessageDelimiter(string* delimiter) {
359    if (TryConsume("<")) {
360      *delimiter = ">";
361    } else {
362      DO(Consume("{"));
363      *delimiter = "}";
364    }
365    return true;
366  }
367
368
369  // Consumes the current field (as returned by the tokenizer) on the
370  // passed in message.
371  bool ConsumeField(Message* message) {
372    const Reflection* reflection = message->GetReflection();
373    const Descriptor* descriptor = message->GetDescriptor();
374
375    string field_name;
376
377    const FieldDescriptor* field = NULL;
378    int start_line = tokenizer_.current().line;
379    int start_column = tokenizer_.current().column;
380
381    const FieldDescriptor* any_type_url_field;
382    const FieldDescriptor* any_value_field;
383    if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
384                                         &any_value_field) &&
385        TryConsume("[")) {
386      string full_type_name, prefix;
387      DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
388      DO(Consume("]"));
389      TryConsume(":");  // ':' is optional between message labels and values.
390      string serialized_value;
391      DO(ConsumeAnyValue(full_type_name,
392                         message->GetDescriptor()->file()->pool(),
393                         &serialized_value));
394      reflection->SetString(
395          message, any_type_url_field,
396          string(prefix + full_type_name));
397      reflection->SetString(message, any_value_field, serialized_value);
398      return true;
399    }
400    if (TryConsume("[")) {
401      // Extension.
402      DO(ConsumeFullTypeName(&field_name));
403      DO(Consume("]"));
404
405      field = (finder_ != NULL
406               ? finder_->FindExtension(message, field_name)
407               : reflection->FindKnownExtensionByName(field_name));
408
409      if (field == NULL) {
410        if (!allow_unknown_field_) {
411          ReportError("Extension \"" + field_name + "\" is not defined or "
412                      "is not an extension of \"" +
413                      descriptor->full_name() + "\".");
414          return false;
415        } else {
416          ReportWarning("Extension \"" + field_name + "\" is not defined or "
417                        "is not an extension of \"" +
418                        descriptor->full_name() + "\".");
419        }
420      }
421    } else {
422      DO(ConsumeIdentifier(&field_name));
423
424      int32 field_number;
425      if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
426        if (descriptor->IsExtensionNumber(field_number)) {
427          field = reflection->FindKnownExtensionByNumber(field_number);
428        } else {
429          field = descriptor->FindFieldByNumber(field_number);
430        }
431      } else {
432        field = descriptor->FindFieldByName(field_name);
433        // Group names are expected to be capitalized as they appear in the
434        // .proto file, which actually matches their type names, not their
435        // field names.
436        if (field == NULL) {
437          string lower_field_name = field_name;
438          LowerString(&lower_field_name);
439          field = descriptor->FindFieldByName(lower_field_name);
440          // If the case-insensitive match worked but the field is NOT a group,
441          if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
442            field = NULL;
443          }
444        }
445        // Again, special-case group names as described above.
446        if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
447            && field->message_type()->name() != field_name) {
448          field = NULL;
449        }
450
451        if (field == NULL && allow_case_insensitive_field_) {
452          string lower_field_name = field_name;
453          LowerString(&lower_field_name);
454          field = descriptor->FindFieldByLowercaseName(lower_field_name);
455        }
456      }
457
458      if (field == NULL) {
459        if (!allow_unknown_field_) {
460          ReportError("Message type \"" + descriptor->full_name() +
461                      "\" has no field named \"" + field_name + "\".");
462          return false;
463        } else {
464          ReportWarning("Message type \"" + descriptor->full_name() +
465                        "\" has no field named \"" + field_name + "\".");
466        }
467      }
468    }
469
470    // Skips unknown field.
471    if (field == NULL) {
472      GOOGLE_CHECK(allow_unknown_field_);
473      // Try to guess the type of this field.
474      // If this field is not a message, there should be a ":" between the
475      // field name and the field value and also the field value should not
476      // start with "{" or "<" which indicates the beginning of a message body.
477      // If there is no ":" or there is a "{" or "<" after ":", this field has
478      // to be a message or the input is ill-formed.
479      if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
480        return SkipFieldValue();
481      } else {
482        return SkipFieldMessage();
483      }
484    }
485
486    if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
487      // Fail if the field is not repeated and it has already been specified.
488      if (!field->is_repeated() && reflection->HasField(*message, field)) {
489        ReportError("Non-repeated field \"" + field_name +
490                    "\" is specified multiple times.");
491        return false;
492      }
493      // Fail if the field is a member of a oneof and another member has already
494      // been specified.
495      const OneofDescriptor* oneof = field->containing_oneof();
496      if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
497        const FieldDescriptor* other_field =
498            reflection->GetOneofFieldDescriptor(*message, oneof);
499        ReportError("Field \"" + field_name + "\" is specified along with "
500                    "field \"" + other_field->name() + "\", another member "
501                    "of oneof \"" + oneof->name() + "\".");
502        return false;
503      }
504    }
505
506    // Perform special handling for embedded message types.
507    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
508      // ':' is optional here.
509      TryConsume(":");
510    } else {
511      // ':' is required here.
512      DO(Consume(":"));
513    }
514
515    if (field->is_repeated() && TryConsume("[")) {
516      // Short repeated format, e.g.  "foo: [1, 2, 3]"
517      while (true) {
518        if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
519          // Perform special handling for embedded message types.
520          DO(ConsumeFieldMessage(message, reflection, field));
521        } else {
522          DO(ConsumeFieldValue(message, reflection, field));
523        }
524        if (TryConsume("]")) {
525          break;
526        }
527        DO(Consume(","));
528      }
529    } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
530      DO(ConsumeFieldMessage(message, reflection, field));
531    } else {
532      DO(ConsumeFieldValue(message, reflection, field));
533    }
534
535    // For historical reasons, fields may optionally be separated by commas or
536    // semicolons.
537    TryConsume(";") || TryConsume(",");
538
539    if (field->options().deprecated()) {
540      ReportWarning("text format contains deprecated field \""
541                    + field_name + "\"");
542    }
543
544    // If a parse info tree exists, add the location for the parsed
545    // field.
546    if (parse_info_tree_ != NULL) {
547      RecordLocation(parse_info_tree_, field,
548                     ParseLocation(start_line, start_column));
549    }
550
551    return true;
552  }
553
554  // Skips the next field including the field's name and value.
555  bool SkipField() {
556    string field_name;
557    if (TryConsume("[")) {
558      // Extension name.
559      DO(ConsumeFullTypeName(&field_name));
560      DO(Consume("]"));
561    } else {
562      DO(ConsumeIdentifier(&field_name));
563    }
564
565    // Try to guess the type of this field.
566    // If this field is not a message, there should be a ":" between the
567    // field name and the field value and also the field value should not
568    // start with "{" or "<" which indicates the beginning of a message body.
569    // If there is no ":" or there is a "{" or "<" after ":", this field has
570    // to be a message or the input is ill-formed.
571    if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
572      DO(SkipFieldValue());
573    } else {
574      DO(SkipFieldMessage());
575    }
576    // For historical reasons, fields may optionally be separated by commas or
577    // semicolons.
578    TryConsume(";") || TryConsume(",");
579    return true;
580  }
581
582  bool ConsumeFieldMessage(Message* message,
583                           const Reflection* reflection,
584                           const FieldDescriptor* field) {
585
586    // If the parse information tree is not NULL, create a nested one
587    // for the nested message.
588    ParseInfoTree* parent = parse_info_tree_;
589    if (parent != NULL) {
590      parse_info_tree_ = CreateNested(parent, field);
591    }
592
593    string delimiter;
594    DO(ConsumeMessageDelimiter(&delimiter));
595    if (field->is_repeated()) {
596      DO(ConsumeMessage(reflection->AddMessage(message, field), delimiter));
597    } else {
598      DO(ConsumeMessage(reflection->MutableMessage(message, field),
599                        delimiter));
600    }
601
602    // Reset the parse information tree.
603    parse_info_tree_ = parent;
604    return true;
605  }
606
607  // Skips the whole body of a message including the beginning delimiter and
608  // the ending delimiter.
609  bool SkipFieldMessage() {
610    string delimiter;
611    DO(ConsumeMessageDelimiter(&delimiter));
612    while (!LookingAt(">") &&  !LookingAt("}")) {
613      DO(SkipField());
614    }
615    DO(Consume(delimiter));
616    return true;
617  }
618
619  bool ConsumeFieldValue(Message* message,
620                         const Reflection* reflection,
621                         const FieldDescriptor* field) {
622
623// Define an easy to use macro for setting fields. This macro checks
624// to see if the field is repeated (in which case we need to use the Add
625// methods or not (in which case we need to use the Set methods).
626#define SET_FIELD(CPPTYPE, VALUE)                                  \
627        if (field->is_repeated()) {                                \
628          reflection->Add##CPPTYPE(message, field, VALUE);         \
629        } else {                                                   \
630          reflection->Set##CPPTYPE(message, field, VALUE);         \
631        }                                                          \
632
633    switch(field->cpp_type()) {
634      case FieldDescriptor::CPPTYPE_INT32: {
635        int64 value;
636        DO(ConsumeSignedInteger(&value, kint32max));
637        SET_FIELD(Int32, static_cast<int32>(value));
638        break;
639      }
640
641      case FieldDescriptor::CPPTYPE_UINT32: {
642        uint64 value;
643        DO(ConsumeUnsignedInteger(&value, kuint32max));
644        SET_FIELD(UInt32, static_cast<uint32>(value));
645        break;
646      }
647
648      case FieldDescriptor::CPPTYPE_INT64: {
649        int64 value;
650        DO(ConsumeSignedInteger(&value, kint64max));
651        SET_FIELD(Int64, value);
652        break;
653      }
654
655      case FieldDescriptor::CPPTYPE_UINT64: {
656        uint64 value;
657        DO(ConsumeUnsignedInteger(&value, kuint64max));
658        SET_FIELD(UInt64, value);
659        break;
660      }
661
662      case FieldDescriptor::CPPTYPE_FLOAT: {
663        double value;
664        DO(ConsumeDouble(&value));
665        SET_FIELD(Float, io::SafeDoubleToFloat(value));
666        break;
667      }
668
669      case FieldDescriptor::CPPTYPE_DOUBLE: {
670        double value;
671        DO(ConsumeDouble(&value));
672        SET_FIELD(Double, value);
673        break;
674      }
675
676      case FieldDescriptor::CPPTYPE_STRING: {
677        string value;
678        DO(ConsumeString(&value));
679        SET_FIELD(String, value);
680        break;
681      }
682
683      case FieldDescriptor::CPPTYPE_BOOL: {
684        if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
685          uint64 value;
686          DO(ConsumeUnsignedInteger(&value, 1));
687          SET_FIELD(Bool, value);
688        } else {
689          string value;
690          DO(ConsumeIdentifier(&value));
691          if (value == "true" || value == "True" || value == "t") {
692            SET_FIELD(Bool, true);
693          } else if (value == "false" || value == "False" || value == "f") {
694            SET_FIELD(Bool, false);
695          } else {
696            ReportError("Invalid value for boolean field \"" + field->name()
697                        + "\". Value: \"" + value  + "\".");
698            return false;
699          }
700        }
701        break;
702      }
703
704      case FieldDescriptor::CPPTYPE_ENUM: {
705        string value;
706        const EnumDescriptor* enum_type = field->enum_type();
707        const EnumValueDescriptor* enum_value = NULL;
708
709        if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
710          DO(ConsumeIdentifier(&value));
711          // Find the enumeration value.
712          enum_value = enum_type->FindValueByName(value);
713
714        } else if (LookingAt("-") ||
715                   LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
716          int64 int_value;
717          DO(ConsumeSignedInteger(&int_value, kint32max));
718          value = SimpleItoa(int_value);        // for error reporting
719          enum_value = enum_type->FindValueByNumber(int_value);
720        } else {
721          ReportError("Expected integer or identifier.");
722          return false;
723        }
724
725        if (enum_value == NULL) {
726          if (!allow_unknown_enum_) {
727            ReportError("Unknown enumeration value of \"" + value  + "\" for "
728                        "field \"" + field->name() + "\".");
729            return false;
730          } else {
731            ReportWarning("Unknown enumeration value of \"" + value  + "\" for "
732                          "field \"" + field->name() + "\".");
733            return true;
734          }
735        }
736
737        SET_FIELD(Enum, enum_value);
738        break;
739      }
740
741      case FieldDescriptor::CPPTYPE_MESSAGE: {
742        // We should never get here. Put here instead of a default
743        // so that if new types are added, we get a nice compiler warning.
744        GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
745        break;
746      }
747    }
748#undef SET_FIELD
749    return true;
750  }
751
752  bool SkipFieldValue() {
753    if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
754      while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
755        tokenizer_.Next();
756      }
757      return true;
758    }
759    // Possible field values other than string:
760    //   12345        => TYPE_INTEGER
761    //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
762    //   1.2345       => TYPE_FLOAT
763    //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
764    //   inf          => TYPE_IDENTIFIER
765    //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
766    //   TYPE_INTEGER => TYPE_IDENTIFIER
767    // Divides them into two group, one with TYPE_SYMBOL
768    // and the other without:
769    //   Group one:
770    //     12345        => TYPE_INTEGER
771    //     1.2345       => TYPE_FLOAT
772    //     inf          => TYPE_IDENTIFIER
773    //     TYPE_INTEGER => TYPE_IDENTIFIER
774    //   Group two:
775    //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
776    //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
777    //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
778    // As we can see, the field value consists of an optional '-' and one of
779    // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
780    bool has_minus = TryConsume("-");
781    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
782        !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
783        !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
784      return false;
785    }
786    // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
787    // value while other combinations all generate valid values.
788    // We check if the value of this combination is valid here.
789    // TYPE_IDENTIFIER after a '-' should be one of the float values listed
790    // below:
791    //   inf, inff, infinity, nan
792    if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
793      string text = tokenizer_.current().text;
794      LowerString(&text);
795      if (text != "inf" &&
796          text != "infinity" &&
797          text != "nan") {
798        ReportError("Invalid float number: " + text);
799        return false;
800      }
801    }
802    tokenizer_.Next();
803    return true;
804  }
805
806  // Returns true if the current token's text is equal to that specified.
807  bool LookingAt(const string& text) {
808    return tokenizer_.current().text == text;
809  }
810
811  // Returns true if the current token's type is equal to that specified.
812  bool LookingAtType(io::Tokenizer::TokenType token_type) {
813    return tokenizer_.current().type == token_type;
814  }
815
816  // Consumes an identifier and saves its value in the identifier parameter.
817  // Returns false if the token is not of type IDENTFIER.
818  bool ConsumeIdentifier(string* identifier) {
819    if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
820      *identifier = tokenizer_.current().text;
821      tokenizer_.Next();
822      return true;
823    }
824
825    // If allow_field_numer_ or allow_unknown_field_ is true, we should able
826    // to parse integer identifiers.
827    if ((allow_field_number_ || allow_unknown_field_)
828        && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
829      *identifier = tokenizer_.current().text;
830      tokenizer_.Next();
831      return true;
832    }
833
834    ReportError("Expected identifier.");
835    return false;
836  }
837
838  // Consume a string of form "<id1>.<id2>....<idN>".
839  bool ConsumeFullTypeName(string* name) {
840    DO(ConsumeIdentifier(name));
841    while (TryConsume(".")) {
842      string part;
843      DO(ConsumeIdentifier(&part));
844      *name += ".";
845      *name += part;
846    }
847    return true;
848  }
849
850  // Consumes a string and saves its value in the text parameter.
851  // Returns false if the token is not of type STRING.
852  bool ConsumeString(string* text) {
853    if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
854      ReportError("Expected string.");
855      return false;
856    }
857
858    text->clear();
859    while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
860      io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
861
862      tokenizer_.Next();
863    }
864
865    return true;
866  }
867
868  // Consumes a uint64 and saves its value in the value parameter.
869  // Returns false if the token is not of type INTEGER.
870  bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
871    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
872      ReportError("Expected integer.");
873      return false;
874    }
875
876    if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
877                                     max_value, value)) {
878      ReportError("Integer out of range.");
879      return false;
880    }
881
882    tokenizer_.Next();
883    return true;
884  }
885
886  // Consumes an int64 and saves its value in the value parameter.
887  // Note that since the tokenizer does not support negative numbers,
888  // we actually may consume an additional token (for the minus sign) in this
889  // method. Returns false if the token is not an integer
890  // (signed or otherwise).
891  bool ConsumeSignedInteger(int64* value, uint64 max_value) {
892    bool negative = false;
893
894    if (TryConsume("-")) {
895      negative = true;
896      // Two's complement always allows one more negative integer than
897      // positive.
898      ++max_value;
899    }
900
901    uint64 unsigned_value;
902
903    DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
904
905    *value = static_cast<int64>(unsigned_value);
906
907    if (negative) {
908      *value = -*value;
909    }
910
911    return true;
912  }
913
914  // Consumes a uint64 and saves its value in the value parameter.
915  // Accepts decimal numbers only, rejects hex or oct numbers.
916  bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
917    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
918      ReportError("Expected integer.");
919      return false;
920    }
921
922    const string& text = tokenizer_.current().text;
923    if (IsHexNumber(text) || IsOctNumber(text)) {
924      ReportError("Expect a decimal number.");
925      return false;
926    }
927
928    if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
929      ReportError("Integer out of range.");
930      return false;
931    }
932
933    tokenizer_.Next();
934    return true;
935  }
936
937  // Consumes a double and saves its value in the value parameter.
938  // Note that since the tokenizer does not support negative numbers,
939  // we actually may consume an additional token (for the minus sign) in this
940  // method. Returns false if the token is not a double
941  // (signed or otherwise).
942  bool ConsumeDouble(double* value) {
943    bool negative = false;
944
945    if (TryConsume("-")) {
946      negative = true;
947    }
948
949    // A double can actually be an integer, according to the tokenizer.
950    // Therefore, we must check both cases here.
951    if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
952      // We have found an integer value for the double.
953      uint64 integer_value;
954      DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
955
956      *value = static_cast<double>(integer_value);
957    } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
958      // We have found a float value for the double.
959      *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
960
961      // Mark the current token as consumed.
962      tokenizer_.Next();
963    } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
964      string text = tokenizer_.current().text;
965      LowerString(&text);
966      if (text == "inf" ||
967          text == "infinity") {
968        *value = std::numeric_limits<double>::infinity();
969        tokenizer_.Next();
970      } else if (text == "nan") {
971        *value = std::numeric_limits<double>::quiet_NaN();
972        tokenizer_.Next();
973      } else {
974        ReportError("Expected double.");
975        return false;
976      }
977    } else {
978      ReportError("Expected double.");
979      return false;
980    }
981
982    if (negative) {
983      *value = -*value;
984    }
985
986    return true;
987  }
988
989  // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
990  // or "type.googleprod.com/full.type.Name"
991  bool ConsumeAnyTypeUrl(string* full_type_name, string* prefix) {
992    // TODO(saito) Extend Consume() to consume multiple tokens at once, so that
993    // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
994    string url1, url2, url3;
995    DO(ConsumeIdentifier(&url1));  // type
996    DO(Consume("."));
997    DO(ConsumeIdentifier(&url2));  // googleapis
998    DO(Consume("."));
999    DO(ConsumeIdentifier(&url3));  // com
1000    DO(Consume("/"));
1001    DO(ConsumeFullTypeName(full_type_name));
1002
1003    *prefix = url1 + "." + url2 + "." + url3 + "/";
1004    if (*prefix != internal::kTypeGoogleApisComPrefix &&
1005        *prefix != internal::kTypeGoogleProdComPrefix) {
1006      ReportError("TextFormat::Parser for Any supports only "
1007                  "type.googleapis.com and type.googleprod.com, "
1008                  "but found \"" + *prefix + "\"");
1009      return false;
1010    }
1011    return true;
1012  }
1013
1014  // A helper function for reconstructing Any::value. Consumes a text of
1015  // full_type_name, then serializes it into serialized_value. "pool" is used to
1016  // look up and create a temporary object with full_type_name.
1017  bool ConsumeAnyValue(const string& full_type_name, const DescriptorPool* pool,
1018                       string* serialized_value) {
1019    const Descriptor* value_descriptor =
1020        pool->FindMessageTypeByName(full_type_name);
1021    if (value_descriptor == NULL) {
1022      ReportError("Could not find type \"" + full_type_name +
1023                  "\" stored in google.protobuf.Any.");
1024      return false;
1025    }
1026    DynamicMessageFactory factory;
1027    const Message* value_prototype = factory.GetPrototype(value_descriptor);
1028    if (value_prototype == NULL) {
1029      return false;
1030    }
1031    google::protobuf::scoped_ptr<Message> value(value_prototype->New());
1032    string sub_delimiter;
1033    DO(ConsumeMessageDelimiter(&sub_delimiter));
1034    DO(ConsumeMessage(value.get(), sub_delimiter));
1035
1036    value->AppendToString(serialized_value);
1037    return true;
1038  }
1039
1040  // Consumes a token and confirms that it matches that specified in the
1041  // value parameter. Returns false if the token found does not match that
1042  // which was specified.
1043  bool Consume(const string& value) {
1044    const string& current_value = tokenizer_.current().text;
1045
1046    if (current_value != value) {
1047      ReportError("Expected \"" + value + "\", found \"" + current_value
1048                  + "\".");
1049      return false;
1050    }
1051
1052    tokenizer_.Next();
1053
1054    return true;
1055  }
1056
1057  // Attempts to consume the supplied value. Returns false if a the
1058  // token found does not match the value specified.
1059  bool TryConsume(const string& value) {
1060    if (tokenizer_.current().text == value) {
1061      tokenizer_.Next();
1062      return true;
1063    } else {
1064      return false;
1065    }
1066  }
1067
1068  // An internal instance of the Tokenizer's error collector, used to
1069  // collect any base-level parse errors and feed them to the ParserImpl.
1070  class ParserErrorCollector : public io::ErrorCollector {
1071   public:
1072    explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
1073        parser_(parser) { }
1074
1075    virtual ~ParserErrorCollector() { }
1076
1077    virtual void AddError(int line, int column, const string& message) {
1078      parser_->ReportError(line, column, message);
1079    }
1080
1081    virtual void AddWarning(int line, int column, const string& message) {
1082      parser_->ReportWarning(line, column, message);
1083    }
1084
1085   private:
1086    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
1087    TextFormat::Parser::ParserImpl* parser_;
1088  };
1089
1090  io::ErrorCollector* error_collector_;
1091  TextFormat::Finder* finder_;
1092  ParseInfoTree* parse_info_tree_;
1093  ParserErrorCollector tokenizer_error_collector_;
1094  io::Tokenizer tokenizer_;
1095  const Descriptor* root_message_type_;
1096  SingularOverwritePolicy singular_overwrite_policy_;
1097  const bool allow_case_insensitive_field_;
1098  const bool allow_unknown_field_;
1099  const bool allow_unknown_enum_;
1100  const bool allow_field_number_;
1101  bool had_errors_;
1102};
1103
1104#undef DO
1105
1106// ===========================================================================
1107// Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1108// from the Printer found in //google/protobuf/io/printer.h
1109class TextFormat::Printer::TextGenerator {
1110 public:
1111  explicit TextGenerator(io::ZeroCopyOutputStream* output,
1112                         int initial_indent_level)
1113    : output_(output),
1114      buffer_(NULL),
1115      buffer_size_(0),
1116      at_start_of_line_(true),
1117      failed_(false),
1118      indent_(""),
1119      initial_indent_level_(initial_indent_level) {
1120    indent_.resize(initial_indent_level_ * 2, ' ');
1121  }
1122
1123  ~TextGenerator() {
1124    // Only BackUp() if we're sure we've successfully called Next() at least
1125    // once.
1126    if (!failed_ && buffer_size_ > 0) {
1127      output_->BackUp(buffer_size_);
1128    }
1129  }
1130
1131  // Indent text by two spaces.  After calling Indent(), two spaces will be
1132  // inserted at the beginning of each line of text.  Indent() may be called
1133  // multiple times to produce deeper indents.
1134  void Indent() {
1135    indent_ += "  ";
1136  }
1137
1138  // Reduces the current indent level by two spaces, or crashes if the indent
1139  // level is zero.
1140  void Outdent() {
1141    if (indent_.empty() ||
1142        indent_.size() < initial_indent_level_ * 2) {
1143      GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1144      return;
1145    }
1146
1147    indent_.resize(indent_.size() - 2);
1148  }
1149
1150  // Print text to the output stream.
1151  void Print(const string& str) {
1152    Print(str.data(), str.size());
1153  }
1154
1155  // Print text to the output stream.
1156  void Print(const char* text) {
1157    Print(text, strlen(text));
1158  }
1159
1160  // Print text to the output stream.
1161  void Print(const char* text, size_t size) {
1162    size_t pos = 0;  // The number of bytes we've written so far.
1163
1164    for (size_t i = 0; i < size; i++) {
1165      if (text[i] == '\n') {
1166        // Saw newline.  If there is more text, we may need to insert an indent
1167        // here.  So, write what we have so far, including the '\n'.
1168        Write(text + pos, i - pos + 1);
1169        pos = i + 1;
1170
1171        // Setting this true will cause the next Write() to insert an indent
1172        // first.
1173        at_start_of_line_ = true;
1174      }
1175    }
1176
1177    // Write the rest.
1178    Write(text + pos, size - pos);
1179  }
1180
1181  // True if any write to the underlying stream failed.  (We don't just
1182  // crash in this case because this is an I/O failure, not a programming
1183  // error.)
1184  bool failed() const { return failed_; }
1185
1186 private:
1187  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1188
1189  void Write(const char* data, size_t size) {
1190    if (failed_) return;
1191    if (size == 0) return;
1192
1193    if (at_start_of_line_) {
1194      // Insert an indent.
1195      at_start_of_line_ = false;
1196      Write(indent_.data(), indent_.size());
1197      if (failed_) return;
1198    }
1199
1200    while (size > buffer_size_) {
1201      // Data exceeds space in the buffer.  Copy what we can and request a
1202      // new buffer.
1203      memcpy(buffer_, data, buffer_size_);
1204      data += buffer_size_;
1205      size -= buffer_size_;
1206      void* void_buffer;
1207      failed_ = !output_->Next(&void_buffer, &buffer_size_);
1208      if (failed_) return;
1209      buffer_ = reinterpret_cast<char*>(void_buffer);
1210    }
1211
1212    // Buffer is big enough to receive the data; copy it.
1213    memcpy(buffer_, data, size);
1214    buffer_ += size;
1215    buffer_size_ -= size;
1216  }
1217
1218  io::ZeroCopyOutputStream* const output_;
1219  char* buffer_;
1220  int buffer_size_;
1221  bool at_start_of_line_;
1222  bool failed_;
1223
1224  string indent_;
1225  int initial_indent_level_;
1226};
1227
1228// ===========================================================================
1229
1230TextFormat::Finder::~Finder() {
1231}
1232
1233TextFormat::Parser::Parser()
1234  : error_collector_(NULL),
1235    finder_(NULL),
1236    parse_info_tree_(NULL),
1237    allow_partial_(false),
1238    allow_case_insensitive_field_(false),
1239    allow_unknown_field_(false),
1240    allow_unknown_enum_(false),
1241    allow_field_number_(false),
1242    allow_relaxed_whitespace_(false),
1243    allow_singular_overwrites_(false) {
1244}
1245
1246TextFormat::Parser::~Parser() {}
1247
1248bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1249                               Message* output) {
1250  output->Clear();
1251
1252  ParserImpl::SingularOverwritePolicy overwrites_policy =
1253      allow_singular_overwrites_
1254      ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1255      : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1256
1257  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1258                    finder_, parse_info_tree_,
1259                    overwrites_policy,
1260                    allow_case_insensitive_field_, allow_unknown_field_,
1261                    allow_unknown_enum_, allow_field_number_,
1262                    allow_relaxed_whitespace_);
1263  return MergeUsingImpl(input, output, &parser);
1264}
1265
1266bool TextFormat::Parser::ParseFromString(const string& input,
1267                                         Message* output) {
1268  io::ArrayInputStream input_stream(input.data(), input.size());
1269  return Parse(&input_stream, output);
1270}
1271
1272bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1273                               Message* output) {
1274  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1275                    finder_, parse_info_tree_,
1276                    ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1277                    allow_case_insensitive_field_, allow_unknown_field_,
1278                    allow_unknown_enum_, allow_field_number_,
1279                    allow_relaxed_whitespace_);
1280  return MergeUsingImpl(input, output, &parser);
1281}
1282
1283bool TextFormat::Parser::MergeFromString(const string& input,
1284                                         Message* output) {
1285  io::ArrayInputStream input_stream(input.data(), input.size());
1286  return Merge(&input_stream, output);
1287}
1288
1289bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1290                                        Message* output,
1291                                        ParserImpl* parser_impl) {
1292  if (!parser_impl->Parse(output)) return false;
1293  if (!allow_partial_ && !output->IsInitialized()) {
1294    vector<string> missing_fields;
1295    output->FindInitializationErrors(&missing_fields);
1296    parser_impl->ReportError(-1, 0, "Message missing required fields: " +
1297                                        Join(missing_fields, ", "));
1298    return false;
1299  }
1300  return true;
1301}
1302
1303bool TextFormat::Parser::ParseFieldValueFromString(
1304    const string& input,
1305    const FieldDescriptor* field,
1306    Message* output) {
1307  io::ArrayInputStream input_stream(input.data(), input.size());
1308  ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1309                    finder_, parse_info_tree_,
1310                    ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1311                    allow_case_insensitive_field_, allow_unknown_field_,
1312                    allow_unknown_enum_, allow_field_number_,
1313                    allow_relaxed_whitespace_);
1314  return parser.ParseField(field, output);
1315}
1316
1317/* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1318                                    Message* output) {
1319  return Parser().Parse(input, output);
1320}
1321
1322/* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1323                                    Message* output) {
1324  return Parser().Merge(input, output);
1325}
1326
1327/* static */ bool TextFormat::ParseFromString(const string& input,
1328                                              Message* output) {
1329  return Parser().ParseFromString(input, output);
1330}
1331
1332/* static */ bool TextFormat::MergeFromString(const string& input,
1333                                              Message* output) {
1334  return Parser().MergeFromString(input, output);
1335}
1336
1337// ===========================================================================
1338
1339// The default implementation for FieldValuePrinter. The base class just
1340// does simple formatting. That way, deriving classes could decide to fallback
1341// to that behavior.
1342TextFormat::FieldValuePrinter::FieldValuePrinter() {}
1343TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1344string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1345  return val ? "true" : "false";
1346}
1347string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
1348  return SimpleItoa(val);
1349}
1350string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
1351  return SimpleItoa(val);
1352}
1353string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
1354  return SimpleItoa(val);
1355}
1356string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
1357  return SimpleItoa(val);
1358}
1359string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1360  return SimpleFtoa(val);
1361}
1362string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1363  return SimpleDtoa(val);
1364}
1365string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
1366  string printed("\"");
1367  CEscapeAndAppend(val, &printed);
1368  printed.push_back('\"');
1369  return printed;
1370}
1371string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
1372  return PrintString(val);
1373}
1374string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
1375                                                const string& name) const {
1376  return name;
1377}
1378string TextFormat::FieldValuePrinter::PrintFieldName(
1379    const Message& message,
1380    const Reflection* reflection,
1381    const FieldDescriptor* field) const {
1382  if (field->is_extension()) {
1383    // We special-case MessageSet elements for compatibility with proto1.
1384    if (field->containing_type()->options().message_set_wire_format()
1385        && field->type() == FieldDescriptor::TYPE_MESSAGE
1386        && field->is_optional()
1387        && field->extension_scope() == field->message_type()) {
1388      return StrCat("[", field->message_type()->full_name(), "]");
1389    } else {
1390      return StrCat("[", field->full_name(), "]");
1391    }
1392  } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1393    // Groups must be serialized with their original capitalization.
1394    return field->message_type()->name();
1395  } else {
1396    return field->name();
1397  }
1398}
1399string TextFormat::FieldValuePrinter::PrintMessageStart(
1400    const Message& message,
1401    int field_index,
1402    int field_count,
1403    bool single_line_mode) const {
1404  return single_line_mode ? " { " : " {\n";
1405}
1406string TextFormat::FieldValuePrinter::PrintMessageEnd(
1407    const Message& message,
1408    int field_index,
1409    int field_count,
1410    bool single_line_mode) const {
1411  return single_line_mode ? "} " : "}\n";
1412}
1413
1414namespace {
1415// Our own specialization: for UTF8 escaped strings.
1416class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
1417 public:
1418  virtual string PrintString(const string& val) const {
1419    return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
1420  }
1421  virtual string PrintBytes(const string& val) const {
1422    return TextFormat::FieldValuePrinter::PrintString(val);
1423  }
1424};
1425
1426}  // namespace
1427
1428TextFormat::Printer::Printer()
1429  : initial_indent_level_(0),
1430    single_line_mode_(false),
1431    use_field_number_(false),
1432    use_short_repeated_primitives_(false),
1433    hide_unknown_fields_(false),
1434    print_message_fields_in_index_order_(false),
1435    expand_any_(false),
1436    truncate_string_field_longer_than_(0LL) {
1437  SetUseUtf8StringEscaping(false);
1438}
1439
1440TextFormat::Printer::~Printer() {
1441  STLDeleteValues(&custom_printers_);
1442}
1443
1444void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
1445  SetDefaultFieldValuePrinter(as_utf8
1446                              ? new FieldValuePrinterUtf8Escaping()
1447                              : new FieldValuePrinter());
1448}
1449
1450void TextFormat::Printer::SetDefaultFieldValuePrinter(
1451    const FieldValuePrinter* printer) {
1452  default_field_value_printer_.reset(printer);
1453}
1454
1455bool TextFormat::Printer::RegisterFieldValuePrinter(
1456    const FieldDescriptor* field,
1457    const FieldValuePrinter* printer) {
1458  return field != NULL && printer != NULL &&
1459         custom_printers_.insert(std::make_pair(field, printer)).second;
1460}
1461
1462bool TextFormat::Printer::PrintToString(const Message& message,
1463                                        string* output) const {
1464  GOOGLE_DCHECK(output) << "output specified is NULL";
1465
1466  output->clear();
1467  io::StringOutputStream output_stream(output);
1468
1469  return Print(message, &output_stream);
1470}
1471
1472bool TextFormat::Printer::PrintUnknownFieldsToString(
1473    const UnknownFieldSet& unknown_fields,
1474    string* output) const {
1475  GOOGLE_DCHECK(output) << "output specified is NULL";
1476
1477  output->clear();
1478  io::StringOutputStream output_stream(output);
1479  return PrintUnknownFields(unknown_fields, &output_stream);
1480}
1481
1482bool TextFormat::Printer::Print(const Message& message,
1483                                io::ZeroCopyOutputStream* output) const {
1484  TextGenerator generator(output, initial_indent_level_);
1485
1486  Print(message, generator);
1487
1488  // Output false if the generator failed internally.
1489  return !generator.failed();
1490}
1491
1492bool TextFormat::Printer::PrintUnknownFields(
1493    const UnknownFieldSet& unknown_fields,
1494    io::ZeroCopyOutputStream* output) const {
1495  TextGenerator generator(output, initial_indent_level_);
1496
1497  PrintUnknownFields(unknown_fields, generator);
1498
1499  // Output false if the generator failed internally.
1500  return !generator.failed();
1501}
1502
1503namespace {
1504// Comparison functor for sorting FieldDescriptors by field index.
1505struct FieldIndexSorter {
1506  bool operator()(const FieldDescriptor* left,
1507                  const FieldDescriptor* right) const {
1508    return left->index() < right->index();
1509  }
1510};
1511
1512}  // namespace
1513
1514bool TextFormat::Printer::PrintAny(const Message& message,
1515                                   TextGenerator& generator) const {
1516  const FieldDescriptor* type_url_field;
1517  const FieldDescriptor* value_field;
1518  if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
1519                                        &value_field)) {
1520    return false;
1521  }
1522
1523  const Reflection* reflection = message.GetReflection();
1524
1525  // Extract the full type name from the type_url field.
1526  const string& type_url = reflection->GetString(message, type_url_field);
1527  string full_type_name;
1528  if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1529    return false;
1530  }
1531
1532  // Print the "value" in text.
1533  const google::protobuf::Descriptor* value_descriptor =
1534      message.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1535          full_type_name);
1536  if (value_descriptor == NULL) {
1537    GOOGLE_LOG(WARNING) << "Proto type " << type_url << " not found";
1538    return false;
1539  }
1540  DynamicMessageFactory factory;
1541  google::protobuf::scoped_ptr<google::protobuf::Message> value_message(
1542      factory.GetPrototype(value_descriptor)->New());
1543  string serialized_value = reflection->GetString(message, value_field);
1544  if (!value_message->ParseFromString(serialized_value)) {
1545    GOOGLE_LOG(WARNING) << type_url << ": failed to parse contents";
1546    return false;
1547  }
1548  generator.Print(StrCat("[", type_url, "]"));
1549  const FieldValuePrinter* printer = FindWithDefault(
1550      custom_printers_, value_field, default_field_value_printer_.get());
1551  generator.Print(
1552      printer->PrintMessageStart(message, -1, 0, single_line_mode_));
1553  generator.Indent();
1554  Print(*value_message, generator);
1555  generator.Outdent();
1556  generator.Print(printer->PrintMessageEnd(message, -1, 0, single_line_mode_));
1557  return true;
1558}
1559
1560void TextFormat::Printer::Print(const Message& message,
1561                                TextGenerator& generator) const {
1562  const Descriptor* descriptor = message.GetDescriptor();
1563  const Reflection* reflection = message.GetReflection();
1564  if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
1565      PrintAny(message, generator)) {
1566    return;
1567  }
1568  vector<const FieldDescriptor*> fields;
1569  reflection->ListFields(message, &fields);
1570  if (print_message_fields_in_index_order_) {
1571    std::sort(fields.begin(), fields.end(), FieldIndexSorter());
1572  }
1573  for (int i = 0; i < fields.size(); i++) {
1574    PrintField(message, reflection, fields[i], generator);
1575  }
1576  if (!hide_unknown_fields_) {
1577    PrintUnknownFields(reflection->GetUnknownFields(message), generator);
1578  }
1579}
1580
1581void TextFormat::Printer::PrintFieldValueToString(
1582    const Message& message,
1583    const FieldDescriptor* field,
1584    int index,
1585    string* output) const {
1586
1587  GOOGLE_DCHECK(output) << "output specified is NULL";
1588
1589  output->clear();
1590  io::StringOutputStream output_stream(output);
1591  TextGenerator generator(&output_stream, initial_indent_level_);
1592
1593  PrintFieldValue(message, message.GetReflection(), field, index, generator);
1594}
1595
1596class MapEntryMessageComparator {
1597 public:
1598  explicit MapEntryMessageComparator(const Descriptor* descriptor)
1599      : field_(descriptor->field(0)) {}
1600
1601  bool operator()(const Message* a, const Message* b) {
1602    const Reflection* reflection = a->GetReflection();
1603    switch (field_->cpp_type()) {
1604      case FieldDescriptor::CPPTYPE_BOOL: {
1605          bool first = reflection->GetBool(*a, field_);
1606          bool second = reflection->GetBool(*b, field_);
1607          return first < second;
1608      }
1609      case FieldDescriptor::CPPTYPE_INT32: {
1610          int32 first = reflection->GetInt32(*a, field_);
1611          int32 second = reflection->GetInt32(*b, field_);
1612          return first < second;
1613      }
1614      case FieldDescriptor::CPPTYPE_INT64: {
1615          int64 first = reflection->GetInt64(*a, field_);
1616          int64 second = reflection->GetInt64(*b, field_);
1617          return first < second;
1618      }
1619      case FieldDescriptor::CPPTYPE_UINT32: {
1620          uint32 first = reflection->GetUInt32(*a, field_);
1621          uint32 second = reflection->GetUInt32(*b, field_);
1622          return first < second;
1623      }
1624      case FieldDescriptor::CPPTYPE_UINT64: {
1625          uint64 first = reflection->GetUInt64(*a, field_);
1626          uint64 second = reflection->GetUInt64(*b, field_);
1627          return first < second;
1628      }
1629      case FieldDescriptor::CPPTYPE_STRING: {
1630          string first = reflection->GetString(*a, field_);
1631          string second = reflection->GetString(*b, field_);
1632          return first < second;
1633      }
1634      default:
1635        GOOGLE_LOG(DFATAL) << "Invalid key for map field.";
1636        return true;
1637    }
1638  }
1639
1640 private:
1641  const FieldDescriptor* field_;
1642};
1643
1644void TextFormat::Printer::PrintField(const Message& message,
1645                                     const Reflection* reflection,
1646                                     const FieldDescriptor* field,
1647                                     TextGenerator& generator) const {
1648  if (use_short_repeated_primitives_ &&
1649      field->is_repeated() &&
1650      field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
1651      field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1652    PrintShortRepeatedField(message, reflection, field, generator);
1653    return;
1654  }
1655
1656  int count = 0;
1657
1658  if (field->is_repeated()) {
1659    count = reflection->FieldSize(message, field);
1660  } else if (reflection->HasField(message, field)) {
1661    count = 1;
1662  }
1663
1664  std::vector<const Message*> sorted_map_field;
1665  if (field->is_map()) {
1666    const RepeatedPtrField<Message>& map_field =
1667        reflection->GetRepeatedPtrField<Message>(message, field);
1668    for (RepeatedPtrField<Message>::const_pointer_iterator it =
1669             map_field.pointer_begin();
1670         it != map_field.pointer_end(); ++it) {
1671      sorted_map_field.push_back(*it);
1672    }
1673
1674    MapEntryMessageComparator comparator(field->message_type());
1675    std::stable_sort(sorted_map_field.begin(), sorted_map_field.end(),
1676                     comparator);
1677  }
1678
1679  for (int j = 0; j < count; ++j) {
1680    const int field_index = field->is_repeated() ? j : -1;
1681
1682    PrintFieldName(message, reflection, field, generator);
1683
1684    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1685      const FieldValuePrinter* printer = FindWithDefault(
1686          custom_printers_, field, default_field_value_printer_.get());
1687      const Message& sub_message =
1688          field->is_repeated()
1689              ? (field->is_map()
1690                     ? *sorted_map_field[j]
1691                     : reflection->GetRepeatedMessage(message, field, j))
1692              : reflection->GetMessage(message, field);
1693      generator.Print(
1694          printer->PrintMessageStart(
1695              sub_message, field_index, count, single_line_mode_));
1696      generator.Indent();
1697      Print(sub_message, generator);
1698      generator.Outdent();
1699      generator.Print(
1700          printer->PrintMessageEnd(
1701              sub_message, field_index, count, single_line_mode_));
1702    } else {
1703      generator.Print(": ");
1704      // Write the field value.
1705      PrintFieldValue(message, reflection, field, field_index, generator);
1706      if (single_line_mode_) {
1707        generator.Print(" ");
1708      } else {
1709        generator.Print("\n");
1710      }
1711    }
1712  }
1713}
1714
1715void TextFormat::Printer::PrintShortRepeatedField(
1716    const Message& message,
1717    const Reflection* reflection,
1718    const FieldDescriptor* field,
1719    TextGenerator& generator) const {
1720  // Print primitive repeated field in short form.
1721  PrintFieldName(message, reflection, field, generator);
1722
1723  int size = reflection->FieldSize(message, field);
1724  generator.Print(": [");
1725  for (int i = 0; i < size; i++) {
1726    if (i > 0) generator.Print(", ");
1727    PrintFieldValue(message, reflection, field, i, generator);
1728  }
1729  if (single_line_mode_) {
1730    generator.Print("] ");
1731  } else {
1732    generator.Print("]\n");
1733  }
1734}
1735
1736void TextFormat::Printer::PrintFieldName(const Message& message,
1737                                         const Reflection* reflection,
1738                                         const FieldDescriptor* field,
1739                                         TextGenerator& generator) const {
1740  // if use_field_number_ is true, prints field number instead
1741  // of field name.
1742  if (use_field_number_) {
1743    generator.Print(SimpleItoa(field->number()));
1744    return;
1745  }
1746
1747  const FieldValuePrinter* printer = FindWithDefault(
1748      custom_printers_, field, default_field_value_printer_.get());
1749  generator.Print(printer->PrintFieldName(message, reflection, field));
1750}
1751
1752void TextFormat::Printer::PrintFieldValue(
1753    const Message& message,
1754    const Reflection* reflection,
1755    const FieldDescriptor* field,
1756    int index,
1757    TextGenerator& generator) const {
1758  GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1759      << "Index must be -1 for non-repeated fields";
1760
1761  const FieldValuePrinter* printer
1762      = FindWithDefault(custom_printers_, field,
1763                        default_field_value_printer_.get());
1764
1765  switch (field->cpp_type()) {
1766#define OUTPUT_FIELD(CPPTYPE, METHOD)                                   \
1767    case FieldDescriptor::CPPTYPE_##CPPTYPE:                            \
1768      generator.Print(printer->Print##METHOD(field->is_repeated()       \
1769               ? reflection->GetRepeated##METHOD(message, field, index) \
1770               : reflection->Get##METHOD(message, field)));             \
1771        break
1772
1773    OUTPUT_FIELD( INT32,  Int32);
1774    OUTPUT_FIELD( INT64,  Int64);
1775    OUTPUT_FIELD(UINT32, UInt32);
1776    OUTPUT_FIELD(UINT64, UInt64);
1777    OUTPUT_FIELD( FLOAT,  Float);
1778    OUTPUT_FIELD(DOUBLE, Double);
1779    OUTPUT_FIELD(  BOOL,   Bool);
1780#undef OUTPUT_FIELD
1781
1782    case FieldDescriptor::CPPTYPE_STRING: {
1783      string scratch;
1784      const string& value = field->is_repeated()
1785          ? reflection->GetRepeatedStringReference(
1786              message, field, index, &scratch)
1787          : reflection->GetStringReference(message, field, &scratch);
1788      const string* value_to_print = &value;
1789      string truncated_value;
1790      if (truncate_string_field_longer_than_ > 0 &&
1791          truncate_string_field_longer_than_ < value.size()) {
1792        truncated_value = value.substr(0, truncate_string_field_longer_than_) +
1793                          "...<truncated>...";
1794        value_to_print = &truncated_value;
1795      }
1796      if (field->type() == FieldDescriptor::TYPE_STRING) {
1797        generator.Print(printer->PrintString(*value_to_print));
1798      } else {
1799        GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
1800        generator.Print(printer->PrintBytes(*value_to_print));
1801      }
1802      break;
1803    }
1804
1805    case FieldDescriptor::CPPTYPE_ENUM: {
1806      int enum_value = field->is_repeated()
1807          ? reflection->GetRepeatedEnumValue(message, field, index)
1808          : reflection->GetEnumValue(message, field);
1809      const EnumValueDescriptor* enum_desc =
1810          field->enum_type()->FindValueByNumber(enum_value);
1811      if (enum_desc != NULL) {
1812        generator.Print(printer->PrintEnum(enum_value, enum_desc->name()));
1813      } else {
1814        // Ordinarily, enum_desc should not be null, because proto2 has the
1815        // invariant that set enum field values must be in-range, but with the
1816        // new integer-based API for enums (or the RepeatedField<int> loophole),
1817        // it is possible for the user to force an unknown integer value.  So we
1818        // simply use the integer value itself as the enum value name in this
1819        // case.
1820        generator.Print(printer->PrintEnum(enum_value,
1821                                           StringPrintf("%d", enum_value)));
1822      }
1823      break;
1824    }
1825
1826    case FieldDescriptor::CPPTYPE_MESSAGE:
1827      Print(field->is_repeated()
1828            ? reflection->GetRepeatedMessage(message, field, index)
1829            : reflection->GetMessage(message, field),
1830            generator);
1831      break;
1832  }
1833}
1834
1835/* static */ bool TextFormat::Print(const Message& message,
1836                                    io::ZeroCopyOutputStream* output) {
1837  return Printer().Print(message, output);
1838}
1839
1840/* static */ bool TextFormat::PrintUnknownFields(
1841    const UnknownFieldSet& unknown_fields,
1842    io::ZeroCopyOutputStream* output) {
1843  return Printer().PrintUnknownFields(unknown_fields, output);
1844}
1845
1846/* static */ bool TextFormat::PrintToString(
1847    const Message& message, string* output) {
1848  return Printer().PrintToString(message, output);
1849}
1850
1851/* static */ bool TextFormat::PrintUnknownFieldsToString(
1852    const UnknownFieldSet& unknown_fields, string* output) {
1853  return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1854}
1855
1856/* static */ void TextFormat::PrintFieldValueToString(
1857    const Message& message,
1858    const FieldDescriptor* field,
1859    int index,
1860    string* output) {
1861  return Printer().PrintFieldValueToString(message, field, index, output);
1862}
1863
1864/* static */ bool TextFormat::ParseFieldValueFromString(
1865    const string& input,
1866    const FieldDescriptor* field,
1867    Message* message) {
1868  return Parser().ParseFieldValueFromString(input, field, message);
1869}
1870
1871// Prints an integer as hex with a fixed number of digits dependent on the
1872// integer type.
1873template<typename IntType>
1874static string PaddedHex(IntType value) {
1875  string result;
1876  result.reserve(sizeof(value) * 2);
1877  for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1878    result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1879  }
1880  return result;
1881}
1882
1883void TextFormat::Printer::PrintUnknownFields(
1884    const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
1885  for (int i = 0; i < unknown_fields.field_count(); i++) {
1886    const UnknownField& field = unknown_fields.field(i);
1887    string field_number = SimpleItoa(field.number());
1888
1889    switch (field.type()) {
1890      case UnknownField::TYPE_VARINT:
1891        generator.Print(field_number);
1892        generator.Print(": ");
1893        generator.Print(SimpleItoa(field.varint()));
1894        if (single_line_mode_) {
1895          generator.Print(" ");
1896        } else {
1897          generator.Print("\n");
1898        }
1899        break;
1900      case UnknownField::TYPE_FIXED32: {
1901        generator.Print(field_number);
1902        generator.Print(": 0x");
1903        generator.Print(
1904            StrCat(strings::Hex(field.fixed32(), strings::ZERO_PAD_8)));
1905        if (single_line_mode_) {
1906          generator.Print(" ");
1907        } else {
1908          generator.Print("\n");
1909        }
1910        break;
1911      }
1912      case UnknownField::TYPE_FIXED64: {
1913        generator.Print(field_number);
1914        generator.Print(": 0x");
1915        generator.Print(
1916            StrCat(strings::Hex(field.fixed64(), strings::ZERO_PAD_16)));
1917        if (single_line_mode_) {
1918          generator.Print(" ");
1919        } else {
1920          generator.Print("\n");
1921        }
1922        break;
1923      }
1924      case UnknownField::TYPE_LENGTH_DELIMITED: {
1925        generator.Print(field_number);
1926        const string& value = field.length_delimited();
1927        UnknownFieldSet embedded_unknown_fields;
1928        if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1929          // This field is parseable as a Message.
1930          // So it is probably an embedded message.
1931          if (single_line_mode_) {
1932            generator.Print(" { ");
1933          } else {
1934            generator.Print(" {\n");
1935            generator.Indent();
1936          }
1937          PrintUnknownFields(embedded_unknown_fields, generator);
1938          if (single_line_mode_) {
1939            generator.Print("} ");
1940          } else {
1941            generator.Outdent();
1942            generator.Print("}\n");
1943          }
1944        } else {
1945          // This field is not parseable as a Message.
1946          // So it is probably just a plain string.
1947          string printed(": \"");
1948          CEscapeAndAppend(value, &printed);
1949          printed.append(single_line_mode_ ? "\" " : "\"\n");
1950          generator.Print(printed);
1951        }
1952        break;
1953      }
1954      case UnknownField::TYPE_GROUP:
1955        generator.Print(field_number);
1956        if (single_line_mode_) {
1957          generator.Print(" { ");
1958        } else {
1959          generator.Print(" {\n");
1960          generator.Indent();
1961        }
1962        PrintUnknownFields(field.group(), generator);
1963        if (single_line_mode_) {
1964          generator.Print("} ");
1965        } else {
1966          generator.Outdent();
1967          generator.Print("}\n");
1968        }
1969        break;
1970    }
1971  }
1972}
1973
1974}  // namespace protobuf
1975}  // namespace google
1976