text_format.cc revision fbaaef999ba563838ebd00874ed8a1c01fbf286d
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <float.h>
36#include <math.h>
37#include <stdio.h>
38#include <stack>
39#include <limits>
40
41#include <google/protobuf/text_format.h>
42
43#include <google/protobuf/descriptor.h>
44#include <google/protobuf/io/coded_stream.h>
45#include <google/protobuf/io/zero_copy_stream.h>
46#include <google/protobuf/io/zero_copy_stream_impl.h>
47#include <google/protobuf/unknown_field_set.h>
48#include <google/protobuf/descriptor.pb.h>
49#include <google/protobuf/io/tokenizer.h>
50#include <google/protobuf/stubs/strutil.h>
51
52namespace google {
53namespace protobuf {
54
55string Message::DebugString() const {
56  string debug_string;
57  io::StringOutputStream output_stream(&debug_string);
58
59  TextFormat::Print(*this, &output_stream);
60
61  return debug_string;
62}
63
64string Message::ShortDebugString() const {
65  string debug_string;
66  io::StringOutputStream output_stream(&debug_string);
67
68  TextFormat::Printer printer;
69  printer.SetSingleLineMode(true);
70
71  printer.Print(*this, &output_stream);
72  // Single line mode currently might have an extra space at the end.
73  if (debug_string.size() > 0 &&
74      debug_string[debug_string.size() - 1] == ' ') {
75    debug_string.resize(debug_string.size() - 1);
76  }
77
78  return debug_string;
79}
80
81void Message::PrintDebugString() const {
82  printf("%s", DebugString().c_str());
83}
84
85// ===========================================================================
86// Internal class for parsing an ASCII representation of a Protocol Message.
87// This class makes use of the Protocol Message compiler's tokenizer found
88// in //google/protobuf/io/tokenizer.h. Note that class's Parse
89// method is *not* thread-safe and should only be used in a single thread at
90// a time.
91
92// Makes code slightly more readable.  The meaning of "DO(foo)" is
93// "Execute foo and fail if it fails.", where failure is indicated by
94// returning false. Borrowed from parser.cc (Thanks Kenton!).
95#define DO(STATEMENT) if (STATEMENT) {} else return false
96
97class TextFormat::Parser::ParserImpl {
98 public:
99
100  // Determines if repeated values for a non-repeated field are
101  // permitted, e.g., the string "foo: 1 foo: 2" for a
102  // required/optional field named "foo".
103  enum SingularOverwritePolicy {
104    ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
105    FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
106  };
107
108  ParserImpl(const Descriptor* root_message_type,
109             io::ZeroCopyInputStream* input_stream,
110             io::ErrorCollector* error_collector,
111             SingularOverwritePolicy singular_overwrite_policy)
112    : error_collector_(error_collector),
113      tokenizer_error_collector_(this),
114      tokenizer_(input_stream, &tokenizer_error_collector_),
115      root_message_type_(root_message_type),
116      singular_overwrite_policy_(singular_overwrite_policy),
117      had_errors_(false) {
118    // For backwards-compatibility with proto1, we need to allow the 'f' suffix
119    // for floats.
120    tokenizer_.set_allow_f_after_float(true);
121
122    // '#' starts a comment.
123    tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
124
125    // Consume the starting token.
126    tokenizer_.Next();
127  }
128  ~ParserImpl() { }
129
130  // Parses the ASCII representation specified in input and saves the
131  // information into the output pointer (a Message). Returns
132  // false if an error occurs (an error will also be logged to
133  // GOOGLE_LOG(ERROR)).
134  bool Parse(Message* output) {
135    // Consume fields until we cannot do so anymore.
136    while(true) {
137      if (LookingAtType(io::Tokenizer::TYPE_END)) {
138        return !had_errors_;
139      }
140
141      DO(ConsumeField(output));
142    }
143  }
144
145  bool ParseField(const FieldDescriptor* field, Message* output) {
146    bool suc;
147    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
148      suc = ConsumeFieldMessage(output, output->GetReflection(), field);
149    } else {
150      suc = ConsumeFieldValue(output, output->GetReflection(), field);
151    }
152    return suc && LookingAtType(io::Tokenizer::TYPE_END);
153  }
154
155  void ReportError(int line, int col, const string& message) {
156    had_errors_ = true;
157    if (error_collector_ == NULL) {
158      if (line >= 0) {
159        GOOGLE_LOG(ERROR) << "Error parsing text-format "
160                   << root_message_type_->full_name()
161                   << ": " << (line + 1) << ":"
162                   << (col + 1) << ": " << message;
163      } else {
164        GOOGLE_LOG(ERROR) << "Error parsing text-format "
165                   << root_message_type_->full_name()
166                   << ": " << message;
167      }
168    } else {
169      error_collector_->AddError(line, col, message);
170    }
171  }
172
173 private:
174  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
175
176  // Reports an error with the given message with information indicating
177  // the position (as derived from the current token).
178  void ReportError(const string& message) {
179    ReportError(tokenizer_.current().line, tokenizer_.current().column,
180                message);
181  }
182
183  // Consumes the specified message with the given starting delimeter.
184  // This method checks to see that the end delimeter at the conclusion of
185  // the consumption matches the starting delimeter passed in here.
186  bool ConsumeMessage(Message* message, const string delimeter) {
187    while (!LookingAt(">") &&  !LookingAt("}")) {
188      DO(ConsumeField(message));
189    }
190
191    // Confirm that we have a valid ending delimeter.
192    DO(Consume(delimeter));
193
194    return true;
195  }
196
197  // Consumes the current field (as returned by the tokenizer) on the
198  // passed in message.
199  bool ConsumeField(Message* message) {
200    const Reflection* reflection = message->GetReflection();
201    const Descriptor* descriptor = message->GetDescriptor();
202
203    string field_name;
204
205    const FieldDescriptor* field = NULL;
206
207    if (TryConsume("[")) {
208      // Extension.
209      DO(ConsumeIdentifier(&field_name));
210      while (TryConsume(".")) {
211        string part;
212        DO(ConsumeIdentifier(&part));
213        field_name += ".";
214        field_name += part;
215      }
216      DO(Consume("]"));
217
218      field = reflection->FindKnownExtensionByName(field_name);
219
220      if (field == NULL) {
221        ReportError("Extension \"" + field_name + "\" is not defined or "
222                    "is not an extension of \"" +
223                    descriptor->full_name() + "\".");
224        return false;
225      }
226    } else {
227      DO(ConsumeIdentifier(&field_name));
228
229      field = descriptor->FindFieldByName(field_name);
230      // Group names are expected to be capitalized as they appear in the
231      // .proto file, which actually matches their type names, not their field
232      // names.
233      if (field == NULL) {
234        string lower_field_name = field_name;
235        LowerString(&lower_field_name);
236        field = descriptor->FindFieldByName(lower_field_name);
237        // If the case-insensitive match worked but the field is NOT a group,
238        if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
239          field = NULL;
240        }
241      }
242      // Again, special-case group names as described above.
243      if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
244          && field->message_type()->name() != field_name) {
245        field = NULL;
246      }
247
248      if (field == NULL) {
249        ReportError("Message type \"" + descriptor->full_name() +
250                    "\" has no field named \"" + field_name + "\".");
251        return false;
252      }
253    }
254
255    // Fail if the field is not repeated and it has already been specified.
256    if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
257        !field->is_repeated() && reflection->HasField(*message, field)) {
258      ReportError("Non-repeated field \"" + field_name +
259                  "\" is specified multiple times.");
260      return false;
261    }
262
263    // Perform special handling for embedded message types.
264    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
265      // ':' is optional here.
266      TryConsume(":");
267      DO(ConsumeFieldMessage(message, reflection, field));
268    } else {
269      DO(Consume(":"));
270      DO(ConsumeFieldValue(message, reflection, field));
271    }
272
273    return true;
274  }
275
276  bool ConsumeFieldMessage(Message* message,
277                           const Reflection* reflection,
278                           const FieldDescriptor* field) {
279    string delimeter;
280    if (TryConsume("<")) {
281      delimeter = ">";
282    } else {
283      DO(Consume("{"));
284      delimeter = "}";
285    }
286
287    if (field->is_repeated()) {
288      DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
289    } else {
290      DO(ConsumeMessage(reflection->MutableMessage(message, field),
291                        delimeter));
292    }
293    return true;
294  }
295
296  bool ConsumeFieldValue(Message* message,
297                         const Reflection* reflection,
298                         const FieldDescriptor* field) {
299
300// Define an easy to use macro for setting fields. This macro checks
301// to see if the field is repeated (in which case we need to use the Add
302// methods or not (in which case we need to use the Set methods).
303#define SET_FIELD(CPPTYPE, VALUE)                                  \
304        if (field->is_repeated()) {                                \
305          reflection->Add##CPPTYPE(message, field, VALUE);         \
306        } else {                                                   \
307          reflection->Set##CPPTYPE(message, field, VALUE);         \
308        }                                                          \
309
310    switch(field->cpp_type()) {
311      case FieldDescriptor::CPPTYPE_INT32: {
312        int64 value;
313        DO(ConsumeSignedInteger(&value, kint32max));
314        SET_FIELD(Int32, static_cast<int32>(value));
315        break;
316      }
317
318      case FieldDescriptor::CPPTYPE_UINT32: {
319        uint64 value;
320        DO(ConsumeUnsignedInteger(&value, kuint32max));
321        SET_FIELD(UInt32, static_cast<uint32>(value));
322        break;
323      }
324
325      case FieldDescriptor::CPPTYPE_INT64: {
326        int64 value;
327        DO(ConsumeSignedInteger(&value, kint64max));
328        SET_FIELD(Int64, value);
329        break;
330      }
331
332      case FieldDescriptor::CPPTYPE_UINT64: {
333        uint64 value;
334        DO(ConsumeUnsignedInteger(&value, kuint64max));
335        SET_FIELD(UInt64, value);
336        break;
337      }
338
339      case FieldDescriptor::CPPTYPE_FLOAT: {
340        double value;
341        DO(ConsumeDouble(&value));
342        SET_FIELD(Float, static_cast<float>(value));
343        break;
344      }
345
346      case FieldDescriptor::CPPTYPE_DOUBLE: {
347        double value;
348        DO(ConsumeDouble(&value));
349        SET_FIELD(Double, value);
350        break;
351      }
352
353      case FieldDescriptor::CPPTYPE_STRING: {
354        string value;
355        DO(ConsumeString(&value));
356        SET_FIELD(String, value);
357        break;
358      }
359
360      case FieldDescriptor::CPPTYPE_BOOL: {
361        string value;
362        DO(ConsumeIdentifier(&value));
363
364        if (value == "true") {
365          SET_FIELD(Bool, true);
366        } else if (value == "false") {
367          SET_FIELD(Bool, false);
368        } else {
369          ReportError("Invalid value for boolean field \"" + field->name()
370                      + "\". Value: \"" + value  + "\".");
371          return false;
372        }
373        break;
374      }
375
376      case FieldDescriptor::CPPTYPE_ENUM: {
377        string value;
378        DO(ConsumeIdentifier(&value));
379
380        // Find the enumeration value.
381        const EnumDescriptor* enum_type = field->enum_type();
382        const EnumValueDescriptor* enum_value
383            = enum_type->FindValueByName(value);
384
385        if (enum_value == NULL) {
386          ReportError("Unknown enumeration value of \"" + value  + "\" for "
387                      "field \"" + field->name() + "\".");
388          return false;
389        }
390
391        SET_FIELD(Enum, enum_value);
392        break;
393      }
394
395      case FieldDescriptor::CPPTYPE_MESSAGE: {
396        // We should never get here. Put here instead of a default
397        // so that if new types are added, we get a nice compiler warning.
398        GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
399        break;
400      }
401    }
402#undef SET_FIELD
403    return true;
404  }
405
406  // Returns true if the current token's text is equal to that specified.
407  bool LookingAt(const string& text) {
408    return tokenizer_.current().text == text;
409  }
410
411  // Returns true if the current token's type is equal to that specified.
412  bool LookingAtType(io::Tokenizer::TokenType token_type) {
413    return tokenizer_.current().type == token_type;
414  }
415
416  // Consumes an identifier and saves its value in the identifier parameter.
417  // Returns false if the token is not of type IDENTFIER.
418  bool ConsumeIdentifier(string* identifier) {
419    if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
420      ReportError("Expected identifier.");
421      return false;
422    }
423
424    *identifier = tokenizer_.current().text;
425
426    tokenizer_.Next();
427    return true;
428  }
429
430  // Consumes a string and saves its value in the text parameter.
431  // Returns false if the token is not of type STRING.
432  bool ConsumeString(string* text) {
433    if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
434      ReportError("Expected string.");
435      return false;
436    }
437
438    text->clear();
439    while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
440      io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
441
442      tokenizer_.Next();
443    }
444
445    return true;
446  }
447
448  // Consumes a uint64 and saves its value in the value parameter.
449  // Returns false if the token is not of type INTEGER.
450  bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
451    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
452      ReportError("Expected integer.");
453      return false;
454    }
455
456    if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
457                                     max_value, value)) {
458      ReportError("Integer out of range.");
459      return false;
460    }
461
462    tokenizer_.Next();
463    return true;
464  }
465
466  // Consumes an int64 and saves its value in the value parameter.
467  // Note that since the tokenizer does not support negative numbers,
468  // we actually may consume an additional token (for the minus sign) in this
469  // method. Returns false if the token is not an integer
470  // (signed or otherwise).
471  bool ConsumeSignedInteger(int64* value, uint64 max_value) {
472    bool negative = false;
473
474    if (TryConsume("-")) {
475      negative = true;
476      // Two's complement always allows one more negative integer than
477      // positive.
478      ++max_value;
479    }
480
481    uint64 unsigned_value;
482
483    DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
484
485    *value = static_cast<int64>(unsigned_value);
486
487    if (negative) {
488      *value = -*value;
489    }
490
491    return true;
492  }
493
494  // Consumes a double and saves its value in the value parameter.
495  // Note that since the tokenizer does not support negative numbers,
496  // we actually may consume an additional token (for the minus sign) in this
497  // method. Returns false if the token is not a double
498  // (signed or otherwise).
499  bool ConsumeDouble(double* value) {
500    bool negative = false;
501
502    if (TryConsume("-")) {
503      negative = true;
504    }
505
506    // A double can actually be an integer, according to the tokenizer.
507    // Therefore, we must check both cases here.
508    if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
509      // We have found an integer value for the double.
510      uint64 integer_value;
511      DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
512
513      *value = static_cast<double>(integer_value);
514    } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
515      // We have found a float value for the double.
516      *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
517
518      // Mark the current token as consumed.
519      tokenizer_.Next();
520    } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
521      string text = tokenizer_.current().text;
522      LowerString(&text);
523      if (text == "inf" || text == "infinity") {
524        *value = std::numeric_limits<double>::infinity();
525        tokenizer_.Next();
526      } else if (text == "nan") {
527        *value = std::numeric_limits<double>::quiet_NaN();
528        tokenizer_.Next();
529      } else {
530        ReportError("Expected double.");
531        return false;
532      }
533    } else {
534      ReportError("Expected double.");
535      return false;
536    }
537
538    if (negative) {
539      *value = -*value;
540    }
541
542    return true;
543  }
544
545  // Consumes a token and confirms that it matches that specified in the
546  // value parameter. Returns false if the token found does not match that
547  // which was specified.
548  bool Consume(const string& value) {
549    const string& current_value = tokenizer_.current().text;
550
551    if (current_value != value) {
552      ReportError("Expected \"" + value + "\", found \"" + current_value
553                  + "\".");
554      return false;
555    }
556
557    tokenizer_.Next();
558
559    return true;
560  }
561
562  // Attempts to consume the supplied value. Returns false if a the
563  // token found does not match the value specified.
564  bool TryConsume(const string& value) {
565    if (tokenizer_.current().text == value) {
566      tokenizer_.Next();
567      return true;
568    } else {
569      return false;
570    }
571  }
572
573  // An internal instance of the Tokenizer's error collector, used to
574  // collect any base-level parse errors and feed them to the ParserImpl.
575  class ParserErrorCollector : public io::ErrorCollector {
576   public:
577    explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
578        parser_(parser) { }
579
580    virtual ~ParserErrorCollector() { };
581
582    virtual void AddError(int line, int column, const string& message) {
583      parser_->ReportError(line, column, message);
584    }
585
586   private:
587    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
588    TextFormat::Parser::ParserImpl* parser_;
589  };
590
591  io::ErrorCollector* error_collector_;
592  ParserErrorCollector tokenizer_error_collector_;
593  io::Tokenizer tokenizer_;
594  const Descriptor* root_message_type_;
595  SingularOverwritePolicy singular_overwrite_policy_;
596  bool had_errors_;
597};
598
599#undef DO
600
601// ===========================================================================
602// Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
603// from the Printer found in //google/protobuf/io/printer.h
604class TextFormat::Printer::TextGenerator {
605 public:
606  explicit TextGenerator(io::ZeroCopyOutputStream* output,
607                         int initial_indent_level)
608    : output_(output),
609      buffer_(NULL),
610      buffer_size_(0),
611      at_start_of_line_(true),
612      failed_(false),
613      indent_(""),
614      initial_indent_level_(initial_indent_level) {
615    indent_.resize(initial_indent_level_ * 2, ' ');
616  }
617
618  ~TextGenerator() {
619    // Only BackUp() if we're sure we've successfully called Next() at least
620    // once.
621    if (buffer_size_ > 0) {
622      output_->BackUp(buffer_size_);
623    }
624  }
625
626  // Indent text by two spaces.  After calling Indent(), two spaces will be
627  // inserted at the beginning of each line of text.  Indent() may be called
628  // multiple times to produce deeper indents.
629  void Indent() {
630    indent_ += "  ";
631  }
632
633  // Reduces the current indent level by two spaces, or crashes if the indent
634  // level is zero.
635  void Outdent() {
636    if (indent_.empty() ||
637        indent_.size() < initial_indent_level_ * 2) {
638      GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
639      return;
640    }
641
642    indent_.resize(indent_.size() - 2);
643  }
644
645  // Print text to the output stream.
646  void Print(const string& str) {
647    Print(str.c_str());
648  }
649
650  // Print text to the output stream.
651  void Print(const char* text) {
652    int size = strlen(text);
653    int pos = 0;  // The number of bytes we've written so far.
654
655    for (int i = 0; i < size; i++) {
656      if (text[i] == '\n') {
657        // Saw newline.  If there is more text, we may need to insert an indent
658        // here.  So, write what we have so far, including the '\n'.
659        Write(text + pos, i - pos + 1);
660        pos = i + 1;
661
662        // Setting this true will cause the next Write() to insert an indent
663        // first.
664        at_start_of_line_ = true;
665      }
666    }
667
668    // Write the rest.
669    Write(text + pos, size - pos);
670  }
671
672  // True if any write to the underlying stream failed.  (We don't just
673  // crash in this case because this is an I/O failure, not a programming
674  // error.)
675  bool failed() const { return failed_; }
676
677 private:
678  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
679
680  void Write(const char* data, int size) {
681    if (failed_) return;
682    if (size == 0) return;
683
684    if (at_start_of_line_) {
685      // Insert an indent.
686      at_start_of_line_ = false;
687      Write(indent_.data(), indent_.size());
688      if (failed_) return;
689    }
690
691    while (size > buffer_size_) {
692      // Data exceeds space in the buffer.  Copy what we can and request a
693      // new buffer.
694      memcpy(buffer_, data, buffer_size_);
695      data += buffer_size_;
696      size -= buffer_size_;
697      void* void_buffer;
698      failed_ = !output_->Next(&void_buffer, &buffer_size_);
699      if (failed_) return;
700      buffer_ = reinterpret_cast<char*>(void_buffer);
701    }
702
703    // Buffer is big enough to receive the data; copy it.
704    memcpy(buffer_, data, size);
705    buffer_ += size;
706    buffer_size_ -= size;
707  }
708
709  io::ZeroCopyOutputStream* const output_;
710  char* buffer_;
711  int buffer_size_;
712  bool at_start_of_line_;
713  bool failed_;
714
715  string indent_;
716  int initial_indent_level_;
717};
718
719// ===========================================================================
720
721TextFormat::Parser::Parser()
722  : error_collector_(NULL),
723    allow_partial_(false) {}
724
725TextFormat::Parser::~Parser() {}
726
727bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
728                               Message* output) {
729  output->Clear();
730  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
731                    ParserImpl::FORBID_SINGULAR_OVERWRITES);
732  return MergeUsingImpl(input, output, &parser);
733}
734
735bool TextFormat::Parser::ParseFromString(const string& input,
736                                         Message* output) {
737  io::ArrayInputStream input_stream(input.data(), input.size());
738  return Parse(&input_stream, output);
739}
740
741bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
742                               Message* output) {
743  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
744                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
745  return MergeUsingImpl(input, output, &parser);
746}
747
748bool TextFormat::Parser::MergeFromString(const string& input,
749                                         Message* output) {
750  io::ArrayInputStream input_stream(input.data(), input.size());
751  return Merge(&input_stream, output);
752}
753
754bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
755                                        Message* output,
756                                        ParserImpl* parser_impl) {
757  if (!parser_impl->Parse(output)) return false;
758  if (!allow_partial_ && !output->IsInitialized()) {
759    vector<string> missing_fields;
760    output->FindInitializationErrors(&missing_fields);
761    parser_impl->ReportError(-1, 0, "Message missing required fields: " +
762                                    JoinStrings(missing_fields, ", "));
763    return false;
764  }
765  return true;
766}
767
768bool TextFormat::Parser::ParseFieldValueFromString(
769    const string& input,
770    const FieldDescriptor* field,
771    Message* output) {
772  io::ArrayInputStream input_stream(input.data(), input.size());
773  ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
774                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
775  return parser.ParseField(field, output);
776}
777
778/* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
779                                    Message* output) {
780  return Parser().Parse(input, output);
781}
782
783/* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
784                                    Message* output) {
785  return Parser().Merge(input, output);
786}
787
788/* static */ bool TextFormat::ParseFromString(const string& input,
789                                              Message* output) {
790  return Parser().ParseFromString(input, output);
791}
792
793/* static */ bool TextFormat::MergeFromString(const string& input,
794                                              Message* output) {
795  return Parser().MergeFromString(input, output);
796}
797
798// ===========================================================================
799
800TextFormat::Printer::Printer()
801  : initial_indent_level_(0),
802    single_line_mode_(false) {}
803
804TextFormat::Printer::~Printer() {}
805
806bool TextFormat::Printer::PrintToString(const Message& message,
807                                        string* output) {
808  GOOGLE_DCHECK(output) << "output specified is NULL";
809
810  output->clear();
811  io::StringOutputStream output_stream(output);
812
813  bool result = Print(message, &output_stream);
814
815  return result;
816}
817
818bool TextFormat::Printer::PrintUnknownFieldsToString(
819    const UnknownFieldSet& unknown_fields,
820    string* output) {
821  GOOGLE_DCHECK(output) << "output specified is NULL";
822
823  output->clear();
824  io::StringOutputStream output_stream(output);
825  return PrintUnknownFields(unknown_fields, &output_stream);
826}
827
828bool TextFormat::Printer::Print(const Message& message,
829                                io::ZeroCopyOutputStream* output) {
830  TextGenerator generator(output, initial_indent_level_);
831
832  Print(message, generator);
833
834  // Output false if the generator failed internally.
835  return !generator.failed();
836}
837
838bool TextFormat::Printer::PrintUnknownFields(
839    const UnknownFieldSet& unknown_fields,
840    io::ZeroCopyOutputStream* output) {
841  TextGenerator generator(output, initial_indent_level_);
842
843  PrintUnknownFields(unknown_fields, generator);
844
845  // Output false if the generator failed internally.
846  return !generator.failed();
847}
848
849void TextFormat::Printer::Print(const Message& message,
850                                TextGenerator& generator) {
851  const Reflection* reflection = message.GetReflection();
852  vector<const FieldDescriptor*> fields;
853  reflection->ListFields(message, &fields);
854  for (int i = 0; i < fields.size(); i++) {
855    PrintField(message, reflection, fields[i], generator);
856  }
857  PrintUnknownFields(reflection->GetUnknownFields(message), generator);
858}
859
860void TextFormat::Printer::PrintFieldValueToString(
861    const Message& message,
862    const FieldDescriptor* field,
863    int index,
864    string* output) {
865
866  GOOGLE_DCHECK(output) << "output specified is NULL";
867
868  output->clear();
869  io::StringOutputStream output_stream(output);
870  TextGenerator generator(&output_stream, initial_indent_level_);
871
872  PrintFieldValue(message, message.GetReflection(), field, index, generator);
873}
874
875void TextFormat::Printer::PrintField(const Message& message,
876                                     const Reflection* reflection,
877                                     const FieldDescriptor* field,
878                                     TextGenerator& generator) {
879  int count = 0;
880
881  if (field->is_repeated()) {
882    count = reflection->FieldSize(message, field);
883  } else if (reflection->HasField(message, field)) {
884    count = 1;
885  }
886
887  for (int j = 0; j < count; ++j) {
888    if (field->is_extension()) {
889      generator.Print("[");
890      // We special-case MessageSet elements for compatibility with proto1.
891      if (field->containing_type()->options().message_set_wire_format()
892          && field->type() == FieldDescriptor::TYPE_MESSAGE
893          && field->is_optional()
894          && field->extension_scope() == field->message_type()) {
895        generator.Print(field->message_type()->full_name());
896      } else {
897        generator.Print(field->full_name());
898      }
899      generator.Print("]");
900    } else {
901      if (field->type() == FieldDescriptor::TYPE_GROUP) {
902        // Groups must be serialized with their original capitalization.
903        generator.Print(field->message_type()->name());
904      } else {
905        generator.Print(field->name());
906      }
907    }
908
909    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
910      if (single_line_mode_) {
911        generator.Print(" { ");
912      } else {
913        generator.Print(" {\n");
914        generator.Indent();
915      }
916    } else {
917      generator.Print(": ");
918    }
919
920    // Write the field value.
921    int field_index = j;
922    if (!field->is_repeated()) {
923      field_index = -1;
924    }
925
926    PrintFieldValue(message, reflection, field, field_index, generator);
927
928    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
929      if (!single_line_mode_) {
930        generator.Outdent();
931      }
932      generator.Print("}");
933    }
934
935    if (single_line_mode_) {
936      generator.Print(" ");
937    } else {
938      generator.Print("\n");
939    }
940  }
941}
942
943void TextFormat::Printer::PrintFieldValue(
944    const Message& message,
945    const Reflection* reflection,
946    const FieldDescriptor* field,
947    int index,
948    TextGenerator& generator) {
949  GOOGLE_DCHECK(field->is_repeated() || (index == -1))
950      << "Index must be -1 for non-repeated fields";
951
952  switch (field->cpp_type()) {
953#define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING)                             \
954      case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
955        generator.Print(TO_STRING(field->is_repeated() ?                     \
956          reflection->GetRepeated##METHOD(message, field, index) :           \
957          reflection->Get##METHOD(message, field)));                         \
958        break;                                                               \
959
960      OUTPUT_FIELD( INT32,  Int32, SimpleItoa);
961      OUTPUT_FIELD( INT64,  Int64, SimpleItoa);
962      OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
963      OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
964      OUTPUT_FIELD( FLOAT,  Float, SimpleFtoa);
965      OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
966#undef OUTPUT_FIELD
967
968      case FieldDescriptor::CPPTYPE_STRING: {
969        string scratch;
970        const string& value = field->is_repeated() ?
971            reflection->GetRepeatedStringReference(
972              message, field, index, &scratch) :
973            reflection->GetStringReference(message, field, &scratch);
974
975        generator.Print("\"");
976        generator.Print(CEscape(value));
977        generator.Print("\"");
978
979        break;
980      }
981
982      case FieldDescriptor::CPPTYPE_BOOL:
983        if (field->is_repeated()) {
984          generator.Print(reflection->GetRepeatedBool(message, field, index)
985                          ? "true" : "false");
986        } else {
987          generator.Print(reflection->GetBool(message, field)
988                          ? "true" : "false");
989        }
990        break;
991
992      case FieldDescriptor::CPPTYPE_ENUM:
993        generator.Print(field->is_repeated() ?
994          reflection->GetRepeatedEnum(message, field, index)->name() :
995          reflection->GetEnum(message, field)->name());
996        break;
997
998      case FieldDescriptor::CPPTYPE_MESSAGE:
999        Print(field->is_repeated() ?
1000                reflection->GetRepeatedMessage(message, field, index) :
1001                reflection->GetMessage(message, field),
1002              generator);
1003        break;
1004  }
1005}
1006
1007/* static */ bool TextFormat::Print(const Message& message,
1008                                    io::ZeroCopyOutputStream* output) {
1009  return Printer().Print(message, output);
1010}
1011
1012/* static */ bool TextFormat::PrintUnknownFields(
1013    const UnknownFieldSet& unknown_fields,
1014    io::ZeroCopyOutputStream* output) {
1015  return Printer().PrintUnknownFields(unknown_fields, output);
1016}
1017
1018/* static */ bool TextFormat::PrintToString(
1019    const Message& message, string* output) {
1020  return Printer().PrintToString(message, output);
1021}
1022
1023/* static */ bool TextFormat::PrintUnknownFieldsToString(
1024    const UnknownFieldSet& unknown_fields, string* output) {
1025  return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1026}
1027
1028/* static */ void TextFormat::PrintFieldValueToString(
1029    const Message& message,
1030    const FieldDescriptor* field,
1031    int index,
1032    string* output) {
1033  return Printer().PrintFieldValueToString(message, field, index, output);
1034}
1035
1036/* static */ bool TextFormat::ParseFieldValueFromString(
1037    const string& input,
1038    const FieldDescriptor* field,
1039    Message* message) {
1040  return Parser().ParseFieldValueFromString(input, field, message);
1041}
1042
1043// Prints an integer as hex with a fixed number of digits dependent on the
1044// integer type.
1045template<typename IntType>
1046static string PaddedHex(IntType value) {
1047  string result;
1048  result.reserve(sizeof(value) * 2);
1049  for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1050    result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1051  }
1052  return result;
1053}
1054
1055void TextFormat::Printer::PrintUnknownFields(
1056    const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
1057  for (int i = 0; i < unknown_fields.field_count(); i++) {
1058    const UnknownField& field = unknown_fields.field(i);
1059    string field_number = SimpleItoa(field.number());
1060
1061    switch (field.type()) {
1062      case UnknownField::TYPE_VARINT:
1063        generator.Print(field_number);
1064        generator.Print(": ");
1065        generator.Print(SimpleItoa(field.varint()));
1066        if (single_line_mode_) {
1067          generator.Print(" ");
1068        } else {
1069          generator.Print("\n");
1070        }
1071        break;
1072      case UnknownField::TYPE_FIXED32: {
1073        generator.Print(field_number);
1074        generator.Print(": 0x");
1075        char buffer[kFastToBufferSize];
1076        generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1077        if (single_line_mode_) {
1078          generator.Print(" ");
1079        } else {
1080          generator.Print("\n");
1081        }
1082        break;
1083      }
1084      case UnknownField::TYPE_FIXED64: {
1085        generator.Print(field_number);
1086        generator.Print(": 0x");
1087        char buffer[kFastToBufferSize];
1088        generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1089        if (single_line_mode_) {
1090          generator.Print(" ");
1091        } else {
1092          generator.Print("\n");
1093        }
1094        break;
1095      }
1096      case UnknownField::TYPE_LENGTH_DELIMITED: {
1097        generator.Print(field_number);
1098        const string& value = field.length_delimited();
1099        UnknownFieldSet embedded_unknown_fields;
1100        if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1101          // This field is parseable as a Message.
1102          // So it is probably an embedded message.
1103          if (single_line_mode_) {
1104            generator.Print(" { ");
1105          } else {
1106            generator.Print(" {\n");
1107            generator.Indent();
1108          }
1109          PrintUnknownFields(embedded_unknown_fields, generator);
1110          if (single_line_mode_) {
1111            generator.Print("} ");
1112          } else {
1113            generator.Outdent();
1114            generator.Print("}\n");
1115          }
1116        } else {
1117          // This field is not parseable as a Message.
1118          // So it is probably just a plain string.
1119          generator.Print(": \"");
1120          generator.Print(CEscape(value));
1121          generator.Print("\"");
1122          if (single_line_mode_) {
1123            generator.Print(" ");
1124          } else {
1125            generator.Print("\n");
1126          }
1127        }
1128        break;
1129      }
1130      case UnknownField::TYPE_GROUP:
1131        generator.Print(field_number);
1132        if (single_line_mode_) {
1133          generator.Print(" { ");
1134        } else {
1135          generator.Print(" {\n");
1136          generator.Indent();
1137        }
1138        PrintUnknownFields(field.group(), generator);
1139        if (single_line_mode_) {
1140          generator.Print("} ");
1141        } else {
1142          generator.Outdent();
1143          generator.Print("}\n");
1144        }
1145        break;
1146    }
1147  }
1148}
1149
1150}  // namespace protobuf
1151}  // namespace google
1152