1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <float.h>
36#include <math.h>
37#include <stdio.h>
38#include <stack>
39#include <limits>
40
41#include <google/protobuf/text_format.h>
42
43#include <google/protobuf/descriptor.h>
44#include <google/protobuf/io/coded_stream.h>
45#include <google/protobuf/io/zero_copy_stream.h>
46#include <google/protobuf/io/zero_copy_stream_impl.h>
47#include <google/protobuf/unknown_field_set.h>
48#include <google/protobuf/descriptor.pb.h>
49#include <google/protobuf/io/tokenizer.h>
50#include <google/protobuf/stubs/strutil.h>
51
52namespace google {
53namespace protobuf {
54
55string Message::DebugString() const {
56  string debug_string;
57
58  TextFormat::PrintToString(*this, &debug_string);
59
60  return debug_string;
61}
62
63string Message::ShortDebugString() const {
64  string debug_string;
65
66  TextFormat::Printer printer;
67  printer.SetSingleLineMode(true);
68
69  printer.PrintToString(*this, &debug_string);
70  // Single line mode currently might have an extra space at the end.
71  if (debug_string.size() > 0 &&
72      debug_string[debug_string.size() - 1] == ' ') {
73    debug_string.resize(debug_string.size() - 1);
74  }
75
76  return debug_string;
77}
78
79string Message::Utf8DebugString() const {
80  string debug_string;
81
82  TextFormat::Printer printer;
83  printer.SetUseUtf8StringEscaping(true);
84
85  printer.PrintToString(*this, &debug_string);
86
87  return debug_string;
88}
89
90void Message::PrintDebugString() const {
91  printf("%s", DebugString().c_str());
92}
93
94
95// ===========================================================================
96// Internal class for parsing an ASCII representation of a Protocol Message.
97// This class makes use of the Protocol Message compiler's tokenizer found
98// in //google/protobuf/io/tokenizer.h. Note that class's Parse
99// method is *not* thread-safe and should only be used in a single thread at
100// a time.
101
102// Makes code slightly more readable.  The meaning of "DO(foo)" is
103// "Execute foo and fail if it fails.", where failure is indicated by
104// returning false. Borrowed from parser.cc (Thanks Kenton!).
105#define DO(STATEMENT) if (STATEMENT) {} else return false
106
107class TextFormat::Parser::ParserImpl {
108 public:
109
110  // Determines if repeated values for a non-repeated field are
111  // permitted, e.g., the string "foo: 1 foo: 2" for a
112  // required/optional field named "foo".
113  enum SingularOverwritePolicy {
114    ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
115    FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
116  };
117
118  ParserImpl(const Descriptor* root_message_type,
119             io::ZeroCopyInputStream* input_stream,
120             io::ErrorCollector* error_collector,
121             SingularOverwritePolicy singular_overwrite_policy)
122    : error_collector_(error_collector),
123      tokenizer_error_collector_(this),
124      tokenizer_(input_stream, &tokenizer_error_collector_),
125      root_message_type_(root_message_type),
126      singular_overwrite_policy_(singular_overwrite_policy),
127      had_errors_(false) {
128    // For backwards-compatibility with proto1, we need to allow the 'f' suffix
129    // for floats.
130    tokenizer_.set_allow_f_after_float(true);
131
132    // '#' starts a comment.
133    tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
134
135    // Consume the starting token.
136    tokenizer_.Next();
137  }
138  ~ParserImpl() { }
139
140  // Parses the ASCII representation specified in input and saves the
141  // information into the output pointer (a Message). Returns
142  // false if an error occurs (an error will also be logged to
143  // GOOGLE_LOG(ERROR)).
144  bool Parse(Message* output) {
145    // Consume fields until we cannot do so anymore.
146    while(true) {
147      if (LookingAtType(io::Tokenizer::TYPE_END)) {
148        return !had_errors_;
149      }
150
151      DO(ConsumeField(output));
152    }
153  }
154
155  bool ParseField(const FieldDescriptor* field, Message* output) {
156    bool suc;
157    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
158      suc = ConsumeFieldMessage(output, output->GetReflection(), field);
159    } else {
160      suc = ConsumeFieldValue(output, output->GetReflection(), field);
161    }
162    return suc && LookingAtType(io::Tokenizer::TYPE_END);
163  }
164
165  void ReportError(int line, int col, const string& message) {
166    had_errors_ = true;
167    if (error_collector_ == NULL) {
168      if (line >= 0) {
169        GOOGLE_LOG(ERROR) << "Error parsing text-format "
170                   << root_message_type_->full_name()
171                   << ": " << (line + 1) << ":"
172                   << (col + 1) << ": " << message;
173      } else {
174        GOOGLE_LOG(ERROR) << "Error parsing text-format "
175                   << root_message_type_->full_name()
176                   << ": " << message;
177      }
178    } else {
179      error_collector_->AddError(line, col, message);
180    }
181  }
182
183  void ReportWarning(int line, int col, const string& message) {
184    if (error_collector_ == NULL) {
185      if (line >= 0) {
186        GOOGLE_LOG(WARNING) << "Warning parsing text-format "
187                     << root_message_type_->full_name()
188                     << ": " << (line + 1) << ":"
189                     << (col + 1) << ": " << message;
190      } else {
191        GOOGLE_LOG(WARNING) << "Warning parsing text-format "
192                     << root_message_type_->full_name()
193                     << ": " << message;
194      }
195    } else {
196      error_collector_->AddWarning(line, col, message);
197    }
198  }
199
200 private:
201  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
202
203  // Reports an error with the given message with information indicating
204  // the position (as derived from the current token).
205  void ReportError(const string& message) {
206    ReportError(tokenizer_.current().line, tokenizer_.current().column,
207                message);
208  }
209
210  // Reports a warning with the given message with information indicating
211  // the position (as derived from the current token).
212  void ReportWarning(const string& message) {
213    ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
214                  message);
215  }
216
217  // Consumes the specified message with the given starting delimeter.
218  // This method checks to see that the end delimeter at the conclusion of
219  // the consumption matches the starting delimeter passed in here.
220  bool ConsumeMessage(Message* message, const string delimeter) {
221    while (!LookingAt(">") &&  !LookingAt("}")) {
222      DO(ConsumeField(message));
223    }
224
225    // Confirm that we have a valid ending delimeter.
226    DO(Consume(delimeter));
227
228    return true;
229  }
230
231  // Consumes the current field (as returned by the tokenizer) on the
232  // passed in message.
233  bool ConsumeField(Message* message) {
234    const Reflection* reflection = message->GetReflection();
235    const Descriptor* descriptor = message->GetDescriptor();
236
237    string field_name;
238
239    const FieldDescriptor* field = NULL;
240
241    if (TryConsume("[")) {
242      // Extension.
243      DO(ConsumeIdentifier(&field_name));
244      while (TryConsume(".")) {
245        string part;
246        DO(ConsumeIdentifier(&part));
247        field_name += ".";
248        field_name += part;
249      }
250      DO(Consume("]"));
251
252      field = reflection->FindKnownExtensionByName(field_name);
253
254      if (field == NULL) {
255        ReportError("Extension \"" + field_name + "\" is not defined or "
256                    "is not an extension of \"" +
257                    descriptor->full_name() + "\".");
258        return false;
259      }
260    } else {
261      DO(ConsumeIdentifier(&field_name));
262
263      field = descriptor->FindFieldByName(field_name);
264      // Group names are expected to be capitalized as they appear in the
265      // .proto file, which actually matches their type names, not their field
266      // names.
267      if (field == NULL) {
268        string lower_field_name = field_name;
269        LowerString(&lower_field_name);
270        field = descriptor->FindFieldByName(lower_field_name);
271        // If the case-insensitive match worked but the field is NOT a group,
272        if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
273          field = NULL;
274        }
275      }
276      // Again, special-case group names as described above.
277      if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
278          && field->message_type()->name() != field_name) {
279        field = NULL;
280      }
281
282      if (field == NULL) {
283        ReportError("Message type \"" + descriptor->full_name() +
284                    "\" has no field named \"" + field_name + "\".");
285        return false;
286      }
287    }
288
289    // Fail if the field is not repeated and it has already been specified.
290    if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
291        !field->is_repeated() && reflection->HasField(*message, field)) {
292      ReportError("Non-repeated field \"" + field_name +
293                  "\" is specified multiple times.");
294      return false;
295    }
296
297    // Perform special handling for embedded message types.
298    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
299      // ':' is optional here.
300      TryConsume(":");
301      DO(ConsumeFieldMessage(message, reflection, field));
302    } else {
303      DO(Consume(":"));
304      DO(ConsumeFieldValue(message, reflection, field));
305    }
306
307    if (field->options().deprecated()) {
308      ReportWarning("text format contains deprecated field \""
309                    + field_name + "\"");
310    }
311
312    return true;
313  }
314
315  bool ConsumeFieldMessage(Message* message,
316                           const Reflection* reflection,
317                           const FieldDescriptor* field) {
318    string delimeter;
319    if (TryConsume("<")) {
320      delimeter = ">";
321    } else {
322      DO(Consume("{"));
323      delimeter = "}";
324    }
325
326    if (field->is_repeated()) {
327      DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
328    } else {
329      DO(ConsumeMessage(reflection->MutableMessage(message, field),
330                        delimeter));
331    }
332    return true;
333  }
334
335  bool ConsumeFieldValue(Message* message,
336                         const Reflection* reflection,
337                         const FieldDescriptor* field) {
338
339// Define an easy to use macro for setting fields. This macro checks
340// to see if the field is repeated (in which case we need to use the Add
341// methods or not (in which case we need to use the Set methods).
342#define SET_FIELD(CPPTYPE, VALUE)                                  \
343        if (field->is_repeated()) {                                \
344          reflection->Add##CPPTYPE(message, field, VALUE);         \
345        } else {                                                   \
346          reflection->Set##CPPTYPE(message, field, VALUE);         \
347        }                                                          \
348
349    switch(field->cpp_type()) {
350      case FieldDescriptor::CPPTYPE_INT32: {
351        int64 value;
352        DO(ConsumeSignedInteger(&value, kint32max));
353        SET_FIELD(Int32, static_cast<int32>(value));
354        break;
355      }
356
357      case FieldDescriptor::CPPTYPE_UINT32: {
358        uint64 value;
359        DO(ConsumeUnsignedInteger(&value, kuint32max));
360        SET_FIELD(UInt32, static_cast<uint32>(value));
361        break;
362      }
363
364      case FieldDescriptor::CPPTYPE_INT64: {
365        int64 value;
366        DO(ConsumeSignedInteger(&value, kint64max));
367        SET_FIELD(Int64, value);
368        break;
369      }
370
371      case FieldDescriptor::CPPTYPE_UINT64: {
372        uint64 value;
373        DO(ConsumeUnsignedInteger(&value, kuint64max));
374        SET_FIELD(UInt64, value);
375        break;
376      }
377
378      case FieldDescriptor::CPPTYPE_FLOAT: {
379        double value;
380        DO(ConsumeDouble(&value));
381        SET_FIELD(Float, static_cast<float>(value));
382        break;
383      }
384
385      case FieldDescriptor::CPPTYPE_DOUBLE: {
386        double value;
387        DO(ConsumeDouble(&value));
388        SET_FIELD(Double, value);
389        break;
390      }
391
392      case FieldDescriptor::CPPTYPE_STRING: {
393        string value;
394        DO(ConsumeString(&value));
395        SET_FIELD(String, value);
396        break;
397      }
398
399      case FieldDescriptor::CPPTYPE_BOOL: {
400        string value;
401        DO(ConsumeIdentifier(&value));
402
403        if (value == "true") {
404          SET_FIELD(Bool, true);
405        } else if (value == "false") {
406          SET_FIELD(Bool, false);
407        } else {
408          ReportError("Invalid value for boolean field \"" + field->name()
409                      + "\". Value: \"" + value  + "\".");
410          return false;
411        }
412        break;
413      }
414
415      case FieldDescriptor::CPPTYPE_ENUM: {
416        string value;
417        DO(ConsumeIdentifier(&value));
418
419        // Find the enumeration value.
420        const EnumDescriptor* enum_type = field->enum_type();
421        const EnumValueDescriptor* enum_value
422            = enum_type->FindValueByName(value);
423
424        if (enum_value == NULL) {
425          ReportError("Unknown enumeration value of \"" + value  + "\" for "
426                      "field \"" + field->name() + "\".");
427          return false;
428        }
429
430        SET_FIELD(Enum, enum_value);
431        break;
432      }
433
434      case FieldDescriptor::CPPTYPE_MESSAGE: {
435        // We should never get here. Put here instead of a default
436        // so that if new types are added, we get a nice compiler warning.
437        GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
438        break;
439      }
440    }
441#undef SET_FIELD
442    return true;
443  }
444
445  // Returns true if the current token's text is equal to that specified.
446  bool LookingAt(const string& text) {
447    return tokenizer_.current().text == text;
448  }
449
450  // Returns true if the current token's type is equal to that specified.
451  bool LookingAtType(io::Tokenizer::TokenType token_type) {
452    return tokenizer_.current().type == token_type;
453  }
454
455  // Consumes an identifier and saves its value in the identifier parameter.
456  // Returns false if the token is not of type IDENTFIER.
457  bool ConsumeIdentifier(string* identifier) {
458    if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
459      ReportError("Expected identifier.");
460      return false;
461    }
462
463    *identifier = tokenizer_.current().text;
464
465    tokenizer_.Next();
466    return true;
467  }
468
469  // Consumes a string and saves its value in the text parameter.
470  // Returns false if the token is not of type STRING.
471  bool ConsumeString(string* text) {
472    if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
473      ReportError("Expected string.");
474      return false;
475    }
476
477    text->clear();
478    while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
479      io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
480
481      tokenizer_.Next();
482    }
483
484    return true;
485  }
486
487  // Consumes a uint64 and saves its value in the value parameter.
488  // Returns false if the token is not of type INTEGER.
489  bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
490    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
491      ReportError("Expected integer.");
492      return false;
493    }
494
495    if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
496                                     max_value, value)) {
497      ReportError("Integer out of range.");
498      return false;
499    }
500
501    tokenizer_.Next();
502    return true;
503  }
504
505  // Consumes an int64 and saves its value in the value parameter.
506  // Note that since the tokenizer does not support negative numbers,
507  // we actually may consume an additional token (for the minus sign) in this
508  // method. Returns false if the token is not an integer
509  // (signed or otherwise).
510  bool ConsumeSignedInteger(int64* value, uint64 max_value) {
511    bool negative = false;
512
513    if (TryConsume("-")) {
514      negative = true;
515      // Two's complement always allows one more negative integer than
516      // positive.
517      ++max_value;
518    }
519
520    uint64 unsigned_value;
521
522    DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
523
524    *value = static_cast<int64>(unsigned_value);
525
526    if (negative) {
527      *value = -*value;
528    }
529
530    return true;
531  }
532
533  // Consumes a double and saves its value in the value parameter.
534  // Note that since the tokenizer does not support negative numbers,
535  // we actually may consume an additional token (for the minus sign) in this
536  // method. Returns false if the token is not a double
537  // (signed or otherwise).
538  bool ConsumeDouble(double* value) {
539    bool negative = false;
540
541    if (TryConsume("-")) {
542      negative = true;
543    }
544
545    // A double can actually be an integer, according to the tokenizer.
546    // Therefore, we must check both cases here.
547    if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
548      // We have found an integer value for the double.
549      uint64 integer_value;
550      DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
551
552      *value = static_cast<double>(integer_value);
553    } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
554      // We have found a float value for the double.
555      *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
556
557      // Mark the current token as consumed.
558      tokenizer_.Next();
559    } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
560      string text = tokenizer_.current().text;
561      LowerString(&text);
562      if (text == "inf" || text == "infinity") {
563        *value = std::numeric_limits<double>::infinity();
564        tokenizer_.Next();
565      } else if (text == "nan") {
566        *value = std::numeric_limits<double>::quiet_NaN();
567        tokenizer_.Next();
568      } else {
569        ReportError("Expected double.");
570        return false;
571      }
572    } else {
573      ReportError("Expected double.");
574      return false;
575    }
576
577    if (negative) {
578      *value = -*value;
579    }
580
581    return true;
582  }
583
584  // Consumes a token and confirms that it matches that specified in the
585  // value parameter. Returns false if the token found does not match that
586  // which was specified.
587  bool Consume(const string& value) {
588    const string& current_value = tokenizer_.current().text;
589
590    if (current_value != value) {
591      ReportError("Expected \"" + value + "\", found \"" + current_value
592                  + "\".");
593      return false;
594    }
595
596    tokenizer_.Next();
597
598    return true;
599  }
600
601  // Attempts to consume the supplied value. Returns false if a the
602  // token found does not match the value specified.
603  bool TryConsume(const string& value) {
604    if (tokenizer_.current().text == value) {
605      tokenizer_.Next();
606      return true;
607    } else {
608      return false;
609    }
610  }
611
612  // An internal instance of the Tokenizer's error collector, used to
613  // collect any base-level parse errors and feed them to the ParserImpl.
614  class ParserErrorCollector : public io::ErrorCollector {
615   public:
616    explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
617        parser_(parser) { }
618
619    virtual ~ParserErrorCollector() { };
620
621    virtual void AddError(int line, int column, const string& message) {
622      parser_->ReportError(line, column, message);
623    }
624
625    virtual void AddWarning(int line, int column, const string& message) {
626      parser_->ReportWarning(line, column, message);
627    }
628
629   private:
630    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
631    TextFormat::Parser::ParserImpl* parser_;
632  };
633
634  io::ErrorCollector* error_collector_;
635  ParserErrorCollector tokenizer_error_collector_;
636  io::Tokenizer tokenizer_;
637  const Descriptor* root_message_type_;
638  SingularOverwritePolicy singular_overwrite_policy_;
639  bool had_errors_;
640};
641
642#undef DO
643
644// ===========================================================================
645// Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
646// from the Printer found in //google/protobuf/io/printer.h
647class TextFormat::Printer::TextGenerator {
648 public:
649  explicit TextGenerator(io::ZeroCopyOutputStream* output,
650                         int initial_indent_level)
651    : output_(output),
652      buffer_(NULL),
653      buffer_size_(0),
654      at_start_of_line_(true),
655      failed_(false),
656      indent_(""),
657      initial_indent_level_(initial_indent_level) {
658    indent_.resize(initial_indent_level_ * 2, ' ');
659  }
660
661  ~TextGenerator() {
662    // Only BackUp() if we're sure we've successfully called Next() at least
663    // once.
664    if (buffer_size_ > 0) {
665      output_->BackUp(buffer_size_);
666    }
667  }
668
669  // Indent text by two spaces.  After calling Indent(), two spaces will be
670  // inserted at the beginning of each line of text.  Indent() may be called
671  // multiple times to produce deeper indents.
672  void Indent() {
673    indent_ += "  ";
674  }
675
676  // Reduces the current indent level by two spaces, or crashes if the indent
677  // level is zero.
678  void Outdent() {
679    if (indent_.empty() ||
680        indent_.size() < initial_indent_level_ * 2) {
681      GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
682      return;
683    }
684
685    indent_.resize(indent_.size() - 2);
686  }
687
688  // Print text to the output stream.
689  void Print(const string& str) {
690    Print(str.data(), str.size());
691  }
692
693  // Print text to the output stream.
694  void Print(const char* text) {
695    Print(text, strlen(text));
696  }
697
698  // Print text to the output stream.
699  void Print(const char* text, int size) {
700    int pos = 0;  // The number of bytes we've written so far.
701
702    for (int i = 0; i < size; i++) {
703      if (text[i] == '\n') {
704        // Saw newline.  If there is more text, we may need to insert an indent
705        // here.  So, write what we have so far, including the '\n'.
706        Write(text + pos, i - pos + 1);
707        pos = i + 1;
708
709        // Setting this true will cause the next Write() to insert an indent
710        // first.
711        at_start_of_line_ = true;
712      }
713    }
714
715    // Write the rest.
716    Write(text + pos, size - pos);
717  }
718
719  // True if any write to the underlying stream failed.  (We don't just
720  // crash in this case because this is an I/O failure, not a programming
721  // error.)
722  bool failed() const { return failed_; }
723
724 private:
725  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
726
727  void Write(const char* data, int size) {
728    if (failed_) return;
729    if (size == 0) return;
730
731    if (at_start_of_line_) {
732      // Insert an indent.
733      at_start_of_line_ = false;
734      Write(indent_.data(), indent_.size());
735      if (failed_) return;
736    }
737
738    while (size > buffer_size_) {
739      // Data exceeds space in the buffer.  Copy what we can and request a
740      // new buffer.
741      memcpy(buffer_, data, buffer_size_);
742      data += buffer_size_;
743      size -= buffer_size_;
744      void* void_buffer;
745      failed_ = !output_->Next(&void_buffer, &buffer_size_);
746      if (failed_) return;
747      buffer_ = reinterpret_cast<char*>(void_buffer);
748    }
749
750    // Buffer is big enough to receive the data; copy it.
751    memcpy(buffer_, data, size);
752    buffer_ += size;
753    buffer_size_ -= size;
754  }
755
756  io::ZeroCopyOutputStream* const output_;
757  char* buffer_;
758  int buffer_size_;
759  bool at_start_of_line_;
760  bool failed_;
761
762  string indent_;
763  int initial_indent_level_;
764};
765
766// ===========================================================================
767
768TextFormat::Parser::Parser()
769  : error_collector_(NULL),
770    allow_partial_(false) {}
771
772TextFormat::Parser::~Parser() {}
773
774bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
775                               Message* output) {
776  output->Clear();
777  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
778                    ParserImpl::FORBID_SINGULAR_OVERWRITES);
779  return MergeUsingImpl(input, output, &parser);
780}
781
782bool TextFormat::Parser::ParseFromString(const string& input,
783                                         Message* output) {
784  io::ArrayInputStream input_stream(input.data(), input.size());
785  return Parse(&input_stream, output);
786}
787
788bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
789                               Message* output) {
790  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
791                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
792  return MergeUsingImpl(input, output, &parser);
793}
794
795bool TextFormat::Parser::MergeFromString(const string& input,
796                                         Message* output) {
797  io::ArrayInputStream input_stream(input.data(), input.size());
798  return Merge(&input_stream, output);
799}
800
801bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
802                                        Message* output,
803                                        ParserImpl* parser_impl) {
804  if (!parser_impl->Parse(output)) return false;
805  if (!allow_partial_ && !output->IsInitialized()) {
806    vector<string> missing_fields;
807    output->FindInitializationErrors(&missing_fields);
808    parser_impl->ReportError(-1, 0, "Message missing required fields: " +
809                                    JoinStrings(missing_fields, ", "));
810    return false;
811  }
812  return true;
813}
814
815bool TextFormat::Parser::ParseFieldValueFromString(
816    const string& input,
817    const FieldDescriptor* field,
818    Message* output) {
819  io::ArrayInputStream input_stream(input.data(), input.size());
820  ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
821                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
822  return parser.ParseField(field, output);
823}
824
825/* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
826                                    Message* output) {
827  return Parser().Parse(input, output);
828}
829
830/* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
831                                    Message* output) {
832  return Parser().Merge(input, output);
833}
834
835/* static */ bool TextFormat::ParseFromString(const string& input,
836                                              Message* output) {
837  return Parser().ParseFromString(input, output);
838}
839
840/* static */ bool TextFormat::MergeFromString(const string& input,
841                                              Message* output) {
842  return Parser().MergeFromString(input, output);
843}
844
845// ===========================================================================
846
847TextFormat::Printer::Printer()
848  : initial_indent_level_(0),
849    single_line_mode_(false),
850    use_short_repeated_primitives_(false),
851    utf8_string_escaping_(false) {}
852
853TextFormat::Printer::~Printer() {}
854
855bool TextFormat::Printer::PrintToString(const Message& message,
856                                        string* output) {
857  GOOGLE_DCHECK(output) << "output specified is NULL";
858
859  output->clear();
860  io::StringOutputStream output_stream(output);
861
862  bool result = Print(message, &output_stream);
863
864  return result;
865}
866
867bool TextFormat::Printer::PrintUnknownFieldsToString(
868    const UnknownFieldSet& unknown_fields,
869    string* output) {
870  GOOGLE_DCHECK(output) << "output specified is NULL";
871
872  output->clear();
873  io::StringOutputStream output_stream(output);
874  return PrintUnknownFields(unknown_fields, &output_stream);
875}
876
877bool TextFormat::Printer::Print(const Message& message,
878                                io::ZeroCopyOutputStream* output) {
879  TextGenerator generator(output, initial_indent_level_);
880
881  Print(message, generator);
882
883  // Output false if the generator failed internally.
884  return !generator.failed();
885}
886
887bool TextFormat::Printer::PrintUnknownFields(
888    const UnknownFieldSet& unknown_fields,
889    io::ZeroCopyOutputStream* output) {
890  TextGenerator generator(output, initial_indent_level_);
891
892  PrintUnknownFields(unknown_fields, generator);
893
894  // Output false if the generator failed internally.
895  return !generator.failed();
896}
897
898void TextFormat::Printer::Print(const Message& message,
899                                TextGenerator& generator) {
900  const Reflection* reflection = message.GetReflection();
901  vector<const FieldDescriptor*> fields;
902  reflection->ListFields(message, &fields);
903  for (int i = 0; i < fields.size(); i++) {
904    PrintField(message, reflection, fields[i], generator);
905  }
906  PrintUnknownFields(reflection->GetUnknownFields(message), generator);
907}
908
909void TextFormat::Printer::PrintFieldValueToString(
910    const Message& message,
911    const FieldDescriptor* field,
912    int index,
913    string* output) {
914
915  GOOGLE_DCHECK(output) << "output specified is NULL";
916
917  output->clear();
918  io::StringOutputStream output_stream(output);
919  TextGenerator generator(&output_stream, initial_indent_level_);
920
921  PrintFieldValue(message, message.GetReflection(), field, index, generator);
922}
923
924void TextFormat::Printer::PrintField(const Message& message,
925                                     const Reflection* reflection,
926                                     const FieldDescriptor* field,
927                                     TextGenerator& generator) {
928  if (use_short_repeated_primitives_ &&
929      field->is_repeated() &&
930      field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
931      field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
932    PrintShortRepeatedField(message, reflection, field, generator);
933    return;
934  }
935
936  int count = 0;
937
938  if (field->is_repeated()) {
939    count = reflection->FieldSize(message, field);
940  } else if (reflection->HasField(message, field)) {
941    count = 1;
942  }
943
944  for (int j = 0; j < count; ++j) {
945    PrintFieldName(message, reflection, field, generator);
946
947    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
948      if (single_line_mode_) {
949        generator.Print(" { ");
950      } else {
951        generator.Print(" {\n");
952        generator.Indent();
953      }
954    } else {
955      generator.Print(": ");
956    }
957
958    // Write the field value.
959    int field_index = j;
960    if (!field->is_repeated()) {
961      field_index = -1;
962    }
963
964    PrintFieldValue(message, reflection, field, field_index, generator);
965
966    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
967      if (single_line_mode_) {
968        generator.Print("} ");
969      } else {
970        generator.Outdent();
971        generator.Print("}\n");
972      }
973    } else {
974      if (single_line_mode_) {
975        generator.Print(" ");
976      } else {
977        generator.Print("\n");
978      }
979    }
980  }
981}
982
983void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
984                                                  const Reflection* reflection,
985                                                  const FieldDescriptor* field,
986                                                  TextGenerator& generator) {
987  // Print primitive repeated field in short form.
988  PrintFieldName(message, reflection, field, generator);
989
990  int size = reflection->FieldSize(message, field);
991  generator.Print(": [");
992  for (int i = 0; i < size; i++) {
993    if (i > 0) generator.Print(", ");
994    PrintFieldValue(message, reflection, field, i, generator);
995  }
996  if (single_line_mode_) {
997    generator.Print("] ");
998  } else {
999    generator.Print("]\n");
1000  }
1001}
1002
1003void TextFormat::Printer::PrintFieldName(const Message& message,
1004                                         const Reflection* reflection,
1005                                         const FieldDescriptor* field,
1006                                         TextGenerator& generator) {
1007  if (field->is_extension()) {
1008    generator.Print("[");
1009    // We special-case MessageSet elements for compatibility with proto1.
1010    if (field->containing_type()->options().message_set_wire_format()
1011        && field->type() == FieldDescriptor::TYPE_MESSAGE
1012        && field->is_optional()
1013        && field->extension_scope() == field->message_type()) {
1014      generator.Print(field->message_type()->full_name());
1015    } else {
1016      generator.Print(field->full_name());
1017    }
1018    generator.Print("]");
1019  } else {
1020    if (field->type() == FieldDescriptor::TYPE_GROUP) {
1021      // Groups must be serialized with their original capitalization.
1022      generator.Print(field->message_type()->name());
1023    } else {
1024      generator.Print(field->name());
1025    }
1026  }
1027}
1028
1029void TextFormat::Printer::PrintFieldValue(
1030    const Message& message,
1031    const Reflection* reflection,
1032    const FieldDescriptor* field,
1033    int index,
1034    TextGenerator& generator) {
1035  GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1036      << "Index must be -1 for non-repeated fields";
1037
1038  switch (field->cpp_type()) {
1039#define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING)                             \
1040      case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
1041        generator.Print(TO_STRING(field->is_repeated() ?                     \
1042          reflection->GetRepeated##METHOD(message, field, index) :           \
1043          reflection->Get##METHOD(message, field)));                         \
1044        break;                                                               \
1045
1046      OUTPUT_FIELD( INT32,  Int32, SimpleItoa);
1047      OUTPUT_FIELD( INT64,  Int64, SimpleItoa);
1048      OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
1049      OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
1050      OUTPUT_FIELD( FLOAT,  Float, SimpleFtoa);
1051      OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
1052#undef OUTPUT_FIELD
1053
1054      case FieldDescriptor::CPPTYPE_STRING: {
1055        string scratch;
1056        const string& value = field->is_repeated() ?
1057            reflection->GetRepeatedStringReference(
1058              message, field, index, &scratch) :
1059            reflection->GetStringReference(message, field, &scratch);
1060
1061        generator.Print("\"");
1062        if (utf8_string_escaping_) {
1063          generator.Print(strings::Utf8SafeCEscape(value));
1064        } else {
1065          generator.Print(CEscape(value));
1066        }
1067        generator.Print("\"");
1068
1069        break;
1070      }
1071
1072      case FieldDescriptor::CPPTYPE_BOOL:
1073        if (field->is_repeated()) {
1074          generator.Print(reflection->GetRepeatedBool(message, field, index)
1075                          ? "true" : "false");
1076        } else {
1077          generator.Print(reflection->GetBool(message, field)
1078                          ? "true" : "false");
1079        }
1080        break;
1081
1082      case FieldDescriptor::CPPTYPE_ENUM:
1083        generator.Print(field->is_repeated() ?
1084          reflection->GetRepeatedEnum(message, field, index)->name() :
1085          reflection->GetEnum(message, field)->name());
1086        break;
1087
1088      case FieldDescriptor::CPPTYPE_MESSAGE:
1089        Print(field->is_repeated() ?
1090                reflection->GetRepeatedMessage(message, field, index) :
1091                reflection->GetMessage(message, field),
1092              generator);
1093        break;
1094  }
1095}
1096
1097/* static */ bool TextFormat::Print(const Message& message,
1098                                    io::ZeroCopyOutputStream* output) {
1099  return Printer().Print(message, output);
1100}
1101
1102/* static */ bool TextFormat::PrintUnknownFields(
1103    const UnknownFieldSet& unknown_fields,
1104    io::ZeroCopyOutputStream* output) {
1105  return Printer().PrintUnknownFields(unknown_fields, output);
1106}
1107
1108/* static */ bool TextFormat::PrintToString(
1109    const Message& message, string* output) {
1110  return Printer().PrintToString(message, output);
1111}
1112
1113/* static */ bool TextFormat::PrintUnknownFieldsToString(
1114    const UnknownFieldSet& unknown_fields, string* output) {
1115  return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1116}
1117
1118/* static */ void TextFormat::PrintFieldValueToString(
1119    const Message& message,
1120    const FieldDescriptor* field,
1121    int index,
1122    string* output) {
1123  return Printer().PrintFieldValueToString(message, field, index, output);
1124}
1125
1126/* static */ bool TextFormat::ParseFieldValueFromString(
1127    const string& input,
1128    const FieldDescriptor* field,
1129    Message* message) {
1130  return Parser().ParseFieldValueFromString(input, field, message);
1131}
1132
1133// Prints an integer as hex with a fixed number of digits dependent on the
1134// integer type.
1135template<typename IntType>
1136static string PaddedHex(IntType value) {
1137  string result;
1138  result.reserve(sizeof(value) * 2);
1139  for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1140    result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1141  }
1142  return result;
1143}
1144
1145void TextFormat::Printer::PrintUnknownFields(
1146    const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
1147  for (int i = 0; i < unknown_fields.field_count(); i++) {
1148    const UnknownField& field = unknown_fields.field(i);
1149    string field_number = SimpleItoa(field.number());
1150
1151    switch (field.type()) {
1152      case UnknownField::TYPE_VARINT:
1153        generator.Print(field_number);
1154        generator.Print(": ");
1155        generator.Print(SimpleItoa(field.varint()));
1156        if (single_line_mode_) {
1157          generator.Print(" ");
1158        } else {
1159          generator.Print("\n");
1160        }
1161        break;
1162      case UnknownField::TYPE_FIXED32: {
1163        generator.Print(field_number);
1164        generator.Print(": 0x");
1165        char buffer[kFastToBufferSize];
1166        generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1167        if (single_line_mode_) {
1168          generator.Print(" ");
1169        } else {
1170          generator.Print("\n");
1171        }
1172        break;
1173      }
1174      case UnknownField::TYPE_FIXED64: {
1175        generator.Print(field_number);
1176        generator.Print(": 0x");
1177        char buffer[kFastToBufferSize];
1178        generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1179        if (single_line_mode_) {
1180          generator.Print(" ");
1181        } else {
1182          generator.Print("\n");
1183        }
1184        break;
1185      }
1186      case UnknownField::TYPE_LENGTH_DELIMITED: {
1187        generator.Print(field_number);
1188        const string& value = field.length_delimited();
1189        UnknownFieldSet embedded_unknown_fields;
1190        if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1191          // This field is parseable as a Message.
1192          // So it is probably an embedded message.
1193          if (single_line_mode_) {
1194            generator.Print(" { ");
1195          } else {
1196            generator.Print(" {\n");
1197            generator.Indent();
1198          }
1199          PrintUnknownFields(embedded_unknown_fields, generator);
1200          if (single_line_mode_) {
1201            generator.Print("} ");
1202          } else {
1203            generator.Outdent();
1204            generator.Print("}\n");
1205          }
1206        } else {
1207          // This field is not parseable as a Message.
1208          // So it is probably just a plain string.
1209          generator.Print(": \"");
1210          generator.Print(CEscape(value));
1211          generator.Print("\"");
1212          if (single_line_mode_) {
1213            generator.Print(" ");
1214          } else {
1215            generator.Print("\n");
1216          }
1217        }
1218        break;
1219      }
1220      case UnknownField::TYPE_GROUP:
1221        generator.Print(field_number);
1222        if (single_line_mode_) {
1223          generator.Print(" { ");
1224        } else {
1225          generator.Print(" {\n");
1226          generator.Indent();
1227        }
1228        PrintUnknownFields(field.group(), generator);
1229        if (single_line_mode_) {
1230          generator.Print("} ");
1231        } else {
1232          generator.Outdent();
1233          generator.Print("}\n");
1234        }
1235        break;
1236    }
1237  }
1238}
1239
1240}  // namespace protobuf
1241}  // namespace google
1242