1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Utilities for printing and parsing protocol messages in a human-readable,
36// text-based format.
37
38#ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39#define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41#include <map>
42#include <memory>
43#ifndef _SHARED_PTR_H
44#include <google/protobuf/stubs/shared_ptr.h>
45#endif
46#include <string>
47#include <vector>
48
49#include <google/protobuf/stubs/common.h>
50#include <google/protobuf/descriptor.h>
51#include <google/protobuf/message.h>
52
53namespace google {
54namespace protobuf {
55
56namespace io {
57  class ErrorCollector;      // tokenizer.h
58}
59
60// This class implements protocol buffer text format.  Printing and parsing
61// protocol messages in text format is useful for debugging and human editing
62// of messages.
63//
64// This class is really a namespace that contains only static methods.
65class LIBPROTOBUF_EXPORT TextFormat {
66 public:
67  // Outputs a textual representation of the given message to the given
68  // output stream.
69  static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
70
71  // Print the fields in an UnknownFieldSet.  They are printed by tag number
72  // only.  Embedded messages are heuristically identified by attempting to
73  // parse them.
74  static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
75                                 io::ZeroCopyOutputStream* output);
76
77  // Like Print(), but outputs directly to a string.
78  static bool PrintToString(const Message& message, string* output);
79
80  // Like PrintUnknownFields(), but outputs directly to a string.
81  static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
82                                         string* output);
83
84  // Outputs a textual representation of the value of the field supplied on
85  // the message supplied. For non-repeated fields, an index of -1 must
86  // be supplied. Note that this method will print the default value for a
87  // field if it is not set.
88  static void PrintFieldValueToString(const Message& message,
89                                      const FieldDescriptor* field,
90                                      int index,
91                                      string* output);
92
93  // The default printer that converts scalar values from fields into
94  // their string representation.
95  // You can derive from this FieldValuePrinter if you want to have
96  // fields to be printed in a different way and register it at the
97  // Printer.
98  class LIBPROTOBUF_EXPORT FieldValuePrinter {
99   public:
100    FieldValuePrinter();
101    virtual ~FieldValuePrinter();
102    virtual string PrintBool(bool val) const;
103    virtual string PrintInt32(int32 val) const;
104    virtual string PrintUInt32(uint32 val) const;
105    virtual string PrintInt64(int64 val) const;
106    virtual string PrintUInt64(uint64 val) const;
107    virtual string PrintFloat(float val) const;
108    virtual string PrintDouble(double val) const;
109    virtual string PrintString(const string& val) const;
110    virtual string PrintBytes(const string& val) const;
111    virtual string PrintEnum(int32 val, const string& name) const;
112    virtual string PrintFieldName(const Message& message,
113                                  const Reflection* reflection,
114                                  const FieldDescriptor* field) const;
115    virtual string PrintMessageStart(const Message& message,
116                                     int field_index,
117                                     int field_count,
118                                     bool single_line_mode) const;
119    virtual string PrintMessageEnd(const Message& message,
120                                   int field_index,
121                                   int field_count,
122                                   bool single_line_mode) const;
123
124   private:
125    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
126  };
127
128  // Class for those users which require more fine-grained control over how
129  // a protobuffer message is printed out.
130  class LIBPROTOBUF_EXPORT Printer {
131   public:
132    Printer();
133    ~Printer();
134
135    // Like TextFormat::Print
136    bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
137    // Like TextFormat::PrintUnknownFields
138    bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
139                            io::ZeroCopyOutputStream* output) const;
140    // Like TextFormat::PrintToString
141    bool PrintToString(const Message& message, string* output) const;
142    // Like TextFormat::PrintUnknownFieldsToString
143    bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
144                                    string* output) const;
145    // Like TextFormat::PrintFieldValueToString
146    void PrintFieldValueToString(const Message& message,
147                                 const FieldDescriptor* field,
148                                 int index,
149                                 string* output) const;
150
151    // Adjust the initial indent level of all output.  Each indent level is
152    // equal to two spaces.
153    void SetInitialIndentLevel(int indent_level) {
154      initial_indent_level_ = indent_level;
155    }
156
157    // If printing in single line mode, then the entire message will be output
158    // on a single line with no line breaks.
159    void SetSingleLineMode(bool single_line_mode) {
160      single_line_mode_ = single_line_mode;
161    }
162
163    bool IsInSingleLineMode() {
164      return single_line_mode_;
165    }
166
167    // If use_field_number is true, uses field number instead of field name.
168    void SetUseFieldNumber(bool use_field_number) {
169      use_field_number_ = use_field_number;
170    }
171
172    // Set true to print repeated primitives in a format like:
173    //   field_name: [1, 2, 3, 4]
174    // instead of printing each value on its own line.  Short format applies
175    // only to primitive values -- i.e. everything except strings and
176    // sub-messages/groups.
177    void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
178      use_short_repeated_primitives_ = use_short_repeated_primitives;
179    }
180
181    // Set true to output UTF-8 instead of ASCII.  The only difference
182    // is that bytes >= 0x80 in string fields will not be escaped,
183    // because they are assumed to be part of UTF-8 multi-byte
184    // sequences. This will change the default FieldValuePrinter.
185    void SetUseUtf8StringEscaping(bool as_utf8);
186
187    // Set the default FieldValuePrinter that is used for all fields that
188    // don't have a field-specific printer registered.
189    // Takes ownership of the printer.
190    void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
191
192    // Sets whether we want to hide unknown fields or not.
193    // Usually unknown fields are printed in a generic way that includes the
194    // tag number of the field instead of field name. However, sometimes it
195    // is useful to be able to print the message without unknown fields (e.g.
196    // for the python protobuf version to maintain consistency between its pure
197    // python and c++ implementations).
198    void SetHideUnknownFields(bool hide) {
199      hide_unknown_fields_ = hide;
200    }
201
202    // If print_message_fields_in_index_order is true, print fields of a proto
203    // message using the order defined in source code instead of the field
204    // number. By default, use the field number order.
205    void SetPrintMessageFieldsInIndexOrder(
206        bool print_message_fields_in_index_order) {
207      print_message_fields_in_index_order_ =
208          print_message_fields_in_index_order;
209    }
210
211    // If expand==true, expand google.protobuf.Any payloads. The output
212    // will be of form
213    //    [type_url] { <value_printed_in_text> }
214    //
215    // If expand==false, print Any using the default printer. The output will
216    // look like
217    //    type_url: "<type_url>"  value: "serialized_content"
218    void SetExpandAny(bool expand) {
219      expand_any_ = expand;
220    }
221
222    // If non-zero, we truncate all string fields that are  longer than this
223    // threshold.  This is useful when the proto message has very long strings,
224    // e.g., dump of encoded image file.
225    //
226    // NOTE(hfgong):  Setting a non-zero value breaks round-trip safe
227    // property of TextFormat::Printer.  That is, from the printed message, we
228    // cannot fully recover the original string field any more.
229    void SetTruncateStringFieldLongerThan(
230        const int64 truncate_string_field_longer_than) {
231      truncate_string_field_longer_than_ = truncate_string_field_longer_than;
232    }
233
234    // Register a custom field-specific FieldValuePrinter for fields
235    // with a particular FieldDescriptor.
236    // Returns "true" if the registration succeeded, or "false", if there is
237    // already a printer for that FieldDescriptor.
238    // Takes ownership of the printer on successful registration.
239    bool RegisterFieldValuePrinter(const FieldDescriptor* field,
240                                   const FieldValuePrinter* printer);
241
242   private:
243    // Forward declaration of an internal class used to print the text
244    // output to the OutputStream (see text_format.cc for implementation).
245    class TextGenerator;
246
247    // Internal Print method, used for writing to the OutputStream via
248    // the TextGenerator class.
249    void Print(const Message& message,
250               TextGenerator& generator) const;
251
252    // Print a single field.
253    void PrintField(const Message& message,
254                    const Reflection* reflection,
255                    const FieldDescriptor* field,
256                    TextGenerator& generator) const;
257
258    // Print a repeated primitive field in short form.
259    void PrintShortRepeatedField(const Message& message,
260                                 const Reflection* reflection,
261                                 const FieldDescriptor* field,
262                                 TextGenerator& generator) const;
263
264    // Print the name of a field -- i.e. everything that comes before the
265    // ':' for a single name/value pair.
266    void PrintFieldName(const Message& message,
267                        const Reflection* reflection,
268                        const FieldDescriptor* field,
269                        TextGenerator& generator) const;
270
271    // Outputs a textual representation of the value of the field supplied on
272    // the message supplied or the default value if not set.
273    void PrintFieldValue(const Message& message,
274                         const Reflection* reflection,
275                         const FieldDescriptor* field,
276                         int index,
277                         TextGenerator& generator) const;
278
279    // Print the fields in an UnknownFieldSet.  They are printed by tag number
280    // only.  Embedded messages are heuristically identified by attempting to
281    // parse them.
282    void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
283                            TextGenerator& generator) const;
284
285    bool PrintAny(const Message& message, TextGenerator& generator) const;
286
287    int initial_indent_level_;
288
289    bool single_line_mode_;
290
291    bool use_field_number_;
292
293    bool use_short_repeated_primitives_;
294
295    bool hide_unknown_fields_;
296
297    bool print_message_fields_in_index_order_;
298
299    bool expand_any_;
300
301    int64 truncate_string_field_longer_than_;
302
303    google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
304    typedef map<const FieldDescriptor*,
305                const FieldValuePrinter*> CustomPrinterMap;
306    CustomPrinterMap custom_printers_;
307  };
308
309  // Parses a text-format protocol message from the given input stream to
310  // the given message object. This function parses the human-readable format
311  // written by Print(). Returns true on success. The message is cleared first,
312  // even if the function fails -- See Merge() to avoid this behavior.
313  //
314  // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
315  //
316  // One use for this function is parsing handwritten strings in test code.
317  // Another use is to parse the output from google::protobuf::Message::DebugString()
318  // (or ShortDebugString()), because these functions output using
319  // google::protobuf::TextFormat::Print().
320  //
321  // If you would like to read a protocol buffer serialized in the
322  // (non-human-readable) binary wire format, see
323  // google::protobuf::MessageLite::ParseFromString().
324  static bool Parse(io::ZeroCopyInputStream* input, Message* output);
325  // Like Parse(), but reads directly from a string.
326  static bool ParseFromString(const string& input, Message* output);
327
328  // Like Parse(), but the data is merged into the given message, as if
329  // using Message::MergeFrom().
330  static bool Merge(io::ZeroCopyInputStream* input, Message* output);
331  // Like Merge(), but reads directly from a string.
332  static bool MergeFromString(const string& input, Message* output);
333
334  // Parse the given text as a single field value and store it into the
335  // given field of the given message. If the field is a repeated field,
336  // the new value will be added to the end
337  static bool ParseFieldValueFromString(const string& input,
338                                        const FieldDescriptor* field,
339                                        Message* message);
340
341  // Interface that TextFormat::Parser can use to find extensions.
342  // This class may be extended in the future to find more information
343  // like fields, etc.
344  class LIBPROTOBUF_EXPORT Finder {
345   public:
346    virtual ~Finder();
347
348    // Try to find an extension of *message by fully-qualified field
349    // name.  Returns NULL if no extension is known for this name or number.
350    virtual const FieldDescriptor* FindExtension(
351        Message* message,
352        const string& name) const = 0;
353  };
354
355  // A location in the parsed text.
356  struct ParseLocation {
357    int line;
358    int column;
359
360    ParseLocation() : line(-1), column(-1) {}
361    ParseLocation(int line_param, int column_param)
362        : line(line_param), column(column_param) {}
363  };
364
365  // Data structure which is populated with the locations of each field
366  // value parsed from the text.
367  class LIBPROTOBUF_EXPORT ParseInfoTree {
368   public:
369    ParseInfoTree();
370    ~ParseInfoTree();
371
372    // Returns the parse location for index-th value of the field in the parsed
373    // text. If none exists, returns a location with line = -1. Index should be
374    // -1 for not-repeated fields.
375    ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
376
377    // Returns the parse info tree for the given field, which must be a message
378    // type. The nested information tree is owned by the root tree and will be
379    // deleted when it is deleted.
380    ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
381                                    int index) const;
382
383   private:
384    // Allow the text format parser to record information into the tree.
385    friend class TextFormat;
386
387    // Records the starting location of a single value for a field.
388    void RecordLocation(const FieldDescriptor* field, ParseLocation location);
389
390    // Create and records a nested tree for a nested message field.
391    ParseInfoTree* CreateNested(const FieldDescriptor* field);
392
393    // Defines the map from the index-th field descriptor to its parse location.
394    typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
395
396    // Defines the map from the index-th field descriptor to the nested parse
397    // info tree.
398    typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
399
400    LocationMap locations_;
401    NestedMap nested_;
402
403    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
404  };
405
406  // For more control over parsing, use this class.
407  class LIBPROTOBUF_EXPORT Parser {
408   public:
409    Parser();
410    ~Parser();
411
412    // Like TextFormat::Parse().
413    bool Parse(io::ZeroCopyInputStream* input, Message* output);
414    // Like TextFormat::ParseFromString().
415    bool ParseFromString(const string& input, Message* output);
416    // Like TextFormat::Merge().
417    bool Merge(io::ZeroCopyInputStream* input, Message* output);
418    // Like TextFormat::MergeFromString().
419    bool MergeFromString(const string& input, Message* output);
420
421    // Set where to report parse errors.  If NULL (the default), errors will
422    // be printed to stderr.
423    void RecordErrorsTo(io::ErrorCollector* error_collector) {
424      error_collector_ = error_collector;
425    }
426
427    // Set how parser finds extensions.  If NULL (the default), the
428    // parser will use the standard Reflection object associated with
429    // the message being parsed.
430    void SetFinder(Finder* finder) {
431      finder_ = finder;
432    }
433
434    // Sets where location information about the parse will be written. If NULL
435    // (the default), then no location will be written.
436    void WriteLocationsTo(ParseInfoTree* tree) {
437      parse_info_tree_ = tree;
438    }
439
440    // Normally parsing fails if, after parsing, output->IsInitialized()
441    // returns false.  Call AllowPartialMessage(true) to skip this check.
442    void AllowPartialMessage(bool allow) {
443      allow_partial_ = allow;
444    }
445
446    // Allow field names to be matched case-insensitively.
447    // This is not advisable if there are fields that only differ in case, or
448    // if you want to enforce writing in the canonical form.
449    // This is 'false' by default.
450    void AllowCaseInsensitiveField(bool allow) {
451      allow_case_insensitive_field_ = allow;
452    }
453
454    // Like TextFormat::ParseFieldValueFromString
455    bool ParseFieldValueFromString(const string& input,
456                                   const FieldDescriptor* field,
457                                   Message* output);
458
459
460    void AllowFieldNumber(bool allow) {
461      allow_field_number_ = allow;
462    }
463
464   private:
465    // Forward declaration of an internal class used to parse text
466    // representations (see text_format.cc for implementation).
467    class ParserImpl;
468
469    // Like TextFormat::Merge().  The provided implementation is used
470    // to do the parsing.
471    bool MergeUsingImpl(io::ZeroCopyInputStream* input,
472                        Message* output,
473                        ParserImpl* parser_impl);
474
475    io::ErrorCollector* error_collector_;
476    Finder* finder_;
477    ParseInfoTree* parse_info_tree_;
478    bool allow_partial_;
479    bool allow_case_insensitive_field_;
480    bool allow_unknown_field_;
481    bool allow_unknown_enum_;
482    bool allow_field_number_;
483    bool allow_relaxed_whitespace_;
484    bool allow_singular_overwrites_;
485  };
486
487
488 private:
489  // Hack: ParseInfoTree declares TextFormat as a friend which should extend
490  // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
491  // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
492  // helpers for ParserImpl to call methods of ParseInfoTree.
493  static inline void RecordLocation(ParseInfoTree* info_tree,
494                                    const FieldDescriptor* field,
495                                    ParseLocation location);
496  static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
497                                            const FieldDescriptor* field);
498
499  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
500};
501
502inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
503                                       const FieldDescriptor* field,
504                                       ParseLocation location) {
505  info_tree->RecordLocation(field, location);
506}
507
508
509inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
510    ParseInfoTree* info_tree, const FieldDescriptor* field) {
511  return info_tree->CreateNested(field);
512}
513
514}  // namespace protobuf
515
516}  // namespace google
517#endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
518