1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Utilities for printing and parsing protocol messages in a human-readable,
36// text-based format.
37
38#ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39#define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41#include <map>
42#include <memory>
43#include <string>
44#include <vector>
45
46#include <google/protobuf/stubs/common.h>
47#include <google/protobuf/descriptor.h>
48#include <google/protobuf/message.h>
49
50namespace google {
51namespace protobuf {
52
53namespace io {
54  class ErrorCollector;      // tokenizer.h
55}
56
57// This class implements protocol buffer text format.  Printing and parsing
58// protocol messages in text format is useful for debugging and human editing
59// of messages.
60//
61// This class is really a namespace that contains only static methods.
62class LIBPROTOBUF_EXPORT TextFormat {
63 public:
64  // Outputs a textual representation of the given message to the given
65  // output stream.
66  static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
67
68  // Print the fields in an UnknownFieldSet.  They are printed by tag number
69  // only.  Embedded messages are heuristically identified by attempting to
70  // parse them.
71  static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
72                                 io::ZeroCopyOutputStream* output);
73
74  // Like Print(), but outputs directly to a string.
75  static bool PrintToString(const Message& message, string* output);
76
77  // Like PrintUnknownFields(), but outputs directly to a string.
78  static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
79                                         string* output);
80
81  // Outputs a textual representation of the value of the field supplied on
82  // the message supplied. For non-repeated fields, an index of -1 must
83  // be supplied. Note that this method will print the default value for a
84  // field if it is not set.
85  static void PrintFieldValueToString(const Message& message,
86                                      const FieldDescriptor* field,
87                                      int index,
88                                      string* output);
89
90  // The default printer that converts scalar values from fields into
91  // their string representation.
92  // You can derive from this FieldValuePrinter if you want to have
93  // fields to be printed in a different way and register it at the
94  // Printer.
95  class LIBPROTOBUF_EXPORT FieldValuePrinter {
96   public:
97    FieldValuePrinter();
98    virtual ~FieldValuePrinter();
99    virtual string PrintBool(bool val) const;
100    virtual string PrintInt32(int32 val) const;
101    virtual string PrintUInt32(uint32 val) const;
102    virtual string PrintInt64(int64 val) const;
103    virtual string PrintUInt64(uint64 val) const;
104    virtual string PrintFloat(float val) const;
105    virtual string PrintDouble(double val) const;
106    virtual string PrintString(const string& val) const;
107    virtual string PrintBytes(const string& val) const;
108    virtual string PrintEnum(int32 val, const string& name) const;
109    virtual string PrintFieldName(const Message& message,
110                                  const Reflection* reflection,
111                                  const FieldDescriptor* field) const;
112    virtual string PrintMessageStart(const Message& message,
113                                     int field_index,
114                                     int field_count,
115                                     bool single_line_mode) const;
116    virtual string PrintMessageEnd(const Message& message,
117                                   int field_index,
118                                   int field_count,
119                                   bool single_line_mode) const;
120
121   private:
122    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
123  };
124
125  // Class for those users which require more fine-grained control over how
126  // a protobuffer message is printed out.
127  class LIBPROTOBUF_EXPORT Printer {
128   public:
129    Printer();
130    ~Printer();
131
132    // Like TextFormat::Print
133    bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
134    // Like TextFormat::PrintUnknownFields
135    bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
136                            io::ZeroCopyOutputStream* output) const;
137    // Like TextFormat::PrintToString
138    bool PrintToString(const Message& message, string* output) const;
139    // Like TextFormat::PrintUnknownFieldsToString
140    bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
141                                    string* output) const;
142    // Like TextFormat::PrintFieldValueToString
143    void PrintFieldValueToString(const Message& message,
144                                 const FieldDescriptor* field,
145                                 int index,
146                                 string* output) const;
147
148    // Adjust the initial indent level of all output.  Each indent level is
149    // equal to two spaces.
150    void SetInitialIndentLevel(int indent_level) {
151      initial_indent_level_ = indent_level;
152    }
153
154    // If printing in single line mode, then the entire message will be output
155    // on a single line with no line breaks.
156    void SetSingleLineMode(bool single_line_mode) {
157      single_line_mode_ = single_line_mode;
158    }
159
160    bool IsInSingleLineMode() {
161      return single_line_mode_;
162    }
163
164    // If use_field_number is true, uses field number instead of field name.
165    void SetUseFieldNumber(bool use_field_number) {
166      use_field_number_ = use_field_number;
167    }
168
169    // Set true to print repeated primitives in a format like:
170    //   field_name: [1, 2, 3, 4]
171    // instead of printing each value on its own line.  Short format applies
172    // only to primitive values -- i.e. everything except strings and
173    // sub-messages/groups.
174    void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
175      use_short_repeated_primitives_ = use_short_repeated_primitives;
176    }
177
178    // Set true to output UTF-8 instead of ASCII.  The only difference
179    // is that bytes >= 0x80 in string fields will not be escaped,
180    // because they are assumed to be part of UTF-8 multi-byte
181    // sequences. This will change the default FieldValuePrinter.
182    void SetUseUtf8StringEscaping(bool as_utf8);
183
184    // Set the default FieldValuePrinter that is used for all fields that
185    // don't have a field-specific printer registered.
186    // Takes ownership of the printer.
187    void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
188
189    // Sets whether we want to hide unknown fields or not.
190    // Usually unknown fields are printed in a generic way that includes the
191    // tag number of the field instead of field name. However, sometimes it
192    // is useful to be able to print the message without unknown fields (e.g.
193    // for the python protobuf version to maintain consistency between its pure
194    // python and c++ implementations).
195    void SetHideUnknownFields(bool hide) {
196      hide_unknown_fields_ = hide;
197    }
198
199    // If print_message_fields_in_index_order is true, print fields of a proto
200    // message using the order defined in source code instead of the field
201    // number. By default, use the field number order.
202    void SetPrintMessageFieldsInIndexOrder(
203        bool print_message_fields_in_index_order) {
204      print_message_fields_in_index_order_ =
205          print_message_fields_in_index_order;
206    }
207
208    // Register a custom field-specific FieldValuePrinter for fields
209    // with a particular FieldDescriptor.
210    // Returns "true" if the registration succeeded, or "false", if there is
211    // already a printer for that FieldDescriptor.
212    // Takes ownership of the printer on successful registration.
213    bool RegisterFieldValuePrinter(const FieldDescriptor* field,
214                                   const FieldValuePrinter* printer);
215
216   private:
217    // Forward declaration of an internal class used to print the text
218    // output to the OutputStream (see text_format.cc for implementation).
219    class TextGenerator;
220
221    // Internal Print method, used for writing to the OutputStream via
222    // the TextGenerator class.
223    void Print(const Message& message,
224               TextGenerator& generator) const;
225
226    // Print a single field.
227    void PrintField(const Message& message,
228                    const Reflection* reflection,
229                    const FieldDescriptor* field,
230                    TextGenerator& generator) const;
231
232    // Print a repeated primitive field in short form.
233    void PrintShortRepeatedField(const Message& message,
234                                 const Reflection* reflection,
235                                 const FieldDescriptor* field,
236                                 TextGenerator& generator) const;
237
238    // Print the name of a field -- i.e. everything that comes before the
239    // ':' for a single name/value pair.
240    void PrintFieldName(const Message& message,
241                        const Reflection* reflection,
242                        const FieldDescriptor* field,
243                        TextGenerator& generator) const;
244
245    // Outputs a textual representation of the value of the field supplied on
246    // the message supplied or the default value if not set.
247    void PrintFieldValue(const Message& message,
248                         const Reflection* reflection,
249                         const FieldDescriptor* field,
250                         int index,
251                         TextGenerator& generator) const;
252
253    // Print the fields in an UnknownFieldSet.  They are printed by tag number
254    // only.  Embedded messages are heuristically identified by attempting to
255    // parse them.
256    void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
257                            TextGenerator& generator) const;
258
259    int initial_indent_level_;
260
261    bool single_line_mode_;
262
263    bool use_field_number_;
264
265    bool use_short_repeated_primitives_;
266
267    bool hide_unknown_fields_;
268
269    bool print_message_fields_in_index_order_;
270
271    scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
272    typedef map<const FieldDescriptor*,
273                const FieldValuePrinter*> CustomPrinterMap;
274    CustomPrinterMap custom_printers_;
275  };
276
277  // Parses a text-format protocol message from the given input stream to
278  // the given message object.  This function parses the format written
279  // by Print().
280  static bool Parse(io::ZeroCopyInputStream* input, Message* output);
281  // Like Parse(), but reads directly from a string.
282  static bool ParseFromString(const string& input, Message* output);
283
284  // Like Parse(), but the data is merged into the given message, as if
285  // using Message::MergeFrom().
286  static bool Merge(io::ZeroCopyInputStream* input, Message* output);
287  // Like Merge(), but reads directly from a string.
288  static bool MergeFromString(const string& input, Message* output);
289
290  // Parse the given text as a single field value and store it into the
291  // given field of the given message. If the field is a repeated field,
292  // the new value will be added to the end
293  static bool ParseFieldValueFromString(const string& input,
294                                        const FieldDescriptor* field,
295                                        Message* message);
296
297  // Interface that TextFormat::Parser can use to find extensions.
298  // This class may be extended in the future to find more information
299  // like fields, etc.
300  class LIBPROTOBUF_EXPORT Finder {
301   public:
302    virtual ~Finder();
303
304    // Try to find an extension of *message by fully-qualified field
305    // name.  Returns NULL if no extension is known for this name or number.
306    virtual const FieldDescriptor* FindExtension(
307        Message* message,
308        const string& name) const = 0;
309  };
310
311  // A location in the parsed text.
312  struct ParseLocation {
313    int line;
314    int column;
315
316    ParseLocation() : line(-1), column(-1) {}
317    ParseLocation(int line_param, int column_param)
318        : line(line_param), column(column_param) {}
319  };
320
321  // Data structure which is populated with the locations of each field
322  // value parsed from the text.
323  class LIBPROTOBUF_EXPORT ParseInfoTree {
324   public:
325    ParseInfoTree();
326    ~ParseInfoTree();
327
328    // Returns the parse location for index-th value of the field in the parsed
329    // text. If none exists, returns a location with line = -1. Index should be
330    // -1 for not-repeated fields.
331    ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
332
333    // Returns the parse info tree for the given field, which must be a message
334    // type. The nested information tree is owned by the root tree and will be
335    // deleted when it is deleted.
336    ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
337                                    int index) const;
338
339   private:
340    // Allow the text format parser to record information into the tree.
341    friend class TextFormat;
342
343    // Records the starting location of a single value for a field.
344    void RecordLocation(const FieldDescriptor* field, ParseLocation location);
345
346    // Create and records a nested tree for a nested message field.
347    ParseInfoTree* CreateNested(const FieldDescriptor* field);
348
349    // Defines the map from the index-th field descriptor to its parse location.
350    typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
351
352    // Defines the map from the index-th field descriptor to the nested parse
353    // info tree.
354    typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
355
356    LocationMap locations_;
357    NestedMap nested_;
358
359    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
360  };
361
362  // For more control over parsing, use this class.
363  class LIBPROTOBUF_EXPORT Parser {
364   public:
365    Parser();
366    ~Parser();
367
368    // Like TextFormat::Parse().
369    bool Parse(io::ZeroCopyInputStream* input, Message* output);
370    // Like TextFormat::ParseFromString().
371    bool ParseFromString(const string& input, Message* output);
372    // Like TextFormat::Merge().
373    bool Merge(io::ZeroCopyInputStream* input, Message* output);
374    // Like TextFormat::MergeFromString().
375    bool MergeFromString(const string& input, Message* output);
376
377    // Set where to report parse errors.  If NULL (the default), errors will
378    // be printed to stderr.
379    void RecordErrorsTo(io::ErrorCollector* error_collector) {
380      error_collector_ = error_collector;
381    }
382
383    // Set how parser finds extensions.  If NULL (the default), the
384    // parser will use the standard Reflection object associated with
385    // the message being parsed.
386    void SetFinder(Finder* finder) {
387      finder_ = finder;
388    }
389
390    // Sets where location information about the parse will be written. If NULL
391    // (the default), then no location will be written.
392    void WriteLocationsTo(ParseInfoTree* tree) {
393      parse_info_tree_ = tree;
394    }
395
396    // Normally parsing fails if, after parsing, output->IsInitialized()
397    // returns false.  Call AllowPartialMessage(true) to skip this check.
398    void AllowPartialMessage(bool allow) {
399      allow_partial_ = allow;
400    }
401
402    // Allow field names to be matched case-insensitively.
403    // This is not advisable if there are fields that only differ in case, or
404    // if you want to enforce writing in the canonical form.
405    // This is 'false' by default.
406    void AllowCaseInsensitiveField(bool allow) {
407      allow_case_insensitive_field_ = allow;
408    }
409
410    // Like TextFormat::ParseFieldValueFromString
411    bool ParseFieldValueFromString(const string& input,
412                                   const FieldDescriptor* field,
413                                   Message* output);
414
415
416    void AllowFieldNumber(bool allow) {
417      allow_field_number_ = allow;
418    }
419
420   private:
421    // Forward declaration of an internal class used to parse text
422    // representations (see text_format.cc for implementation).
423    class ParserImpl;
424
425    // Like TextFormat::Merge().  The provided implementation is used
426    // to do the parsing.
427    bool MergeUsingImpl(io::ZeroCopyInputStream* input,
428                        Message* output,
429                        ParserImpl* parser_impl);
430
431    io::ErrorCollector* error_collector_;
432    Finder* finder_;
433    ParseInfoTree* parse_info_tree_;
434    bool allow_partial_;
435    bool allow_case_insensitive_field_;
436    bool allow_unknown_field_;
437    bool allow_unknown_enum_;
438    bool allow_field_number_;
439    bool allow_relaxed_whitespace_;
440    bool allow_singular_overwrites_;
441  };
442
443
444 private:
445  // Hack: ParseInfoTree declares TextFormat as a friend which should extend
446  // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
447  // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
448  // helpers for ParserImpl to call methods of ParseInfoTree.
449  static inline void RecordLocation(ParseInfoTree* info_tree,
450                                    const FieldDescriptor* field,
451                                    ParseLocation location);
452  static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
453                                            const FieldDescriptor* field);
454
455  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
456};
457
458inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
459                                       const FieldDescriptor* field,
460                                       ParseLocation location) {
461  info_tree->RecordLocation(field, location);
462}
463
464
465inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
466    ParseInfoTree* info_tree, const FieldDescriptor* field) {
467  return info_tree->CreateNested(field);
468}
469
470}  // namespace protobuf
471
472}  // namespace google
473#endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
474