1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Utilities for printing and parsing protocol messages in a human-readable,
36// text-based format.
37
38#ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39#define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41#include <map>
42#include <string>
43#include <vector>
44#include <google/protobuf/stubs/common.h>
45#include <google/protobuf/message.h>
46#include <google/protobuf/descriptor.h>
47
48namespace google {
49namespace protobuf {
50
51namespace io {
52  class ErrorCollector;      // tokenizer.h
53}
54
55// This class implements protocol buffer text format.  Printing and parsing
56// protocol messages in text format is useful for debugging and human editing
57// of messages.
58//
59// This class is really a namespace that contains only static methods.
60class LIBPROTOBUF_EXPORT TextFormat {
61 public:
62  // Outputs a textual representation of the given message to the given
63  // output stream.
64  static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
65
66  // Print the fields in an UnknownFieldSet.  They are printed by tag number
67  // only.  Embedded messages are heuristically identified by attempting to
68  // parse them.
69  static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
70                                 io::ZeroCopyOutputStream* output);
71
72  // Like Print(), but outputs directly to a string.
73  static bool PrintToString(const Message& message, string* output);
74
75  // Like PrintUnknownFields(), but outputs directly to a string.
76  static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
77                                         string* output);
78
79  // Outputs a textual representation of the value of the field supplied on
80  // the message supplied. For non-repeated fields, an index of -1 must
81  // be supplied. Note that this method will print the default value for a
82  // field if it is not set.
83  static void PrintFieldValueToString(const Message& message,
84                                      const FieldDescriptor* field,
85                                      int index,
86                                      string* output);
87
88  // Class for those users which require more fine-grained control over how
89  // a protobuffer message is printed out.
90  class LIBPROTOBUF_EXPORT Printer {
91   public:
92    Printer();
93    ~Printer();
94
95    // Like TextFormat::Print
96    bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
97    // Like TextFormat::PrintUnknownFields
98    bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
99                            io::ZeroCopyOutputStream* output) const;
100    // Like TextFormat::PrintToString
101    bool PrintToString(const Message& message, string* output) const;
102    // Like TextFormat::PrintUnknownFieldsToString
103    bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
104                                    string* output) const;
105    // Like TextFormat::PrintFieldValueToString
106    void PrintFieldValueToString(const Message& message,
107                                 const FieldDescriptor* field,
108                                 int index,
109                                 string* output) const;
110
111    // Adjust the initial indent level of all output.  Each indent level is
112    // equal to two spaces.
113    void SetInitialIndentLevel(int indent_level) {
114      initial_indent_level_ = indent_level;
115    }
116
117    // If printing in single line mode, then the entire message will be output
118    // on a single line with no line breaks.
119    void SetSingleLineMode(bool single_line_mode) {
120      single_line_mode_ = single_line_mode;
121    }
122
123    // Set true to print repeated primitives in a format like:
124    //   field_name: [1, 2, 3, 4]
125    // instead of printing each value on its own line.  Short format applies
126    // only to primitive values -- i.e. everything except strings and
127    // sub-messages/groups.
128    void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
129      use_short_repeated_primitives_ = use_short_repeated_primitives;
130    }
131
132    // Set true to output UTF-8 instead of ASCII.  The only difference
133    // is that bytes >= 0x80 in string fields will not be escaped,
134    // because they are assumed to be part of UTF-8 multi-byte
135    // sequences.
136    void SetUseUtf8StringEscaping(bool as_utf8) {
137      utf8_string_escaping_ = as_utf8;
138    }
139
140   private:
141    // Forward declaration of an internal class used to print the text
142    // output to the OutputStream (see text_format.cc for implementation).
143    class TextGenerator;
144
145    // Internal Print method, used for writing to the OutputStream via
146    // the TextGenerator class.
147    void Print(const Message& message,
148               TextGenerator& generator) const;
149
150    // Print a single field.
151    void PrintField(const Message& message,
152                    const Reflection* reflection,
153                    const FieldDescriptor* field,
154                    TextGenerator& generator) const;
155
156    // Print a repeated primitive field in short form.
157    void PrintShortRepeatedField(const Message& message,
158                                 const Reflection* reflection,
159                                 const FieldDescriptor* field,
160                                 TextGenerator& generator) const;
161
162    // Print the name of a field -- i.e. everything that comes before the
163    // ':' for a single name/value pair.
164    void PrintFieldName(const Message& message,
165                        const Reflection* reflection,
166                        const FieldDescriptor* field,
167                        TextGenerator& generator) const;
168
169    // Outputs a textual representation of the value of the field supplied on
170    // the message supplied or the default value if not set.
171    void PrintFieldValue(const Message& message,
172                         const Reflection* reflection,
173                         const FieldDescriptor* field,
174                         int index,
175                         TextGenerator& generator) const;
176
177    // Print the fields in an UnknownFieldSet.  They are printed by tag number
178    // only.  Embedded messages are heuristically identified by attempting to
179    // parse them.
180    void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
181                            TextGenerator& generator) const;
182
183    int initial_indent_level_;
184
185    bool single_line_mode_;
186
187    bool use_short_repeated_primitives_;
188
189    bool utf8_string_escaping_;
190  };
191
192  // Parses a text-format protocol message from the given input stream to
193  // the given message object.  This function parses the format written
194  // by Print().
195  static bool Parse(io::ZeroCopyInputStream* input, Message* output);
196  // Like Parse(), but reads directly from a string.
197  static bool ParseFromString(const string& input, Message* output);
198
199  // Like Parse(), but the data is merged into the given message, as if
200  // using Message::MergeFrom().
201  static bool Merge(io::ZeroCopyInputStream* input, Message* output);
202  // Like Merge(), but reads directly from a string.
203  static bool MergeFromString(const string& input, Message* output);
204
205  // Parse the given text as a single field value and store it into the
206  // given field of the given message. If the field is a repeated field,
207  // the new value will be added to the end
208  static bool ParseFieldValueFromString(const string& input,
209                                        const FieldDescriptor* field,
210                                        Message* message);
211
212  // Interface that TextFormat::Parser can use to find extensions.
213  // This class may be extended in the future to find more information
214  // like fields, etc.
215  class LIBPROTOBUF_EXPORT Finder {
216   public:
217    virtual ~Finder();
218
219    // Try to find an extension of *message by fully-qualified field
220    // name.  Returns NULL if no extension is known for this name or number.
221    virtual const FieldDescriptor* FindExtension(
222        Message* message,
223        const string& name) const = 0;
224  };
225
226  // A location in the parsed text.
227  struct ParseLocation {
228    int line;
229    int column;
230
231    ParseLocation() : line(-1), column(-1) {}
232    ParseLocation(int line_param, int column_param)
233        : line(line_param), column(column_param) {}
234  };
235
236  // Data structure which is populated with the locations of each field
237  // value parsed from the text.
238  class LIBPROTOBUF_EXPORT ParseInfoTree {
239   public:
240    ParseInfoTree();
241    ~ParseInfoTree();
242
243    // Returns the parse location for index-th value of the field in the parsed
244    // text. If none exists, returns a location with line = -1. Index should be
245    // -1 for not-repeated fields.
246    ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
247
248    // Returns the parse info tree for the given field, which must be a message
249    // type. The nested information tree is owned by the root tree and will be
250    // deleted when it is deleted.
251    ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
252                                    int index) const;
253
254   private:
255    // Allow the text format parser to record information into the tree.
256    friend class TextFormat;
257
258    // Records the starting location of a single value for a field.
259    void RecordLocation(const FieldDescriptor* field, ParseLocation location);
260
261    // Create and records a nested tree for a nested message field.
262    ParseInfoTree* CreateNested(const FieldDescriptor* field);
263
264    // Defines the map from the index-th field descriptor to its parse location.
265    typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
266
267    // Defines the map from the index-th field descriptor to the nested parse
268    // info tree.
269    typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
270
271    LocationMap locations_;
272    NestedMap nested_;
273
274    GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
275  };
276
277  // For more control over parsing, use this class.
278  class LIBPROTOBUF_EXPORT Parser {
279   public:
280    Parser();
281    ~Parser();
282
283    // Like TextFormat::Parse().
284    bool Parse(io::ZeroCopyInputStream* input, Message* output);
285    // Like TextFormat::ParseFromString().
286    bool ParseFromString(const string& input, Message* output);
287    // Like TextFormat::Merge().
288    bool Merge(io::ZeroCopyInputStream* input, Message* output);
289    // Like TextFormat::MergeFromString().
290    bool MergeFromString(const string& input, Message* output);
291
292    // Set where to report parse errors.  If NULL (the default), errors will
293    // be printed to stderr.
294    void RecordErrorsTo(io::ErrorCollector* error_collector) {
295      error_collector_ = error_collector;
296    }
297
298    // Set how parser finds extensions.  If NULL (the default), the
299    // parser will use the standard Reflection object associated with
300    // the message being parsed.
301    void SetFinder(Finder* finder) {
302      finder_ = finder;
303    }
304
305    // Sets where location information about the parse will be written. If NULL
306    // (the default), then no location will be written.
307    void WriteLocationsTo(ParseInfoTree* tree) {
308      parse_info_tree_ = tree;
309    }
310
311    // Normally parsing fails if, after parsing, output->IsInitialized()
312    // returns false.  Call AllowPartialMessage(true) to skip this check.
313    void AllowPartialMessage(bool allow) {
314      allow_partial_ = allow;
315    }
316
317    // Like TextFormat::ParseFieldValueFromString
318    bool ParseFieldValueFromString(const string& input,
319                                   const FieldDescriptor* field,
320                                   Message* output);
321
322
323   private:
324    // Forward declaration of an internal class used to parse text
325    // representations (see text_format.cc for implementation).
326    class ParserImpl;
327
328    // Like TextFormat::Merge().  The provided implementation is used
329    // to do the parsing.
330    bool MergeUsingImpl(io::ZeroCopyInputStream* input,
331                        Message* output,
332                        ParserImpl* parser_impl);
333
334    io::ErrorCollector* error_collector_;
335    Finder* finder_;
336    ParseInfoTree* parse_info_tree_;
337    bool allow_partial_;
338    bool allow_unknown_field_;
339  };
340
341 private:
342  // Hack: ParseInfoTree declares TextFormat as a friend which should extend
343  // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
344  // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
345  // helpers for ParserImpl to call methods of ParseInfoTree.
346  static inline void RecordLocation(ParseInfoTree* info_tree,
347                                    const FieldDescriptor* field,
348                                    ParseLocation location);
349  static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
350                                            const FieldDescriptor* field);
351
352  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
353};
354
355inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
356                                       const FieldDescriptor* field,
357                                       ParseLocation location) {
358  info_tree->RecordLocation(field, location);
359}
360
361inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
362    ParseInfoTree* info_tree, const FieldDescriptor* field) {
363  return info_tree->CreateNested(field);
364}
365
366}  // namespace protobuf
367
368}  // namespace google
369#endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
370