parser.cc revision d0332953cda33fb4f8e24ebff9c49159b69c43d6
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Recursive descent FTW.
36
37#include <float.h>
38#include <google/protobuf/stubs/hash.h>
39#include <limits>
40
41
42#include <google/protobuf/compiler/parser.h>
43#include <google/protobuf/descriptor.h>
44#include <google/protobuf/descriptor.pb.h>
45#include <google/protobuf/wire_format.h>
46#include <google/protobuf/io/tokenizer.h>
47#include <google/protobuf/stubs/common.h>
48#include <google/protobuf/stubs/strutil.h>
49#include <google/protobuf/stubs/map-util.h>
50
51namespace google {
52namespace protobuf {
53namespace compiler {
54
55using internal::WireFormat;
56
57namespace {
58
59typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap;
60
61TypeNameMap MakeTypeNameTable() {
62  TypeNameMap result;
63
64  result["double"  ] = FieldDescriptorProto::TYPE_DOUBLE;
65  result["float"   ] = FieldDescriptorProto::TYPE_FLOAT;
66  result["uint64"  ] = FieldDescriptorProto::TYPE_UINT64;
67  result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64;
68  result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32;
69  result["bool"    ] = FieldDescriptorProto::TYPE_BOOL;
70  result["string"  ] = FieldDescriptorProto::TYPE_STRING;
71  result["group"   ] = FieldDescriptorProto::TYPE_GROUP;
72
73  result["bytes"   ] = FieldDescriptorProto::TYPE_BYTES;
74  result["uint32"  ] = FieldDescriptorProto::TYPE_UINT32;
75  result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
76  result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
77  result["int32"   ] = FieldDescriptorProto::TYPE_INT32;
78  result["int64"   ] = FieldDescriptorProto::TYPE_INT64;
79  result["sint32"  ] = FieldDescriptorProto::TYPE_SINT32;
80  result["sint64"  ] = FieldDescriptorProto::TYPE_SINT64;
81
82  return result;
83}
84
85const TypeNameMap kTypeNames = MakeTypeNameTable();
86
87}  // anonymous namespace
88
89// Makes code slightly more readable.  The meaning of "DO(foo)" is
90// "Execute foo and fail if it fails.", where failure is indicated by
91// returning false.
92#define DO(STATEMENT) if (STATEMENT) {} else return false
93
94// ===================================================================
95
96Parser::Parser()
97  : input_(NULL),
98    error_collector_(NULL),
99    source_location_table_(NULL),
100    had_errors_(false),
101    require_syntax_identifier_(false),
102    stop_after_syntax_identifier_(false) {
103}
104
105Parser::~Parser() {
106}
107
108// ===================================================================
109
110inline bool Parser::LookingAt(const char* text) {
111  return input_->current().text == text;
112}
113
114inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
115  return input_->current().type == token_type;
116}
117
118inline bool Parser::AtEnd() {
119  return LookingAtType(io::Tokenizer::TYPE_END);
120}
121
122bool Parser::TryConsume(const char* text) {
123  if (LookingAt(text)) {
124    input_->Next();
125    return true;
126  } else {
127    return false;
128  }
129}
130
131bool Parser::Consume(const char* text, const char* error) {
132  if (TryConsume(text)) {
133    return true;
134  } else {
135    AddError(error);
136    return false;
137  }
138}
139
140bool Parser::Consume(const char* text) {
141  if (TryConsume(text)) {
142    return true;
143  } else {
144    AddError("Expected \"" + string(text) + "\".");
145    return false;
146  }
147}
148
149bool Parser::ConsumeIdentifier(string* output, const char* error) {
150  if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
151    *output = input_->current().text;
152    input_->Next();
153    return true;
154  } else {
155    AddError(error);
156    return false;
157  }
158}
159
160bool Parser::ConsumeInteger(int* output, const char* error) {
161  if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
162    uint64 value = 0;
163    if (!io::Tokenizer::ParseInteger(input_->current().text,
164                                     kint32max, &value)) {
165      AddError("Integer out of range.");
166      // We still return true because we did, in fact, parse an integer.
167    }
168    *output = value;
169    input_->Next();
170    return true;
171  } else {
172    AddError(error);
173    return false;
174  }
175}
176
177bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
178                              const char* error) {
179  if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
180    if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
181                                     output)) {
182      AddError("Integer out of range.");
183      // We still return true because we did, in fact, parse an integer.
184      *output = 0;
185    }
186    input_->Next();
187    return true;
188  } else {
189    AddError(error);
190    return false;
191  }
192}
193
194bool Parser::ConsumeNumber(double* output, const char* error) {
195  if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
196    *output = io::Tokenizer::ParseFloat(input_->current().text);
197    input_->Next();
198    return true;
199  } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
200    // Also accept integers.
201    uint64 value = 0;
202    if (!io::Tokenizer::ParseInteger(input_->current().text,
203                                     kuint64max, &value)) {
204      AddError("Integer out of range.");
205      // We still return true because we did, in fact, parse a number.
206    }
207    *output = value;
208    input_->Next();
209    return true;
210  } else if (LookingAt("inf")) {
211    *output = numeric_limits<double>::infinity();
212    input_->Next();
213    return true;
214  } else if (LookingAt("nan")) {
215    *output = numeric_limits<double>::quiet_NaN();
216    input_->Next();
217    return true;
218  } else {
219    AddError(error);
220    return false;
221  }
222}
223
224bool Parser::ConsumeString(string* output, const char* error) {
225  if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
226    io::Tokenizer::ParseString(input_->current().text, output);
227    input_->Next();
228    // Allow C++ like concatenation of adjacent string tokens.
229    while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
230      io::Tokenizer::ParseStringAppend(input_->current().text, output);
231      input_->Next();
232    }
233    return true;
234  } else {
235    AddError(error);
236    return false;
237  }
238}
239
240// -------------------------------------------------------------------
241
242void Parser::AddError(int line, int column, const string& error) {
243  if (error_collector_ != NULL) {
244    error_collector_->AddError(line, column, error);
245  }
246  had_errors_ = true;
247}
248
249void Parser::AddError(const string& error) {
250  AddError(input_->current().line, input_->current().column, error);
251}
252
253void Parser::RecordLocation(
254    const Message* descriptor,
255    DescriptorPool::ErrorCollector::ErrorLocation location,
256    int line, int column) {
257  if (source_location_table_ != NULL) {
258    source_location_table_->Add(descriptor, location, line, column);
259  }
260}
261
262void Parser::RecordLocation(
263    const Message* descriptor,
264    DescriptorPool::ErrorCollector::ErrorLocation location) {
265  RecordLocation(descriptor, location,
266                 input_->current().line, input_->current().column);
267}
268
269// -------------------------------------------------------------------
270
271void Parser::SkipStatement() {
272  while (true) {
273    if (AtEnd()) {
274      return;
275    } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
276      if (TryConsume(";")) {
277        return;
278      } else if (TryConsume("{")) {
279        SkipRestOfBlock();
280        return;
281      } else if (LookingAt("}")) {
282        return;
283      }
284    }
285    input_->Next();
286  }
287}
288
289void Parser::SkipRestOfBlock() {
290  while (true) {
291    if (AtEnd()) {
292      return;
293    } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
294      if (TryConsume("}")) {
295        return;
296      } else if (TryConsume("{")) {
297        SkipRestOfBlock();
298      }
299    }
300    input_->Next();
301  }
302}
303
304// ===================================================================
305
306bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
307  input_ = input;
308  had_errors_ = false;
309  syntax_identifier_.clear();
310
311  if (LookingAtType(io::Tokenizer::TYPE_START)) {
312    // Advance to first token.
313    input_->Next();
314  }
315
316  if (require_syntax_identifier_ || LookingAt("syntax")) {
317    if (!ParseSyntaxIdentifier()) {
318      // Don't attempt to parse the file if we didn't recognize the syntax
319      // identifier.
320      return false;
321    }
322  } else if (!stop_after_syntax_identifier_) {
323    syntax_identifier_ = "proto2";
324  }
325
326  if (stop_after_syntax_identifier_) return !had_errors_;
327
328  // Repeatedly parse statements until we reach the end of the file.
329  while (!AtEnd()) {
330    if (!ParseTopLevelStatement(file)) {
331      // This statement failed to parse.  Skip it, but keep looping to parse
332      // other statements.
333      SkipStatement();
334
335      if (LookingAt("}")) {
336        AddError("Unmatched \"}\".");
337        input_->Next();
338      }
339    }
340  }
341
342  input_ = NULL;
343  return !had_errors_;
344}
345
346bool Parser::ParseSyntaxIdentifier() {
347  DO(Consume("syntax", "File must begin with 'syntax = \"proto2\";'."));
348  DO(Consume("="));
349  io::Tokenizer::Token syntax_token = input_->current();
350  string syntax;
351  DO(ConsumeString(&syntax, "Expected syntax identifier."));
352  DO(Consume(";"));
353
354  syntax_identifier_ = syntax;
355
356  if (syntax != "proto2" && !stop_after_syntax_identifier_) {
357    AddError(syntax_token.line, syntax_token.column,
358      "Unrecognized syntax identifier \"" + syntax + "\".  This parser "
359      "only recognizes \"proto2\".");
360    return false;
361  }
362
363  return true;
364}
365
366bool Parser::ParseTopLevelStatement(FileDescriptorProto* file) {
367  if (TryConsume(";")) {
368    // empty statement; ignore
369    return true;
370  } else if (LookingAt("message")) {
371    return ParseMessageDefinition(file->add_message_type());
372  } else if (LookingAt("enum")) {
373    return ParseEnumDefinition(file->add_enum_type());
374  } else if (LookingAt("service")) {
375    return ParseServiceDefinition(file->add_service());
376  } else if (LookingAt("extend")) {
377    return ParseExtend(file->mutable_extension(),
378                       file->mutable_message_type());
379  } else if (LookingAt("import")) {
380    return ParseImport(file->add_dependency());
381  } else if (LookingAt("package")) {
382    return ParsePackage(file);
383  } else if (LookingAt("option")) {
384    return ParseOption(file->mutable_options());
385  } else {
386    AddError("Expected top-level statement (e.g. \"message\").");
387    return false;
388  }
389}
390
391// -------------------------------------------------------------------
392// Messages
393
394bool Parser::ParseMessageDefinition(DescriptorProto* message) {
395  DO(Consume("message"));
396  RecordLocation(message, DescriptorPool::ErrorCollector::NAME);
397  DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
398  DO(ParseMessageBlock(message));
399  return true;
400}
401
402bool Parser::ParseMessageBlock(DescriptorProto* message) {
403  DO(Consume("{"));
404
405  while (!TryConsume("}")) {
406    if (AtEnd()) {
407      AddError("Reached end of input in message definition (missing '}').");
408      return false;
409    }
410
411    if (!ParseMessageStatement(message)) {
412      // This statement failed to parse.  Skip it, but keep looping to parse
413      // other statements.
414      SkipStatement();
415    }
416  }
417
418  return true;
419}
420
421bool Parser::ParseMessageStatement(DescriptorProto* message) {
422  if (TryConsume(";")) {
423    // empty statement; ignore
424    return true;
425  } else if (LookingAt("message")) {
426    return ParseMessageDefinition(message->add_nested_type());
427  } else if (LookingAt("enum")) {
428    return ParseEnumDefinition(message->add_enum_type());
429  } else if (LookingAt("extensions")) {
430    return ParseExtensions(message);
431  } else if (LookingAt("extend")) {
432    return ParseExtend(message->mutable_extension(),
433                       message->mutable_nested_type());
434  } else if (LookingAt("option")) {
435    return ParseOption(message->mutable_options());
436  } else {
437    return ParseMessageField(message->add_field(),
438                             message->mutable_nested_type());
439  }
440}
441
442bool Parser::ParseMessageField(FieldDescriptorProto* field,
443                               RepeatedPtrField<DescriptorProto>* messages) {
444  // Parse label and type.
445  FieldDescriptorProto::Label label;
446  DO(ParseLabel(&label));
447  field->set_label(label);
448
449  RecordLocation(field, DescriptorPool::ErrorCollector::TYPE);
450  FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
451  string type_name;
452  DO(ParseType(&type, &type_name));
453  if (type_name.empty()) {
454    field->set_type(type);
455  } else {
456    field->set_type_name(type_name);
457  }
458
459  // Parse name and '='.
460  RecordLocation(field, DescriptorPool::ErrorCollector::NAME);
461  io::Tokenizer::Token name_token = input_->current();
462  DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
463  DO(Consume("=", "Missing field number."));
464
465  // Parse field number.
466  RecordLocation(field, DescriptorPool::ErrorCollector::NUMBER);
467  int number;
468  DO(ConsumeInteger(&number, "Expected field number."));
469  field->set_number(number);
470
471  // Parse options.
472  DO(ParseFieldOptions(field));
473
474  // Deal with groups.
475  if (type_name.empty() && type == FieldDescriptorProto::TYPE_GROUP) {
476    DescriptorProto* group = messages->Add();
477    group->set_name(field->name());
478    // Record name location to match the field name's location.
479    RecordLocation(group, DescriptorPool::ErrorCollector::NAME,
480                   name_token.line, name_token.column);
481
482    // As a hack for backwards-compatibility, we force the group name to start
483    // with a capital letter and lower-case the field name.  New code should
484    // not use groups; it should use nested messages.
485    if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
486      AddError(name_token.line, name_token.column,
487        "Group names must start with a capital letter.");
488    }
489    LowerString(field->mutable_name());
490
491    field->set_type_name(group->name());
492    if (LookingAt("{")) {
493      DO(ParseMessageBlock(group));
494    } else {
495      AddError("Missing group body.");
496      return false;
497    }
498  } else {
499    DO(Consume(";"));
500  }
501
502  return true;
503}
504
505bool Parser::ParseFieldOptions(FieldDescriptorProto* field) {
506  if (!TryConsume("[")) return true;
507
508  // Parse field options.
509  do {
510    if (LookingAt("default")) {
511      DO(ParseDefaultAssignment(field));
512    } else {
513      DO(ParseOptionAssignment(field->mutable_options()));
514    }
515  } while (TryConsume(","));
516
517  DO(Consume("]"));
518  return true;
519}
520
521bool Parser::ParseDefaultAssignment(FieldDescriptorProto* field) {
522  if (field->has_default_value()) {
523    AddError("Already set option \"default\".");
524    field->clear_default_value();
525  }
526
527  DO(Consume("default"));
528  DO(Consume("="));
529
530  RecordLocation(field, DescriptorPool::ErrorCollector::DEFAULT_VALUE);
531  string* default_value = field->mutable_default_value();
532
533  if (!field->has_type()) {
534    // The field has a type name, but we don't know if it is a message or an
535    // enum yet.  Assume an enum for now.
536    DO(ConsumeIdentifier(default_value, "Expected identifier."));
537    return true;
538  }
539
540  switch (field->type()) {
541    case FieldDescriptorProto::TYPE_INT32:
542    case FieldDescriptorProto::TYPE_INT64:
543    case FieldDescriptorProto::TYPE_SINT32:
544    case FieldDescriptorProto::TYPE_SINT64:
545    case FieldDescriptorProto::TYPE_SFIXED32:
546    case FieldDescriptorProto::TYPE_SFIXED64: {
547      uint64 max_value = kint64max;
548      if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
549          field->type() == FieldDescriptorProto::TYPE_SINT32 ||
550          field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
551        max_value = kint32max;
552      }
553
554      // These types can be negative.
555      if (TryConsume("-")) {
556        default_value->append("-");
557        // Two's complement always has one more negative value than positive.
558        ++max_value;
559      }
560      // Parse the integer to verify that it is not out-of-range.
561      uint64 value;
562      DO(ConsumeInteger64(max_value, &value, "Expected integer."));
563      // And stringify it again.
564      default_value->append(SimpleItoa(value));
565      break;
566    }
567
568    case FieldDescriptorProto::TYPE_UINT32:
569    case FieldDescriptorProto::TYPE_UINT64:
570    case FieldDescriptorProto::TYPE_FIXED32:
571    case FieldDescriptorProto::TYPE_FIXED64: {
572      uint64 max_value = kuint64max;
573      if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
574          field->type() == FieldDescriptorProto::TYPE_FIXED32) {
575        max_value = kuint32max;
576      }
577
578      // Numeric, not negative.
579      if (TryConsume("-")) {
580        AddError("Unsigned field can't have negative default value.");
581      }
582      // Parse the integer to verify that it is not out-of-range.
583      uint64 value;
584      DO(ConsumeInteger64(max_value, &value, "Expected integer."));
585      // And stringify it again.
586      default_value->append(SimpleItoa(value));
587      break;
588    }
589
590    case FieldDescriptorProto::TYPE_FLOAT:
591    case FieldDescriptorProto::TYPE_DOUBLE:
592      // These types can be negative.
593      if (TryConsume("-")) {
594        default_value->append("-");
595      }
596      // Parse the integer because we have to convert hex integers to decimal
597      // floats.
598      double value;
599      DO(ConsumeNumber(&value, "Expected number."));
600      // And stringify it again.
601      default_value->append(SimpleDtoa(value));
602      break;
603
604    case FieldDescriptorProto::TYPE_BOOL:
605      if (TryConsume("true")) {
606        default_value->assign("true");
607      } else if (TryConsume("false")) {
608        default_value->assign("false");
609      } else {
610        AddError("Expected \"true\" or \"false\".");
611        return false;
612      }
613      break;
614
615    case FieldDescriptorProto::TYPE_STRING:
616      DO(ConsumeString(default_value, "Expected string."));
617      break;
618
619    case FieldDescriptorProto::TYPE_BYTES:
620      DO(ConsumeString(default_value, "Expected string."));
621      *default_value = CEscape(*default_value);
622      break;
623
624    case FieldDescriptorProto::TYPE_ENUM:
625      DO(ConsumeIdentifier(default_value, "Expected identifier."));
626      break;
627
628    case FieldDescriptorProto::TYPE_MESSAGE:
629    case FieldDescriptorProto::TYPE_GROUP:
630      AddError("Messages can't have default values.");
631      return false;
632  }
633
634  return true;
635}
636
637bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option) {
638  UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
639  string identifier;  // We parse identifiers into this string.
640  if (LookingAt("(")) {  // This is an extension.
641    DO(Consume("("));
642    // An extension name consists of dot-separated identifiers, and may begin
643    // with a dot.
644    if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
645      DO(ConsumeIdentifier(&identifier, "Expected identifier."));
646      name->mutable_name_part()->append(identifier);
647    }
648    while (LookingAt(".")) {
649      DO(Consume("."));
650      name->mutable_name_part()->append(".");
651      DO(ConsumeIdentifier(&identifier, "Expected identifier."));
652      name->mutable_name_part()->append(identifier);
653    }
654    DO(Consume(")"));
655    name->set_is_extension(true);
656  } else {  // This is a regular field.
657    DO(ConsumeIdentifier(&identifier, "Expected identifier."));
658    name->mutable_name_part()->append(identifier);
659    name->set_is_extension(false);
660  }
661  return true;
662}
663
664// We don't interpret the option here. Instead we store it in an
665// UninterpretedOption, to be interpreted later.
666bool Parser::ParseOptionAssignment(Message* options) {
667  // Create an entry in the uninterpreted_option field.
668  const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()->
669      FindFieldByName("uninterpreted_option");
670  GOOGLE_CHECK(uninterpreted_option_field != NULL)
671      << "No field named \"uninterpreted_option\" in the Options proto.";
672
673  UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>(
674      options->GetReflection()->AddMessage(options,
675                                           uninterpreted_option_field));
676
677  // Parse dot-separated name.
678  RecordLocation(uninterpreted_option,
679                 DescriptorPool::ErrorCollector::OPTION_NAME);
680
681  DO(ParseOptionNamePart(uninterpreted_option));
682
683  while (LookingAt(".")) {
684    DO(Consume("."));
685    DO(ParseOptionNamePart(uninterpreted_option));
686  }
687
688  DO(Consume("="));
689
690  RecordLocation(uninterpreted_option,
691                 DescriptorPool::ErrorCollector::OPTION_VALUE);
692
693  // All values are a single token, except for negative numbers, which consist
694  // of a single '-' symbol, followed by a positive number.
695  bool is_negative = TryConsume("-");
696
697  switch (input_->current().type) {
698    case io::Tokenizer::TYPE_START:
699      GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
700      return false;
701
702    case io::Tokenizer::TYPE_END:
703      AddError("Unexpected end of stream while parsing option value.");
704      return false;
705
706    case io::Tokenizer::TYPE_IDENTIFIER: {
707      if (is_negative) {
708        AddError("Invalid '-' symbol before identifier.");
709        return false;
710      }
711      string value;
712      DO(ConsumeIdentifier(&value, "Expected identifier."));
713      uninterpreted_option->set_identifier_value(value);
714      break;
715    }
716
717    case io::Tokenizer::TYPE_INTEGER: {
718      uint64 value;
719      uint64 max_value =
720          is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
721      DO(ConsumeInteger64(max_value, &value, "Expected integer."));
722      if (is_negative) {
723        uninterpreted_option->set_negative_int_value(-value);
724      } else {
725        uninterpreted_option->set_positive_int_value(value);
726      }
727      break;
728    }
729
730    case io::Tokenizer::TYPE_FLOAT: {
731      double value;
732      DO(ConsumeNumber(&value, "Expected number."));
733      uninterpreted_option->set_double_value(is_negative ? -value : value);
734      break;
735    }
736
737    case io::Tokenizer::TYPE_STRING: {
738      if (is_negative) {
739        AddError("Invalid '-' symbol before string.");
740        return false;
741      }
742      string value;
743      DO(ConsumeString(&value, "Expected string."));
744      uninterpreted_option->set_string_value(value);
745      break;
746    }
747
748    case io::Tokenizer::TYPE_SYMBOL:
749      AddError("Expected option value.");
750      return false;
751  }
752
753  return true;
754}
755
756bool Parser::ParseExtensions(DescriptorProto* message) {
757  // Parse the declaration.
758  DO(Consume("extensions"));
759
760  do {
761    DescriptorProto::ExtensionRange* range = message->add_extension_range();
762    RecordLocation(range, DescriptorPool::ErrorCollector::NUMBER);
763
764    int start, end;
765    DO(ConsumeInteger(&start, "Expected field number range."));
766
767    if (TryConsume("to")) {
768      if (TryConsume("max")) {
769        end = FieldDescriptor::kMaxNumber;
770      } else {
771        DO(ConsumeInteger(&end, "Expected integer."));
772      }
773    } else {
774      end = start;
775    }
776
777    // Users like to specify inclusive ranges, but in code we like the end
778    // number to be exclusive.
779    ++end;
780
781    range->set_start(start);
782    range->set_end(end);
783  } while (TryConsume(","));
784
785  DO(Consume(";"));
786  return true;
787}
788
789bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
790                         RepeatedPtrField<DescriptorProto>* messages) {
791  DO(Consume("extend"));
792
793  // We expect to see at least one extension field defined in the extend block.
794  // We need to create it now so we can record the extendee's location.
795  FieldDescriptorProto* first_field = extensions->Add();
796
797  // Parse the extendee type.
798  RecordLocation(first_field, DescriptorPool::ErrorCollector::EXTENDEE);
799  DO(ParseUserDefinedType(first_field->mutable_extendee()));
800
801  // Parse the block.
802  DO(Consume("{"));
803
804  bool is_first = true;
805
806  do {
807    if (AtEnd()) {
808      AddError("Reached end of input in extend definition (missing '}').");
809      return false;
810    }
811
812    FieldDescriptorProto* field;
813    if (is_first) {
814      field = first_field;
815      is_first = false;
816    } else {
817      field = extensions->Add();
818      field->set_extendee(first_field->extendee());
819    }
820
821    if (!ParseMessageField(field, messages)) {
822      // This statement failed to parse.  Skip it, but keep looping to parse
823      // other statements.
824      SkipStatement();
825    }
826  } while(!TryConsume("}"));
827
828  return true;
829}
830
831// -------------------------------------------------------------------
832// Enums
833
834bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type) {
835  DO(Consume("enum"));
836  RecordLocation(enum_type, DescriptorPool::ErrorCollector::NAME);
837  DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
838  DO(ParseEnumBlock(enum_type));
839  return true;
840}
841
842bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type) {
843  DO(Consume("{"));
844
845  while (!TryConsume("}")) {
846    if (AtEnd()) {
847      AddError("Reached end of input in enum definition (missing '}').");
848      return false;
849    }
850
851    if (!ParseEnumStatement(enum_type)) {
852      // This statement failed to parse.  Skip it, but keep looping to parse
853      // other statements.
854      SkipStatement();
855    }
856  }
857
858  return true;
859}
860
861bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type) {
862  if (TryConsume(";")) {
863    // empty statement; ignore
864    return true;
865  } else if (LookingAt("option")) {
866    return ParseOption(enum_type->mutable_options());
867  } else {
868    return ParseEnumConstant(enum_type->add_value());
869  }
870}
871
872bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value) {
873  RecordLocation(enum_value, DescriptorPool::ErrorCollector::NAME);
874  DO(ConsumeIdentifier(enum_value->mutable_name(),
875                       "Expected enum constant name."));
876  DO(Consume("=", "Missing numeric value for enum constant."));
877
878  bool is_negative = TryConsume("-");
879  int number;
880  DO(ConsumeInteger(&number, "Expected integer."));
881  if (is_negative) number *= -1;
882  enum_value->set_number(number);
883
884  DO(ParseEnumConstantOptions(enum_value));
885
886  DO(Consume(";"));
887
888  return true;
889}
890
891bool Parser::ParseEnumConstantOptions(EnumValueDescriptorProto* value) {
892  if (!TryConsume("[")) return true;
893
894  do {
895    DO(ParseOptionAssignment(value->mutable_options()));
896  } while (TryConsume(","));
897
898  DO(Consume("]"));
899  return true;
900}
901
902// -------------------------------------------------------------------
903// Services
904
905bool Parser::ParseServiceDefinition(ServiceDescriptorProto* service) {
906  DO(Consume("service"));
907  RecordLocation(service, DescriptorPool::ErrorCollector::NAME);
908  DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
909  DO(ParseServiceBlock(service));
910  return true;
911}
912
913bool Parser::ParseServiceBlock(ServiceDescriptorProto* service) {
914  DO(Consume("{"));
915
916  while (!TryConsume("}")) {
917    if (AtEnd()) {
918      AddError("Reached end of input in service definition (missing '}').");
919      return false;
920    }
921
922    if (!ParseServiceStatement(service)) {
923      // This statement failed to parse.  Skip it, but keep looping to parse
924      // other statements.
925      SkipStatement();
926    }
927  }
928
929  return true;
930}
931
932bool Parser::ParseServiceStatement(ServiceDescriptorProto* service) {
933  if (TryConsume(";")) {
934    // empty statement; ignore
935    return true;
936  } else if (LookingAt("option")) {
937    return ParseOption(service->mutable_options());
938  } else {
939    return ParseServiceMethod(service->add_method());
940  }
941}
942
943bool Parser::ParseServiceMethod(MethodDescriptorProto* method) {
944  DO(Consume("rpc"));
945  RecordLocation(method, DescriptorPool::ErrorCollector::NAME);
946  DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
947
948  // Parse input type.
949  DO(Consume("("));
950  RecordLocation(method, DescriptorPool::ErrorCollector::INPUT_TYPE);
951  DO(ParseUserDefinedType(method->mutable_input_type()));
952  DO(Consume(")"));
953
954  // Parse output type.
955  DO(Consume("returns"));
956  DO(Consume("("));
957  RecordLocation(method, DescriptorPool::ErrorCollector::OUTPUT_TYPE);
958  DO(ParseUserDefinedType(method->mutable_output_type()));
959  DO(Consume(")"));
960
961  if (TryConsume("{")) {
962    // Options!
963    while (!TryConsume("}")) {
964      if (AtEnd()) {
965        AddError("Reached end of input in method options (missing '}').");
966        return false;
967      }
968
969      if (TryConsume(";")) {
970        // empty statement; ignore
971      } else {
972        if (!ParseOption(method->mutable_options())) {
973          // This statement failed to parse.  Skip it, but keep looping to
974          // parse other statements.
975          SkipStatement();
976        }
977      }
978    }
979  } else {
980    DO(Consume(";"));
981  }
982
983  return true;
984}
985
986// -------------------------------------------------------------------
987
988bool Parser::ParseLabel(FieldDescriptorProto::Label* label) {
989  if (TryConsume("optional")) {
990    *label = FieldDescriptorProto::LABEL_OPTIONAL;
991    return true;
992  } else if (TryConsume("repeated")) {
993    *label = FieldDescriptorProto::LABEL_REPEATED;
994    return true;
995  } else if (TryConsume("required")) {
996    *label = FieldDescriptorProto::LABEL_REQUIRED;
997    return true;
998  } else {
999    AddError("Expected \"required\", \"optional\", or \"repeated\".");
1000    // We can actually reasonably recover here by just assuming the user
1001    // forgot the label altogether.
1002    *label = FieldDescriptorProto::LABEL_OPTIONAL;
1003    return true;
1004  }
1005}
1006
1007bool Parser::ParseType(FieldDescriptorProto::Type* type,
1008                       string* type_name) {
1009  TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1010  if (iter != kTypeNames.end()) {
1011    *type = iter->second;
1012    input_->Next();
1013  } else {
1014    DO(ParseUserDefinedType(type_name));
1015  }
1016  return true;
1017}
1018
1019bool Parser::ParseUserDefinedType(string* type_name) {
1020  type_name->clear();
1021
1022  TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1023  if (iter != kTypeNames.end()) {
1024    // Note:  The only place enum types are allowed is for field types, but
1025    //   if we are parsing a field type then we would not get here because
1026    //   primitives are allowed there as well.  So this error message doesn't
1027    //   need to account for enums.
1028    AddError("Expected message type.");
1029
1030    // Pretend to accept this type so that we can go on parsing.
1031    *type_name = input_->current().text;
1032    input_->Next();
1033    return true;
1034  }
1035
1036  // A leading "." means the name is fully-qualified.
1037  if (TryConsume(".")) type_name->append(".");
1038
1039  // Consume the first part of the name.
1040  string identifier;
1041  DO(ConsumeIdentifier(&identifier, "Expected type name."));
1042  type_name->append(identifier);
1043
1044  // Consume more parts.
1045  while (TryConsume(".")) {
1046    type_name->append(".");
1047    DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1048    type_name->append(identifier);
1049  }
1050
1051  return true;
1052}
1053
1054// ===================================================================
1055
1056bool Parser::ParsePackage(FileDescriptorProto* file) {
1057  if (file->has_package()) {
1058    AddError("Multiple package definitions.");
1059    // Don't append the new package to the old one.  Just replace it.  Not
1060    // that it really matters since this is an error anyway.
1061    file->clear_package();
1062  }
1063
1064  DO(Consume("package"));
1065
1066  RecordLocation(file, DescriptorPool::ErrorCollector::NAME);
1067
1068  while (true) {
1069    string identifier;
1070    DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1071    file->mutable_package()->append(identifier);
1072    if (!TryConsume(".")) break;
1073    file->mutable_package()->append(".");
1074  }
1075
1076  DO(Consume(";"));
1077  return true;
1078}
1079
1080bool Parser::ParseImport(string* import_filename) {
1081  DO(Consume("import"));
1082  DO(ConsumeString(import_filename,
1083    "Expected a string naming the file to import."));
1084  DO(Consume(";"));
1085  return true;
1086}
1087
1088bool Parser::ParseOption(Message* options) {
1089  DO(Consume("option"));
1090  DO(ParseOptionAssignment(options));
1091  DO(Consume(";"));
1092  return true;
1093}
1094
1095// ===================================================================
1096
1097SourceLocationTable::SourceLocationTable() {}
1098SourceLocationTable::~SourceLocationTable() {}
1099
1100bool SourceLocationTable::Find(
1101    const Message* descriptor,
1102    DescriptorPool::ErrorCollector::ErrorLocation location,
1103    int* line, int* column) const {
1104  const pair<int, int>* result =
1105    FindOrNull(location_map_, make_pair(descriptor, location));
1106  if (result == NULL) {
1107    *line   = -1;
1108    *column = 0;
1109    return false;
1110  } else {
1111    *line   = result->first;
1112    *column = result->second;
1113    return true;
1114  }
1115}
1116
1117void SourceLocationTable::Add(
1118    const Message* descriptor,
1119    DescriptorPool::ErrorCollector::ErrorLocation location,
1120    int line, int column) {
1121  location_map_[make_pair(descriptor, location)] = make_pair(line, column);
1122}
1123
1124void SourceLocationTable::Clear() {
1125  location_map_.clear();
1126}
1127
1128}  // namespace compiler
1129}  // namespace protobuf
1130}  // namespace google
1131