idl_parser.cpp revision 07d5965c812fa5e82dc4d3eb32b37540b7c91598
1/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18
19#include "flatbuffers/flatbuffers.h"
20#include "flatbuffers/idl.h"
21#include "flatbuffers/util.h"
22
23namespace flatbuffers {
24
25const char *const kTypeNames[] = {
26  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE,
27    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
28  #undef FLATBUFFERS_TD
29  nullptr
30};
31
32const char kTypeSizes[] = {
33  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
34      sizeof(CTYPE),
35    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
36  #undef FLATBUFFERS_TD
37};
38
39static void Error(const std::string &msg) {
40  throw msg;
41}
42
43// Ensure that integer values we parse fit inside the declared integer type.
44static void CheckBitsFit(int64_t val, size_t bits) {
45  auto mask = (1ll << bits) - 1;  // Bits we allow to be used.
46  if (bits < 64 &&
47      (val & ~mask) != 0 &&  // Positive or unsigned.
48      (val |  mask) != -1)   // Negative.
49    Error("constant does not fit in a " + NumToString(bits) + "-bit field");
50}
51
52// atot: templated version of atoi/atof: convert a string to an instance of T.
53template<typename T> inline T atot(const char *s) {
54  auto val = StringToInt(s);
55  CheckBitsFit(val, sizeof(T) * 8);
56  return (T)val;
57}
58template<> inline bool atot<bool>(const char *s) {
59  return 0 != atoi(s);
60}
61template<> inline float atot<float>(const char *s) {
62  return static_cast<float>(strtod(s, nullptr));
63}
64template<> inline double atot<double>(const char *s) {
65  return strtod(s, nullptr);
66}
67
68template<> inline Offset<void> atot<Offset<void>>(const char *s) {
69  return Offset<void>(atoi(s));
70}
71
72// Declare tokens we'll use. Single character tokens are represented by their
73// ascii character code (e.g. '{'), others above 256.
74#define FLATBUFFERS_GEN_TOKENS(TD) \
75  TD(Eof, 256, "end of file") \
76  TD(StringConstant, 257, "string constant") \
77  TD(IntegerConstant, 258, "integer constant") \
78  TD(FloatConstant, 259, "float constant") \
79  TD(Identifier, 260, "identifier") \
80  TD(Table, 261, "table") \
81  TD(Struct, 262, "struct") \
82  TD(Enum, 263, "enum") \
83  TD(Union, 264, "union") \
84  TD(NameSpace, 265, "namespace") \
85  TD(RootType, 266, "root_type") \
86  TD(FileIdentifier, 267, "file_identifier") \
87  TD(FileExtension, 268, "file_extension") \
88  TD(Include, 269, "include")
89#ifdef __GNUC__
90__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
91#endif
92enum {
93  #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
94    FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
95  #undef FLATBUFFERS_TOKEN
96  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
97      kToken ## ENUM,
98    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
99  #undef FLATBUFFERS_TD
100};
101
102static std::string TokenToString(int t) {
103  static const char *tokens[] = {
104    #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
105      FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
106    #undef FLATBUFFERS_TOKEN
107    #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE,
108      FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
109    #undef FLATBUFFERS_TD
110  };
111  if (t < 256) {  // A single ascii char token.
112    std::string s;
113    s.append(1, static_cast<char>(t));
114    return s;
115  } else {       // Other tokens.
116    return tokens[t - 256];
117  }
118}
119
120// Parses exactly nibbles worth of hex digits into a number, or error.
121int64_t Parser::ParseHexNum(int nibbles) {
122  for (int i = 0; i < nibbles; i++)
123    if (!isxdigit(cursor_[i]))
124      Error("escape code must be followed by " + NumToString(nibbles) +
125            " hex digits");
126  auto val = StringToInt(cursor_, 16);
127  cursor_ += nibbles;
128  return val;
129}
130
131void Parser::Next() {
132  doc_comment_.clear();
133  bool seen_newline = false;
134  for (;;) {
135    char c = *cursor_++;
136    token_ = c;
137    switch (c) {
138      case '\0': cursor_--; token_ = kTokenEof; return;
139      case ' ': case '\r': case '\t': break;
140      case '\n': line_++; seen_newline = true; break;
141      case '{': case '}': case '(': case ')': case '[': case ']': return;
142      case ',': case ':': case ';': case '=': return;
143      case '.':
144        if(!isdigit(*cursor_)) return;
145        Error("floating point constant can\'t start with \".\"");
146        break;
147      case '\"':
148        attribute_ = "";
149        while (*cursor_ != '\"') {
150          if (*cursor_ < ' ' && *cursor_ >= 0)
151            Error("illegal character in string constant");
152          if (*cursor_ == '\\') {
153            cursor_++;
154            switch (*cursor_) {
155              case 'n':  attribute_ += '\n'; cursor_++; break;
156              case 't':  attribute_ += '\t'; cursor_++; break;
157              case 'r':  attribute_ += '\r'; cursor_++; break;
158              case 'b':  attribute_ += '\b'; cursor_++; break;
159              case 'f':  attribute_ += '\f'; cursor_++; break;
160              case '\"': attribute_ += '\"'; cursor_++; break;
161              case '\\': attribute_ += '\\'; cursor_++; break;
162              case '/':  attribute_ += '/';  cursor_++; break;
163              case 'x': {  // Not in the JSON standard
164                cursor_++;
165                attribute_ += static_cast<char>(ParseHexNum(2));
166                break;
167              }
168              case 'u': {
169                cursor_++;
170                ToUTF8(static_cast<int>(ParseHexNum(4)), &attribute_);
171                break;
172              }
173              default: Error("unknown escape code in string constant"); break;
174            }
175          } else { // printable chars + UTF-8 bytes
176            attribute_ += *cursor_++;
177          }
178        }
179        cursor_++;
180        token_ = kTokenStringConstant;
181        return;
182      case '/':
183        if (*cursor_ == '/') {
184          const char *start = ++cursor_;
185          while (*cursor_ && *cursor_ != '\n') cursor_++;
186          if (*start == '/') {  // documentation comment
187            if (cursor_ != source_ && !seen_newline)
188              Error("a documentation comment should be on a line on its own");
189            doc_comment_.push_back(std::string(start + 1, cursor_));
190          }
191          break;
192        }
193        // fall thru
194      default:
195        if (isalpha(static_cast<unsigned char>(c))) {
196          // Collect all chars of an identifier:
197          const char *start = cursor_ - 1;
198          while (isalnum(static_cast<unsigned char>(*cursor_)) ||
199                 *cursor_ == '_')
200            cursor_++;
201          attribute_.clear();
202          attribute_.append(start, cursor_);
203          // First, see if it is a type keyword from the table of types:
204          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
205            if (attribute_ == IDLTYPE) { \
206              token_ = kToken ## ENUM; \
207              return; \
208            }
209            FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
210          #undef FLATBUFFERS_TD
211          // If it's a boolean constant keyword, turn those into integers,
212          // which simplifies our logic downstream.
213          if (attribute_ == "true" || attribute_ == "false") {
214            attribute_ = NumToString(attribute_ == "true");
215            token_ = kTokenIntegerConstant;
216            return;
217          }
218          // Check for declaration keywords:
219          if (attribute_ == "table")     { token_ = kTokenTable;     return; }
220          if (attribute_ == "struct")    { token_ = kTokenStruct;    return; }
221          if (attribute_ == "enum")      { token_ = kTokenEnum;      return; }
222          if (attribute_ == "union")     { token_ = kTokenUnion;     return; }
223          if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; }
224          if (attribute_ == "root_type") { token_ = kTokenRootType;  return; }
225          if (attribute_ == "include")   { token_ = kTokenInclude;  return; }
226          if (attribute_ == "file_identifier") {
227            token_ = kTokenFileIdentifier;
228            return;
229          }
230          if (attribute_ == "file_extension") {
231            token_ = kTokenFileExtension;
232            return;
233          }
234          // If not, it is a user-defined identifier:
235          token_ = kTokenIdentifier;
236          return;
237        } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
238          const char *start = cursor_ - 1;
239          while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
240          if (*cursor_ == '.') {
241            cursor_++;
242            while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
243            // See if this float has a scientific notation suffix. Both JSON
244            // and C++ (through strtod() we use) have the same format:
245            if (*cursor_ == 'e' || *cursor_ == 'E') {
246              cursor_++;
247              if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
248              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
249            }
250            token_ = kTokenFloatConstant;
251          } else {
252            token_ = kTokenIntegerConstant;
253          }
254          attribute_.clear();
255          attribute_.append(start, cursor_);
256          return;
257        }
258        std::string ch;
259        ch = c;
260        if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
261        Error("illegal character: " + ch);
262        break;
263    }
264  }
265}
266
267// Check if a given token is next, if so, consume it as well.
268bool Parser::IsNext(int t) {
269  bool isnext = t == token_;
270  if (isnext) Next();
271  return isnext;
272}
273
274// Expect a given token to be next, consume it, or error if not present.
275void Parser::Expect(int t) {
276  if (t != token_) {
277    Error("expecting: " + TokenToString(t) + " instead got: " +
278          TokenToString(token_));
279  }
280  Next();
281}
282
283void Parser::ParseTypeIdent(Type &type) {
284  auto enum_def = enums_.Lookup(attribute_);
285  if (enum_def) {
286    type = enum_def->underlying_type;
287    if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
288  } else {
289    type.base_type = BASE_TYPE_STRUCT;
290    type.struct_def = LookupCreateStruct(attribute_);
291  }
292}
293
294// Parse any IDL type.
295void Parser::ParseType(Type &type) {
296  if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
297    type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
298  } else {
299    if (token_ == kTokenIdentifier) {
300      ParseTypeIdent(type);
301    } else if (token_ == '[') {
302      Next();
303      Type subtype;
304      ParseType(subtype);
305      if (subtype.base_type == BASE_TYPE_VECTOR) {
306        // We could support this, but it will complicate things, and it's
307        // easier to work around with a struct around the inner vector.
308        Error("nested vector types not supported (wrap in table first).");
309      }
310      if (subtype.base_type == BASE_TYPE_UNION) {
311        // We could support this if we stored a struct of 2 elements per
312        // union element.
313        Error("vector of union types not supported (wrap in table first).");
314      }
315      type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
316      type.element = subtype.base_type;
317      Expect(']');
318      return;
319    } else {
320      Error("illegal type syntax");
321    }
322  }
323  Next();
324}
325
326FieldDef &Parser::AddField(StructDef &struct_def,
327                           const std::string &name,
328                           const Type &type) {
329  auto &field = *new FieldDef();
330  field.value.offset =
331    FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
332  field.name = name;
333  field.value.type = type;
334  if (struct_def.fixed) {  // statically compute the field offset
335    auto size = InlineSize(type);
336    auto alignment = InlineAlignment(type);
337    // structs_ need to have a predictable format, so we need to align to
338    // the largest scalar
339    struct_def.minalign = std::max(struct_def.minalign, alignment);
340    struct_def.PadLastField(alignment);
341    field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
342    struct_def.bytesize += size;
343  }
344  if (struct_def.fields.Add(name, &field))
345    Error("field already exists: " + name);
346  return field;
347}
348
349void Parser::ParseField(StructDef &struct_def) {
350  std::string name = attribute_;
351  std::vector<std::string> dc = doc_comment_;
352  Expect(kTokenIdentifier);
353  Expect(':');
354  Type type;
355  ParseType(type);
356
357  if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
358    Error("structs_ may contain only scalar or struct fields");
359
360  FieldDef *typefield = nullptr;
361  if (type.base_type == BASE_TYPE_UNION) {
362    // For union fields, add a second auto-generated field to hold the type,
363    // with _type appended as the name.
364    typefield = &AddField(struct_def, name + "_type",
365                          type.enum_def->underlying_type);
366  }
367
368  auto &field = AddField(struct_def, name, type);
369
370  if (token_ == '=') {
371    Next();
372    if (!IsScalar(type.base_type))
373      Error("default values currently only supported for scalars");
374    ParseSingleValue(field.value);
375  }
376
377  if (type.enum_def &&
378      IsScalar(type.base_type) &&
379      !struct_def.fixed &&
380      !type.enum_def->attributes.Lookup("bit_flags") &&
381      !type.enum_def->ReverseLookup(static_cast<int>(
382                         StringToInt(field.value.constant.c_str()))))
383    Error("enum " + type.enum_def->name +
384          " does not have a declaration for this field\'s default of " +
385          field.value.constant);
386
387  field.doc_comment = dc;
388  ParseMetaData(field);
389  field.deprecated = field.attributes.Lookup("deprecated") != nullptr;
390  if (field.deprecated && struct_def.fixed)
391    Error("can't deprecate fields in a struct");
392  field.required = field.attributes.Lookup("required") != nullptr;
393  if (field.required && (struct_def.fixed ||
394                         IsScalar(field.value.type.base_type)))
395    Error("only non-scalar fields in tables may be 'required'");
396  auto nested = field.attributes.Lookup("nested_flatbuffer");
397  if (nested) {
398    if (nested->type.base_type != BASE_TYPE_STRING)
399      Error("nested_flatbuffer attribute must be a string (the root type)");
400    if (field.value.type.base_type != BASE_TYPE_VECTOR ||
401        field.value.type.element != BASE_TYPE_UCHAR)
402      Error("nested_flatbuffer attribute may only apply to a vector of ubyte");
403    // This will cause an error if the root type of the nested flatbuffer
404    // wasn't defined elsewhere.
405    LookupCreateStruct(nested->constant);
406  }
407
408  if (typefield) {
409    // If this field is a union, and it has a manually assigned id,
410    // the automatically added type field should have an id as well (of N - 1).
411    auto attr = field.attributes.Lookup("id");
412    if (attr) {
413      auto id = atoi(attr->constant.c_str());
414      auto val = new Value();
415      val->type = attr->type;
416      val->constant = NumToString(id - 1);
417      typefield->attributes.Add("id", val);
418    }
419  }
420
421  Expect(';');
422}
423
424void Parser::ParseAnyValue(Value &val, FieldDef *field) {
425  switch (val.type.base_type) {
426    case BASE_TYPE_UNION: {
427      assert(field);
428      if (!field_stack_.size() ||
429          field_stack_.back().second->value.type.base_type != BASE_TYPE_UTYPE)
430        Error("missing type field before this union value: " + field->name);
431      auto enum_idx = atot<unsigned char>(
432                                    field_stack_.back().first.constant.c_str());
433      auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
434      if (!enum_val) Error("illegal type id for: " + field->name);
435      val.constant = NumToString(ParseTable(*enum_val->struct_def));
436      break;
437    }
438    case BASE_TYPE_STRUCT:
439      val.constant = NumToString(ParseTable(*val.type.struct_def));
440      break;
441    case BASE_TYPE_STRING: {
442      auto s = attribute_;
443      Expect(kTokenStringConstant);
444      val.constant = NumToString(builder_.CreateString(s).o);
445      break;
446    }
447    case BASE_TYPE_VECTOR: {
448      Expect('[');
449      val.constant = NumToString(ParseVector(val.type.VectorType()));
450      break;
451    }
452    default:
453      ParseSingleValue(val);
454      break;
455  }
456}
457
458void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
459  auto off = atot<uoffset_t>(val.constant.c_str());
460  assert(struct_stack_.size() - off == struct_def.bytesize);
461  builder_.Align(struct_def.minalign);
462  builder_.PushBytes(&struct_stack_[off], struct_def.bytesize);
463  struct_stack_.resize(struct_stack_.size() - struct_def.bytesize);
464  builder_.AddStructOffset(val.offset, builder_.GetSize());
465}
466
467uoffset_t Parser::ParseTable(const StructDef &struct_def) {
468  Expect('{');
469  size_t fieldn = 0;
470  if (!IsNext('}')) for (;;) {
471    std::string name = attribute_;
472    if (!IsNext(kTokenStringConstant)) Expect(kTokenIdentifier);
473    auto field = struct_def.fields.Lookup(name);
474    if (!field) Error("unknown field: " + name);
475    if (struct_def.fixed && (fieldn >= struct_def.fields.vec.size()
476                            || struct_def.fields.vec[fieldn] != field)) {
477       Error("struct field appearing out of order: " + name);
478    }
479    Expect(':');
480    Value val = field->value;
481    ParseAnyValue(val, field);
482    field_stack_.push_back(std::make_pair(val, field));
483    fieldn++;
484    if (IsNext('}')) break;
485    Expect(',');
486  }
487  for (auto it = field_stack_.rbegin();
488           it != field_stack_.rbegin() + fieldn; ++it) {
489    if (it->second->used)
490      Error("field set more than once: " + it->second->name);
491    it->second->used = true;
492  }
493  for (auto it = field_stack_.rbegin();
494           it != field_stack_.rbegin() + fieldn; ++it) {
495    it->second->used = false;
496  }
497  if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
498    Error("incomplete struct initialization: " + struct_def.name);
499  auto start = struct_def.fixed
500                 ? builder_.StartStruct(struct_def.minalign)
501                 : builder_.StartTable();
502
503  for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
504       size;
505       size /= 2) {
506    // Go through elements in reverse, since we're building the data backwards.
507    for (auto it = field_stack_.rbegin();
508             it != field_stack_.rbegin() + fieldn; ++it) {
509      auto &value = it->first;
510      auto field = it->second;
511      if (!struct_def.sortbysize || size == SizeOf(value.type.base_type)) {
512        switch (value.type.base_type) {
513          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
514            case BASE_TYPE_ ## ENUM: \
515              builder_.Pad(field->padding); \
516              if (struct_def.fixed) { \
517                builder_.PushElement(atot<CTYPE>(value.constant.c_str())); \
518              } else { \
519                builder_.AddElement(value.offset, \
520                             atot<CTYPE>(       value.constant.c_str()), \
521                             atot<CTYPE>(field->value.constant.c_str())); \
522              } \
523              break;
524            FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
525          #undef FLATBUFFERS_TD
526          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
527            case BASE_TYPE_ ## ENUM: \
528              builder_.Pad(field->padding); \
529              if (IsStruct(field->value.type)) { \
530                SerializeStruct(*field->value.type.struct_def, value); \
531              } else { \
532                builder_.AddOffset(value.offset, \
533                  atot<CTYPE>(value.constant.c_str())); \
534              } \
535              break;
536            FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
537          #undef FLATBUFFERS_TD
538        }
539      }
540    }
541  }
542  for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
543
544  if (struct_def.fixed) {
545    builder_.ClearOffsets();
546    builder_.EndStruct();
547    // Temporarily store this struct in a side buffer, since this data has to
548    // be stored in-line later in the parent object.
549    auto off = struct_stack_.size();
550    struct_stack_.insert(struct_stack_.end(),
551                         builder_.GetBufferPointer(),
552                         builder_.GetBufferPointer() + struct_def.bytesize);
553    builder_.PopBytes(struct_def.bytesize);
554    return static_cast<uoffset_t>(off);
555  } else {
556    return builder_.EndTable(
557      start,
558      static_cast<voffset_t>(struct_def.fields.vec.size()));
559  }
560}
561
562uoffset_t Parser::ParseVector(const Type &type) {
563  int count = 0;
564  if (token_ != ']') for (;;) {
565    Value val;
566    val.type = type;
567    ParseAnyValue(val, NULL);
568    field_stack_.push_back(std::make_pair(val, nullptr));
569    count++;
570    if (token_ == ']') break;
571    Expect(',');
572  }
573  Next();
574
575  builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
576                       InlineAlignment(type));
577  for (int i = 0; i < count; i++) {
578    // start at the back, since we're building the data backwards.
579    auto &val = field_stack_.back().first;
580    switch (val.type.base_type) {
581      #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
582        case BASE_TYPE_ ## ENUM: \
583          if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
584          else builder_.PushElement(atot<CTYPE>(val.constant.c_str())); \
585          break;
586        FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
587      #undef FLATBUFFERS_TD
588    }
589    field_stack_.pop_back();
590  }
591
592  builder_.ClearOffsets();
593  return builder_.EndVector(count);
594}
595
596void Parser::ParseMetaData(Definition &def) {
597  if (IsNext('(')) {
598    for (;;) {
599      auto name = attribute_;
600      Expect(kTokenIdentifier);
601      auto e = new Value();
602      def.attributes.Add(name, e);
603      if (IsNext(':')) {
604        ParseSingleValue(*e);
605      }
606      if (IsNext(')')) break;
607      Expect(',');
608    }
609  }
610}
611
612bool Parser::TryTypedValue(int dtoken,
613                           bool check,
614                           Value &e,
615                           BaseType req) {
616  bool match = dtoken == token_;
617  if (match) {
618    e.constant = attribute_;
619    if (!check) {
620      if (e.type.base_type == BASE_TYPE_NONE) {
621        e.type.base_type = req;
622      } else {
623        Error(std::string("type mismatch: expecting: ") +
624              kTypeNames[e.type.base_type] +
625              ", found: " +
626              kTypeNames[req]);
627      }
628    }
629    Next();
630  }
631  return match;
632}
633
634int64_t Parser::ParseIntegerFromString(Type &type) {
635  int64_t result = 0;
636  // Parse one or more enum identifiers, separated by spaces.
637  const char *next = attribute_.c_str();
638  do {
639    const char *divider = strchr(next, ' ');
640    std::string word;
641    if (divider) {
642      word = std::string(next, divider);
643      next = divider + strspn(divider, " ");
644    } else {
645      word = next;
646      next += word.length();
647    }
648    if (type.enum_def) {  // The field has an enum type
649      auto enum_val = type.enum_def->vals.Lookup(word);
650      if (!enum_val)
651        Error("unknown enum value: " + word +
652              ", for enum: " + type.enum_def->name);
653      result |= enum_val->value;
654    } else {  // No enum type, probably integral field.
655      if (!IsInteger(type.base_type))
656        Error("not a valid value for this field: " + word);
657      // TODO: could check if its a valid number constant here.
658      const char *dot = strchr(word.c_str(), '.');
659      if (!dot) Error("enum values need to be qualified by an enum type");
660      std::string enum_def_str(word.c_str(), dot);
661      std::string enum_val_str(dot + 1, word.c_str() + word.length());
662      auto enum_def = enums_.Lookup(enum_def_str);
663      if (!enum_def) Error("unknown enum: " + enum_def_str);
664      auto enum_val = enum_def->vals.Lookup(enum_val_str);
665      if (!enum_val) Error("unknown enum value: " + enum_val_str);
666      result |= enum_val->value;
667    }
668  } while(*next);
669  return result;
670}
671
672void Parser::ParseSingleValue(Value &e) {
673  // First check if this could be a string/identifier enum value:
674  if (e.type.base_type != BASE_TYPE_STRING &&
675      e.type.base_type != BASE_TYPE_NONE &&
676      (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
677      e.constant = NumToString(ParseIntegerFromString(e.type));
678      Next();
679  } else if (TryTypedValue(kTokenIntegerConstant,
680                    IsScalar(e.type.base_type),
681                    e,
682                    BASE_TYPE_INT) ||
683      TryTypedValue(kTokenFloatConstant,
684                    IsFloat(e.type.base_type),
685                    e,
686                    BASE_TYPE_FLOAT) ||
687      TryTypedValue(kTokenStringConstant,
688                    e.type.base_type == BASE_TYPE_STRING,
689                    e,
690                    BASE_TYPE_STRING)) {
691  } else {
692    Error("cannot parse value starting with: " + TokenToString(token_));
693  }
694}
695
696StructDef *Parser::LookupCreateStruct(const std::string &name) {
697  auto struct_def = structs_.Lookup(name);
698  if (!struct_def) {
699    // Rather than failing, we create a "pre declared" StructDef, due to
700    // circular references, and check for errors at the end of parsing.
701    struct_def = new StructDef();
702    structs_.Add(name, struct_def);
703    struct_def->name = name;
704    struct_def->predecl = true;
705    struct_def->defined_namespace = namespaces_.back();
706  }
707  return struct_def;
708}
709
710void Parser::ParseEnum(bool is_union) {
711  std::vector<std::string> dc = doc_comment_;
712  Next();
713  std::string name = attribute_;
714  Expect(kTokenIdentifier);
715  auto &enum_def = *new EnumDef();
716  enum_def.name = name;
717  enum_def.doc_comment = dc;
718  enum_def.is_union = is_union;
719  enum_def.defined_namespace = namespaces_.back();
720  if (enums_.Add(name, &enum_def)) Error("enum already exists: " + name);
721  if (is_union) {
722    enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
723    enum_def.underlying_type.enum_def = &enum_def;
724  } else {
725    if (proto_mode_) {
726      enum_def.underlying_type.base_type = BASE_TYPE_SHORT;
727    } else {
728      // Give specialized error message, since this type spec used to
729      // be optional in the first FlatBuffers release.
730      if (!IsNext(':')) Error("must specify the underlying integer type for this"
731                              " enum (e.g. \': short\', which was the default).");
732      // Specify the integer type underlying this enum.
733      ParseType(enum_def.underlying_type);
734      if (!IsInteger(enum_def.underlying_type.base_type))
735        Error("underlying enum type must be integral");
736    }
737    // Make this type refer back to the enum it was derived from.
738    enum_def.underlying_type.enum_def = &enum_def;
739  }
740  ParseMetaData(enum_def);
741  Expect('{');
742  if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
743  do {
744    std::string name = attribute_;
745    std::vector<std::string> dc = doc_comment_;
746    Expect(kTokenIdentifier);
747    auto prevsize = enum_def.vals.vec.size();
748    auto value = enum_def.vals.vec.size()
749      ? enum_def.vals.vec.back()->value + 1
750      : 0;
751    auto &ev = *new EnumVal(name, value);
752    if (enum_def.vals.Add(name, &ev))
753      Error("enum value already exists: " + name);
754    ev.doc_comment = dc;
755    if (is_union) {
756      ev.struct_def = LookupCreateStruct(name);
757    }
758    if (IsNext('=')) {
759      ev.value = atoi(attribute_.c_str());
760      Expect(kTokenIntegerConstant);
761      if (prevsize && enum_def.vals.vec[prevsize - 1]->value >= ev.value)
762        Error("enum values must be specified in ascending order");
763    }
764  } while (IsNext(proto_mode_ ? ';' : ',') && token_ != '}');
765  Expect('}');
766  if (enum_def.attributes.Lookup("bit_flags")) {
767    for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
768         ++it) {
769      if (static_cast<size_t>((*it)->value) >=
770           SizeOf(enum_def.underlying_type.base_type) * 8)
771        Error("bit flag out of range of underlying integral type");
772      (*it)->value = 1LL << (*it)->value;
773    }
774  }
775}
776
777StructDef &Parser::StartStruct() {
778  std::string name = attribute_;
779  Expect(kTokenIdentifier);
780  auto &struct_def = *LookupCreateStruct(name);
781  if (!struct_def.predecl) Error("datatype already exists: " + name);
782  struct_def.predecl = false;
783  struct_def.name = name;
784  // Move this struct to the back of the vector just in case it was predeclared,
785  // to preserve declaration order.
786  remove(structs_.vec.begin(), structs_.vec.end(), &struct_def);
787  structs_.vec.back() = &struct_def;
788  return struct_def;
789}
790
791void Parser::ParseDecl() {
792  std::vector<std::string> dc = doc_comment_;
793  bool fixed = IsNext(kTokenStruct);
794  if (!fixed) Expect(kTokenTable);
795  auto &struct_def = StartStruct();
796  struct_def.doc_comment = dc;
797  struct_def.fixed = fixed;
798  ParseMetaData(struct_def);
799  struct_def.sortbysize =
800    struct_def.attributes.Lookup("original_order") == nullptr && !fixed;
801  Expect('{');
802  while (token_ != '}') ParseField(struct_def);
803  auto force_align = struct_def.attributes.Lookup("force_align");
804  if (fixed && force_align) {
805    auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
806    if (force_align->type.base_type != BASE_TYPE_INT ||
807        align < struct_def.minalign ||
808        align > 256 ||
809        align & (align - 1))
810      Error("force_align must be a power of two integer ranging from the"
811            "struct\'s natural alignment to 256");
812    struct_def.minalign = align;
813  }
814  struct_def.PadLastField(struct_def.minalign);
815  // Check if this is a table that has manual id assignments
816  auto &fields = struct_def.fields.vec;
817  if (!struct_def.fixed && fields.size()) {
818    size_t num_id_fields = 0;
819    for (auto it = fields.begin(); it != fields.end(); ++it) {
820      if ((*it)->attributes.Lookup("id")) num_id_fields++;
821    }
822    // If any fields have ids..
823    if (num_id_fields) {
824      // Then all fields must have them.
825      if (num_id_fields != fields.size())
826        Error("either all fields or no fields must have an 'id' attribute");
827      // Simply sort by id, then the fields are the same as if no ids had
828      // been specified.
829      std::sort(fields.begin(), fields.end(),
830        [](const FieldDef *a, const FieldDef *b) -> bool {
831          auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
832          auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
833          return a_id < b_id;
834      });
835      // Verify we have a contiguous set, and reassign vtable offsets.
836      for (int i = 0; i < static_cast<int>(fields.size()); i++) {
837        if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
838          Error("field id\'s must be consecutive from 0, id " +
839                NumToString(i) + " missing or set twice");
840        fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
841      }
842    }
843  }
844  // Check that no identifiers clash with auto generated fields.
845  // This is not an ideal situation, but should occur very infrequently,
846  // and allows us to keep using very readable names for type & length fields
847  // without inducing compile errors.
848  auto CheckClash = [&fields, &struct_def](const char *suffix,
849                                           BaseType basetype) {
850    auto len = strlen(suffix);
851    for (auto it = fields.begin(); it != fields.end(); ++it) {
852      auto &name = (*it)->name;
853      if (name.length() > len &&
854          name.compare(name.length() - len, len, suffix) == 0 &&
855          (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
856        auto field = struct_def.fields.Lookup(
857                       name.substr(0, name.length() - len));
858        if (field && field->value.type.base_type == basetype)
859          Error("Field " + name +
860                " would clash with generated functions for field " +
861                field->name);
862      }
863    }
864  };
865  CheckClash("_type", BASE_TYPE_UNION);
866  CheckClash("Type", BASE_TYPE_UNION);
867  CheckClash("_length", BASE_TYPE_VECTOR);
868  CheckClash("Length", BASE_TYPE_VECTOR);
869  Expect('}');
870}
871
872bool Parser::SetRootType(const char *name) {
873  root_struct_def = structs_.Lookup(name);
874  return root_struct_def != nullptr;
875}
876
877void Parser::MarkGenerated() {
878  // Since the Parser object retains definitions across files, we must
879  // ensure we only output code for definitions once, in the file they are first
880  // declared. This function marks all existing definitions as having already
881  // been generated.
882  for (auto it = enums_.vec.begin();
883           it != enums_.vec.end(); ++it) {
884    (*it)->generated = true;
885  }
886  for (auto it = structs_.vec.begin();
887           it != structs_.vec.end(); ++it) {
888    (*it)->generated = true;
889  }
890}
891
892void Parser::ParseNamespace() {
893  Next();
894  auto ns = new Namespace();
895  namespaces_.push_back(ns);
896  for (;;) {
897    ns->components.push_back(attribute_);
898    Expect(kTokenIdentifier);
899    if (!IsNext('.')) break;
900  }
901  Expect(';');
902}
903
904// Best effort parsing of .proto declarations, with the aim to turn them
905// in the closest corresponding FlatBuffer equivalent.
906// We parse everything as identifiers instead of keywords, since we don't
907// want protobuf keywords to become invalid identifiers in FlatBuffers.
908void Parser::ParseProtoDecl() {
909  if (attribute_ == "package") {
910    // These are identical in syntax to FlatBuffer's namespace decl.
911    ParseNamespace();
912  } else if (attribute_ == "message") {
913    Next();
914    auto &struct_def = StartStruct();
915    Expect('{');
916    while (token_ != '}') {
917      // Parse the qualifier.
918      bool required = false;
919      bool repeated = false;
920      if (attribute_ == "optional") {
921        // This is the default.
922      } else if (attribute_ == "required") {
923        required = true;
924      } else if (attribute_ == "repeated") {
925        repeated = true;
926      } else {
927        Error("expecting optional/required/repeated, got: " + attribute_);
928      }
929      Type type = ParseTypeFromProtoType();
930      // Repeated elements get mapped to a vector.
931      if (repeated) {
932        type.element = type.base_type;
933        type.base_type = BASE_TYPE_VECTOR;
934      }
935      std::string name = attribute_;
936      Expect(kTokenIdentifier);
937      // Parse the field id. Since we're just translating schemas, not
938      // any kind of binary compatibility, we can safely ignore these, and
939      // assign our own.
940      Expect('=');
941      Expect(kTokenIntegerConstant);
942      auto &field = AddField(struct_def, name, type);
943      field.required = required;
944      // See if there's a default specified.
945      if (IsNext('[')) {
946        if (attribute_ != "default") Error("\'default\' expected");
947        Next();
948        Expect('=');
949        field.value.constant = attribute_;
950        Next();
951        Expect(']');
952      }
953      Expect(';');
954    }
955    Next();
956  } else if (attribute_ == "enum") {
957    // These are almost the same, just with different terminator:
958    ParseEnum(false);
959  } else if (attribute_ == "import") {
960    Next();
961    included_files_[attribute_] = true;
962    Expect(kTokenStringConstant);
963    Expect(';');
964  } else if (attribute_ == "option") {  // Skip these.
965    Next();
966    Expect(kTokenIdentifier);
967    Expect('=');
968    Next();  // Any single token.
969    Expect(';');
970  } else {
971    Error("don\'t know how to parse .proto declaration starting with " +
972          attribute_);
973  }
974}
975
976// Parse a protobuf type, and map it to the corresponding FlatBuffer one.
977Type Parser::ParseTypeFromProtoType() {
978  Expect(kTokenIdentifier);
979  struct type_lookup { const char *proto_type; BaseType fb_type; };
980  static type_lookup lookup[] = {
981    { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
982    { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
983    { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
984    { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
985    { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
986    { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
987    { "bool", BASE_TYPE_BOOL },
988    { "string", BASE_TYPE_STRING },
989    { "bytes", BASE_TYPE_STRING },
990    { nullptr, BASE_TYPE_NONE }
991  };
992  Type type;
993  for (auto tl = lookup; tl->proto_type; tl++) {
994    if (attribute_ == tl->proto_type) {
995      type.base_type = tl->fb_type;
996      Next();
997      return type;
998    }
999  }
1000  ParseTypeIdent(type);
1001  Expect(kTokenIdentifier);
1002  return type;
1003}
1004
1005bool Parser::Parse(const char *source, const char **include_paths,
1006                   const char *source_filename) {
1007  if (source_filename) included_files_[source_filename] = true;
1008  source_ = cursor_ = source;
1009  line_ = 1;
1010  error_.clear();
1011  builder_.Clear();
1012  try {
1013    Next();
1014    // Includes must come first:
1015    while (IsNext(kTokenInclude)) {
1016      auto name = attribute_;
1017      Expect(kTokenStringConstant);
1018      if (included_files_.find(name) == included_files_.end()) {
1019        // We found an include file that we have not parsed yet.
1020        // Load it and parse it.
1021        std::string contents;
1022        if (!include_paths) {
1023          const char *current_directory[] = { "", nullptr };
1024          include_paths = current_directory;
1025        }
1026        for (auto paths = include_paths; paths && *paths; paths++) {
1027          auto filepath = flatbuffers::ConCatPathFileName(*paths, name);
1028          if(LoadFile(filepath.c_str(), true, &contents)) break;
1029        }
1030        if (contents.empty())
1031          Error("unable to load include file: " + name);
1032        included_files_[name] = true;
1033        if (!Parse(contents.c_str(), include_paths)) {
1034          // Any errors, we're done.
1035          return false;
1036        }
1037        // We do not want to output code for any included files:
1038        MarkGenerated();
1039        // This is the easiest way to continue this file after an include:
1040        // instead of saving and restoring all the state, we simply start the
1041        // file anew. This will cause it to encounter the same include statement
1042        // again, but this time it will skip it, because it was entered into
1043        // included_files_.
1044        // This is recursive, but only go as deep as the number of include
1045        // statements.
1046        return Parse(source, include_paths, source_filename);
1047      }
1048      Expect(';');
1049    }
1050    // Now parse all other kinds of declarations:
1051    while (token_ != kTokenEof) {
1052      if (proto_mode_) {
1053        ParseProtoDecl();
1054      } else if (token_ == kTokenNameSpace) {
1055        ParseNamespace();
1056      } else if (token_ == '{') {
1057        if (!root_struct_def) Error("no root type set to parse json with");
1058        if (builder_.GetSize()) {
1059          Error("cannot have more than one json object in a file");
1060        }
1061        builder_.Finish(Offset<Table>(ParseTable(*root_struct_def)),
1062          file_identifier_.length() ? file_identifier_.c_str() : nullptr);
1063      } else if (token_ == kTokenEnum) {
1064        ParseEnum(false);
1065      } else if (token_ == kTokenUnion) {
1066        ParseEnum(true);
1067      } else if (token_ == kTokenRootType) {
1068        Next();
1069        auto root_type = attribute_;
1070        Expect(kTokenIdentifier);
1071        if (!SetRootType(root_type.c_str()))
1072          Error("unknown root type: " + root_type);
1073        if (root_struct_def->fixed)
1074          Error("root type must be a table");
1075        Expect(';');
1076      } else if (token_ == kTokenFileIdentifier) {
1077        Next();
1078        file_identifier_ = attribute_;
1079        Expect(kTokenStringConstant);
1080        if (file_identifier_.length() !=
1081            FlatBufferBuilder::kFileIdentifierLength)
1082          Error("file_identifier must be exactly " +
1083                NumToString(FlatBufferBuilder::kFileIdentifierLength) +
1084                " characters");
1085        Expect(';');
1086      } else if (token_ == kTokenFileExtension) {
1087        Next();
1088        file_extension_ = attribute_;
1089        Expect(kTokenStringConstant);
1090        Expect(';');
1091      } else if(token_ == kTokenInclude) {
1092        Error("includes must come before declarations");
1093      } else {
1094        ParseDecl();
1095      }
1096    }
1097    for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
1098      if ((*it)->predecl)
1099        Error("type referenced but not defined: " + (*it)->name);
1100    }
1101    for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
1102      auto &enum_def = **it;
1103      if (enum_def.is_union) {
1104        for (auto it = enum_def.vals.vec.begin();
1105             it != enum_def.vals.vec.end();
1106             ++it) {
1107          auto &val = **it;
1108          if (val.struct_def && val.struct_def->fixed)
1109            Error("only tables can be union elements: " + val.name);
1110        }
1111      }
1112    }
1113  } catch (const std::string &msg) {
1114    error_ = source_filename ? AbsolutePath(source_filename) : "";
1115    #ifdef _WIN32
1116      error_ += "(" + NumToString(line_) + ")";  // MSVC alike
1117    #else
1118      if (source_filename) error_ += ":";
1119      error_ += NumToString(line_) + ":0";  // gcc alike
1120    #endif
1121    error_ += ": error: " + msg;
1122    return false;
1123  }
1124  assert(!struct_stack_.size());
1125  return true;
1126}
1127
1128}  // namespace flatbuffers
1129