idl_parser.cpp revision 30af866e5ac9eb60f1998ebbe6fc77c1c3834cc1
1/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18
19#include "flatbuffers/flatbuffers.h"
20#include "flatbuffers/idl.h"
21#include "flatbuffers/util.h"
22
23namespace flatbuffers {
24
25const char *const kTypeNames[] = {
26  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) IDLTYPE,
27    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
28  #undef FLATBUFFERS_TD
29  nullptr
30};
31
32const char kTypeSizes[] = {
33  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) sizeof(CTYPE),
34    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
35  #undef FLATBUFFERS_TD
36};
37
38static void Error(const std::string &msg) {
39  throw msg;
40}
41
42// Ensure that integer values we parse fit inside the declared integer type.
43static void CheckBitsFit(int64_t val, size_t bits) {
44  auto mask = (1ll << bits) - 1;  // Bits we allow to be used.
45  if (bits < 64 &&
46      (val & ~mask) != 0 &&  // Positive or unsigned.
47      (val |  mask) != -1)   // Negative.
48    Error("constant does not fit in a " + NumToString(bits) + "-bit field");
49}
50
51// atot: templated version of atoi/atof: convert a string to an instance of T.
52template<typename T> inline T atot(const char *s) {
53  auto val = StringToInt(s);
54  CheckBitsFit(val, sizeof(T) * 8);
55  return (T)val;
56}
57template<> inline bool atot<bool>(const char *s) {
58  return 0 != atoi(s);
59}
60template<> inline float atot<float>(const char *s) {
61  return static_cast<float>(strtod(s, nullptr));
62}
63template<> inline double atot<double>(const char *s) {
64  return strtod(s, nullptr);
65}
66
67template<> inline Offset<void> atot<Offset<void>>(const char *s) {
68  return Offset<void>(atoi(s));
69}
70
71// Declare tokens we'll use. Single character tokens are represented by their
72// ascii character code (e.g. '{'), others above 256.
73#define FLATBUFFERS_GEN_TOKENS(TD) \
74  TD(Eof, 256, "end of file") \
75  TD(StringConstant, 257, "string constant") \
76  TD(IntegerConstant, 258, "integer constant") \
77  TD(FloatConstant, 259, "float constant") \
78  TD(Identifier, 260, "identifier") \
79  TD(Table, 261, "table") \
80  TD(Struct, 262, "struct") \
81  TD(Enum, 263, "enum") \
82  TD(Union, 264, "union") \
83  TD(NameSpace, 265, "namespace") \
84  TD(RootType, 266, "root_type") \
85  TD(FileIdentifier, 267, "file_identifier") \
86  TD(FileExtension, 268, "file_extension") \
87  TD(Include, 269, "include")
88#ifdef __GNUC__
89__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
90#endif
91enum {
92  #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
93    FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
94  #undef FLATBUFFERS_TOKEN
95  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) kToken ## ENUM,
96    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
97  #undef FLATBUFFERS_TD
98};
99
100static std::string TokenToString(int t) {
101  static const char *tokens[] = {
102    #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
103      FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
104    #undef FLATBUFFERS_TOKEN
105    #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) IDLTYPE,
106      FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
107    #undef FLATBUFFERS_TD
108  };
109  if (t < 256) {  // A single ascii char token.
110    std::string s;
111    s.append(1, t);
112    return s;
113  } else {       // Other tokens.
114    return tokens[t - 256];
115  }
116}
117
118// Parses exactly nibbles worth of hex digits into a number, or error.
119int64_t Parser::ParseHexNum(int nibbles) {
120  for (int i = 0; i < nibbles; i++)
121    if (!isxdigit(cursor_[i]))
122      Error("escape code must be followed by " + NumToString(nibbles) +
123            " hex digits");
124  auto val = StringToInt(cursor_, 16);
125  cursor_ += nibbles;
126  return val;
127}
128
129void Parser::Next() {
130  doc_comment_.clear();
131  bool seen_newline = false;
132  for (;;) {
133    char c = *cursor_++;
134    token_ = c;
135    switch (c) {
136      case '\0': cursor_--; token_ = kTokenEof; return;
137      case ' ': case '\r': case '\t': break;
138      case '\n': line_++; seen_newline = true; break;
139      case '{': case '}': case '(': case ')': case '[': case ']': return;
140      case ',': case ':': case ';': case '=': return;
141      case '.':
142        if(!isdigit(*cursor_)) return;
143        Error("floating point constant can\'t start with \".\"");
144        break;
145      case '\"':
146        attribute_ = "";
147        while (*cursor_ != '\"') {
148          if (*cursor_ < ' ' && *cursor_ >= 0)
149            Error("illegal character in string constant");
150          if (*cursor_ == '\\') {
151            cursor_++;
152            switch (*cursor_) {
153              case 'n':  attribute_ += '\n'; cursor_++; break;
154              case 't':  attribute_ += '\t'; cursor_++; break;
155              case 'r':  attribute_ += '\r'; cursor_++; break;
156              case 'b':  attribute_ += '\b'; cursor_++; break;
157              case 'f':  attribute_ += '\f'; cursor_++; break;
158              case '\"': attribute_ += '\"'; cursor_++; break;
159              case '\\': attribute_ += '\\'; cursor_++; break;
160              case '/':  attribute_ += '/';  cursor_++; break;
161              case 'x': {  // Not in the JSON standard
162                cursor_++;
163                attribute_ += static_cast<char>(ParseHexNum(2));
164                break;
165              }
166              case 'u': {
167                cursor_++;
168                ToUTF8(static_cast<int>(ParseHexNum(4)), &attribute_);
169                break;
170              }
171              default: Error("unknown escape code in string constant"); break;
172            }
173          } else { // printable chars + UTF-8 bytes
174            attribute_ += *cursor_++;
175          }
176        }
177        cursor_++;
178        token_ = kTokenStringConstant;
179        return;
180      case '/':
181        if (*cursor_ == '/') {
182          const char *start = ++cursor_;
183          while (*cursor_ && *cursor_ != '\n') cursor_++;
184          if (*start == '/') {  // documentation comment
185            if (!seen_newline)
186              Error("a documentation comment should be on a line on its own");
187            // todo: do we want to support multiline comments instead?
188            doc_comment_ += std::string(start + 1, cursor_);
189          }
190          break;
191        }
192        // fall thru
193      default:
194        if (isalpha(static_cast<unsigned char>(c))) {
195          // Collect all chars of an identifier:
196          const char *start = cursor_ - 1;
197          while (isalnum(static_cast<unsigned char>(*cursor_)) ||
198                 *cursor_ == '_')
199            cursor_++;
200          attribute_.clear();
201          attribute_.append(start, cursor_);
202          // First, see if it is a type keyword from the table of types:
203          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) \
204            if (attribute_ == IDLTYPE) { \
205              token_ = kToken ## ENUM; \
206              return; \
207            }
208            FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
209          #undef FLATBUFFERS_TD
210          // If it's a boolean constant keyword, turn those into integers,
211          // which simplifies our logic downstream.
212          if (attribute_ == "true" || attribute_ == "false") {
213            attribute_ = NumToString(attribute_ == "true");
214            token_ = kTokenIntegerConstant;
215            return;
216          }
217          // Check for declaration keywords:
218          if (attribute_ == "table")     { token_ = kTokenTable;     return; }
219          if (attribute_ == "struct")    { token_ = kTokenStruct;    return; }
220          if (attribute_ == "enum")      { token_ = kTokenEnum;      return; }
221          if (attribute_ == "union")     { token_ = kTokenUnion;     return; }
222          if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; }
223          if (attribute_ == "root_type") { token_ = kTokenRootType;  return; }
224          if (attribute_ == "include")   { token_ = kTokenInclude;  return; }
225          if (attribute_ == "file_identifier") {
226            token_ = kTokenFileIdentifier;
227            return;
228          }
229          if (attribute_ == "file_extension") {
230            token_ = kTokenFileExtension;
231            return;
232          }
233          // If not, it is a user-defined identifier:
234          token_ = kTokenIdentifier;
235          return;
236        } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
237          const char *start = cursor_ - 1;
238          while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
239          if (*cursor_ == '.') {
240            cursor_++;
241            while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
242            // See if this float has a scientific notation suffix. Both JSON
243            // and C++ (through strtod() we use) have the same format:
244            if (*cursor_ == 'e' || *cursor_ == 'E') {
245              cursor_++;
246              if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
247              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
248            }
249            token_ = kTokenFloatConstant;
250          } else {
251            token_ = kTokenIntegerConstant;
252          }
253          attribute_.clear();
254          attribute_.append(start, cursor_);
255          return;
256        }
257        std::string ch;
258        ch = c;
259        if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
260        Error("illegal character: " + ch);
261        break;
262    }
263  }
264}
265
266// Check if a given token is next, if so, consume it as well.
267bool Parser::IsNext(int t) {
268  bool isnext = t == token_;
269  if (isnext) Next();
270  return isnext;
271}
272
273// Expect a given token to be next, consume it, or error if not present.
274void Parser::Expect(int t) {
275  if (t != token_) {
276    Error("expecting: " + TokenToString(t) + " instead got: " +
277          TokenToString(token_));
278  }
279  Next();
280}
281
282// Parse any IDL type.
283void Parser::ParseType(Type &type) {
284  if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
285    type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
286  } else {
287    if (token_ == kTokenIdentifier) {
288      auto enum_def = enums_.Lookup(attribute_);
289      if (enum_def) {
290        type = enum_def->underlying_type;
291        if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
292      } else {
293        type.base_type = BASE_TYPE_STRUCT;
294        type.struct_def = LookupCreateStruct(attribute_);
295      }
296    } else if (token_ == '[') {
297      Next();
298      Type subtype;
299      ParseType(subtype);
300      if (subtype.base_type == BASE_TYPE_VECTOR) {
301        // We could support this, but it will complicate things, and it's
302        // easier to work around with a struct around the inner vector.
303        Error("nested vector types not supported (wrap in table first).");
304      }
305      if (subtype.base_type == BASE_TYPE_UNION) {
306        // We could support this if we stored a struct of 2 elements per
307        // union element.
308        Error("vector of union types not supported (wrap in table first).");
309      }
310      type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
311      type.element = subtype.base_type;
312      Expect(']');
313      return;
314    } else {
315      Error("illegal type syntax");
316    }
317  }
318  Next();
319}
320
321FieldDef &Parser::AddField(StructDef &struct_def,
322                           const std::string &name,
323                           const Type &type) {
324  auto &field = *new FieldDef();
325  field.value.offset =
326    FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
327  field.name = name;
328  field.value.type = type;
329  if (struct_def.fixed) {  // statically compute the field offset
330    auto size = InlineSize(type);
331    auto alignment = InlineAlignment(type);
332    // structs_ need to have a predictable format, so we need to align to
333    // the largest scalar
334    struct_def.minalign = std::max(struct_def.minalign, alignment);
335    struct_def.PadLastField(alignment);
336    field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
337    struct_def.bytesize += size;
338  }
339  if (struct_def.fields.Add(name, &field))
340    Error("field already exists: " + name);
341  return field;
342}
343
344void Parser::ParseField(StructDef &struct_def) {
345  std::string name = attribute_;
346  std::string dc = doc_comment_;
347  Expect(kTokenIdentifier);
348  Expect(':');
349  Type type;
350  ParseType(type);
351
352  if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
353    Error("structs_ may contain only scalar or struct fields");
354
355  FieldDef *typefield = nullptr;
356  if (type.base_type == BASE_TYPE_UNION) {
357    // For union fields, add a second auto-generated field to hold the type,
358    // with _type appended as the name.
359    typefield = &AddField(struct_def, name + "_type",
360                          type.enum_def->underlying_type);
361  }
362
363  auto &field = AddField(struct_def, name, type);
364
365  if (token_ == '=') {
366    Next();
367    ParseSingleValue(field.value);
368  }
369
370  field.doc_comment = dc;
371  ParseMetaData(field);
372  field.deprecated = field.attributes.Lookup("deprecated") != nullptr;
373  if (field.deprecated && struct_def.fixed)
374    Error("can't deprecate fields in a struct");
375  auto nested = field.attributes.Lookup("nested_flatbuffer");
376  if (nested) {
377    if (nested->type.base_type != BASE_TYPE_STRING)
378      Error("nested_flatbuffer attribute must be a string (the root type)");
379    if (field.value.type.base_type != BASE_TYPE_VECTOR ||
380        field.value.type.element != BASE_TYPE_UCHAR)
381      Error("nested_flatbuffer attribute may only apply to a vector of ubyte");
382    // This will cause an error if the root type of the nested flatbuffer
383    // wasn't defined elsewhere.
384    LookupCreateStruct(nested->constant);
385  }
386
387  if (typefield) {
388    // If this field is a union, and it has a manually assigned id,
389    // the automatically added type field should have an id as well (of N - 1).
390    auto attr = field.attributes.Lookup("id");
391    if (attr) {
392      auto id = atoi(attr->constant.c_str());
393      auto val = new Value();
394      val->type = attr->type;
395      val->constant = NumToString(id - 1);
396      typefield->attributes.Add("id", val);
397    }
398  }
399
400  Expect(';');
401}
402
403void Parser::ParseAnyValue(Value &val, FieldDef *field) {
404  switch (val.type.base_type) {
405    case BASE_TYPE_UNION: {
406      assert(field);
407      if (!field_stack_.size() ||
408          field_stack_.back().second->value.type.base_type != BASE_TYPE_UTYPE)
409        Error("missing type field before this union value: " + field->name);
410      auto enum_idx = atot<unsigned char>(
411                                    field_stack_.back().first.constant.c_str());
412      auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
413      if (!enum_val) Error("illegal type id for: " + field->name);
414      val.constant = NumToString(ParseTable(*enum_val->struct_def));
415      break;
416    }
417    case BASE_TYPE_STRUCT:
418      val.constant = NumToString(ParseTable(*val.type.struct_def));
419      break;
420    case BASE_TYPE_STRING: {
421      auto s = attribute_;
422      Expect(kTokenStringConstant);
423      val.constant = NumToString(builder_.CreateString(s).o);
424      break;
425    }
426    case BASE_TYPE_VECTOR: {
427      Expect('[');
428      val.constant = NumToString(ParseVector(val.type.VectorType()));
429      break;
430    }
431    default:
432      ParseSingleValue(val);
433      break;
434  }
435}
436
437void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
438  auto off = atot<uoffset_t>(val.constant.c_str());
439  assert(struct_stack_.size() - off == struct_def.bytesize);
440  builder_.Align(struct_def.minalign);
441  builder_.PushBytes(&struct_stack_[off], struct_def.bytesize);
442  struct_stack_.resize(struct_stack_.size() - struct_def.bytesize);
443  builder_.AddStructOffset(val.offset, builder_.GetSize());
444}
445
446uoffset_t Parser::ParseTable(const StructDef &struct_def) {
447  Expect('{');
448  size_t fieldn = 0;
449  if (!IsNext('}')) for (;;) {
450    std::string name = attribute_;
451    if (!IsNext(kTokenStringConstant)) Expect(kTokenIdentifier);
452    auto field = struct_def.fields.Lookup(name);
453    if (!field) Error("unknown field: " + name);
454    if (struct_def.fixed && (fieldn >= struct_def.fields.vec.size()
455                            || struct_def.fields.vec[fieldn] != field)) {
456       Error("struct field appearing out of order: " + name);
457    }
458    Expect(':');
459    Value val = field->value;
460    ParseAnyValue(val, field);
461    field_stack_.push_back(std::make_pair(val, field));
462    fieldn++;
463    if (IsNext('}')) break;
464    Expect(',');
465  }
466  if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
467    Error("incomplete struct initialization: " + struct_def.name);
468  auto start = struct_def.fixed
469                 ? builder_.StartStruct(struct_def.minalign)
470                 : builder_.StartTable();
471
472  for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
473       size;
474       size /= 2) {
475    // Go through elements in reverse, since we're building the data backwards.
476    for (auto it = field_stack_.rbegin();
477             it != field_stack_.rbegin() + fieldn; ++it) {
478      auto &value = it->first;
479      auto field = it->second;
480      if (!struct_def.sortbysize || size == SizeOf(value.type.base_type)) {
481        switch (value.type.base_type) {
482          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) \
483            case BASE_TYPE_ ## ENUM: \
484              builder_.Pad(field->padding); \
485              if (struct_def.fixed) { \
486                builder_.PushElement(atot<CTYPE>(value.constant.c_str())); \
487              } else { \
488                builder_.AddElement(value.offset, \
489                             atot<CTYPE>(       value.constant.c_str()), \
490                             atot<CTYPE>(field->value.constant.c_str())); \
491              } \
492              break;
493            FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
494          #undef FLATBUFFERS_TD
495          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) \
496            case BASE_TYPE_ ## ENUM: \
497              builder_.Pad(field->padding); \
498              if (IsStruct(field->value.type)) { \
499                SerializeStruct(*field->value.type.struct_def, value); \
500              } else { \
501                builder_.AddOffset(value.offset, \
502                  atot<CTYPE>(value.constant.c_str())); \
503              } \
504              break;
505            FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
506          #undef FLATBUFFERS_TD
507        }
508      }
509    }
510  }
511  for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
512
513  if (struct_def.fixed) {
514    builder_.ClearOffsets();
515    builder_.EndStruct();
516    // Temporarily store this struct in a side buffer, since this data has to
517    // be stored in-line later in the parent object.
518    auto off = struct_stack_.size();
519    struct_stack_.insert(struct_stack_.end(),
520                         builder_.GetBufferPointer(),
521                         builder_.GetBufferPointer() + struct_def.bytesize);
522    builder_.PopBytes(struct_def.bytesize);
523    return static_cast<uoffset_t>(off);
524  } else {
525    return builder_.EndTable(
526      start,
527      static_cast<voffset_t>(struct_def.fields.vec.size()));
528  }
529}
530
531uoffset_t Parser::ParseVector(const Type &type) {
532  int count = 0;
533  if (token_ != ']') for (;;) {
534    Value val;
535    val.type = type;
536    ParseAnyValue(val, NULL);
537    field_stack_.push_back(std::make_pair(val, nullptr));
538    count++;
539    if (token_ == ']') break;
540    Expect(',');
541  }
542  Next();
543
544  builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
545                       InlineAlignment(type));
546  for (int i = 0; i < count; i++) {
547    // start at the back, since we're building the data backwards.
548    auto &val = field_stack_.back().first;
549    switch (val.type.base_type) {
550      #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE) \
551        case BASE_TYPE_ ## ENUM: \
552          if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
553          else builder_.PushElement(atot<CTYPE>(val.constant.c_str())); \
554          break;
555        FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
556      #undef FLATBUFFERS_TD
557    }
558    field_stack_.pop_back();
559  }
560
561  builder_.ClearOffsets();
562  return builder_.EndVector(count);
563}
564
565void Parser::ParseMetaData(Definition &def) {
566  if (IsNext('(')) {
567    for (;;) {
568      auto name = attribute_;
569      Expect(kTokenIdentifier);
570      auto e = new Value();
571      def.attributes.Add(name, e);
572      if (IsNext(':')) {
573        ParseSingleValue(*e);
574      }
575      if (IsNext(')')) break;
576      Expect(',');
577    }
578  }
579}
580
581bool Parser::TryTypedValue(int dtoken,
582                           bool check,
583                           Value &e,
584                           BaseType req) {
585  bool match = dtoken == token_;
586  if (match) {
587    e.constant = attribute_;
588    if (!check) {
589      if (e.type.base_type == BASE_TYPE_NONE) {
590        e.type.base_type = req;
591      } else {
592        Error(std::string("type mismatch: expecting: ") +
593              kTypeNames[e.type.base_type] +
594              ", found: " +
595              kTypeNames[req]);
596      }
597    }
598    Next();
599  }
600  return match;
601}
602
603int64_t Parser::ParseIntegerFromString(Type &type) {
604  int64_t result = 0;
605  // Parse one or more enum identifiers, separated by spaces.
606  const char *next = attribute_.c_str();
607  do {
608    const char *divider = strchr(next, ' ');
609    std::string word;
610    if (divider) {
611      word = std::string(next, divider);
612      next = divider + strspn(divider, " ");
613    } else {
614      word = next;
615      next += word.length();
616    }
617    if (type.enum_def) {  // The field has an enum type
618      auto enum_val = type.enum_def->vals.Lookup(word);
619      if (!enum_val)
620        Error("unknown enum value: " + word +
621              ", for enum: " + type.enum_def->name);
622      result |= enum_val->value;
623    } else {  // No enum type, probably integral field.
624      if (!IsInteger(type.base_type))
625        Error("not a valid value for this field: " + word);
626      // TODO: could check if its a valid number constant here.
627      const char *dot = strchr(word.c_str(), '.');
628      if (!dot) Error("enum values need to be qualified by an enum type");
629      std::string enum_def_str(word.c_str(), dot);
630      std::string enum_val_str(dot + 1, word.c_str() + word.length());
631      auto enum_def = enums_.Lookup(enum_def_str);
632      if (!enum_def) Error("unknown enum: " + enum_def_str);
633      auto enum_val = enum_def->vals.Lookup(enum_val_str);
634      if (!enum_val) Error("unknown enum value: " + enum_val_str);
635      result |= enum_val->value;
636    }
637  } while(*next);
638  return result;
639}
640
641void Parser::ParseSingleValue(Value &e) {
642  // First check if this could be a string/identifier enum value:
643  if (e.type.base_type != BASE_TYPE_STRING &&
644      e.type.base_type != BASE_TYPE_NONE &&
645      (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
646      e.constant = NumToString(ParseIntegerFromString(e.type));
647      Next();
648  } else if (TryTypedValue(kTokenIntegerConstant,
649                    IsScalar(e.type.base_type),
650                    e,
651                    BASE_TYPE_INT) ||
652      TryTypedValue(kTokenFloatConstant,
653                    IsFloat(e.type.base_type),
654                    e,
655                    BASE_TYPE_FLOAT) ||
656      TryTypedValue(kTokenStringConstant,
657                    e.type.base_type == BASE_TYPE_STRING,
658                    e,
659                    BASE_TYPE_STRING)) {
660  } else {
661    Error("cannot parse value starting with: " + TokenToString(token_));
662  }
663}
664
665StructDef *Parser::LookupCreateStruct(const std::string &name) {
666  auto struct_def = structs_.Lookup(name);
667  if (!struct_def) {
668    // Rather than failing, we create a "pre declared" StructDef, due to
669    // circular references, and check for errors at the end of parsing.
670    struct_def = new StructDef();
671    structs_.Add(name, struct_def);
672    struct_def->name = name;
673    struct_def->predecl = true;
674    struct_def->defined_namespace = namespaces_.back();
675  }
676  return struct_def;
677}
678
679void Parser::ParseEnum(bool is_union) {
680  std::string dc = doc_comment_;
681  Next();
682  std::string name = attribute_;
683  Expect(kTokenIdentifier);
684  auto &enum_def = *new EnumDef();
685  enum_def.name = name;
686  enum_def.doc_comment = dc;
687  enum_def.is_union = is_union;
688  if (enums_.Add(name, &enum_def)) Error("enum already exists: " + name);
689  if (is_union) {
690    enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
691    enum_def.underlying_type.enum_def = &enum_def;
692  } else {
693    // Give specialized error message, since this type spec used to
694    // be optional in the first FlatBuffers release.
695    if (!IsNext(':')) Error("must specify the underlying integer type for this"
696                            " enum (e.g. \': short\', which was the default).");
697    // Specify the integer type underlying this enum.
698    ParseType(enum_def.underlying_type);
699    if (!IsInteger(enum_def.underlying_type.base_type))
700      Error("underlying enum type must be integral");
701    // Make this type refer back to the enum it was derived from.
702    enum_def.underlying_type.enum_def = &enum_def;
703  }
704  ParseMetaData(enum_def);
705  Expect('{');
706  if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
707  do {
708    std::string name = attribute_;
709    std::string dc = doc_comment_;
710    Expect(kTokenIdentifier);
711    auto prevsize = enum_def.vals.vec.size();
712    auto value = enum_def.vals.vec.size()
713      ? enum_def.vals.vec.back()->value + 1
714      : 0;
715    auto &ev = *new EnumVal(name, value);
716    if (enum_def.vals.Add(name, &ev))
717      Error("enum value already exists: " + name);
718    ev.doc_comment = dc;
719    if (is_union) {
720      ev.struct_def = LookupCreateStruct(name);
721    }
722    if (IsNext('=')) {
723      ev.value = atoi(attribute_.c_str());
724      Expect(kTokenIntegerConstant);
725      if (prevsize && enum_def.vals.vec[prevsize - 1]->value >= ev.value)
726        Error("enum values must be specified in ascending order");
727    }
728  } while (IsNext(','));
729  Expect('}');
730  if (enum_def.attributes.Lookup("bit_flags")) {
731    for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
732         ++it) {
733      if (static_cast<size_t>((*it)->value) >=
734           SizeOf(enum_def.underlying_type.base_type) * 8)
735        Error("bit flag out of range of underlying integral type");
736      (*it)->value = 1LL << (*it)->value;
737    }
738  }
739}
740
741void Parser::ParseDecl() {
742  std::string dc = doc_comment_;
743  bool fixed = IsNext(kTokenStruct);
744  if (!fixed) Expect(kTokenTable);
745  std::string name = attribute_;
746  Expect(kTokenIdentifier);
747  auto &struct_def = *LookupCreateStruct(name);
748  if (!struct_def.predecl) Error("datatype already exists: " + name);
749  struct_def.predecl = false;
750  struct_def.name = name;
751  struct_def.doc_comment = dc;
752  struct_def.fixed = fixed;
753  // Move this struct to the back of the vector just in case it was predeclared,
754  // to preserve declartion order.
755  remove(structs_.vec.begin(), structs_.vec.end(), &struct_def);
756  structs_.vec.back() = &struct_def;
757  ParseMetaData(struct_def);
758  struct_def.sortbysize =
759    struct_def.attributes.Lookup("original_order") == nullptr && !fixed;
760  Expect('{');
761  while (token_ != '}') ParseField(struct_def);
762  auto force_align = struct_def.attributes.Lookup("force_align");
763  if (fixed && force_align) {
764    auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
765    if (force_align->type.base_type != BASE_TYPE_INT ||
766        align < struct_def.minalign ||
767        align > 256 ||
768        align & (align - 1))
769      Error("force_align must be a power of two integer ranging from the"
770            "struct\'s natural alignment to 256");
771    struct_def.minalign = align;
772  }
773  struct_def.PadLastField(struct_def.minalign);
774  // Check if this is a table that has manual id assignments
775  auto &fields = struct_def.fields.vec;
776  if (!struct_def.fixed && fields.size()) {
777    size_t num_id_fields = 0;
778    for (auto it = fields.begin(); it != fields.end(); ++it) {
779      if ((*it)->attributes.Lookup("id")) num_id_fields++;
780    }
781    // If any fields have ids..
782    if (num_id_fields) {
783      // Then all fields must have them.
784      if (num_id_fields != fields.size())
785        Error("either all fields or no fields must have an 'id' attribute");
786      // Simply sort by id, then the fields are the same as if no ids had
787      // been specified.
788      std::sort(fields.begin(), fields.end(),
789        [](const FieldDef *a, const FieldDef *b) -> bool {
790          auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
791          auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
792          return a_id < b_id;
793      });
794      // Verify we have a contiguous set, and reassign vtable offsets.
795      for (int i = 0; i < static_cast<int>(fields.size()); i++) {
796        if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
797          Error("field id\'s must be consecutive from 0, id " +
798                NumToString(i) + " missing or set twice");
799        fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
800      }
801    }
802  }
803  Expect('}');
804}
805
806bool Parser::SetRootType(const char *name) {
807  root_struct_def = structs_.Lookup(name);
808  return root_struct_def != nullptr;
809}
810
811void Parser::MarkGenerated() {
812  // Since the Parser object retains definitions across files, we must
813  // ensure we only output code for definitions once, in the file they are first
814  // declared. This function marks all existing definitions as having already
815  // been generated.
816  for (auto it = enums_.vec.begin();
817           it != enums_.vec.end(); ++it) {
818    (*it)->generated = true;
819  }
820  for (auto it = structs_.vec.begin();
821           it != structs_.vec.end(); ++it) {
822    (*it)->generated = true;
823  }
824}
825
826bool Parser::Parse(const char *source, const char *filepath) {
827  included_files_[filepath] = true;
828  // This is the starting point to reset to if we interrupted our parsing
829  // to deal with an include:
830  restart_parse_after_include:
831  source_ = cursor_ = source;
832  line_ = 1;
833  error_.clear();
834  builder_.Clear();
835  try {
836    Next();
837    // Includes must come first:
838    while (IsNext(kTokenInclude)) {
839      auto name = attribute_;
840      Expect(kTokenStringConstant);
841      auto path = StripFileName(filepath);
842      if (path.length()) name = path + kPathSeparator + name;
843      if (included_files_.find(name) == included_files_.end()) {
844        // We found an include file that we have not parsed yet.
845        // Load it and parse it.
846        std::string contents;
847        if (!LoadFile(name.c_str(), true, &contents))
848          Error("unable to load include file: " + name);
849        Parse(contents.c_str(), name.c_str());
850        // Any errors, we're done.
851        if (error_.length()) return false;
852        // We do not want to output code for any included files:
853        MarkGenerated();
854        // This is the easiest way to continue this file after an include:
855        // instead of saving and restoring all the state, we simply start the
856        // file anew. This will cause it to encounter the same include statement
857        // again, but this time it will skip it, because it was entered into
858        // included_files_.
859        goto restart_parse_after_include;
860      }
861      Expect(';');
862    }
863    // Now parse all other kinds of declarations:
864    while (token_ != kTokenEof) {
865      if (token_ == kTokenNameSpace) {
866        Next();
867        auto ns = new Namespace();
868        namespaces_.push_back(ns);
869        for (;;) {
870          ns->components.push_back(attribute_);
871          Expect(kTokenIdentifier);
872          if (!IsNext('.')) break;
873        }
874        Expect(';');
875      } else if (token_ == '{') {
876        if (!root_struct_def) Error("no root type set to parse json with");
877        if (builder_.GetSize()) {
878          Error("cannot have more than one json object in a file");
879        }
880        builder_.Finish(Offset<Table>(ParseTable(*root_struct_def)));
881      } else if (token_ == kTokenEnum) {
882        ParseEnum(false);
883      } else if (token_ == kTokenUnion) {
884        ParseEnum(true);
885      } else if (token_ == kTokenRootType) {
886        Next();
887        auto root_type = attribute_;
888        Expect(kTokenIdentifier);
889        if (!SetRootType(root_type.c_str()))
890          Error("unknown root type: " + root_type);
891        if (root_struct_def->fixed)
892          Error("root type must be a table");
893        Expect(';');
894      } else if (token_ == kTokenFileIdentifier) {
895        Next();
896        file_identifier_ = attribute_;
897        Expect(kTokenStringConstant);
898        if (file_identifier_.length() !=
899            FlatBufferBuilder::kFileIdentifierLength)
900          Error("file_identifier must be exactly " +
901                NumToString(FlatBufferBuilder::kFileIdentifierLength) +
902                " characters");
903        Expect(';');
904      } else if (token_ == kTokenFileExtension) {
905        Next();
906        file_extension_ = attribute_;
907        Expect(kTokenStringConstant);
908        Expect(';');
909      } else if(token_ == kTokenInclude) {
910        Error("includes must come before declarations");
911      } else {
912        ParseDecl();
913      }
914    }
915    for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
916      if ((*it)->predecl)
917        Error("type referenced but not defined: " + (*it)->name);
918    }
919    for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
920      auto &enum_def = **it;
921      if (enum_def.is_union) {
922        for (auto it = enum_def.vals.vec.begin();
923             it != enum_def.vals.vec.end();
924             ++it) {
925          auto &val = **it;
926          if (val.struct_def && val.struct_def->fixed)
927            Error("only tables can be union elements: " + val.name);
928        }
929      }
930    }
931  } catch (const std::string &msg) {
932    error_ = "line " + NumToString(line_) + ": " + msg;
933    return false;
934  }
935  assert(!struct_stack_.size());
936  return true;
937}
938
939}  // namespace flatbuffers
940