idl_parser.cpp revision 0e85eeef2c6ed3eb9ec201aaea6caa62612a8522
1/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18#include <list>
19
20#ifdef _WIN32
21#if !defined(_USE_MATH_DEFINES)
22#define _USE_MATH_DEFINES  // For M_PI.
23#endif                     // !defined(_USE_MATH_DEFINES)
24#endif                     // _WIN32
25
26#include <math.h>
27
28#include "flatbuffers/idl.h"
29#include "flatbuffers/util.h"
30
31namespace flatbuffers {
32
33const char *const kTypeNames[] = {
34  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
35    IDLTYPE,
36    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
37  #undef FLATBUFFERS_TD
38  nullptr
39};
40
41const char kTypeSizes[] = {
42  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
43      sizeof(CTYPE),
44    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
45  #undef FLATBUFFERS_TD
46};
47
48// The enums in the reflection schema should match the ones we use internally.
49// Compare the last element to check if these go out of sync.
50static_assert(BASE_TYPE_UNION ==
51              static_cast<BaseType>(reflection::Union),
52              "enums don't match");
53
54// Any parsing calls have to be wrapped in this macro, which automates
55// handling of recursive error checking a bit. It will check the received
56// CheckedError object, and return straight away on error.
57#define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; }
58
59// These two functions are called hundreds of times below, so define a short
60// form:
61#define NEXT() ECHECK(Next())
62#define EXPECT(tok) ECHECK(Expect(tok))
63
64static bool ValidateUTF8(const std::string &str) {
65  const char *s = &str[0];
66  const char * const sEnd = s + str.length();
67  while (s < sEnd) {
68    if (FromUTF8(&s) < 0) {
69      return false;
70    }
71  }
72  return true;
73}
74
75CheckedError Parser::Error(const std::string &msg) {
76  error_ = file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
77  #ifdef _WIN32
78    error_ += "(" + NumToString(line_) + ")";  // MSVC alike
79  #else
80    if (file_being_parsed_.length()) error_ += ":";
81    error_ += NumToString(line_) + ":0";  // gcc alike
82  #endif
83  error_ += ": error: " + msg;
84  return CheckedError(true);
85}
86
87inline CheckedError NoError() { return CheckedError(false); }
88
89inline std::string OutOfRangeErrorMsg(int64_t val, const std::string &op,
90                                      int64_t limit) {
91  const std::string cause = NumToString(val) + op + NumToString(limit);
92  return "constant does not fit (" + cause + ")";
93}
94
95// Ensure that integer values we parse fit inside the declared integer type.
96CheckedError Parser::CheckInRange(int64_t val, int64_t min, int64_t max) {
97  if (val < min)
98    return Error(OutOfRangeErrorMsg(val, " < ", min));
99  else if (val > max)
100    return Error(OutOfRangeErrorMsg(val, " > ", max));
101  else
102    return NoError();
103}
104
105// atot: templated version of atoi/atof: convert a string to an instance of T.
106template<typename T> inline CheckedError atot(const char *s, Parser &parser,
107                                              T *val) {
108  int64_t i = StringToInt(s);
109  const int64_t min = std::numeric_limits<T>::min();
110  const int64_t max = std::numeric_limits<T>::max();
111  ECHECK(parser.CheckInRange(i, min, max));
112  *val = (T)i;
113  return NoError();
114}
115template<> inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
116                                              uint64_t *val) {
117  (void)parser;
118  *val = StringToUInt(s);
119  return NoError();
120}
121template<> inline CheckedError atot<bool>(const char *s, Parser &parser,
122                                          bool *val) {
123  (void)parser;
124  *val = 0 != atoi(s);
125  return NoError();
126}
127template<> inline CheckedError atot<float>(const char *s, Parser &parser,
128                                           float *val) {
129  (void)parser;
130  *val = static_cast<float>(strtod(s, nullptr));
131  return NoError();
132}
133template<> inline CheckedError atot<double>(const char *s, Parser &parser,
134                                            double *val) {
135  (void)parser;
136  *val = strtod(s, nullptr);
137  return NoError();
138}
139
140template<> inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
141                                                  Offset<void> *val) {
142  (void)parser;
143  *val = Offset<void>(atoi(s));
144  return NoError();
145}
146
147std::string Namespace::GetFullyQualifiedName(const std::string &name,
148                                             size_t max_components) const {
149  // Early exit if we don't have a defined namespace.
150  if (components.size() == 0 || !max_components) {
151    return name;
152  }
153  std::stringstream stream;
154  for (size_t i = 0; i < std::min(components.size(), max_components);
155       i++) {
156    if (i) {
157      stream << ".";
158    }
159    stream << components[i];
160  }
161  if (name.length()) stream << "." << name;
162  return stream.str();
163}
164
165
166
167// Declare tokens we'll use. Single character tokens are represented by their
168// ascii character code (e.g. '{'), others above 256.
169#define FLATBUFFERS_GEN_TOKENS(TD) \
170  TD(Eof, 256, "end of file") \
171  TD(StringConstant, 257, "string constant") \
172  TD(IntegerConstant, 258, "integer constant") \
173  TD(FloatConstant, 259, "float constant") \
174  TD(Identifier, 260, "identifier") \
175  TD(Table, 261, "table") \
176  TD(Struct, 262, "struct") \
177  TD(Enum, 263, "enum") \
178  TD(Union, 264, "union") \
179  TD(NameSpace, 265, "namespace") \
180  TD(RootType, 266, "root_type") \
181  TD(FileIdentifier, 267, "file_identifier") \
182  TD(FileExtension, 268, "file_extension") \
183  TD(Include, 269, "include") \
184  TD(Attribute, 270, "attribute") \
185  TD(Null, 271, "null") \
186  TD(Service, 272, "rpc_service") \
187  TD(NativeInclude, 273, "native_include")
188#ifdef __GNUC__
189__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
190#endif
191enum {
192  #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
193    FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
194  #undef FLATBUFFERS_TOKEN
195  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
196      kToken ## ENUM,
197    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
198  #undef FLATBUFFERS_TD
199};
200
201static std::string TokenToString(int t) {
202  static const char *tokens[] = {
203    #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
204      FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
205    #undef FLATBUFFERS_TOKEN
206    #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
207      IDLTYPE,
208      FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
209    #undef FLATBUFFERS_TD
210  };
211  if (t < 256) {  // A single ascii char token.
212    std::string s;
213    s.append(1, static_cast<char>(t));
214    return s;
215  } else {       // Other tokens.
216    return tokens[t - 256];
217  }
218}
219
220std::string Parser::TokenToStringId(int t) {
221  return TokenToString(t) + (t == kTokenIdentifier ? ": " + attribute_ : "");
222}
223
224// Parses exactly nibbles worth of hex digits into a number, or error.
225CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
226  for (int i = 0; i < nibbles; i++)
227    if (!isxdigit(static_cast<const unsigned char>(cursor_[i])))
228      return Error("escape code must be followed by " + NumToString(nibbles) +
229                   " hex digits");
230  std::string target(cursor_, cursor_ + nibbles);
231  *val = StringToUInt(target.c_str(), nullptr, 16);
232  cursor_ += nibbles;
233  return NoError();
234}
235
236CheckedError Parser::SkipByteOrderMark() {
237  if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
238  cursor_++;
239  if (static_cast<unsigned char>(*cursor_) != 0xbb) return Error("invalid utf-8 byte order mark");
240  cursor_++;
241  if (static_cast<unsigned char>(*cursor_) != 0xbf) return Error("invalid utf-8 byte order mark");
242  cursor_++;
243  return NoError();
244}
245
246bool IsIdentifierStart(char c) {
247  return isalpha(static_cast<unsigned char>(c)) || c == '_';
248}
249
250CheckedError Parser::Next() {
251  doc_comment_.clear();
252  bool seen_newline = false;
253  attribute_.clear();
254  for (;;) {
255    char c = *cursor_++;
256    token_ = c;
257    switch (c) {
258      case '\0': cursor_--; token_ = kTokenEof; return NoError();
259      case ' ': case '\r': case '\t': break;
260      case '\n': line_++; seen_newline = true; break;
261      case '{': case '}': case '(': case ')': case '[': case ']':
262      case ',': case ':': case ';': case '=': return NoError();
263      case '.':
264        if(!isdigit(static_cast<const unsigned char>(*cursor_))) return NoError();
265        return Error("floating point constant can\'t start with \".\"");
266      case '\"':
267      case '\'': {
268        int unicode_high_surrogate = -1;
269
270        while (*cursor_ != c) {
271          if (*cursor_ < ' ' && *cursor_ >= 0)
272            return Error("illegal character in string constant");
273          if (*cursor_ == '\\') {
274            cursor_++;
275            if (unicode_high_surrogate != -1 &&
276                *cursor_ != 'u') {
277              return Error(
278                "illegal Unicode sequence (unpaired high surrogate)");
279            }
280            switch (*cursor_) {
281              case 'n':  attribute_ += '\n'; cursor_++; break;
282              case 't':  attribute_ += '\t'; cursor_++; break;
283              case 'r':  attribute_ += '\r'; cursor_++; break;
284              case 'b':  attribute_ += '\b'; cursor_++; break;
285              case 'f':  attribute_ += '\f'; cursor_++; break;
286              case '\"': attribute_ += '\"'; cursor_++; break;
287              case '\'': attribute_ += '\''; cursor_++; break;
288              case '\\': attribute_ += '\\'; cursor_++; break;
289              case '/':  attribute_ += '/';  cursor_++; break;
290              case 'x': {  // Not in the JSON standard
291                cursor_++;
292                uint64_t val;
293                ECHECK(ParseHexNum(2, &val));
294                attribute_ += static_cast<char>(val);
295                break;
296              }
297              case 'u': {
298                cursor_++;
299                uint64_t val;
300                ECHECK(ParseHexNum(4, &val));
301                if (val >= 0xD800 && val <= 0xDBFF) {
302                  if (unicode_high_surrogate != -1) {
303                    return Error(
304                      "illegal Unicode sequence (multiple high surrogates)");
305                  } else {
306                    unicode_high_surrogate = static_cast<int>(val);
307                  }
308                } else if (val >= 0xDC00 && val <= 0xDFFF) {
309                  if (unicode_high_surrogate == -1) {
310                    return Error(
311                      "illegal Unicode sequence (unpaired low surrogate)");
312                  } else {
313                    int code_point = 0x10000 +
314                      ((unicode_high_surrogate & 0x03FF) << 10) +
315                      (val & 0x03FF);
316                    ToUTF8(code_point, &attribute_);
317                    unicode_high_surrogate = -1;
318                  }
319                } else {
320                  if (unicode_high_surrogate != -1) {
321                    return Error(
322                      "illegal Unicode sequence (unpaired high surrogate)");
323                  }
324                  ToUTF8(static_cast<int>(val), &attribute_);
325                }
326                break;
327              }
328              default: return Error("unknown escape code in string constant");
329            }
330          } else { // printable chars + UTF-8 bytes
331            if (unicode_high_surrogate != -1) {
332              return Error(
333                "illegal Unicode sequence (unpaired high surrogate)");
334            }
335            attribute_ += *cursor_++;
336          }
337        }
338        if (unicode_high_surrogate != -1) {
339          return Error(
340            "illegal Unicode sequence (unpaired high surrogate)");
341        }
342        cursor_++;
343        if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
344          return Error("illegal UTF-8 sequence");
345        }
346        token_ = kTokenStringConstant;
347        return NoError();
348      }
349      case '/':
350        if (*cursor_ == '/') {
351          const char *start = ++cursor_;
352          while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
353          if (*start == '/') {  // documentation comment
354            if (cursor_ != source_ && !seen_newline)
355              return Error(
356                    "a documentation comment should be on a line on its own");
357            doc_comment_.push_back(std::string(start + 1, cursor_));
358          }
359          break;
360        } else if (*cursor_ == '*') {
361          cursor_++;
362          // TODO: make nested.
363          while (*cursor_ != '*' || cursor_[1] != '/') {
364            if (*cursor_ == '\n') line_++;
365            if (!*cursor_) return Error("end of file in comment");
366            cursor_++;
367          }
368          cursor_ += 2;
369          break;
370        }
371        // fall thru
372      default:
373        if (IsIdentifierStart(c)) {
374          // Collect all chars of an identifier:
375          const char *start = cursor_ - 1;
376          while (isalnum(static_cast<unsigned char>(*cursor_)) ||
377                 *cursor_ == '_')
378            cursor_++;
379          attribute_.append(start, cursor_);
380          // First, see if it is a type keyword from the table of types:
381          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
382            PTYPE) \
383            if (attribute_ == IDLTYPE) { \
384              token_ = kToken ## ENUM; \
385              return NoError(); \
386            }
387            FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
388          #undef FLATBUFFERS_TD
389          // If it's a boolean constant keyword, turn those into integers,
390          // which simplifies our logic downstream.
391          if (attribute_ == "true" || attribute_ == "false") {
392            attribute_ = NumToString(attribute_ == "true");
393            token_ = kTokenIntegerConstant;
394            return NoError();
395          }
396          // Check for declaration keywords:
397          if (attribute_ == "table") {
398            token_ = kTokenTable;
399            return NoError();
400          }
401          if (attribute_ == "struct") {
402            token_ = kTokenStruct;
403            return NoError();
404          }
405          if (attribute_ == "enum") {
406            token_ = kTokenEnum;
407            return NoError();
408          }
409          if (attribute_ == "union") {
410            token_ = kTokenUnion;
411            return NoError();
412          }
413          if (attribute_ == "namespace") {
414            token_ = kTokenNameSpace;
415            return NoError();
416          }
417          if (attribute_ == "root_type") {
418            token_ = kTokenRootType;
419            return NoError();
420          }
421          if (attribute_ == "include") {
422            token_ = kTokenInclude;
423            return NoError();
424          }
425          if (attribute_ == "attribute") {
426            token_ = kTokenAttribute;
427            return NoError();
428          }
429          if (attribute_ == "file_identifier") {
430            token_ = kTokenFileIdentifier;
431            return NoError();
432          }
433          if (attribute_ == "file_extension") {
434            token_ = kTokenFileExtension;
435            return NoError();
436          }
437          if (attribute_ == "null") {
438            token_ = kTokenNull;
439            return NoError();
440          }
441          if (attribute_ == "rpc_service") {
442            token_ = kTokenService;
443            return NoError();
444          }
445          if (attribute_ == "native_include") {
446            token_ = kTokenNativeInclude;
447            return NoError();
448          }
449          // If not, it is a user-defined identifier:
450          token_ = kTokenIdentifier;
451          return NoError();
452        } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
453          const char *start = cursor_ - 1;
454          if (c == '-' && *cursor_ == '0' &&
455              (cursor_[1] == 'x' || cursor_[1] == 'X')) {
456            ++start;
457            ++cursor_;
458            attribute_.append(&c, &c + 1);
459            c = '0';
460          }
461          if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
462              cursor_++;
463              while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
464              attribute_.append(start + 2, cursor_);
465              attribute_ = NumToString(static_cast<int64_t>(
466                             StringToUInt(attribute_.c_str(), nullptr, 16)));
467              token_ = kTokenIntegerConstant;
468              return NoError();
469          }
470          while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
471          if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
472            if (*cursor_ == '.') {
473              cursor_++;
474              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
475            }
476            // See if this float has a scientific notation suffix. Both JSON
477            // and C++ (through strtod() we use) have the same format:
478            if (*cursor_ == 'e' || *cursor_ == 'E') {
479              cursor_++;
480              if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
481              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
482            }
483            token_ = kTokenFloatConstant;
484          } else {
485            token_ = kTokenIntegerConstant;
486          }
487          attribute_.append(start, cursor_);
488          return NoError();
489        }
490        std::string ch;
491        ch = c;
492        if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
493        return Error("illegal character: " + ch);
494    }
495  }
496}
497
498// Check if a given token is next.
499bool Parser::Is(int t) {
500  return t == token_;
501}
502
503// Expect a given token to be next, consume it, or error if not present.
504CheckedError Parser::Expect(int t) {
505  if (t != token_) {
506    return Error("expecting: " + TokenToString(t) + " instead got: " +
507                 TokenToStringId(token_));
508  }
509  NEXT();
510  return NoError();
511}
512
513CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
514  while (Is('.')) {
515    NEXT();
516    *id += ".";
517    *id += attribute_;
518    if (last) *last = attribute_;
519    EXPECT(kTokenIdentifier);
520  }
521  return NoError();
522}
523
524EnumDef *Parser::LookupEnum(const std::string &id) {
525  // Search thru parent namespaces.
526  for (int components = static_cast<int>(namespaces_.back()->components.size());
527       components >= 0; components--) {
528    auto ed = enums_.Lookup(
529                namespaces_.back()->GetFullyQualifiedName(id, components));
530    if (ed) return ed;
531  }
532  return nullptr;
533}
534
535CheckedError Parser::ParseTypeIdent(Type &type) {
536  std::string id = attribute_;
537  EXPECT(kTokenIdentifier);
538  ECHECK(ParseNamespacing(&id, nullptr));
539  auto enum_def = LookupEnum(id);
540  if (enum_def) {
541    type = enum_def->underlying_type;
542    if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
543  } else {
544    type.base_type = BASE_TYPE_STRUCT;
545    type.struct_def = LookupCreateStruct(id);
546  }
547  return NoError();
548}
549
550// Parse any IDL type.
551CheckedError Parser::ParseType(Type &type) {
552  if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
553    type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
554    NEXT();
555  } else {
556    if (token_ == kTokenIdentifier) {
557      ECHECK(ParseTypeIdent(type));
558    } else if (token_ == '[') {
559      NEXT();
560      Type subtype;
561      ECHECK(ParseType(subtype));
562      if (subtype.base_type == BASE_TYPE_VECTOR) {
563        // We could support this, but it will complicate things, and it's
564        // easier to work around with a struct around the inner vector.
565        return Error(
566              "nested vector types not supported (wrap in table first).");
567      }
568      type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
569      type.element = subtype.base_type;
570      EXPECT(']');
571    } else {
572      return Error("illegal type syntax");
573    }
574  }
575  return NoError();
576}
577
578CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
579                              const Type &type, FieldDef **dest) {
580  auto &field = *new FieldDef();
581  field.value.offset =
582    FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
583  field.name = name;
584  field.file = struct_def.file;
585  field.value.type = type;
586  if (struct_def.fixed) {  // statically compute the field offset
587    auto size = InlineSize(type);
588    auto alignment = InlineAlignment(type);
589    // structs_ need to have a predictable format, so we need to align to
590    // the largest scalar
591    struct_def.minalign = std::max(struct_def.minalign, alignment);
592    struct_def.PadLastField(alignment);
593    field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
594    struct_def.bytesize += size;
595  }
596  if (struct_def.fields.Add(name, &field))
597    return Error("field already exists: " + name);
598  *dest = &field;
599  return NoError();
600}
601
602CheckedError Parser::ParseField(StructDef &struct_def) {
603  std::string name = attribute_;
604
605  if (name == struct_def.name)
606    return Error("field name can not be the same as table/struct name");
607
608  std::vector<std::string> dc = doc_comment_;
609  EXPECT(kTokenIdentifier);
610  EXPECT(':');
611  Type type;
612  ECHECK(ParseType(type));
613
614  if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
615    return Error("structs_ may contain only scalar or struct fields");
616
617  FieldDef *typefield = nullptr;
618  if (type.base_type == BASE_TYPE_UNION) {
619    // For union fields, add a second auto-generated field to hold the type,
620    // with a special suffix.
621    ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
622                    type.enum_def->underlying_type, &typefield));
623  } else if (type.base_type == BASE_TYPE_VECTOR &&
624             type.element == BASE_TYPE_UNION) {
625    // Only cpp supports the union vector feature so far.
626    if (opts.lang_to_generate != IDLOptions::kCpp) {
627      return Error("Vectors of unions are not yet supported in all "
628                   "the specified programming languages.");
629    }
630    // For vector of union fields, add a second auto-generated vector field to
631    // hold the types, with a special suffix.
632    Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
633    union_vector.element = BASE_TYPE_UTYPE;
634    ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
635                    union_vector, &typefield));
636  }
637
638  FieldDef *field;
639  ECHECK(AddField(struct_def, name, type, &field));
640
641  if (token_ == '=') {
642    NEXT();
643    if (!IsScalar(type.base_type))
644      return Error("default values currently only supported for scalars");
645    ECHECK(ParseSingleValue(field->value));
646  }
647  if (IsFloat(field->value.type.base_type)) {
648    if (!strpbrk(field->value.constant.c_str(), ".eE"))
649      field->value.constant += ".0";
650  }
651
652  if (type.enum_def &&
653      IsScalar(type.base_type) &&
654      !struct_def.fixed &&
655      !type.enum_def->attributes.Lookup("bit_flags") &&
656      !type.enum_def->ReverseLookup(static_cast<int>(
657                         StringToInt(field->value.constant.c_str()))))
658    return Error("enum " + type.enum_def->name +
659          " does not have a declaration for this field\'s default of " +
660          field->value.constant);
661
662  field->doc_comment = dc;
663  ECHECK(ParseMetaData(&field->attributes));
664  field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
665  auto hash_name = field->attributes.Lookup("hash");
666  if (hash_name) {
667    switch (type.base_type) {
668      case BASE_TYPE_INT:
669      case BASE_TYPE_UINT: {
670        if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
671          return Error("Unknown hashing algorithm for 32 bit types: " +
672                hash_name->constant);
673        break;
674      }
675      case BASE_TYPE_LONG:
676      case BASE_TYPE_ULONG: {
677        if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
678          return Error("Unknown hashing algorithm for 64 bit types: " +
679                hash_name->constant);
680        break;
681      }
682      default:
683        return Error(
684              "only int, uint, long and ulong data types support hashing.");
685    }
686  }
687  auto cpp_type = field->attributes.Lookup("cpp_type");
688  if (cpp_type) {
689    if (!hash_name)
690      return Error("cpp_type can only be used with a hashed field");
691  }
692  if (field->deprecated && struct_def.fixed)
693    return Error("can't deprecate fields in a struct");
694  field->required = field->attributes.Lookup("required") != nullptr;
695  if (field->required && (struct_def.fixed ||
696                         IsScalar(field->value.type.base_type)))
697    return Error("only non-scalar fields in tables may be 'required'");
698  field->key = field->attributes.Lookup("key") != nullptr;
699  if (field->key) {
700    if (struct_def.has_key)
701      return Error("only one field may be set as 'key'");
702    struct_def.has_key = true;
703    if (!IsScalar(field->value.type.base_type)) {
704      field->required = true;
705      if (field->value.type.base_type != BASE_TYPE_STRING)
706        return Error("'key' field must be string or scalar type");
707    }
708  }
709
710  field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
711  if (field->native_inline && !IsStruct(field->value.type))
712    return Error("native_inline can only be defined on structs'");
713
714  auto nested = field->attributes.Lookup("nested_flatbuffer");
715  if (nested) {
716    if (nested->type.base_type != BASE_TYPE_STRING)
717      return Error(
718            "nested_flatbuffer attribute must be a string (the root type)");
719    if (field->value.type.base_type != BASE_TYPE_VECTOR ||
720        field->value.type.element != BASE_TYPE_UCHAR)
721      return Error(
722            "nested_flatbuffer attribute may only apply to a vector of ubyte");
723    // This will cause an error if the root type of the nested flatbuffer
724    // wasn't defined elsewhere.
725    LookupCreateStruct(nested->constant);
726  }
727
728  if (field->attributes.Lookup("flexbuffer")) {
729    field->flexbuffer = true;
730    uses_flexbuffers_ = true;
731    if (field->value.type.base_type != BASE_TYPE_VECTOR ||
732        field->value.type.element != BASE_TYPE_UCHAR)
733      return Error(
734            "flexbuffer attribute may only apply to a vector of ubyte");
735  }
736
737  if (typefield) {
738    // If this field is a union, and it has a manually assigned id,
739    // the automatically added type field should have an id as well (of N - 1).
740    auto attr = field->attributes.Lookup("id");
741    if (attr) {
742      auto id = atoi(attr->constant.c_str());
743      auto val = new Value();
744      val->type = attr->type;
745      val->constant = NumToString(id - 1);
746      typefield->attributes.Add("id", val);
747    }
748  }
749
750  EXPECT(';');
751  return NoError();
752}
753
754CheckedError Parser::ParseString(Value &val) {
755  auto s = attribute_;
756  EXPECT(kTokenStringConstant);
757  val.constant = NumToString(builder_.CreateString(s).o);
758  return NoError();
759}
760
761CheckedError Parser::ParseComma() {
762  if (!opts.protobuf_ascii_alike) EXPECT(',');
763  return NoError();
764}
765
766CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
767                                   size_t parent_fieldn,
768                                   const StructDef *parent_struct_def) {
769  switch (val.type.base_type) {
770    case BASE_TYPE_UNION: {
771      assert(field);
772      std::string constant;
773      // Find corresponding type field we may have already parsed.
774      for (auto elem = field_stack_.rbegin();
775           elem != field_stack_.rbegin() + parent_fieldn; ++elem) {
776        auto &type = elem->second->value.type;
777        if (type.base_type == BASE_TYPE_UTYPE &&
778            type.enum_def == val.type.enum_def) {
779          constant = elem->first.constant;
780          break;
781        }
782      }
783      if (constant.empty()) {
784        // We haven't seen the type field yet. Sadly a lot of JSON writers
785        // output these in alphabetical order, meaning it comes after this
786        // value. So we scan past the value to find it, then come back here.
787        auto type_name = field->name + UnionTypeFieldSuffix();
788        assert(parent_struct_def);
789        auto type_field = parent_struct_def->fields.Lookup(type_name);
790        assert(type_field);  // Guaranteed by ParseField().
791        // Remember where we are in the source file, so we can come back here.
792        auto backup = *static_cast<ParserState *>(this);
793        ECHECK(SkipAnyJsonValue());  // The table.
794        ECHECK(ParseComma());
795        auto next_name = attribute_;
796        if (Is(kTokenStringConstant)) {
797          NEXT();
798        } else {
799          EXPECT(kTokenIdentifier);
800        }
801        if (next_name != type_name)
802          return Error("missing type field after this union value: " +
803                       type_name);
804        EXPECT(':');
805        Value type_val = type_field->value;
806        ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr));
807        constant = type_val.constant;
808        // Got the information we needed, now rewind:
809        *static_cast<ParserState *>(this) = backup;
810      }
811      uint8_t enum_idx;
812      ECHECK(atot(constant.c_str(), *this, &enum_idx));
813      auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
814      if (!enum_val) return Error("illegal type id for: " + field->name);
815      if (enum_val->union_type.base_type == BASE_TYPE_STRUCT) {
816        ECHECK(ParseTable(*enum_val->union_type.struct_def, &val.constant,
817                          nullptr));
818        if (enum_val->union_type.struct_def->fixed) {
819          // All BASE_TYPE_UNION values are offsets, so turn this into one.
820          SerializeStruct(*enum_val->union_type.struct_def, val);
821          builder_.ClearOffsets();
822          val.constant = NumToString(builder_.GetSize());
823        }
824      } else if (enum_val->union_type.base_type == BASE_TYPE_STRING) {
825        ECHECK(ParseString(val));
826      } else {
827        assert(false);
828      }
829      break;
830    }
831    case BASE_TYPE_STRUCT:
832      ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
833      break;
834    case BASE_TYPE_STRING: {
835      ECHECK(ParseString(val));
836      break;
837    }
838    case BASE_TYPE_VECTOR: {
839      uoffset_t off;
840      ECHECK(ParseVector(val.type.VectorType(), &off));
841      val.constant = NumToString(off);
842      break;
843    }
844    case BASE_TYPE_INT:
845    case BASE_TYPE_UINT:
846    case BASE_TYPE_LONG:
847    case BASE_TYPE_ULONG: {
848      if (field && field->attributes.Lookup("hash") &&
849          (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
850        ECHECK(ParseHash(val, field));
851      } else {
852        ECHECK(ParseSingleValue(val));
853      }
854      break;
855    }
856    default:
857      ECHECK(ParseSingleValue(val));
858      break;
859  }
860  return NoError();
861}
862
863void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
864  assert(val.constant.length() == struct_def.bytesize);
865  builder_.Align(struct_def.minalign);
866  builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
867                     struct_def.bytesize);
868  builder_.AddStructOffset(val.offset, builder_.GetSize());
869}
870
871CheckedError Parser::ParseTableDelimiters(size_t &fieldn,
872                                          const StructDef *struct_def,
873    const std::function<CheckedError(const std::string &name)> &body) {
874  // We allow tables both as JSON object{ .. } with field names
875  // or vector[..] with all fields in order
876  char terminator = '}';
877  bool is_nested_vector = struct_def && Is('[');
878  if (is_nested_vector) {
879    NEXT();
880    terminator = ']';
881  } else {
882    EXPECT('{');
883  }
884  for (;;) {
885    if ((!opts.strict_json || !fieldn) && Is(terminator)) break;
886    std::string name;
887    if (is_nested_vector) {
888      if (fieldn > struct_def->fields.vec.size()) {
889        return Error("too many unnamed fields in nested array");
890      }
891      name = struct_def->fields.vec[fieldn]->name;
892    } else {
893      name = attribute_;
894      if (Is(kTokenStringConstant)) {
895        NEXT();
896      } else {
897        EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
898      }
899      if (!opts.protobuf_ascii_alike || !(Is('{') || Is('['))) EXPECT(':');
900    }
901    ECHECK(body(name));
902    if (Is(terminator)) break;
903    ECHECK(ParseComma());
904  }
905  NEXT();
906  if (is_nested_vector && fieldn != struct_def->fields.vec.size()) {
907    return Error("wrong number of unnamed fields in table vector");
908  }
909  return NoError();
910}
911
912CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
913                                uoffset_t *ovalue) {
914  size_t fieldn = 0;
915  auto err = ParseTableDelimiters(fieldn, &struct_def,
916                                  [&](const std::string &name) -> CheckedError {
917    auto field = struct_def.fields.Lookup(name);
918    if (!field) {
919      if (!opts.skip_unexpected_fields_in_json) {
920        return Error("unknown field: " + name);
921      } else {
922        ECHECK(SkipAnyJsonValue());
923      }
924    } else {
925      if (Is(kTokenNull)) {
926        NEXT(); // Ignore this field.
927      } else {
928        Value val = field->value;
929        if (field->flexbuffer) {
930          flexbuffers::Builder builder(1024,
931                                       flexbuffers::BUILDER_FLAG_SHARE_ALL);
932          ECHECK(ParseFlexBufferValue(&builder));
933          builder.Finish();
934          auto off = builder_.CreateVector(builder.GetBuffer());
935          val.constant = NumToString(off.o);
936        } else {
937          ECHECK(ParseAnyValue(val, field, fieldn, &struct_def));
938        }
939        // Hardcoded insertion-sort with error-check.
940        // If fields are specified in order, then this loop exits immediately.
941        auto elem = field_stack_.rbegin();
942        for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
943          auto existing_field = elem->second;
944          if (existing_field == field)
945            return Error("field set more than once: " + field->name);
946          if (existing_field->value.offset < field->value.offset) break;
947        }
948        // Note: elem points to before the insertion point, thus .base() points
949        // to the correct spot.
950        field_stack_.insert(elem.base(), std::make_pair(val, field));
951        fieldn++;
952      }
953    }
954    return NoError();
955  });
956  ECHECK(err);
957
958  // Check if all required fields are parsed.
959  for (auto field_it = struct_def.fields.vec.begin();
960            field_it != struct_def.fields.vec.end();
961            ++field_it) {
962    auto required_field = *field_it;
963    if (!required_field->required) {
964      continue;
965    }
966    bool found = false;
967    for (auto pf_it = field_stack_.end() - fieldn;
968         pf_it != field_stack_.end();
969         ++pf_it) {
970      auto parsed_field = pf_it->second;
971      if (parsed_field == required_field) {
972        found = true;
973        break;
974      }
975    }
976    if (!found) {
977      return Error("required field is missing: " + required_field->name + " in " + struct_def.name);
978    }
979  }
980
981  if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
982    return Error("struct: wrong number of initializers: " + struct_def.name);
983
984  auto start = struct_def.fixed
985                 ? builder_.StartStruct(struct_def.minalign)
986                 : builder_.StartTable();
987
988  for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
989       size;
990       size /= 2) {
991    // Go through elements in reverse, since we're building the data backwards.
992    for (auto it = field_stack_.rbegin();
993             it != field_stack_.rbegin() + fieldn; ++it) {
994      auto &field_value = it->first;
995      auto field = it->second;
996      if (!struct_def.sortbysize ||
997          size == SizeOf(field_value.type.base_type)) {
998        switch (field_value.type.base_type) {
999          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
1000            PTYPE) \
1001            case BASE_TYPE_ ## ENUM: \
1002              builder_.Pad(field->padding); \
1003              if (struct_def.fixed) { \
1004                CTYPE val; \
1005                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1006                builder_.PushElement(val); \
1007              } else { \
1008                CTYPE val, valdef; \
1009                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1010                ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
1011                builder_.AddElement(field_value.offset, val, valdef); \
1012              } \
1013              break;
1014            FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
1015          #undef FLATBUFFERS_TD
1016          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
1017            PTYPE) \
1018            case BASE_TYPE_ ## ENUM: \
1019              builder_.Pad(field->padding); \
1020              if (IsStruct(field->value.type)) { \
1021                SerializeStruct(*field->value.type.struct_def, field_value); \
1022              } else { \
1023                CTYPE val; \
1024                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1025                builder_.AddOffset(field_value.offset, val); \
1026              } \
1027              break;
1028            FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
1029          #undef FLATBUFFERS_TD
1030        }
1031      }
1032    }
1033  }
1034  for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
1035
1036  if (struct_def.fixed) {
1037    builder_.ClearOffsets();
1038    builder_.EndStruct();
1039    assert(value);
1040    // Temporarily store this struct in the value string, since it is to
1041    // be serialized in-place elsewhere.
1042    value->assign(
1043          reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
1044          struct_def.bytesize);
1045    builder_.PopBytes(struct_def.bytesize);
1046    assert(!ovalue);
1047  } else {
1048    auto val = builder_.EndTable(start,
1049                          static_cast<voffset_t>(struct_def.fields.vec.size()));
1050    if (ovalue) *ovalue = val;
1051    if (value) *value = NumToString(val);
1052  }
1053  return NoError();
1054}
1055
1056CheckedError Parser::ParseVectorDelimiters(size_t &count,
1057                                    const std::function<CheckedError()> &body) {
1058  EXPECT('[');
1059  for (;;) {
1060    if ((!opts.strict_json || !count) && Is(']')) break;
1061    ECHECK(body());
1062    count++;
1063    if (Is(']')) break;
1064    ECHECK(ParseComma());
1065  }
1066  NEXT();
1067  return NoError();
1068}
1069
1070CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue) {
1071  size_t count = 0;
1072  auto err = ParseVectorDelimiters(count, [&]() -> CheckedError {
1073    Value val;
1074    val.type = type;
1075    ECHECK(ParseAnyValue(val, nullptr, 0, nullptr));
1076    field_stack_.push_back(std::make_pair(val, nullptr));
1077    return NoError();
1078  });
1079  ECHECK(err);
1080
1081  builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
1082                       InlineAlignment(type));
1083  for (size_t i = 0; i < count; i++) {
1084    // start at the back, since we're building the data backwards.
1085    auto &val = field_stack_.back().first;
1086    switch (val.type.base_type) {
1087      #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
1088        case BASE_TYPE_ ## ENUM: \
1089          if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
1090          else { \
1091             CTYPE elem; \
1092             ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1093             builder_.PushElement(elem); \
1094          } \
1095          break;
1096        FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1097      #undef FLATBUFFERS_TD
1098    }
1099    field_stack_.pop_back();
1100  }
1101
1102  builder_.ClearOffsets();
1103  *ovalue = builder_.EndVector(count);
1104  return NoError();
1105}
1106
1107CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
1108  if (Is('(')) {
1109    NEXT();
1110    for (;;) {
1111      auto name = attribute_;
1112      EXPECT(kTokenIdentifier);
1113      if (known_attributes_.find(name) == known_attributes_.end())
1114        return Error("user define attributes must be declared before use: " +
1115                     name);
1116      auto e = new Value();
1117      attributes->Add(name, e);
1118      if (Is(':')) {
1119        NEXT();
1120        ECHECK(ParseSingleValue(*e));
1121      }
1122      if (Is(')')) { NEXT(); break; }
1123      EXPECT(',');
1124    }
1125  }
1126  return NoError();
1127}
1128
1129CheckedError Parser::TryTypedValue(int dtoken, bool check, Value &e,
1130                                   BaseType req, bool *destmatch) {
1131  bool match = dtoken == token_;
1132  if (match) {
1133    *destmatch = true;
1134    e.constant = attribute_;
1135    if (!check) {
1136      if (e.type.base_type == BASE_TYPE_NONE) {
1137        e.type.base_type = req;
1138      } else {
1139        return Error(std::string("type mismatch: expecting: ") +
1140                     kTypeNames[e.type.base_type] +
1141                     ", found: " +
1142                     kTypeNames[req]);
1143      }
1144    }
1145    NEXT();
1146  }
1147  return NoError();
1148}
1149
1150CheckedError Parser::ParseEnumFromString(Type &type, int64_t *result) {
1151  *result = 0;
1152  // Parse one or more enum identifiers, separated by spaces.
1153  const char *next = attribute_.c_str();
1154  do {
1155    const char *divider = strchr(next, ' ');
1156    std::string word;
1157    if (divider) {
1158      word = std::string(next, divider);
1159      next = divider + strspn(divider, " ");
1160    } else {
1161      word = next;
1162      next += word.length();
1163    }
1164    if (type.enum_def) {  // The field has an enum type
1165      auto enum_val = type.enum_def->vals.Lookup(word);
1166      if (!enum_val)
1167        return Error("unknown enum value: " + word +
1168              ", for enum: " + type.enum_def->name);
1169      *result |= enum_val->value;
1170    } else {  // No enum type, probably integral field.
1171      if (!IsInteger(type.base_type))
1172        return Error("not a valid value for this field: " + word);
1173      // TODO: could check if its a valid number constant here.
1174      const char *dot = strrchr(word.c_str(), '.');
1175      if (!dot)
1176        return Error("enum values need to be qualified by an enum type");
1177      std::string enum_def_str(word.c_str(), dot);
1178      std::string enum_val_str(dot + 1, word.c_str() + word.length());
1179      auto enum_def = LookupEnum(enum_def_str);
1180      if (!enum_def) return Error("unknown enum: " + enum_def_str);
1181      auto enum_val = enum_def->vals.Lookup(enum_val_str);
1182      if (!enum_val) return Error("unknown enum value: " + enum_val_str);
1183      *result |= enum_val->value;
1184    }
1185  } while(*next);
1186  return NoError();
1187}
1188
1189
1190CheckedError Parser::ParseHash(Value &e, FieldDef* field) {
1191  assert(field);
1192  Value *hash_name = field->attributes.Lookup("hash");
1193  switch (e.type.base_type) {
1194    case BASE_TYPE_INT: {
1195      auto hash = FindHashFunction32(hash_name->constant.c_str());
1196      int32_t hashed_value = static_cast<int32_t>(hash(attribute_.c_str()));
1197      e.constant = NumToString(hashed_value);
1198      break;
1199    }
1200    case BASE_TYPE_UINT: {
1201      auto hash = FindHashFunction32(hash_name->constant.c_str());
1202      uint32_t hashed_value = hash(attribute_.c_str());
1203      e.constant = NumToString(hashed_value);
1204      break;
1205    }
1206    case BASE_TYPE_LONG: {
1207      auto hash = FindHashFunction64(hash_name->constant.c_str());
1208      int64_t hashed_value = static_cast<int64_t>(hash(attribute_.c_str()));
1209      e.constant = NumToString(hashed_value);
1210      break;
1211    }
1212    case BASE_TYPE_ULONG: {
1213      auto hash = FindHashFunction64(hash_name->constant.c_str());
1214      uint64_t hashed_value = hash(attribute_.c_str());
1215      e.constant = NumToString(hashed_value);
1216      break;
1217    }
1218    default:
1219      assert(0);
1220  }
1221  NEXT();
1222  return NoError();
1223}
1224
1225CheckedError Parser::TokenError() {
1226  return Error("cannot parse value starting with: " +
1227               TokenToStringId(token_));
1228}
1229
1230CheckedError Parser::ParseSingleValue(Value &e) {
1231  // First see if this could be a conversion function:
1232  if (token_ == kTokenIdentifier && *cursor_ == '(') {
1233    auto functionname = attribute_;
1234    NEXT();
1235    EXPECT('(');
1236    ECHECK(ParseSingleValue(e));
1237    EXPECT(')');
1238    #define FLATBUFFERS_FN_DOUBLE(name, op) \
1239      if (functionname == name) { \
1240        auto x = strtod(e.constant.c_str(), nullptr); \
1241        e.constant = NumToString(op); \
1242      }
1243    FLATBUFFERS_FN_DOUBLE("deg", x / M_PI * 180);
1244    FLATBUFFERS_FN_DOUBLE("rad", x * M_PI / 180);
1245    FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1246    FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1247    FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1248    FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1249    FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1250    FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1251    // TODO(wvo): add more useful conversion functions here.
1252    #undef FLATBUFFERS_FN_DOUBLE
1253  // Then check if this could be a string/identifier enum value:
1254  } else if (e.type.base_type != BASE_TYPE_STRING &&
1255      e.type.base_type != BASE_TYPE_NONE &&
1256      (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1257    if (IsIdentifierStart(attribute_[0])) {  // Enum value.
1258      int64_t val;
1259      ECHECK(ParseEnumFromString(e.type, &val));
1260      e.constant = NumToString(val);
1261      NEXT();
1262    } else {  // Numeric constant in string.
1263      if (IsInteger(e.type.base_type)) {
1264        char *end;
1265        e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
1266        if (*end)
1267          return Error("invalid integer: " + attribute_);
1268      } else if (IsFloat(e.type.base_type)) {
1269        char *end;
1270        e.constant = NumToString(strtod(attribute_.c_str(), &end));
1271        if (*end)
1272          return Error("invalid float: " + attribute_);
1273      } else {
1274        assert(0);  // Shouldn't happen, we covered all types.
1275        e.constant = "0";
1276      }
1277      NEXT();
1278    }
1279  } else {
1280    bool match = false;
1281    ECHECK(TryTypedValue(kTokenIntegerConstant,
1282                         IsScalar(e.type.base_type),
1283                         e,
1284                         BASE_TYPE_INT,
1285                         &match));
1286    ECHECK(TryTypedValue(kTokenFloatConstant,
1287                         IsFloat(e.type.base_type),
1288                         e,
1289                         BASE_TYPE_FLOAT,
1290                         &match));
1291    ECHECK(TryTypedValue(kTokenStringConstant,
1292                         e.type.base_type == BASE_TYPE_STRING,
1293                         e,
1294                         BASE_TYPE_STRING,
1295                         &match));
1296    if (!match) return TokenError();
1297  }
1298  return NoError();
1299}
1300
1301StructDef *Parser::LookupCreateStruct(const std::string &name,
1302                                      bool create_if_new, bool definition) {
1303  std::string qualified_name = namespaces_.back()->GetFullyQualifiedName(name);
1304  // See if it exists pre-declared by an unqualified use.
1305  auto struct_def = structs_.Lookup(name);
1306  if (struct_def && struct_def->predecl) {
1307    if (definition) {
1308      // Make sure it has the current namespace, and is registered under its
1309      // qualified name.
1310      struct_def->defined_namespace = namespaces_.back();
1311      structs_.Move(name, qualified_name);
1312    }
1313    return struct_def;
1314  }
1315  // See if it exists pre-declared by an qualified use.
1316  struct_def = structs_.Lookup(qualified_name);
1317  if (struct_def && struct_def->predecl) {
1318    if (definition) {
1319      // Make sure it has the current namespace.
1320      struct_def->defined_namespace = namespaces_.back();
1321    }
1322    return struct_def;
1323  }
1324  if (!definition) {
1325    // Search thru parent namespaces.
1326    for (size_t components = namespaces_.back()->components.size();
1327         components && !struct_def; components--) {
1328      struct_def = structs_.Lookup(
1329          namespaces_.back()->GetFullyQualifiedName(name, components - 1));
1330    }
1331  }
1332  if (!struct_def && create_if_new) {
1333    struct_def = new StructDef();
1334    if (definition) {
1335      structs_.Add(qualified_name, struct_def);
1336      struct_def->name = name;
1337      struct_def->defined_namespace = namespaces_.back();
1338    } else {
1339      // Not a definition.
1340      // Rather than failing, we create a "pre declared" StructDef, due to
1341      // circular references, and check for errors at the end of parsing.
1342      // It is defined in the root namespace, since we don't know what the
1343      // final namespace will be.
1344      // TODO: maybe safer to use special namespace?
1345      structs_.Add(name, struct_def);
1346      struct_def->name = name;
1347      struct_def->defined_namespace = new Namespace();
1348      namespaces_.insert(namespaces_.begin(), struct_def->defined_namespace);
1349    }
1350  }
1351  return struct_def;
1352}
1353
1354CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
1355  std::vector<std::string> enum_comment = doc_comment_;
1356  NEXT();
1357  std::string enum_name = attribute_;
1358  EXPECT(kTokenIdentifier);
1359  auto &enum_def = *new EnumDef();
1360  enum_def.name = enum_name;
1361  enum_def.file = file_being_parsed_;
1362  enum_def.doc_comment = enum_comment;
1363  enum_def.is_union = is_union;
1364  enum_def.defined_namespace = namespaces_.back();
1365  if (enums_.Add(namespaces_.back()->GetFullyQualifiedName(enum_name),
1366                 &enum_def))
1367    return Error("enum already exists: " + enum_name);
1368  if (is_union) {
1369    enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
1370    enum_def.underlying_type.enum_def = &enum_def;
1371  } else {
1372    if (opts.proto_mode) {
1373      enum_def.underlying_type.base_type = BASE_TYPE_INT;
1374    } else {
1375      // Give specialized error message, since this type spec used to
1376      // be optional in the first FlatBuffers release.
1377      if (!Is(':')) {
1378        return Error("must specify the underlying integer type for this"
1379              " enum (e.g. \': short\', which was the default).");
1380      } else {
1381        NEXT();
1382      }
1383      // Specify the integer type underlying this enum.
1384      ECHECK(ParseType(enum_def.underlying_type));
1385      if (!IsInteger(enum_def.underlying_type.base_type))
1386        return Error("underlying enum type must be integral");
1387    }
1388    // Make this type refer back to the enum it was derived from.
1389    enum_def.underlying_type.enum_def = &enum_def;
1390  }
1391  ECHECK(ParseMetaData(&enum_def.attributes));
1392  EXPECT('{');
1393  if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
1394  for (;;) {
1395    if (opts.proto_mode && attribute_ == "option") {
1396      ECHECK(ParseProtoOption());
1397    } else {
1398      auto value_name = attribute_;
1399      auto full_name = value_name;
1400      std::vector<std::string> value_comment = doc_comment_;
1401      EXPECT(kTokenIdentifier);
1402      if (is_union) {
1403        ECHECK(ParseNamespacing(&full_name, &value_name));
1404        if (opts.union_value_namespacing) {
1405          // Since we can't namespace the actual enum identifiers, turn
1406          // namespace parts into part of the identifier.
1407          value_name = full_name;
1408          std::replace(value_name.begin(), value_name.end(), '.', '_');
1409        }
1410      }
1411      auto prevsize = enum_def.vals.vec.size();
1412      auto value = enum_def.vals.vec.size()
1413        ? enum_def.vals.vec.back()->value + 1
1414        : 0;
1415      auto &ev = *new EnumVal(value_name, value);
1416      if (enum_def.vals.Add(value_name, &ev))
1417        return Error("enum value already exists: " + value_name);
1418      ev.doc_comment = value_comment;
1419      if (is_union) {
1420        if (Is(':')) {
1421          NEXT();
1422          ECHECK(ParseType(ev.union_type));
1423          if (ev.union_type.base_type != BASE_TYPE_STRUCT &&
1424              ev.union_type.base_type != BASE_TYPE_STRING)
1425            return Error("union value type may only be table/struct/string");
1426          enum_def.uses_type_aliases = true;
1427        } else {
1428          ev.union_type = Type(BASE_TYPE_STRUCT, LookupCreateStruct(full_name));
1429        }
1430      }
1431      if (Is('=')) {
1432        NEXT();
1433        ev.value = StringToInt(attribute_.c_str());
1434        EXPECT(kTokenIntegerConstant);
1435        if (!opts.proto_mode && prevsize &&
1436            enum_def.vals.vec[prevsize - 1]->value >= ev.value)
1437          return Error("enum values must be specified in ascending order");
1438      }
1439      if (is_union) {
1440        if (ev.value < 0 || ev.value >= 256)
1441          return Error("union enum value must fit in a ubyte");
1442      }
1443      if (opts.proto_mode && Is('[')) {
1444        NEXT();
1445        // ignore attributes on enums.
1446        while (token_ != ']') NEXT();
1447        NEXT();
1448      }
1449    }
1450    if (!Is(opts.proto_mode ? ';' : ',')) break;
1451    NEXT();
1452    if (Is('}')) break;
1453  }
1454  EXPECT('}');
1455  if (enum_def.attributes.Lookup("bit_flags")) {
1456    for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
1457         ++it) {
1458      if (static_cast<size_t>((*it)->value) >=
1459           SizeOf(enum_def.underlying_type.base_type) * 8)
1460        return Error("bit flag out of range of underlying integral type");
1461      (*it)->value = 1LL << (*it)->value;
1462    }
1463  }
1464  if (dest) *dest = &enum_def;
1465  types_.Add(namespaces_.back()->GetFullyQualifiedName(enum_def.name),
1466             new Type(BASE_TYPE_UNION, nullptr, &enum_def));
1467  return NoError();
1468}
1469
1470CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
1471  auto &struct_def = *LookupCreateStruct(name, true, true);
1472  if (!struct_def.predecl) return Error("datatype already exists: " + name);
1473  struct_def.predecl = false;
1474  struct_def.name = name;
1475  struct_def.file = file_being_parsed_;
1476  // Move this struct to the back of the vector just in case it was predeclared,
1477  // to preserve declaration order.
1478  *std::remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) = &struct_def;
1479  *dest = &struct_def;
1480  return NoError();
1481}
1482
1483CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
1484                                StructDef *struct_def,
1485                                const char *suffix,
1486                                BaseType basetype) {
1487  auto len = strlen(suffix);
1488  for (auto it = fields.begin(); it != fields.end(); ++it) {
1489    auto &fname = (*it)->name;
1490    if (fname.length() > len &&
1491        fname.compare(fname.length() - len, len, suffix) == 0 &&
1492        (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
1493      auto field = struct_def->fields.Lookup(
1494                                             fname.substr(0, fname.length() - len));
1495      if (field && field->value.type.base_type == basetype)
1496        return Error("Field " + fname +
1497                     " would clash with generated functions for field " +
1498                     field->name);
1499    }
1500  }
1501  return NoError();
1502}
1503
1504static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
1505  auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
1506  auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
1507  return a_id < b_id;
1508}
1509
1510CheckedError Parser::ParseDecl() {
1511  std::vector<std::string> dc = doc_comment_;
1512  bool fixed = Is(kTokenStruct);
1513  if (fixed) NEXT() else EXPECT(kTokenTable);
1514  std::string name = attribute_;
1515  EXPECT(kTokenIdentifier);
1516  StructDef *struct_def;
1517  ECHECK(StartStruct(name, &struct_def));
1518  struct_def->doc_comment = dc;
1519  struct_def->fixed = fixed;
1520  ECHECK(ParseMetaData(&struct_def->attributes));
1521  struct_def->sortbysize =
1522    struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
1523  EXPECT('{');
1524  while (token_ != '}') ECHECK(ParseField(*struct_def));
1525  auto force_align = struct_def->attributes.Lookup("force_align");
1526  if (fixed && force_align) {
1527    auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
1528    if (force_align->type.base_type != BASE_TYPE_INT ||
1529        align < struct_def->minalign ||
1530        align > FLATBUFFERS_MAX_ALIGNMENT ||
1531        align & (align - 1))
1532      return Error("force_align must be a power of two integer ranging from the"
1533                   "struct\'s natural alignment to " +
1534                   NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1535    struct_def->minalign = align;
1536  }
1537  struct_def->PadLastField(struct_def->minalign);
1538  // Check if this is a table that has manual id assignments
1539  auto &fields = struct_def->fields.vec;
1540  if (!struct_def->fixed && fields.size()) {
1541    size_t num_id_fields = 0;
1542    for (auto it = fields.begin(); it != fields.end(); ++it) {
1543      if ((*it)->attributes.Lookup("id")) num_id_fields++;
1544    }
1545    // If any fields have ids..
1546    if (num_id_fields) {
1547      // Then all fields must have them.
1548      if (num_id_fields != fields.size())
1549        return Error(
1550              "either all fields or no fields must have an 'id' attribute");
1551      // Simply sort by id, then the fields are the same as if no ids had
1552      // been specified.
1553      std::sort(fields.begin(), fields.end(), compareFieldDefs);
1554      // Verify we have a contiguous set, and reassign vtable offsets.
1555      for (int i = 0; i < static_cast<int>(fields.size()); i++) {
1556        if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
1557          return Error("field id\'s must be consecutive from 0, id " +
1558                NumToString(i) + " missing or set twice");
1559        fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
1560      }
1561    }
1562  }
1563
1564  ECHECK(CheckClash(fields, struct_def, UnionTypeFieldSuffix(),
1565                    BASE_TYPE_UNION));
1566  ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
1567  ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
1568  ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
1569  ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
1570  ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
1571  EXPECT('}');
1572  types_.Add(namespaces_.back()->GetFullyQualifiedName(struct_def->name),
1573             new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
1574  return NoError();
1575}
1576
1577CheckedError Parser::ParseService() {
1578  std::vector<std::string> service_comment = doc_comment_;
1579  NEXT();
1580  auto service_name = attribute_;
1581  EXPECT(kTokenIdentifier);
1582  auto &service_def = *new ServiceDef();
1583  service_def.name = service_name;
1584  service_def.file = file_being_parsed_;
1585  service_def.doc_comment = service_comment;
1586  service_def.defined_namespace = namespaces_.back();
1587  if (services_.Add(namespaces_.back()->GetFullyQualifiedName(service_name),
1588                    &service_def))
1589    return Error("service already exists: " + service_name);
1590  ECHECK(ParseMetaData(&service_def.attributes));
1591  EXPECT('{');
1592  do {
1593    auto rpc_name = attribute_;
1594    EXPECT(kTokenIdentifier);
1595    EXPECT('(');
1596    Type reqtype, resptype;
1597    ECHECK(ParseTypeIdent(reqtype));
1598    EXPECT(')');
1599    EXPECT(':');
1600    ECHECK(ParseTypeIdent(resptype));
1601    if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
1602        resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
1603        return Error("rpc request and response types must be tables");
1604    auto &rpc = *new RPCCall();
1605    rpc.name = rpc_name;
1606    rpc.request = reqtype.struct_def;
1607    rpc.response = resptype.struct_def;
1608    if (service_def.calls.Add(rpc_name, &rpc))
1609      return Error("rpc already exists: " + rpc_name);
1610    ECHECK(ParseMetaData(&rpc.attributes));
1611    EXPECT(';');
1612  } while (token_ != '}');
1613  NEXT();
1614  return NoError();
1615}
1616
1617bool Parser::SetRootType(const char *name) {
1618  root_struct_def_ = structs_.Lookup(name);
1619  if (!root_struct_def_)
1620    root_struct_def_ = structs_.Lookup(
1621                         namespaces_.back()->GetFullyQualifiedName(name));
1622  return root_struct_def_ != nullptr;
1623}
1624
1625void Parser::MarkGenerated() {
1626  // This function marks all existing definitions as having already
1627  // been generated, which signals no code for included files should be
1628  // generated.
1629  for (auto it = enums_.vec.begin();
1630           it != enums_.vec.end(); ++it) {
1631    (*it)->generated = true;
1632  }
1633  for (auto it = structs_.vec.begin();
1634           it != structs_.vec.end(); ++it) {
1635    if (!(*it)->predecl) {
1636      (*it)->generated = true;
1637    }
1638  }
1639  for (auto it = services_.vec.begin();
1640           it != services_.vec.end(); ++it) {
1641    (*it)->generated = true;
1642  }
1643}
1644
1645CheckedError Parser::ParseNamespace() {
1646  NEXT();
1647  auto ns = new Namespace();
1648  namespaces_.push_back(ns);
1649  if (token_ != ';') {
1650    for (;;) {
1651      ns->components.push_back(attribute_);
1652      EXPECT(kTokenIdentifier);
1653      if (Is('.')) NEXT() else break;
1654    }
1655  }
1656  EXPECT(';');
1657  return NoError();
1658}
1659
1660static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
1661  return a->value < b->value;
1662}
1663
1664// Best effort parsing of .proto declarations, with the aim to turn them
1665// in the closest corresponding FlatBuffer equivalent.
1666// We parse everything as identifiers instead of keywords, since we don't
1667// want protobuf keywords to become invalid identifiers in FlatBuffers.
1668CheckedError Parser::ParseProtoDecl() {
1669  bool isextend = attribute_ == "extend";
1670  if (attribute_ == "package") {
1671    // These are identical in syntax to FlatBuffer's namespace decl.
1672    ECHECK(ParseNamespace());
1673  } else if (attribute_ == "message" || isextend) {
1674    std::vector<std::string> struct_comment = doc_comment_;
1675    NEXT();
1676    StructDef *struct_def = nullptr;
1677    if (isextend) {
1678      if (Is('.')) NEXT();  // qualified names may start with a . ?
1679      auto id = attribute_;
1680      EXPECT(kTokenIdentifier);
1681      ECHECK(ParseNamespacing(&id, nullptr));
1682      struct_def = LookupCreateStruct(id, false);
1683      if (!struct_def)
1684        return Error("cannot extend unknown message type: " + id);
1685    } else {
1686      std::string name = attribute_;
1687      EXPECT(kTokenIdentifier);
1688      ECHECK(StartStruct(name, &struct_def));
1689      // Since message definitions can be nested, we create a new namespace.
1690      auto ns = new Namespace();
1691      // Copy of current namespace.
1692      *ns = *namespaces_.back();
1693      // But with current message name.
1694      ns->components.push_back(name);
1695      namespaces_.push_back(ns);
1696    }
1697    struct_def->doc_comment = struct_comment;
1698    ECHECK(ParseProtoFields(struct_def, isextend, false));
1699    if (!isextend) {
1700      // We have to remove the nested namespace, but we can't just throw it
1701      // away, so put it at the beginning of the vector.
1702      auto ns = namespaces_.back();
1703      namespaces_.pop_back();
1704      namespaces_.insert(namespaces_.begin(), ns);
1705    }
1706    if (Is(';')) NEXT();
1707  } else if (attribute_ == "enum") {
1708    // These are almost the same, just with different terminator:
1709    EnumDef *enum_def;
1710    ECHECK(ParseEnum(false, &enum_def));
1711    if (Is(';')) NEXT();
1712    // Protobuf allows them to be specified in any order, so sort afterwards.
1713    auto &v = enum_def->vals.vec;
1714    std::sort(v.begin(), v.end(), compareEnumVals);
1715
1716    // Temp: remove any duplicates, as .fbs files can't handle them.
1717    for (auto it = v.begin(); it != v.end(); ) {
1718      if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
1719      else ++it;
1720    }
1721  } else if (attribute_ == "syntax") {  // Skip these.
1722    NEXT();
1723    EXPECT('=');
1724    EXPECT(kTokenStringConstant);
1725    EXPECT(';');
1726  } else if (attribute_ == "option") {  // Skip these.
1727    ECHECK(ParseProtoOption());
1728    EXPECT(';');
1729  } else if (attribute_ == "service") {  // Skip these.
1730    NEXT();
1731    EXPECT(kTokenIdentifier);
1732    ECHECK(ParseProtoCurliesOrIdent());
1733  } else {
1734    return Error("don\'t know how to parse .proto declaration starting with " +
1735          TokenToStringId(token_));
1736  }
1737  return NoError();
1738}
1739
1740CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
1741                                      bool inside_oneof) {
1742  EXPECT('{');
1743  while (token_ != '}') {
1744    if (attribute_ == "message" || attribute_ == "extend" ||
1745        attribute_ == "enum") {
1746      // Nested declarations.
1747      ECHECK(ParseProtoDecl());
1748    } else if (attribute_ == "extensions") {  // Skip these.
1749      NEXT();
1750      EXPECT(kTokenIntegerConstant);
1751      if (Is(kTokenIdentifier)) {
1752        NEXT();  // to
1753        NEXT();  // num
1754      }
1755      EXPECT(';');
1756    } else if (attribute_ == "option") {  // Skip these.
1757      ECHECK(ParseProtoOption());
1758      EXPECT(';');
1759    } else if (attribute_ == "reserved") {  // Skip these.
1760      NEXT();
1761      EXPECT(kTokenIntegerConstant);
1762      while (Is(',')) { NEXT(); EXPECT(kTokenIntegerConstant); }
1763      EXPECT(';');
1764    } else {
1765      std::vector<std::string> field_comment = doc_comment_;
1766      // Parse the qualifier.
1767      bool required = false;
1768      bool repeated = false;
1769      bool oneof = false;
1770      if (!inside_oneof) {
1771        if (attribute_ == "optional") {
1772          // This is the default.
1773          EXPECT(kTokenIdentifier);
1774        } else if (attribute_ == "required") {
1775          required = true;
1776          EXPECT(kTokenIdentifier);
1777        } else if (attribute_ == "repeated") {
1778          repeated = true;
1779          EXPECT(kTokenIdentifier);
1780        } else if (attribute_ == "oneof") {
1781          oneof = true;
1782          EXPECT(kTokenIdentifier);
1783        } else {
1784          // can't error, proto3 allows decls without any of the above.
1785        }
1786      }
1787      StructDef *anonymous_struct = nullptr;
1788      Type type;
1789      if (attribute_ == "group" || oneof) {
1790        if (!oneof) EXPECT(kTokenIdentifier);
1791        auto name = "Anonymous" + NumToString(anonymous_counter++);
1792        ECHECK(StartStruct(name, &anonymous_struct));
1793        type = Type(BASE_TYPE_STRUCT, anonymous_struct);
1794      } else {
1795        ECHECK(ParseTypeFromProtoType(&type));
1796      }
1797      // Repeated elements get mapped to a vector.
1798      if (repeated) {
1799        type.element = type.base_type;
1800        type.base_type = BASE_TYPE_VECTOR;
1801      }
1802      std::string name = attribute_;
1803      // Protos may use our keywords "attribute" & "namespace" as an identifier.
1804      if (Is(kTokenAttribute) || Is(kTokenNameSpace)) {
1805        NEXT();
1806        // TODO: simpler to just not make these keywords?
1807        name += "_";  // Have to make it not a keyword.
1808      } else {
1809        EXPECT(kTokenIdentifier);
1810      }
1811      if (!oneof) {
1812        // Parse the field id. Since we're just translating schemas, not
1813        // any kind of binary compatibility, we can safely ignore these, and
1814        // assign our own.
1815        EXPECT('=');
1816        EXPECT(kTokenIntegerConstant);
1817      }
1818      FieldDef *field = nullptr;
1819      if (isextend) {
1820        // We allow a field to be re-defined when extending.
1821        // TODO: are there situations where that is problematic?
1822        field = struct_def->fields.Lookup(name);
1823      }
1824      if (!field) ECHECK(AddField(*struct_def, name, type, &field));
1825      field->doc_comment = field_comment;
1826      if (!IsScalar(type.base_type)) field->required = required;
1827      // See if there's a default specified.
1828      if (Is('[')) {
1829        NEXT();
1830        for (;;) {
1831          auto key = attribute_;
1832          ECHECK(ParseProtoKey());
1833          EXPECT('=');
1834          auto val = attribute_;
1835          ECHECK(ParseProtoCurliesOrIdent());
1836          if (key == "default") {
1837            // Temp: skip non-numeric defaults (enums).
1838            auto numeric = strpbrk(val.c_str(), "0123456789-+.");
1839            if (IsScalar(type.base_type) && numeric == val.c_str())
1840              field->value.constant = val;
1841          } else if (key == "deprecated") {
1842            field->deprecated = val == "true";
1843          }
1844          if (!Is(',')) break;
1845          NEXT();
1846        }
1847        EXPECT(']');
1848      }
1849      if (anonymous_struct) {
1850        ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
1851        if (Is(';')) NEXT();
1852      } else {
1853        EXPECT(';');
1854      }
1855    }
1856  }
1857  NEXT();
1858  return NoError();
1859}
1860
1861CheckedError Parser::ParseProtoKey() {
1862  if (token_ == '(') {
1863    NEXT();
1864    // Skip "(a.b)" style custom attributes.
1865    while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
1866    EXPECT(')');
1867    while (Is('.')) { NEXT(); EXPECT(kTokenIdentifier); }
1868  } else {
1869    EXPECT(kTokenIdentifier);
1870  }
1871  return NoError();
1872}
1873
1874CheckedError Parser::ParseProtoCurliesOrIdent() {
1875  if (Is('{')) {
1876    NEXT();
1877    for (int nesting = 1; nesting; ) {
1878      if (token_ == '{') nesting++;
1879      else if (token_ == '}') nesting--;
1880      NEXT();
1881    }
1882  } else {
1883    NEXT();  // Any single token.
1884  }
1885  return NoError();
1886}
1887
1888CheckedError Parser::ParseProtoOption() {
1889  NEXT();
1890  ECHECK(ParseProtoKey());
1891  EXPECT('=');
1892  ECHECK(ParseProtoCurliesOrIdent());
1893  return NoError();
1894}
1895
1896// Parse a protobuf type, and map it to the corresponding FlatBuffer one.
1897CheckedError Parser::ParseTypeFromProtoType(Type *type) {
1898  struct type_lookup { const char *proto_type; BaseType fb_type; };
1899  static type_lookup lookup[] = {
1900    { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
1901    { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
1902    { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
1903    { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
1904    { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
1905    { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
1906    { "bool", BASE_TYPE_BOOL },
1907    { "string", BASE_TYPE_STRING },
1908    { "bytes", BASE_TYPE_STRING },
1909    { nullptr, BASE_TYPE_NONE }
1910  };
1911  for (auto tl = lookup; tl->proto_type; tl++) {
1912    if (attribute_ == tl->proto_type) {
1913      type->base_type = tl->fb_type;
1914      NEXT();
1915      return NoError();
1916    }
1917  }
1918  if (Is('.')) NEXT();  // qualified names may start with a . ?
1919  ECHECK(ParseTypeIdent(*type));
1920  return NoError();
1921}
1922
1923CheckedError Parser::SkipAnyJsonValue() {
1924  switch (token_) {
1925    case '{': {
1926      size_t fieldn = 0;
1927      return ParseTableDelimiters(fieldn, nullptr,
1928                                  [&](const std::string &) -> CheckedError {
1929        ECHECK(SkipAnyJsonValue());
1930        fieldn++;
1931        return NoError();
1932      });
1933    }
1934    case '[': {
1935      size_t count = 0;
1936      return ParseVectorDelimiters(count, [&]() { return SkipAnyJsonValue(); });
1937    }
1938    case kTokenStringConstant:
1939      EXPECT(kTokenStringConstant);
1940      break;
1941    case kTokenIntegerConstant:
1942      EXPECT(kTokenIntegerConstant);
1943      break;
1944    case kTokenFloatConstant:
1945      EXPECT(kTokenFloatConstant);
1946      break;
1947    default:
1948      return TokenError();
1949  }
1950  return NoError();
1951}
1952
1953CheckedError Parser::ParseFlexBufferValue(flexbuffers::Builder *builder) {
1954  switch (token_) {
1955    case '{': {
1956      auto start = builder->StartMap();
1957      size_t fieldn = 0;
1958      auto err = ParseTableDelimiters(fieldn, nullptr,
1959                                  [&](const std::string &name) -> CheckedError {
1960        builder->Key(name);
1961        ECHECK(ParseFlexBufferValue(builder));
1962        fieldn++;
1963        return NoError();
1964      });
1965      ECHECK(err);
1966      builder->EndMap(start);
1967      break;
1968    }
1969    case '[':{
1970      auto start = builder->StartVector();
1971      size_t count = 0;
1972      ECHECK(ParseVectorDelimiters(count, [&]() {
1973        return ParseFlexBufferValue(builder);
1974      }));
1975      builder->EndVector(start, false, false);
1976      break;
1977    }
1978    case kTokenStringConstant:
1979      builder->String(attribute_);
1980      EXPECT(kTokenStringConstant);
1981      break;
1982    case kTokenIntegerConstant:
1983      builder->Int(StringToInt(attribute_.c_str()));
1984      EXPECT(kTokenIntegerConstant);
1985      break;
1986    case kTokenFloatConstant:
1987      builder->Double(strtod(attribute_.c_str(), nullptr));
1988      EXPECT(kTokenFloatConstant);
1989      break;
1990    default:
1991      return TokenError();
1992  }
1993  return NoError();
1994}
1995
1996bool Parser::ParseFlexBuffer(const char *source, const char *source_filename,
1997                             flexbuffers::Builder *builder) {
1998  auto ok = !StartParseFile(source, source_filename).Check() &&
1999            !ParseFlexBufferValue(builder).Check();
2000  if (ok) builder->Finish();
2001  return ok;
2002}
2003
2004bool Parser::Parse(const char *source, const char **include_paths,
2005                   const char *source_filename) {
2006  return !ParseRoot(source, include_paths, source_filename).Check();
2007}
2008
2009CheckedError Parser::StartParseFile(const char *source, const char *source_filename) {
2010  file_being_parsed_ = source_filename ? source_filename : "";
2011  source_ = cursor_ = source;
2012  line_ = 1;
2013  error_.clear();
2014  ECHECK(SkipByteOrderMark());
2015  NEXT();
2016  if (Is(kTokenEof))
2017      return Error("input file is empty");
2018  return NoError();
2019}
2020
2021CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
2022                             const char *source_filename) {
2023  ECHECK(DoParse(source, include_paths, source_filename, nullptr));
2024
2025  // Check that all types were defined.
2026  for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2027    if ((*it)->predecl) {
2028      return Error("type referenced but not defined: " + (*it)->name);
2029    }
2030  }
2031
2032  // This check has to happen here and not earlier, because only now do we
2033  // know for sure what the type of these are.
2034  for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2035    auto &enum_def = **it;
2036    if (enum_def.is_union) {
2037      for (auto val_it = enum_def.vals.vec.begin();
2038           val_it != enum_def.vals.vec.end();
2039           ++val_it) {
2040        auto &val = **val_it;
2041        if (opts.lang_to_generate != IDLOptions::kCpp &&
2042            val.union_type.struct_def && val.union_type.struct_def->fixed)
2043          return Error(
2044                "only tables can be union elements in the generated language: "
2045                + val.name);
2046      }
2047    }
2048  }
2049  return NoError();
2050}
2051
2052CheckedError Parser::DoParse(const char *source,
2053                                    const char **include_paths,
2054                                    const char *source_filename,
2055                                    const char *include_filename) {
2056  if (source_filename &&
2057      included_files_.find(source_filename) == included_files_.end()) {
2058    included_files_[source_filename] = include_filename ? include_filename : "";
2059    files_included_per_file_[source_filename] = std::set<std::string>();
2060  }
2061  if (!include_paths) {
2062    static const char *current_directory[] = { "", nullptr };
2063    include_paths = current_directory;
2064  }
2065  field_stack_.clear();
2066  builder_.Clear();
2067  // Start with a blank namespace just in case this file doesn't have one.
2068  namespaces_.push_back(new Namespace());
2069
2070  ECHECK(StartParseFile(source, source_filename));
2071
2072  // Includes must come before type declarations:
2073  for (;;) {
2074    // Parse pre-include proto statements if any:
2075    if (opts.proto_mode &&
2076        (attribute_ == "option" || attribute_ == "syntax" ||
2077         attribute_ == "package")) {
2078        ECHECK(ParseProtoDecl());
2079    } else if (Is(kTokenNativeInclude)) {
2080      NEXT();
2081      native_included_files_.emplace_back(attribute_);
2082      EXPECT(kTokenStringConstant);
2083    } else if (Is(kTokenInclude) ||
2084               (opts.proto_mode &&
2085                attribute_ == "import" &&
2086                Is(kTokenIdentifier))) {
2087      NEXT();
2088      if (opts.proto_mode && attribute_ == "public") NEXT();
2089      auto name = flatbuffers::PosixPath(attribute_.c_str());
2090      EXPECT(kTokenStringConstant);
2091      // Look for the file in include_paths.
2092      std::string filepath;
2093      for (auto paths = include_paths; paths && *paths; paths++) {
2094        filepath = flatbuffers::ConCatPathFileName(*paths, name);
2095        if(FileExists(filepath.c_str())) break;
2096      }
2097      if (filepath.empty())
2098        return Error("unable to locate include file: " + name);
2099      if (source_filename)
2100        files_included_per_file_[source_filename].insert(filepath);
2101      if (included_files_.find(filepath) == included_files_.end()) {
2102        // We found an include file that we have not parsed yet.
2103        // Load it and parse it.
2104        std::string contents;
2105        if (!LoadFile(filepath.c_str(), true, &contents))
2106          return Error("unable to load include file: " + name);
2107        ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
2108                       name.c_str()));
2109        // We generally do not want to output code for any included files:
2110        if (!opts.generate_all) MarkGenerated();
2111        // This is the easiest way to continue this file after an include:
2112        // instead of saving and restoring all the state, we simply start the
2113        // file anew. This will cause it to encounter the same include
2114        // statement again, but this time it will skip it, because it was
2115        // entered into included_files_.
2116        // This is recursive, but only go as deep as the number of include
2117        // statements.
2118        return DoParse(source, include_paths, source_filename, include_filename);
2119      }
2120      EXPECT(';');
2121    } else {
2122      break;
2123    }
2124  }
2125  // Now parse all other kinds of declarations:
2126  while (token_ != kTokenEof) {
2127    if (opts.proto_mode) {
2128      ECHECK(ParseProtoDecl());
2129    } else if (token_ == kTokenNameSpace) {
2130      ECHECK(ParseNamespace());
2131    } else if (token_ == '{') {
2132      if (!root_struct_def_)
2133        return Error("no root type set to parse json with");
2134      if (builder_.GetSize()) {
2135        return Error("cannot have more than one json object in a file");
2136      }
2137      uoffset_t toff;
2138      ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
2139      builder_.Finish(Offset<Table>(toff),
2140                file_identifier_.length() ? file_identifier_.c_str() : nullptr);
2141    } else if (token_ == kTokenEnum) {
2142      ECHECK(ParseEnum(false, nullptr));
2143    } else if (token_ == kTokenUnion) {
2144      ECHECK(ParseEnum(true, nullptr));
2145    } else if (token_ == kTokenRootType) {
2146      NEXT();
2147      auto root_type = attribute_;
2148      EXPECT(kTokenIdentifier);
2149      ECHECK(ParseNamespacing(&root_type, nullptr));
2150      if (!SetRootType(root_type.c_str()))
2151        return Error("unknown root type: " + root_type);
2152      if (root_struct_def_->fixed)
2153        return Error("root type must be a table");
2154      EXPECT(';');
2155    } else if (token_ == kTokenFileIdentifier) {
2156      NEXT();
2157      file_identifier_ = attribute_;
2158      EXPECT(kTokenStringConstant);
2159      if (file_identifier_.length() !=
2160          FlatBufferBuilder::kFileIdentifierLength)
2161        return Error("file_identifier must be exactly " +
2162              NumToString(FlatBufferBuilder::kFileIdentifierLength) +
2163              " characters");
2164      EXPECT(';');
2165    } else if (token_ == kTokenFileExtension) {
2166      NEXT();
2167      file_extension_ = attribute_;
2168      EXPECT(kTokenStringConstant);
2169      EXPECT(';');
2170    } else if(token_ == kTokenInclude) {
2171      return Error("includes must come before declarations");
2172    } else if(token_ == kTokenAttribute) {
2173      NEXT();
2174      auto name = attribute_;
2175      EXPECT(kTokenStringConstant);
2176      EXPECT(';');
2177      known_attributes_[name] = false;
2178    } else if (token_ == kTokenService) {
2179      ECHECK(ParseService());
2180    } else {
2181      ECHECK(ParseDecl());
2182    }
2183  }
2184  return NoError();
2185}
2186
2187std::set<std::string> Parser::GetIncludedFilesRecursive(
2188    const std::string &file_name) const {
2189  std::set<std::string> included_files;
2190  std::list<std::string> to_process;
2191
2192  if (file_name.empty()) return included_files;
2193  to_process.push_back(file_name);
2194
2195  while (!to_process.empty()) {
2196    std::string current = to_process.front();
2197    to_process.pop_front();
2198    included_files.insert(current);
2199
2200    auto new_files = files_included_per_file_.at(current);
2201    for (auto it = new_files.begin(); it != new_files.end(); ++it) {
2202      if (included_files.find(*it) == included_files.end())
2203        to_process.push_back(*it);
2204    }
2205  }
2206
2207  return included_files;
2208}
2209
2210// Schema serialization functionality:
2211
2212template<typename T> bool compareName(const T* a, const T* b) {
2213    return a->defined_namespace->GetFullyQualifiedName(a->name)
2214        < b->defined_namespace->GetFullyQualifiedName(b->name);
2215}
2216
2217template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
2218  // Pre-sort these vectors, such that we can set the correct indices for them.
2219  auto vec = defvec;
2220  std::sort(vec.begin(), vec.end(), compareName<T>);
2221  for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
2222}
2223
2224void Parser::Serialize() {
2225  builder_.Clear();
2226  AssignIndices(structs_.vec);
2227  AssignIndices(enums_.vec);
2228  std::vector<Offset<reflection::Object>> object_offsets;
2229  for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2230    auto offset = (*it)->Serialize(&builder_, *this);
2231    object_offsets.push_back(offset);
2232    (*it)->serialized_location = offset.o;
2233  }
2234  std::vector<Offset<reflection::Enum>> enum_offsets;
2235  for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2236    auto offset = (*it)->Serialize(&builder_, *this);
2237    enum_offsets.push_back(offset);
2238    (*it)->serialized_location = offset.o;
2239  }
2240  auto schema_offset = reflection::CreateSchema(
2241                         builder_,
2242                         builder_.CreateVectorOfSortedTables(&object_offsets),
2243                         builder_.CreateVectorOfSortedTables(&enum_offsets),
2244                         builder_.CreateString(file_identifier_),
2245                         builder_.CreateString(file_extension_),
2246                         root_struct_def_
2247                           ? root_struct_def_->serialized_location
2248                           : 0);
2249  builder_.Finish(schema_offset, reflection::SchemaIdentifier());
2250}
2251
2252Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
2253                                                const Parser &parser) const {
2254  std::vector<Offset<reflection::Field>> field_offsets;
2255  for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
2256    field_offsets.push_back(
2257      (*it)->Serialize(builder,
2258                       static_cast<uint16_t>(it - fields.vec.begin()), parser));
2259  }
2260  auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2261  return reflection::CreateObject(*builder,
2262                                  builder->CreateString(qualified_name),
2263                                  builder->CreateVectorOfSortedTables(
2264                                    &field_offsets),
2265                                  fixed,
2266                                  static_cast<int>(minalign),
2267                                  static_cast<int>(bytesize),
2268                                  SerializeAttributes(builder, parser),
2269                                  parser.opts.binary_schema_comments
2270                                    ? builder->CreateVectorOfStrings(
2271                                        doc_comment)
2272                                    : 0);
2273}
2274
2275Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
2276                                              uint16_t id,
2277                                              const Parser &parser) const {
2278  return reflection::CreateField(*builder,
2279                                 builder->CreateString(name),
2280                                 value.type.Serialize(builder),
2281                                 id,
2282                                 value.offset,
2283                                 IsInteger(value.type.base_type)
2284                                   ? StringToInt(value.constant.c_str())
2285                                   : 0,
2286                                 IsFloat(value.type.base_type)
2287                                   ? strtod(value.constant.c_str(), nullptr)
2288                                   : 0.0,
2289                                 deprecated,
2290                                 required,
2291                                 key,
2292                                 SerializeAttributes(builder, parser),
2293                                 parser.opts.binary_schema_comments
2294                                   ? builder->CreateVectorOfStrings(doc_comment)
2295                                   : 0);
2296  // TODO: value.constant is almost always "0", we could save quite a bit of
2297  // space by sharing it. Same for common values of value.type.
2298}
2299
2300Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
2301                                            const Parser &parser) const {
2302  std::vector<Offset<reflection::EnumVal>> enumval_offsets;
2303  for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
2304    enumval_offsets.push_back((*it)->Serialize(builder));
2305  }
2306  auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2307  return reflection::CreateEnum(*builder,
2308                                builder->CreateString(qualified_name),
2309                                builder->CreateVector(enumval_offsets),
2310                                is_union,
2311                                underlying_type.Serialize(builder),
2312                                SerializeAttributes(builder, parser),
2313                                parser.opts.binary_schema_comments
2314                                  ? builder->CreateVectorOfStrings(doc_comment)
2315                                  : 0);
2316}
2317
2318Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder) const
2319                                                                               {
2320  return reflection::CreateEnumVal(*builder,
2321                                   builder->CreateString(name),
2322                                   value,
2323                                   union_type.struct_def
2324                                     ? union_type.struct_def->
2325                                         serialized_location
2326                                     : 0,
2327                                   union_type.Serialize(builder));
2328}
2329
2330Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
2331  return reflection::CreateType(*builder,
2332                                static_cast<reflection::BaseType>(base_type),
2333                                static_cast<reflection::BaseType>(element),
2334                                struct_def ? struct_def->index :
2335                                             (enum_def ? enum_def->index : -1));
2336}
2337
2338flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<
2339  reflection::KeyValue>>>
2340    Definition::SerializeAttributes(FlatBufferBuilder *builder,
2341                                    const Parser &parser) const {
2342  std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
2343  for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
2344    auto it = parser.known_attributes_.find(kv->first);
2345    assert(it != parser.known_attributes_.end());
2346    if (!it->second) {  // Custom attribute.
2347      attrs.push_back(
2348          reflection::CreateKeyValue(*builder, builder->CreateString(kv->first),
2349                                     builder->CreateString(
2350                                         kv->second->constant)));
2351    }
2352  }
2353  if (attrs.size()) {
2354    return builder->CreateVectorOfSortedTables(&attrs);
2355  } else {
2356    return 0;
2357  }
2358}
2359
2360std::string Parser::ConformTo(const Parser &base) {
2361  for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
2362    auto &struct_def = **sit;
2363    auto qualified_name =
2364        struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
2365    auto struct_def_base = base.structs_.Lookup(qualified_name);
2366    if (!struct_def_base) continue;
2367    for (auto fit = struct_def.fields.vec.begin();
2368             fit != struct_def.fields.vec.end(); ++fit) {
2369      auto &field = **fit;
2370      auto field_base = struct_def_base->fields.Lookup(field.name);
2371      if (field_base) {
2372        if (field.value.offset != field_base->value.offset)
2373          return "offsets differ for field: " + field.name;
2374        if (field.value.constant != field_base->value.constant)
2375          return "defaults differ for field: " + field.name;
2376        if (!EqualByName(field.value.type, field_base->value.type))
2377          return "types differ for field: " + field.name;
2378      } else {
2379        // Doesn't have to exist, deleting fields is fine.
2380        // But we should check if there is a field that has the same offset
2381        // but is incompatible (in the case of field renaming).
2382        for (auto fbit = struct_def_base->fields.vec.begin();
2383                 fbit != struct_def_base->fields.vec.end(); ++fbit) {
2384          field_base = *fbit;
2385          if (field.value.offset == field_base->value.offset) {
2386            if (!EqualByName(field.value.type, field_base->value.type))
2387              return "field renamed to different type: " + field.name;
2388            break;
2389          }
2390        }
2391      }
2392    }
2393  }
2394  for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
2395    auto &enum_def = **eit;
2396    auto qualified_name =
2397        enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
2398    auto enum_def_base = base.enums_.Lookup(qualified_name);
2399    if (!enum_def_base) continue;
2400    for (auto evit = enum_def.vals.vec.begin();
2401             evit != enum_def.vals.vec.end(); ++evit) {
2402      auto &enum_val = **evit;
2403      auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
2404      if (enum_val_base) {
2405        if (enum_val.value != enum_val_base->value)
2406          return "values differ for enum: " + enum_val.name;
2407      }
2408    }
2409  }
2410  return "";
2411}
2412
2413}  // namespace flatbuffers
2414