idl_parser.cpp revision aaf5598a032314767721fead8a0acf9ca37c5e09
1/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18#include <list>
19
20#ifdef _WIN32
21#if !defined(_USE_MATH_DEFINES)
22#define _USE_MATH_DEFINES  // For M_PI.
23#endif                     // !defined(_USE_MATH_DEFINES)
24#endif                     // _WIN32
25
26#include <math.h>
27
28#include "flatbuffers/idl.h"
29#include "flatbuffers/util.h"
30
31namespace flatbuffers {
32
33const char *const kTypeNames[] = {
34  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
35    IDLTYPE,
36    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
37  #undef FLATBUFFERS_TD
38  nullptr
39};
40
41const char kTypeSizes[] = {
42  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
43      sizeof(CTYPE),
44    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
45  #undef FLATBUFFERS_TD
46};
47
48// The enums in the reflection schema should match the ones we use internally.
49// Compare the last element to check if these go out of sync.
50static_assert(BASE_TYPE_UNION ==
51              static_cast<BaseType>(reflection::Union),
52              "enums don't match");
53
54// Any parsing calls have to be wrapped in this macro, which automates
55// handling of recursive error checking a bit. It will check the received
56// CheckedError object, and return straight away on error.
57#define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; }
58
59// These two functions are called hundreds of times below, so define a short
60// form:
61#define NEXT() ECHECK(Next())
62#define EXPECT(tok) ECHECK(Expect(tok))
63
64static bool ValidateUTF8(const std::string &str) {
65  const char *s = &str[0];
66  const char * const sEnd = s + str.length();
67  while (s < sEnd) {
68    if (FromUTF8(&s) < 0) {
69      return false;
70    }
71  }
72  return true;
73}
74
75CheckedError Parser::Error(const std::string &msg) {
76  error_ = file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
77  #ifdef _WIN32
78    error_ += "(" + NumToString(line_) + ")";  // MSVC alike
79  #else
80    if (file_being_parsed_.length()) error_ += ":";
81    error_ += NumToString(line_) + ":0";  // gcc alike
82  #endif
83  error_ += ": error: " + msg;
84  return CheckedError(true);
85}
86
87inline CheckedError NoError() { return CheckedError(false); }
88
89inline std::string OutOfRangeErrorMsg(int64_t val, const std::string &op,
90                                      int64_t limit) {
91  const std::string cause = NumToString(val) + op + NumToString(limit);
92  return "constant does not fit (" + cause + ")";
93}
94
95// Ensure that integer values we parse fit inside the declared integer type.
96CheckedError Parser::CheckInRange(int64_t val, int64_t min, int64_t max) {
97  if (val < min)
98    return Error(OutOfRangeErrorMsg(val, " < ", min));
99  else if (val > max)
100    return Error(OutOfRangeErrorMsg(val, " > ", max));
101  else
102    return NoError();
103}
104
105// atot: templated version of atoi/atof: convert a string to an instance of T.
106template<typename T> inline CheckedError atot(const char *s, Parser &parser,
107                                              T *val) {
108  int64_t i = StringToInt(s);
109  const int64_t min = std::numeric_limits<T>::min();
110  const int64_t max = std::numeric_limits<T>::max();
111  ECHECK(parser.CheckInRange(i, min, max));
112  *val = (T)i;
113  return NoError();
114}
115template<> inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
116                                              uint64_t *val) {
117  (void)parser;
118  *val = StringToUInt(s);
119  return NoError();
120}
121template<> inline CheckedError atot<bool>(const char *s, Parser &parser,
122                                          bool *val) {
123  (void)parser;
124  *val = 0 != atoi(s);
125  return NoError();
126}
127template<> inline CheckedError atot<float>(const char *s, Parser &parser,
128                                           float *val) {
129  (void)parser;
130  *val = static_cast<float>(strtod(s, nullptr));
131  return NoError();
132}
133template<> inline CheckedError atot<double>(const char *s, Parser &parser,
134                                            double *val) {
135  (void)parser;
136  *val = strtod(s, nullptr);
137  return NoError();
138}
139
140template<> inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
141                                                  Offset<void> *val) {
142  (void)parser;
143  *val = Offset<void>(atoi(s));
144  return NoError();
145}
146
147std::string Namespace::GetFullyQualifiedName(const std::string &name,
148                                             size_t max_components) const {
149  // Early exit if we don't have a defined namespace.
150  if (components.size() == 0 || !max_components) {
151    return name;
152  }
153  std::stringstream stream;
154  for (size_t i = 0; i < std::min(components.size(), max_components);
155       i++) {
156    if (i) {
157      stream << ".";
158    }
159    stream << components[i];
160  }
161  if (name.length()) stream << "." << name;
162  return stream.str();
163}
164
165
166
167// Declare tokens we'll use. Single character tokens are represented by their
168// ascii character code (e.g. '{'), others above 256.
169#define FLATBUFFERS_GEN_TOKENS(TD) \
170  TD(Eof, 256, "end of file") \
171  TD(StringConstant, 257, "string constant") \
172  TD(IntegerConstant, 258, "integer constant") \
173  TD(FloatConstant, 259, "float constant") \
174  TD(Identifier, 260, "identifier") \
175  TD(Table, 261, "table") \
176  TD(Struct, 262, "struct") \
177  TD(Enum, 263, "enum") \
178  TD(Union, 264, "union") \
179  TD(NameSpace, 265, "namespace") \
180  TD(RootType, 266, "root_type") \
181  TD(FileIdentifier, 267, "file_identifier") \
182  TD(FileExtension, 268, "file_extension") \
183  TD(Include, 269, "include") \
184  TD(Attribute, 270, "attribute") \
185  TD(Null, 271, "null") \
186  TD(Service, 272, "rpc_service") \
187  TD(NativeInclude, 273, "native_include")
188#ifdef __GNUC__
189__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
190#endif
191enum {
192  #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
193    FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
194  #undef FLATBUFFERS_TOKEN
195  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
196      kToken ## ENUM,
197    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
198  #undef FLATBUFFERS_TD
199};
200
201static std::string TokenToString(int t) {
202  static const char *tokens[] = {
203    #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
204      FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
205    #undef FLATBUFFERS_TOKEN
206    #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
207      IDLTYPE,
208      FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
209    #undef FLATBUFFERS_TD
210  };
211  if (t < 256) {  // A single ascii char token.
212    std::string s;
213    s.append(1, static_cast<char>(t));
214    return s;
215  } else {       // Other tokens.
216    return tokens[t - 256];
217  }
218}
219
220std::string Parser::TokenToStringId(int t) {
221  return TokenToString(t) + (t == kTokenIdentifier ? ": " + attribute_ : "");
222}
223
224// Parses exactly nibbles worth of hex digits into a number, or error.
225CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
226  for (int i = 0; i < nibbles; i++)
227    if (!isxdigit(static_cast<const unsigned char>(cursor_[i])))
228      return Error("escape code must be followed by " + NumToString(nibbles) +
229                   " hex digits");
230  std::string target(cursor_, cursor_ + nibbles);
231  *val = StringToUInt(target.c_str(), nullptr, 16);
232  cursor_ += nibbles;
233  return NoError();
234}
235
236CheckedError Parser::SkipByteOrderMark() {
237  if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
238  cursor_++;
239  if (static_cast<unsigned char>(*cursor_) != 0xbb) return Error("invalid utf-8 byte order mark");
240  cursor_++;
241  if (static_cast<unsigned char>(*cursor_) != 0xbf) return Error("invalid utf-8 byte order mark");
242  cursor_++;
243  return NoError();
244}
245
246bool IsIdentifierStart(char c) {
247  return isalpha(static_cast<unsigned char>(c)) || c == '_';
248}
249
250CheckedError Parser::Next() {
251  doc_comment_.clear();
252  bool seen_newline = false;
253  attribute_.clear();
254  for (;;) {
255    char c = *cursor_++;
256    token_ = c;
257    switch (c) {
258      case '\0': cursor_--; token_ = kTokenEof; return NoError();
259      case ' ': case '\r': case '\t': break;
260      case '\n': line_++; seen_newline = true; break;
261      case '{': case '}': case '(': case ')': case '[': case ']':
262      case ',': case ':': case ';': case '=': return NoError();
263      case '.':
264        if(!isdigit(static_cast<const unsigned char>(*cursor_))) return NoError();
265        return Error("floating point constant can\'t start with \".\"");
266      case '\"':
267      case '\'': {
268        int unicode_high_surrogate = -1;
269
270        while (*cursor_ != c) {
271          if (*cursor_ < ' ' && *cursor_ >= 0)
272            return Error("illegal character in string constant");
273          if (*cursor_ == '\\') {
274            cursor_++;
275            if (unicode_high_surrogate != -1 &&
276                *cursor_ != 'u') {
277              return Error(
278                "illegal Unicode sequence (unpaired high surrogate)");
279            }
280            switch (*cursor_) {
281              case 'n':  attribute_ += '\n'; cursor_++; break;
282              case 't':  attribute_ += '\t'; cursor_++; break;
283              case 'r':  attribute_ += '\r'; cursor_++; break;
284              case 'b':  attribute_ += '\b'; cursor_++; break;
285              case 'f':  attribute_ += '\f'; cursor_++; break;
286              case '\"': attribute_ += '\"'; cursor_++; break;
287              case '\'': attribute_ += '\''; cursor_++; break;
288              case '\\': attribute_ += '\\'; cursor_++; break;
289              case '/':  attribute_ += '/';  cursor_++; break;
290              case 'x': {  // Not in the JSON standard
291                cursor_++;
292                uint64_t val;
293                ECHECK(ParseHexNum(2, &val));
294                attribute_ += static_cast<char>(val);
295                break;
296              }
297              case 'u': {
298                cursor_++;
299                uint64_t val;
300                ECHECK(ParseHexNum(4, &val));
301                if (val >= 0xD800 && val <= 0xDBFF) {
302                  if (unicode_high_surrogate != -1) {
303                    return Error(
304                      "illegal Unicode sequence (multiple high surrogates)");
305                  } else {
306                    unicode_high_surrogate = static_cast<int>(val);
307                  }
308                } else if (val >= 0xDC00 && val <= 0xDFFF) {
309                  if (unicode_high_surrogate == -1) {
310                    return Error(
311                      "illegal Unicode sequence (unpaired low surrogate)");
312                  } else {
313                    int code_point = 0x10000 +
314                      ((unicode_high_surrogate & 0x03FF) << 10) +
315                      (val & 0x03FF);
316                    ToUTF8(code_point, &attribute_);
317                    unicode_high_surrogate = -1;
318                  }
319                } else {
320                  if (unicode_high_surrogate != -1) {
321                    return Error(
322                      "illegal Unicode sequence (unpaired high surrogate)");
323                  }
324                  ToUTF8(static_cast<int>(val), &attribute_);
325                }
326                break;
327              }
328              default: return Error("unknown escape code in string constant");
329            }
330          } else { // printable chars + UTF-8 bytes
331            if (unicode_high_surrogate != -1) {
332              return Error(
333                "illegal Unicode sequence (unpaired high surrogate)");
334            }
335            attribute_ += *cursor_++;
336          }
337        }
338        if (unicode_high_surrogate != -1) {
339          return Error(
340            "illegal Unicode sequence (unpaired high surrogate)");
341        }
342        cursor_++;
343        if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
344          return Error("illegal UTF-8 sequence");
345        }
346        token_ = kTokenStringConstant;
347        return NoError();
348      }
349      case '/':
350        if (*cursor_ == '/') {
351          const char *start = ++cursor_;
352          while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
353          if (*start == '/') {  // documentation comment
354            if (cursor_ != source_ && !seen_newline)
355              return Error(
356                    "a documentation comment should be on a line on its own");
357            doc_comment_.push_back(std::string(start + 1, cursor_));
358          }
359          break;
360        } else if (*cursor_ == '*') {
361          cursor_++;
362          // TODO: make nested.
363          while (*cursor_ != '*' || cursor_[1] != '/') {
364            if (*cursor_ == '\n') line_++;
365            if (!*cursor_) return Error("end of file in comment");
366            cursor_++;
367          }
368          cursor_ += 2;
369          break;
370        }
371        // fall thru
372      default:
373        if (IsIdentifierStart(c)) {
374          // Collect all chars of an identifier:
375          const char *start = cursor_ - 1;
376          while (isalnum(static_cast<unsigned char>(*cursor_)) ||
377                 *cursor_ == '_')
378            cursor_++;
379          attribute_.append(start, cursor_);
380          // First, see if it is a type keyword from the table of types:
381          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
382            PTYPE) \
383            if (attribute_ == IDLTYPE) { \
384              token_ = kToken ## ENUM; \
385              return NoError(); \
386            }
387            FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
388          #undef FLATBUFFERS_TD
389          // If it's a boolean constant keyword, turn those into integers,
390          // which simplifies our logic downstream.
391          if (attribute_ == "true" || attribute_ == "false") {
392            attribute_ = NumToString(attribute_ == "true");
393            token_ = kTokenIntegerConstant;
394            return NoError();
395          }
396          // Check for declaration keywords:
397          if (attribute_ == "table") {
398            token_ = kTokenTable;
399            return NoError();
400          }
401          if (attribute_ == "struct") {
402            token_ = kTokenStruct;
403            return NoError();
404          }
405          if (attribute_ == "enum") {
406            token_ = kTokenEnum;
407            return NoError();
408          }
409          if (attribute_ == "union") {
410            token_ = kTokenUnion;
411            return NoError();
412          }
413          if (attribute_ == "namespace") {
414            token_ = kTokenNameSpace;
415            return NoError();
416          }
417          if (attribute_ == "root_type") {
418            token_ = kTokenRootType;
419            return NoError();
420          }
421          if (attribute_ == "include") {
422            token_ = kTokenInclude;
423            return NoError();
424          }
425          if (attribute_ == "attribute") {
426            token_ = kTokenAttribute;
427            return NoError();
428          }
429          if (attribute_ == "file_identifier") {
430            token_ = kTokenFileIdentifier;
431            return NoError();
432          }
433          if (attribute_ == "file_extension") {
434            token_ = kTokenFileExtension;
435            return NoError();
436          }
437          if (attribute_ == "null") {
438            token_ = kTokenNull;
439            return NoError();
440          }
441          if (attribute_ == "rpc_service") {
442            token_ = kTokenService;
443            return NoError();
444          }
445          if (attribute_ == "native_include") {
446            token_ = kTokenNativeInclude;
447            return NoError();
448          }
449          // If not, it is a user-defined identifier:
450          token_ = kTokenIdentifier;
451          return NoError();
452        } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
453          const char *start = cursor_ - 1;
454          if (c == '-' && *cursor_ == '0' &&
455              (cursor_[1] == 'x' || cursor_[1] == 'X')) {
456            ++start;
457            ++cursor_;
458            attribute_.append(&c, &c + 1);
459            c = '0';
460          }
461          if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
462              cursor_++;
463              while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
464              attribute_.append(start + 2, cursor_);
465              attribute_ = NumToString(static_cast<int64_t>(
466                             StringToUInt(attribute_.c_str(), nullptr, 16)));
467              token_ = kTokenIntegerConstant;
468              return NoError();
469          }
470          while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
471          if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
472            if (*cursor_ == '.') {
473              cursor_++;
474              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
475            }
476            // See if this float has a scientific notation suffix. Both JSON
477            // and C++ (through strtod() we use) have the same format:
478            if (*cursor_ == 'e' || *cursor_ == 'E') {
479              cursor_++;
480              if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
481              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
482            }
483            token_ = kTokenFloatConstant;
484          } else {
485            token_ = kTokenIntegerConstant;
486          }
487          attribute_.append(start, cursor_);
488          return NoError();
489        }
490        std::string ch;
491        ch = c;
492        if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
493        return Error("illegal character: " + ch);
494    }
495  }
496}
497
498// Check if a given token is next.
499bool Parser::Is(int t) {
500  return t == token_;
501}
502
503// Expect a given token to be next, consume it, or error if not present.
504CheckedError Parser::Expect(int t) {
505  if (t != token_) {
506    return Error("expecting: " + TokenToString(t) + " instead got: " +
507                 TokenToStringId(token_));
508  }
509  NEXT();
510  return NoError();
511}
512
513CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
514  while (Is('.')) {
515    NEXT();
516    *id += ".";
517    *id += attribute_;
518    if (last) *last = attribute_;
519    EXPECT(kTokenIdentifier);
520  }
521  return NoError();
522}
523
524EnumDef *Parser::LookupEnum(const std::string &id) {
525  // Search thru parent namespaces.
526  for (int components = static_cast<int>(namespaces_.back()->components.size());
527       components >= 0; components--) {
528    auto ed = enums_.Lookup(
529                namespaces_.back()->GetFullyQualifiedName(id, components));
530    if (ed) return ed;
531  }
532  return nullptr;
533}
534
535CheckedError Parser::ParseTypeIdent(Type &type) {
536  std::string id = attribute_;
537  EXPECT(kTokenIdentifier);
538  ECHECK(ParseNamespacing(&id, nullptr));
539  auto enum_def = LookupEnum(id);
540  if (enum_def) {
541    type = enum_def->underlying_type;
542    if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
543  } else {
544    type.base_type = BASE_TYPE_STRUCT;
545    type.struct_def = LookupCreateStruct(id);
546  }
547  return NoError();
548}
549
550// Parse any IDL type.
551CheckedError Parser::ParseType(Type &type) {
552  if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
553    type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
554    NEXT();
555  } else {
556    if (token_ == kTokenIdentifier) {
557      ECHECK(ParseTypeIdent(type));
558    } else if (token_ == '[') {
559      NEXT();
560      Type subtype;
561      ECHECK(ParseType(subtype));
562      if (subtype.base_type == BASE_TYPE_VECTOR) {
563        // We could support this, but it will complicate things, and it's
564        // easier to work around with a struct around the inner vector.
565        return Error(
566              "nested vector types not supported (wrap in table first).");
567      }
568      type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
569      type.element = subtype.base_type;
570      EXPECT(']');
571    } else {
572      return Error("illegal type syntax");
573    }
574  }
575  return NoError();
576}
577
578CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
579                              const Type &type, FieldDef **dest) {
580  auto &field = *new FieldDef();
581  field.value.offset =
582    FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
583  field.name = name;
584  field.file = struct_def.file;
585  field.value.type = type;
586  if (struct_def.fixed) {  // statically compute the field offset
587    auto size = InlineSize(type);
588    auto alignment = InlineAlignment(type);
589    // structs_ need to have a predictable format, so we need to align to
590    // the largest scalar
591    struct_def.minalign = std::max(struct_def.minalign, alignment);
592    struct_def.PadLastField(alignment);
593    field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
594    struct_def.bytesize += size;
595  }
596  if (struct_def.fields.Add(name, &field))
597    return Error("field already exists: " + name);
598  *dest = &field;
599  return NoError();
600}
601
602CheckedError Parser::ParseField(StructDef &struct_def) {
603  std::string name = attribute_;
604  std::vector<std::string> dc = doc_comment_;
605  EXPECT(kTokenIdentifier);
606  EXPECT(':');
607  Type type;
608  ECHECK(ParseType(type));
609
610  if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
611    return Error("structs_ may contain only scalar or struct fields");
612
613  FieldDef *typefield = nullptr;
614  if (type.base_type == BASE_TYPE_UNION) {
615    // For union fields, add a second auto-generated field to hold the type,
616    // with a special suffix.
617    ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
618                    type.enum_def->underlying_type, &typefield));
619  } else if (type.base_type == BASE_TYPE_VECTOR &&
620             type.element == BASE_TYPE_UNION) {
621    // Only cpp supports the union vector feature so far.
622    if (opts.lang_to_generate != IDLOptions::kCpp) {
623      return Error("Vectors of unions are not yet supported in all "
624                   "the specified programming languages.");
625    }
626    // For vector of union fields, add a second auto-generated vector field to
627    // hold the types, with a special suffix.
628    Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
629    union_vector.element = BASE_TYPE_UTYPE;
630    ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
631                    union_vector, &typefield));
632  }
633
634  FieldDef *field;
635  ECHECK(AddField(struct_def, name, type, &field));
636
637  if (token_ == '=') {
638    NEXT();
639    if (!IsScalar(type.base_type))
640      return Error("default values currently only supported for scalars");
641    ECHECK(ParseSingleValue(field->value));
642  }
643  if (IsFloat(field->value.type.base_type)) {
644    if (!strpbrk(field->value.constant.c_str(), ".eE"))
645      field->value.constant += ".0";
646  }
647
648  if (type.enum_def &&
649      IsScalar(type.base_type) &&
650      !struct_def.fixed &&
651      !type.enum_def->attributes.Lookup("bit_flags") &&
652      !type.enum_def->ReverseLookup(static_cast<int>(
653                         StringToInt(field->value.constant.c_str()))))
654    return Error("enum " + type.enum_def->name +
655          " does not have a declaration for this field\'s default of " +
656          field->value.constant);
657
658  field->doc_comment = dc;
659  ECHECK(ParseMetaData(&field->attributes));
660  field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
661  auto hash_name = field->attributes.Lookup("hash");
662  if (hash_name) {
663    switch (type.base_type) {
664      case BASE_TYPE_INT:
665      case BASE_TYPE_UINT: {
666        if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
667          return Error("Unknown hashing algorithm for 32 bit types: " +
668                hash_name->constant);
669        break;
670      }
671      case BASE_TYPE_LONG:
672      case BASE_TYPE_ULONG: {
673        if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
674          return Error("Unknown hashing algorithm for 64 bit types: " +
675                hash_name->constant);
676        break;
677      }
678      default:
679        return Error(
680              "only int, uint, long and ulong data types support hashing.");
681    }
682  }
683  auto cpp_type = field->attributes.Lookup("cpp_type");
684  if (cpp_type) {
685    if (!hash_name)
686      return Error("cpp_type can only be used with a hashed field");
687  }
688  if (field->deprecated && struct_def.fixed)
689    return Error("can't deprecate fields in a struct");
690  field->required = field->attributes.Lookup("required") != nullptr;
691  if (field->required && (struct_def.fixed ||
692                         IsScalar(field->value.type.base_type)))
693    return Error("only non-scalar fields in tables may be 'required'");
694  field->key = field->attributes.Lookup("key") != nullptr;
695  if (field->key) {
696    if (struct_def.has_key)
697      return Error("only one field may be set as 'key'");
698    struct_def.has_key = true;
699    if (!IsScalar(field->value.type.base_type)) {
700      field->required = true;
701      if (field->value.type.base_type != BASE_TYPE_STRING)
702        return Error("'key' field must be string or scalar type");
703    }
704  }
705
706  field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
707  if (field->native_inline && !IsStruct(field->value.type))
708    return Error("native_inline can only be defined on structs'");
709
710  auto nested = field->attributes.Lookup("nested_flatbuffer");
711  if (nested) {
712    if (nested->type.base_type != BASE_TYPE_STRING)
713      return Error(
714            "nested_flatbuffer attribute must be a string (the root type)");
715    if (field->value.type.base_type != BASE_TYPE_VECTOR ||
716        field->value.type.element != BASE_TYPE_UCHAR)
717      return Error(
718            "nested_flatbuffer attribute may only apply to a vector of ubyte");
719    // This will cause an error if the root type of the nested flatbuffer
720    // wasn't defined elsewhere.
721    LookupCreateStruct(nested->constant);
722  }
723
724  if (typefield) {
725    // If this field is a union, and it has a manually assigned id,
726    // the automatically added type field should have an id as well (of N - 1).
727    auto attr = field->attributes.Lookup("id");
728    if (attr) {
729      auto id = atoi(attr->constant.c_str());
730      auto val = new Value();
731      val->type = attr->type;
732      val->constant = NumToString(id - 1);
733      typefield->attributes.Add("id", val);
734    }
735  }
736
737  EXPECT(';');
738  return NoError();
739}
740
741CheckedError Parser::ParseString(Value &val) {
742  auto s = attribute_;
743  EXPECT(kTokenStringConstant);
744  val.constant = NumToString(builder_.CreateString(s).o);
745  return NoError();
746}
747
748CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
749                                   size_t parent_fieldn,
750                                   const StructDef *parent_struct_def) {
751  switch (val.type.base_type) {
752    case BASE_TYPE_UNION: {
753      assert(field);
754      std::string constant;
755      // Find corresponding type field we may have already parsed.
756      for (auto elem = field_stack_.rbegin();
757           elem != field_stack_.rbegin() + parent_fieldn; ++elem) {
758        auto &type = elem->second->value.type;
759        if (type.base_type == BASE_TYPE_UTYPE &&
760            type.enum_def == val.type.enum_def) {
761          constant = elem->first.constant;
762          break;
763        }
764      }
765      if (constant.empty()) {
766        // We haven't seen the type field yet. Sadly a lot of JSON writers
767        // output these in alphabetical order, meaning it comes after this
768        // value. So we scan past the value to find it, then come back here.
769        auto type_name = field->name + UnionTypeFieldSuffix();
770        assert(parent_struct_def);
771        auto type_field = parent_struct_def->fields.Lookup(type_name);
772        assert(type_field);  // Guaranteed by ParseField().
773        // Remember where we are in the source file, so we can come back here.
774        auto backup = *static_cast<ParserState *>(this);
775        ECHECK(SkipAnyJsonValue());  // The table.
776        EXPECT(',');
777        auto next_name = attribute_;
778        if (Is(kTokenStringConstant)) {
779          NEXT();
780        } else {
781          EXPECT(kTokenIdentifier);
782        }
783        if (next_name != type_name)
784          return Error("missing type field after this union value: " +
785                       type_name);
786        EXPECT(':');
787        Value type_val = type_field->value;
788        ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr));
789        constant = type_val.constant;
790        // Got the information we needed, now rewind:
791        *static_cast<ParserState *>(this) = backup;
792      }
793      uint8_t enum_idx;
794      ECHECK(atot(constant.c_str(), *this, &enum_idx));
795      auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
796      if (!enum_val) return Error("illegal type id for: " + field->name);
797      if (enum_val->union_type.base_type == BASE_TYPE_STRUCT) {
798        ECHECK(ParseTable(*enum_val->union_type.struct_def, &val.constant,
799                          nullptr));
800        if (enum_val->union_type.struct_def->fixed) {
801          // All BASE_TYPE_UNION values are offsets, so turn this into one.
802          SerializeStruct(*enum_val->union_type.struct_def, val);
803          builder_.ClearOffsets();
804          val.constant = NumToString(builder_.GetSize());
805        }
806      } else if (enum_val->union_type.base_type == BASE_TYPE_STRING) {
807        ECHECK(ParseString(val));
808      } else {
809        assert(false);
810      }
811      break;
812    }
813    case BASE_TYPE_STRUCT:
814      ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
815      break;
816    case BASE_TYPE_STRING: {
817      ECHECK(ParseString(val));
818      break;
819    }
820    case BASE_TYPE_VECTOR: {
821      EXPECT('[');
822      uoffset_t off;
823      ECHECK(ParseVector(val.type.VectorType(), &off));
824      val.constant = NumToString(off);
825      break;
826    }
827    case BASE_TYPE_INT:
828    case BASE_TYPE_UINT:
829    case BASE_TYPE_LONG:
830    case BASE_TYPE_ULONG: {
831      if (field && field->attributes.Lookup("hash") &&
832          (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
833        ECHECK(ParseHash(val, field));
834      } else {
835        ECHECK(ParseSingleValue(val));
836      }
837      break;
838    }
839    default:
840      ECHECK(ParseSingleValue(val));
841      break;
842  }
843  return NoError();
844}
845
846void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
847  assert(val.constant.length() == struct_def.bytesize);
848  builder_.Align(struct_def.minalign);
849  builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
850                     struct_def.bytesize);
851  builder_.AddStructOffset(val.offset, builder_.GetSize());
852}
853
854CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
855                                uoffset_t *ovalue) {
856  EXPECT('{');
857  size_t fieldn = 0;
858  for (;;) {
859    if ((!opts.strict_json || !fieldn) && Is('}')) { NEXT(); break; }
860    std::string name = attribute_;
861    if (Is(kTokenStringConstant)) {
862      NEXT();
863    } else {
864      EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
865    }
866    auto field = struct_def.fields.Lookup(name);
867    if (!field) {
868      if (!opts.skip_unexpected_fields_in_json) {
869        return Error("unknown field: " + name);
870      } else {
871        EXPECT(':');
872        ECHECK(SkipAnyJsonValue());
873      }
874    } else {
875      EXPECT(':');
876      if (Is(kTokenNull)) {
877        NEXT(); // Ignore this field.
878      } else {
879        Value val = field->value;
880        ECHECK(ParseAnyValue(val, field, fieldn, &struct_def));
881        // Hardcoded insertion-sort with error-check.
882        // If fields are specified in order, then this loop exits immediately.
883        auto elem = field_stack_.rbegin();
884        for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
885          auto existing_field = elem->second;
886          if (existing_field == field)
887            return Error("field set more than once: " + field->name);
888          if (existing_field->value.offset < field->value.offset) break;
889        }
890        // Note: elem points to before the insertion point, thus .base() points
891        // to the correct spot.
892        field_stack_.insert(elem.base(), std::make_pair(val, field));
893        fieldn++;
894      }
895    }
896    if (Is('}')) { NEXT(); break; }
897    EXPECT(',');
898  }
899
900  // Check if all required fields are parsed.
901  for (auto field_it = struct_def.fields.vec.begin();
902            field_it != struct_def.fields.vec.end();
903            ++field_it) {
904    auto required_field = *field_it;
905    if (!required_field->required) {
906      continue;
907    }
908    bool found = false;
909    for (auto pf_it = field_stack_.end() - fieldn;
910         pf_it != field_stack_.end();
911         ++pf_it) {
912      auto parsed_field = pf_it->second;
913      if (parsed_field == required_field) {
914        found = true;
915        break;
916      }
917    }
918    if (!found) {
919      return Error("required field is missing: " + required_field->name + " in " + struct_def.name);
920    }
921  }
922
923  if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
924    return Error("struct: wrong number of initializers: " + struct_def.name);
925
926  auto start = struct_def.fixed
927                 ? builder_.StartStruct(struct_def.minalign)
928                 : builder_.StartTable();
929
930  for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
931       size;
932       size /= 2) {
933    // Go through elements in reverse, since we're building the data backwards.
934    for (auto it = field_stack_.rbegin();
935             it != field_stack_.rbegin() + fieldn; ++it) {
936      auto &field_value = it->first;
937      auto field = it->second;
938      if (!struct_def.sortbysize ||
939          size == SizeOf(field_value.type.base_type)) {
940        switch (field_value.type.base_type) {
941          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
942            PTYPE) \
943            case BASE_TYPE_ ## ENUM: \
944              builder_.Pad(field->padding); \
945              if (struct_def.fixed) { \
946                CTYPE val; \
947                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
948                builder_.PushElement(val); \
949              } else { \
950                CTYPE val, valdef; \
951                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
952                ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
953                builder_.AddElement(field_value.offset, val, valdef); \
954              } \
955              break;
956            FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
957          #undef FLATBUFFERS_TD
958          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
959            PTYPE) \
960            case BASE_TYPE_ ## ENUM: \
961              builder_.Pad(field->padding); \
962              if (IsStruct(field->value.type)) { \
963                SerializeStruct(*field->value.type.struct_def, field_value); \
964              } else { \
965                CTYPE val; \
966                ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
967                builder_.AddOffset(field_value.offset, val); \
968              } \
969              break;
970            FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
971          #undef FLATBUFFERS_TD
972        }
973      }
974    }
975  }
976  for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
977
978  if (struct_def.fixed) {
979    builder_.ClearOffsets();
980    builder_.EndStruct();
981    assert(value);
982    // Temporarily store this struct in the value string, since it is to
983    // be serialized in-place elsewhere.
984    value->assign(
985          reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
986          struct_def.bytesize);
987    builder_.PopBytes(struct_def.bytesize);
988    assert(!ovalue);
989  } else {
990    auto val = builder_.EndTable(start,
991                          static_cast<voffset_t>(struct_def.fields.vec.size()));
992    if (ovalue) *ovalue = val;
993    if (value) *value = NumToString(val);
994  }
995  return NoError();
996}
997
998CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue) {
999  int count = 0;
1000  for (;;) {
1001    if ((!opts.strict_json || !count) && Is(']')) { NEXT(); break; }
1002    Value val;
1003    val.type = type;
1004    ECHECK(ParseAnyValue(val, nullptr, 0, nullptr));
1005    field_stack_.push_back(std::make_pair(val, nullptr));
1006    count++;
1007    if (Is(']')) { NEXT(); break; }
1008    EXPECT(',');
1009  }
1010
1011  builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
1012                       InlineAlignment(type));
1013  for (int i = 0; i < count; i++) {
1014    // start at the back, since we're building the data backwards.
1015    auto &val = field_stack_.back().first;
1016    switch (val.type.base_type) {
1017      #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
1018        case BASE_TYPE_ ## ENUM: \
1019          if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
1020          else { \
1021             CTYPE elem; \
1022             ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1023             builder_.PushElement(elem); \
1024          } \
1025          break;
1026        FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1027      #undef FLATBUFFERS_TD
1028    }
1029    field_stack_.pop_back();
1030  }
1031
1032  builder_.ClearOffsets();
1033  *ovalue = builder_.EndVector(count);
1034  return NoError();
1035}
1036
1037CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
1038  if (Is('(')) {
1039    NEXT();
1040    for (;;) {
1041      auto name = attribute_;
1042      EXPECT(kTokenIdentifier);
1043      if (known_attributes_.find(name) == known_attributes_.end())
1044        return Error("user define attributes must be declared before use: " +
1045                     name);
1046      auto e = new Value();
1047      attributes->Add(name, e);
1048      if (Is(':')) {
1049        NEXT();
1050        ECHECK(ParseSingleValue(*e));
1051      }
1052      if (Is(')')) { NEXT(); break; }
1053      EXPECT(',');
1054    }
1055  }
1056  return NoError();
1057}
1058
1059CheckedError Parser::TryTypedValue(int dtoken, bool check, Value &e,
1060                                   BaseType req, bool *destmatch) {
1061  bool match = dtoken == token_;
1062  if (match) {
1063    *destmatch = true;
1064    e.constant = attribute_;
1065    if (!check) {
1066      if (e.type.base_type == BASE_TYPE_NONE) {
1067        e.type.base_type = req;
1068      } else {
1069        return Error(std::string("type mismatch: expecting: ") +
1070                     kTypeNames[e.type.base_type] +
1071                     ", found: " +
1072                     kTypeNames[req]);
1073      }
1074    }
1075    NEXT();
1076  }
1077  return NoError();
1078}
1079
1080CheckedError Parser::ParseEnumFromString(Type &type, int64_t *result) {
1081  *result = 0;
1082  // Parse one or more enum identifiers, separated by spaces.
1083  const char *next = attribute_.c_str();
1084  do {
1085    const char *divider = strchr(next, ' ');
1086    std::string word;
1087    if (divider) {
1088      word = std::string(next, divider);
1089      next = divider + strspn(divider, " ");
1090    } else {
1091      word = next;
1092      next += word.length();
1093    }
1094    if (type.enum_def) {  // The field has an enum type
1095      auto enum_val = type.enum_def->vals.Lookup(word);
1096      if (!enum_val)
1097        return Error("unknown enum value: " + word +
1098              ", for enum: " + type.enum_def->name);
1099      *result |= enum_val->value;
1100    } else {  // No enum type, probably integral field.
1101      if (!IsInteger(type.base_type))
1102        return Error("not a valid value for this field: " + word);
1103      // TODO: could check if its a valid number constant here.
1104      const char *dot = strrchr(word.c_str(), '.');
1105      if (!dot)
1106        return Error("enum values need to be qualified by an enum type");
1107      std::string enum_def_str(word.c_str(), dot);
1108      std::string enum_val_str(dot + 1, word.c_str() + word.length());
1109      auto enum_def = LookupEnum(enum_def_str);
1110      if (!enum_def) return Error("unknown enum: " + enum_def_str);
1111      auto enum_val = enum_def->vals.Lookup(enum_val_str);
1112      if (!enum_val) return Error("unknown enum value: " + enum_val_str);
1113      *result |= enum_val->value;
1114    }
1115  } while(*next);
1116  return NoError();
1117}
1118
1119
1120CheckedError Parser::ParseHash(Value &e, FieldDef* field) {
1121  assert(field);
1122  Value *hash_name = field->attributes.Lookup("hash");
1123  switch (e.type.base_type) {
1124    case BASE_TYPE_INT: {
1125      auto hash = FindHashFunction32(hash_name->constant.c_str());
1126      int32_t hashed_value = static_cast<int32_t>(hash(attribute_.c_str()));
1127      e.constant = NumToString(hashed_value);
1128      break;
1129    }
1130    case BASE_TYPE_UINT: {
1131      auto hash = FindHashFunction32(hash_name->constant.c_str());
1132      uint32_t hashed_value = hash(attribute_.c_str());
1133      e.constant = NumToString(hashed_value);
1134      break;
1135    }
1136    case BASE_TYPE_LONG: {
1137      auto hash = FindHashFunction64(hash_name->constant.c_str());
1138      int64_t hashed_value = static_cast<int64_t>(hash(attribute_.c_str()));
1139      e.constant = NumToString(hashed_value);
1140      break;
1141    }
1142    case BASE_TYPE_ULONG: {
1143      auto hash = FindHashFunction64(hash_name->constant.c_str());
1144      uint64_t hashed_value = hash(attribute_.c_str());
1145      e.constant = NumToString(hashed_value);
1146      break;
1147    }
1148    default:
1149      assert(0);
1150  }
1151  NEXT();
1152  return NoError();
1153}
1154
1155CheckedError Parser::ParseSingleValue(Value &e) {
1156  // First see if this could be a conversion function:
1157  if (token_ == kTokenIdentifier && *cursor_ == '(') {
1158    auto functionname = attribute_;
1159    NEXT();
1160    EXPECT('(');
1161    ECHECK(ParseSingleValue(e));
1162    EXPECT(')');
1163    #define FLATBUFFERS_FN_DOUBLE(name, op) \
1164      if (functionname == name) { \
1165        auto x = strtod(e.constant.c_str(), nullptr); \
1166        e.constant = NumToString(op); \
1167      }
1168    FLATBUFFERS_FN_DOUBLE("deg", x / M_PI * 180);
1169    FLATBUFFERS_FN_DOUBLE("rad", x * M_PI / 180);
1170    FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1171    FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1172    FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1173    FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1174    FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1175    FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1176    // TODO(wvo): add more useful conversion functions here.
1177    #undef FLATBUFFERS_FN_DOUBLE
1178  // Then check if this could be a string/identifier enum value:
1179  } else if (e.type.base_type != BASE_TYPE_STRING &&
1180      e.type.base_type != BASE_TYPE_NONE &&
1181      (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1182    if (IsIdentifierStart(attribute_[0])) {  // Enum value.
1183      int64_t val;
1184      ECHECK(ParseEnumFromString(e.type, &val));
1185      e.constant = NumToString(val);
1186      NEXT();
1187    } else {  // Numeric constant in string.
1188      if (IsInteger(e.type.base_type)) {
1189        char *end;
1190        e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
1191        if (*end)
1192          return Error("invalid integer: " + attribute_);
1193      } else if (IsFloat(e.type.base_type)) {
1194        char *end;
1195        e.constant = NumToString(strtod(attribute_.c_str(), &end));
1196        if (*end)
1197          return Error("invalid float: " + attribute_);
1198      } else {
1199        assert(0);  // Shouldn't happen, we covered all types.
1200        e.constant = "0";
1201      }
1202      NEXT();
1203    }
1204  } else {
1205    bool match = false;
1206    ECHECK(TryTypedValue(kTokenIntegerConstant,
1207                         IsScalar(e.type.base_type),
1208                         e,
1209                         BASE_TYPE_INT,
1210                         &match));
1211    ECHECK(TryTypedValue(kTokenFloatConstant,
1212                         IsFloat(e.type.base_type),
1213                         e,
1214                         BASE_TYPE_FLOAT,
1215                         &match));
1216    ECHECK(TryTypedValue(kTokenStringConstant,
1217                         e.type.base_type == BASE_TYPE_STRING,
1218                         e,
1219                         BASE_TYPE_STRING,
1220                         &match));
1221    if (!match)
1222      return Error("cannot parse value starting with: " +
1223                   TokenToStringId(token_));
1224  }
1225  return NoError();
1226}
1227
1228StructDef *Parser::LookupCreateStruct(const std::string &name,
1229                                      bool create_if_new, bool definition) {
1230  std::string qualified_name = namespaces_.back()->GetFullyQualifiedName(name);
1231  // See if it exists pre-declared by an unqualified use.
1232  auto struct_def = structs_.Lookup(name);
1233  if (struct_def && struct_def->predecl) {
1234    if (definition) {
1235      // Make sure it has the current namespace, and is registered under its
1236      // qualified name.
1237      struct_def->defined_namespace = namespaces_.back();
1238      structs_.Move(name, qualified_name);
1239    }
1240    return struct_def;
1241  }
1242  // See if it exists pre-declared by an qualified use.
1243  struct_def = structs_.Lookup(qualified_name);
1244  if (struct_def && struct_def->predecl) {
1245    if (definition) {
1246      // Make sure it has the current namespace.
1247      struct_def->defined_namespace = namespaces_.back();
1248    }
1249    return struct_def;
1250  }
1251  if (!definition) {
1252    // Search thru parent namespaces.
1253    for (size_t components = namespaces_.back()->components.size();
1254         components && !struct_def; components--) {
1255      struct_def = structs_.Lookup(
1256          namespaces_.back()->GetFullyQualifiedName(name, components - 1));
1257    }
1258  }
1259  if (!struct_def && create_if_new) {
1260    struct_def = new StructDef();
1261    if (definition) {
1262      structs_.Add(qualified_name, struct_def);
1263      struct_def->name = name;
1264      struct_def->defined_namespace = namespaces_.back();
1265    } else {
1266      // Not a definition.
1267      // Rather than failing, we create a "pre declared" StructDef, due to
1268      // circular references, and check for errors at the end of parsing.
1269      // It is defined in the root namespace, since we don't know what the
1270      // final namespace will be.
1271      // TODO: maybe safer to use special namespace?
1272      structs_.Add(name, struct_def);
1273      struct_def->name = name;
1274      struct_def->defined_namespace = new Namespace();
1275      namespaces_.insert(namespaces_.begin(), struct_def->defined_namespace);
1276    }
1277  }
1278  return struct_def;
1279}
1280
1281CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
1282  std::vector<std::string> enum_comment = doc_comment_;
1283  NEXT();
1284  std::string enum_name = attribute_;
1285  EXPECT(kTokenIdentifier);
1286  auto &enum_def = *new EnumDef();
1287  enum_def.name = enum_name;
1288  enum_def.file = file_being_parsed_;
1289  enum_def.doc_comment = enum_comment;
1290  enum_def.is_union = is_union;
1291  enum_def.defined_namespace = namespaces_.back();
1292  if (enums_.Add(namespaces_.back()->GetFullyQualifiedName(enum_name),
1293                 &enum_def))
1294    return Error("enum already exists: " + enum_name);
1295  if (is_union) {
1296    enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
1297    enum_def.underlying_type.enum_def = &enum_def;
1298  } else {
1299    if (opts.proto_mode) {
1300      enum_def.underlying_type.base_type = BASE_TYPE_INT;
1301    } else {
1302      // Give specialized error message, since this type spec used to
1303      // be optional in the first FlatBuffers release.
1304      if (!Is(':')) {
1305        return Error("must specify the underlying integer type for this"
1306              " enum (e.g. \': short\', which was the default).");
1307      } else {
1308        NEXT();
1309      }
1310      // Specify the integer type underlying this enum.
1311      ECHECK(ParseType(enum_def.underlying_type));
1312      if (!IsInteger(enum_def.underlying_type.base_type))
1313        return Error("underlying enum type must be integral");
1314    }
1315    // Make this type refer back to the enum it was derived from.
1316    enum_def.underlying_type.enum_def = &enum_def;
1317  }
1318  ECHECK(ParseMetaData(&enum_def.attributes));
1319  EXPECT('{');
1320  if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
1321  for (;;) {
1322    if (opts.proto_mode && attribute_ == "option") {
1323      ECHECK(ParseProtoOption());
1324    } else {
1325      auto value_name = attribute_;
1326      auto full_name = value_name;
1327      std::vector<std::string> value_comment = doc_comment_;
1328      EXPECT(kTokenIdentifier);
1329      if (is_union) {
1330        ECHECK(ParseNamespacing(&full_name, &value_name));
1331        if (opts.union_value_namespacing) {
1332          // Since we can't namespace the actual enum identifiers, turn
1333          // namespace parts into part of the identifier.
1334          value_name = full_name;
1335          std::replace(value_name.begin(), value_name.end(), '.', '_');
1336        }
1337      }
1338      auto prevsize = enum_def.vals.vec.size();
1339      auto value = enum_def.vals.vec.size()
1340        ? enum_def.vals.vec.back()->value + 1
1341        : 0;
1342      auto &ev = *new EnumVal(value_name, value);
1343      if (enum_def.vals.Add(value_name, &ev))
1344        return Error("enum value already exists: " + value_name);
1345      ev.doc_comment = value_comment;
1346      if (is_union) {
1347        if (Is(':')) {
1348          NEXT();
1349          ECHECK(ParseType(ev.union_type));
1350          if (ev.union_type.base_type != BASE_TYPE_STRUCT &&
1351              ev.union_type.base_type != BASE_TYPE_STRING)
1352            return Error("union value type may only be table/struct/string");
1353          enum_def.uses_type_aliases = true;
1354        } else {
1355          ev.union_type = Type(BASE_TYPE_STRUCT, LookupCreateStruct(full_name));
1356        }
1357      }
1358      if (Is('=')) {
1359        NEXT();
1360        ev.value = StringToInt(attribute_.c_str());
1361        EXPECT(kTokenIntegerConstant);
1362        if (!opts.proto_mode && prevsize &&
1363            enum_def.vals.vec[prevsize - 1]->value >= ev.value)
1364          return Error("enum values must be specified in ascending order");
1365      }
1366      if (is_union) {
1367        if (ev.value < 0 || ev.value >= 256)
1368          return Error("union enum value must fit in a ubyte");
1369      }
1370      if (opts.proto_mode && Is('[')) {
1371        NEXT();
1372        // ignore attributes on enums.
1373        while (token_ != ']') NEXT();
1374        NEXT();
1375      }
1376    }
1377    if (!Is(opts.proto_mode ? ';' : ',')) break;
1378    NEXT();
1379    if (Is('}')) break;
1380  }
1381  EXPECT('}');
1382  if (enum_def.attributes.Lookup("bit_flags")) {
1383    for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
1384         ++it) {
1385      if (static_cast<size_t>((*it)->value) >=
1386           SizeOf(enum_def.underlying_type.base_type) * 8)
1387        return Error("bit flag out of range of underlying integral type");
1388      (*it)->value = 1LL << (*it)->value;
1389    }
1390  }
1391  if (dest) *dest = &enum_def;
1392  types_.Add(namespaces_.back()->GetFullyQualifiedName(enum_def.name),
1393             new Type(BASE_TYPE_UNION, nullptr, &enum_def));
1394  return NoError();
1395}
1396
1397CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
1398  auto &struct_def = *LookupCreateStruct(name, true, true);
1399  if (!struct_def.predecl) return Error("datatype already exists: " + name);
1400  struct_def.predecl = false;
1401  struct_def.name = name;
1402  struct_def.file = file_being_parsed_;
1403  // Move this struct to the back of the vector just in case it was predeclared,
1404  // to preserve declaration order.
1405  *std::remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) = &struct_def;
1406  *dest = &struct_def;
1407  return NoError();
1408}
1409
1410CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
1411                                StructDef *struct_def,
1412                                const char *suffix,
1413                                BaseType basetype) {
1414  auto len = strlen(suffix);
1415  for (auto it = fields.begin(); it != fields.end(); ++it) {
1416    auto &fname = (*it)->name;
1417    if (fname.length() > len &&
1418        fname.compare(fname.length() - len, len, suffix) == 0 &&
1419        (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
1420      auto field = struct_def->fields.Lookup(
1421                                             fname.substr(0, fname.length() - len));
1422      if (field && field->value.type.base_type == basetype)
1423        return Error("Field " + fname +
1424                     " would clash with generated functions for field " +
1425                     field->name);
1426    }
1427  }
1428  return NoError();
1429}
1430
1431static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
1432  auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
1433  auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
1434  return a_id < b_id;
1435}
1436
1437CheckedError Parser::ParseDecl() {
1438  std::vector<std::string> dc = doc_comment_;
1439  bool fixed = Is(kTokenStruct);
1440  if (fixed) NEXT() else EXPECT(kTokenTable);
1441  std::string name = attribute_;
1442  EXPECT(kTokenIdentifier);
1443  StructDef *struct_def;
1444  ECHECK(StartStruct(name, &struct_def));
1445  struct_def->doc_comment = dc;
1446  struct_def->fixed = fixed;
1447  ECHECK(ParseMetaData(&struct_def->attributes));
1448  struct_def->sortbysize =
1449    struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
1450  EXPECT('{');
1451  while (token_ != '}') ECHECK(ParseField(*struct_def));
1452  auto force_align = struct_def->attributes.Lookup("force_align");
1453  if (fixed && force_align) {
1454    auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
1455    if (force_align->type.base_type != BASE_TYPE_INT ||
1456        align < struct_def->minalign ||
1457        align > FLATBUFFERS_MAX_ALIGNMENT ||
1458        align & (align - 1))
1459      return Error("force_align must be a power of two integer ranging from the"
1460                   "struct\'s natural alignment to " +
1461                   NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1462    struct_def->minalign = align;
1463  }
1464  struct_def->PadLastField(struct_def->minalign);
1465  // Check if this is a table that has manual id assignments
1466  auto &fields = struct_def->fields.vec;
1467  if (!struct_def->fixed && fields.size()) {
1468    size_t num_id_fields = 0;
1469    for (auto it = fields.begin(); it != fields.end(); ++it) {
1470      if ((*it)->attributes.Lookup("id")) num_id_fields++;
1471    }
1472    // If any fields have ids..
1473    if (num_id_fields) {
1474      // Then all fields must have them.
1475      if (num_id_fields != fields.size())
1476        return Error(
1477              "either all fields or no fields must have an 'id' attribute");
1478      // Simply sort by id, then the fields are the same as if no ids had
1479      // been specified.
1480      std::sort(fields.begin(), fields.end(), compareFieldDefs);
1481      // Verify we have a contiguous set, and reassign vtable offsets.
1482      for (int i = 0; i < static_cast<int>(fields.size()); i++) {
1483        if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
1484          return Error("field id\'s must be consecutive from 0, id " +
1485                NumToString(i) + " missing or set twice");
1486        fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
1487      }
1488    }
1489  }
1490
1491  ECHECK(CheckClash(fields, struct_def, UnionTypeFieldSuffix(),
1492                    BASE_TYPE_UNION));
1493  ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
1494  ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
1495  ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
1496  ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
1497  ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
1498  EXPECT('}');
1499  types_.Add(namespaces_.back()->GetFullyQualifiedName(struct_def->name),
1500             new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
1501  return NoError();
1502}
1503
1504CheckedError Parser::ParseService() {
1505  std::vector<std::string> service_comment = doc_comment_;
1506  NEXT();
1507  auto service_name = attribute_;
1508  EXPECT(kTokenIdentifier);
1509  auto &service_def = *new ServiceDef();
1510  service_def.name = service_name;
1511  service_def.file = file_being_parsed_;
1512  service_def.doc_comment = service_comment;
1513  service_def.defined_namespace = namespaces_.back();
1514  if (services_.Add(namespaces_.back()->GetFullyQualifiedName(service_name),
1515                    &service_def))
1516    return Error("service already exists: " + service_name);
1517  ECHECK(ParseMetaData(&service_def.attributes));
1518  EXPECT('{');
1519  do {
1520    auto rpc_name = attribute_;
1521    EXPECT(kTokenIdentifier);
1522    EXPECT('(');
1523    Type reqtype, resptype;
1524    ECHECK(ParseTypeIdent(reqtype));
1525    EXPECT(')');
1526    EXPECT(':');
1527    ECHECK(ParseTypeIdent(resptype));
1528    if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
1529        resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
1530        return Error("rpc request and response types must be tables");
1531    auto &rpc = *new RPCCall();
1532    rpc.name = rpc_name;
1533    rpc.request = reqtype.struct_def;
1534    rpc.response = resptype.struct_def;
1535    if (service_def.calls.Add(rpc_name, &rpc))
1536      return Error("rpc already exists: " + rpc_name);
1537    ECHECK(ParseMetaData(&rpc.attributes));
1538    EXPECT(';');
1539  } while (token_ != '}');
1540  NEXT();
1541  return NoError();
1542}
1543
1544bool Parser::SetRootType(const char *name) {
1545  root_struct_def_ = structs_.Lookup(name);
1546  if (!root_struct_def_)
1547    root_struct_def_ = structs_.Lookup(
1548                         namespaces_.back()->GetFullyQualifiedName(name));
1549  return root_struct_def_ != nullptr;
1550}
1551
1552void Parser::MarkGenerated() {
1553  // This function marks all existing definitions as having already
1554  // been generated, which signals no code for included files should be
1555  // generated.
1556  for (auto it = enums_.vec.begin();
1557           it != enums_.vec.end(); ++it) {
1558    (*it)->generated = true;
1559  }
1560  for (auto it = structs_.vec.begin();
1561           it != structs_.vec.end(); ++it) {
1562    (*it)->generated = true;
1563  }
1564  for (auto it = services_.vec.begin();
1565           it != services_.vec.end(); ++it) {
1566    (*it)->generated = true;
1567  }
1568}
1569
1570CheckedError Parser::ParseNamespace() {
1571  NEXT();
1572  auto ns = new Namespace();
1573  namespaces_.push_back(ns);
1574  if (token_ != ';') {
1575    for (;;) {
1576      ns->components.push_back(attribute_);
1577      EXPECT(kTokenIdentifier);
1578      if (Is('.')) NEXT() else break;
1579    }
1580  }
1581  EXPECT(';');
1582  return NoError();
1583}
1584
1585static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
1586  return a->value < b->value;
1587}
1588
1589// Best effort parsing of .proto declarations, with the aim to turn them
1590// in the closest corresponding FlatBuffer equivalent.
1591// We parse everything as identifiers instead of keywords, since we don't
1592// want protobuf keywords to become invalid identifiers in FlatBuffers.
1593CheckedError Parser::ParseProtoDecl() {
1594  bool isextend = attribute_ == "extend";
1595  if (attribute_ == "package") {
1596    // These are identical in syntax to FlatBuffer's namespace decl.
1597    ECHECK(ParseNamespace());
1598  } else if (attribute_ == "message" || isextend) {
1599    std::vector<std::string> struct_comment = doc_comment_;
1600    NEXT();
1601    StructDef *struct_def = nullptr;
1602    if (isextend) {
1603      if (Is('.')) NEXT();  // qualified names may start with a . ?
1604      auto id = attribute_;
1605      EXPECT(kTokenIdentifier);
1606      ECHECK(ParseNamespacing(&id, nullptr));
1607      struct_def = LookupCreateStruct(id, false);
1608      if (!struct_def)
1609        return Error("cannot extend unknown message type: " + id);
1610    } else {
1611      std::string name = attribute_;
1612      EXPECT(kTokenIdentifier);
1613      ECHECK(StartStruct(name, &struct_def));
1614      // Since message definitions can be nested, we create a new namespace.
1615      auto ns = new Namespace();
1616      // Copy of current namespace.
1617      *ns = *namespaces_.back();
1618      // But with current message name.
1619      ns->components.push_back(name);
1620      namespaces_.push_back(ns);
1621    }
1622    struct_def->doc_comment = struct_comment;
1623    ECHECK(ParseProtoFields(struct_def, isextend, false));
1624    if (!isextend) {
1625      // We have to remove the nested namespace, but we can't just throw it
1626      // away, so put it at the beginning of the vector.
1627      auto ns = namespaces_.back();
1628      namespaces_.pop_back();
1629      namespaces_.insert(namespaces_.begin(), ns);
1630    }
1631    if (Is(';')) NEXT();
1632  } else if (attribute_ == "enum") {
1633    // These are almost the same, just with different terminator:
1634    EnumDef *enum_def;
1635    ECHECK(ParseEnum(false, &enum_def));
1636    if (Is(';')) NEXT();
1637    // Protobuf allows them to be specified in any order, so sort afterwards.
1638    auto &v = enum_def->vals.vec;
1639    std::sort(v.begin(), v.end(), compareEnumVals);
1640
1641    // Temp: remove any duplicates, as .fbs files can't handle them.
1642    for (auto it = v.begin(); it != v.end(); ) {
1643      if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
1644      else ++it;
1645    }
1646  } else if (attribute_ == "syntax") {  // Skip these.
1647    NEXT();
1648    EXPECT('=');
1649    EXPECT(kTokenStringConstant);
1650    EXPECT(';');
1651  } else if (attribute_ == "option") {  // Skip these.
1652    ECHECK(ParseProtoOption());
1653    EXPECT(';');
1654  } else if (attribute_ == "service") {  // Skip these.
1655    NEXT();
1656    EXPECT(kTokenIdentifier);
1657    ECHECK(ParseProtoCurliesOrIdent());
1658  } else {
1659    return Error("don\'t know how to parse .proto declaration starting with " +
1660          TokenToStringId(token_));
1661  }
1662  return NoError();
1663}
1664
1665CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
1666                                      bool inside_oneof) {
1667  EXPECT('{');
1668  while (token_ != '}') {
1669    if (attribute_ == "message" || attribute_ == "extend" ||
1670        attribute_ == "enum") {
1671      // Nested declarations.
1672      ECHECK(ParseProtoDecl());
1673    } else if (attribute_ == "extensions") {  // Skip these.
1674      NEXT();
1675      EXPECT(kTokenIntegerConstant);
1676      if (Is(kTokenIdentifier)) {
1677        NEXT();  // to
1678        NEXT();  // num
1679      }
1680      EXPECT(';');
1681    } else if (attribute_ == "option") {  // Skip these.
1682      ECHECK(ParseProtoOption());
1683      EXPECT(';');
1684    } else if (attribute_ == "reserved") {  // Skip these.
1685      NEXT();
1686      EXPECT(kTokenIntegerConstant);
1687      while (Is(',')) { NEXT(); EXPECT(kTokenIntegerConstant); }
1688      EXPECT(';');
1689    } else {
1690      std::vector<std::string> field_comment = doc_comment_;
1691      // Parse the qualifier.
1692      bool required = false;
1693      bool repeated = false;
1694      bool oneof = false;
1695      if (!inside_oneof) {
1696        if (attribute_ == "optional") {
1697          // This is the default.
1698          EXPECT(kTokenIdentifier);
1699        } else if (attribute_ == "required") {
1700          required = true;
1701          EXPECT(kTokenIdentifier);
1702        } else if (attribute_ == "repeated") {
1703          repeated = true;
1704          EXPECT(kTokenIdentifier);
1705        } else if (attribute_ == "oneof") {
1706          oneof = true;
1707          EXPECT(kTokenIdentifier);
1708        } else {
1709          // can't error, proto3 allows decls without any of the above.
1710        }
1711      }
1712      StructDef *anonymous_struct = nullptr;
1713      Type type;
1714      if (attribute_ == "group" || oneof) {
1715        if (!oneof) EXPECT(kTokenIdentifier);
1716        auto name = "Anonymous" + NumToString(anonymous_counter++);
1717        ECHECK(StartStruct(name, &anonymous_struct));
1718        type = Type(BASE_TYPE_STRUCT, anonymous_struct);
1719      } else {
1720        ECHECK(ParseTypeFromProtoType(&type));
1721      }
1722      // Repeated elements get mapped to a vector.
1723      if (repeated) {
1724        type.element = type.base_type;
1725        type.base_type = BASE_TYPE_VECTOR;
1726      }
1727      std::string name = attribute_;
1728      // Protos may use our keywords "attribute" & "namespace" as an identifier.
1729      if (Is(kTokenAttribute) || Is(kTokenNameSpace)) {
1730        NEXT();
1731        // TODO: simpler to just not make these keywords?
1732        name += "_";  // Have to make it not a keyword.
1733      } else {
1734        EXPECT(kTokenIdentifier);
1735      }
1736      if (!oneof) {
1737        // Parse the field id. Since we're just translating schemas, not
1738        // any kind of binary compatibility, we can safely ignore these, and
1739        // assign our own.
1740        EXPECT('=');
1741        EXPECT(kTokenIntegerConstant);
1742      }
1743      FieldDef *field = nullptr;
1744      if (isextend) {
1745        // We allow a field to be re-defined when extending.
1746        // TODO: are there situations where that is problematic?
1747        field = struct_def->fields.Lookup(name);
1748      }
1749      if (!field) ECHECK(AddField(*struct_def, name, type, &field));
1750      field->doc_comment = field_comment;
1751      if (!IsScalar(type.base_type)) field->required = required;
1752      // See if there's a default specified.
1753      if (Is('[')) {
1754        NEXT();
1755        for (;;) {
1756          auto key = attribute_;
1757          ECHECK(ParseProtoKey());
1758          EXPECT('=');
1759          auto val = attribute_;
1760          ECHECK(ParseProtoCurliesOrIdent());
1761          if (key == "default") {
1762            // Temp: skip non-numeric defaults (enums).
1763            auto numeric = strpbrk(val.c_str(), "0123456789-+.");
1764            if (IsScalar(type.base_type) && numeric == val.c_str())
1765              field->value.constant = val;
1766          } else if (key == "deprecated") {
1767            field->deprecated = val == "true";
1768          }
1769          if (!Is(',')) break;
1770          NEXT();
1771        }
1772        EXPECT(']');
1773      }
1774      if (anonymous_struct) {
1775        ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
1776        if (Is(';')) NEXT();
1777      } else {
1778        EXPECT(';');
1779      }
1780    }
1781  }
1782  NEXT();
1783  return NoError();
1784}
1785
1786CheckedError Parser::ParseProtoKey() {
1787  if (token_ == '(') {
1788    NEXT();
1789    // Skip "(a.b)" style custom attributes.
1790    while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
1791    EXPECT(')');
1792    while (Is('.')) { NEXT(); EXPECT(kTokenIdentifier); }
1793  } else {
1794    EXPECT(kTokenIdentifier);
1795  }
1796  return NoError();
1797}
1798
1799CheckedError Parser::ParseProtoCurliesOrIdent() {
1800  if (Is('{')) {
1801    NEXT();
1802    for (int nesting = 1; nesting; ) {
1803      if (token_ == '{') nesting++;
1804      else if (token_ == '}') nesting--;
1805      NEXT();
1806    }
1807  } else {
1808    NEXT();  // Any single token.
1809  }
1810  return NoError();
1811}
1812
1813CheckedError Parser::ParseProtoOption() {
1814  NEXT();
1815  ECHECK(ParseProtoKey());
1816  EXPECT('=');
1817  ECHECK(ParseProtoCurliesOrIdent());
1818  return NoError();
1819}
1820
1821// Parse a protobuf type, and map it to the corresponding FlatBuffer one.
1822CheckedError Parser::ParseTypeFromProtoType(Type *type) {
1823  struct type_lookup { const char *proto_type; BaseType fb_type; };
1824  static type_lookup lookup[] = {
1825    { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
1826    { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
1827    { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
1828    { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
1829    { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
1830    { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
1831    { "bool", BASE_TYPE_BOOL },
1832    { "string", BASE_TYPE_STRING },
1833    { "bytes", BASE_TYPE_STRING },
1834    { nullptr, BASE_TYPE_NONE }
1835  };
1836  for (auto tl = lookup; tl->proto_type; tl++) {
1837    if (attribute_ == tl->proto_type) {
1838      type->base_type = tl->fb_type;
1839      NEXT();
1840      return NoError();
1841    }
1842  }
1843  if (Is('.')) NEXT();  // qualified names may start with a . ?
1844  ECHECK(ParseTypeIdent(*type));
1845  return NoError();
1846}
1847
1848CheckedError Parser::SkipAnyJsonValue() {
1849  switch (token_) {
1850    case '{':
1851      ECHECK(SkipJsonObject());
1852      break;
1853    case kTokenStringConstant:
1854      ECHECK(SkipJsonString());
1855      break;
1856    case '[':
1857      ECHECK(SkipJsonArray());
1858      break;
1859    case kTokenIntegerConstant:
1860      EXPECT(kTokenIntegerConstant);
1861      break;
1862    case kTokenFloatConstant:
1863      EXPECT(kTokenFloatConstant);
1864      break;
1865    default:
1866      return Error(std::string("Unexpected token:") + std::string(1, static_cast<char>(token_)));
1867  }
1868  return NoError();
1869}
1870
1871CheckedError Parser::SkipJsonObject() {
1872  EXPECT('{');
1873  size_t fieldn = 0;
1874
1875  for (;;) {
1876    if ((!opts.strict_json || !fieldn) && Is('}')) break;
1877
1878    if (!Is(kTokenStringConstant)) {
1879      EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1880    }
1881    else {
1882      NEXT();
1883    }
1884
1885    EXPECT(':');
1886    ECHECK(SkipAnyJsonValue());
1887    fieldn++;
1888
1889    if (Is('}')) break;
1890    EXPECT(',');
1891  }
1892
1893  NEXT();
1894  return NoError();
1895}
1896
1897CheckedError Parser::SkipJsonArray() {
1898  EXPECT('[');
1899
1900  for (;;) {
1901    if (Is(']')) break;
1902
1903    ECHECK(SkipAnyJsonValue());
1904
1905    if (Is(']')) break;
1906    EXPECT(',');
1907  }
1908
1909  NEXT();
1910  return NoError();
1911}
1912
1913CheckedError Parser::SkipJsonString() {
1914  EXPECT(kTokenStringConstant);
1915  return NoError();
1916}
1917
1918bool Parser::Parse(const char *source, const char **include_paths,
1919                   const char *source_filename) {
1920  return !DoParse(source, include_paths, source_filename).Check();
1921}
1922
1923CheckedError Parser::DoParse(const char *source, const char **include_paths,
1924                             const char *source_filename) {
1925  file_being_parsed_ = source_filename ? source_filename : "";
1926  if (source_filename &&
1927      included_files_.find(source_filename) == included_files_.end()) {
1928    included_files_[source_filename] = true;
1929    files_included_per_file_[source_filename] = std::set<std::string>();
1930  }
1931  if (!include_paths) {
1932    static const char *current_directory[] = { "", nullptr };
1933    include_paths = current_directory;
1934  }
1935  source_ = cursor_ = source;
1936  line_ = 1;
1937  error_.clear();
1938  field_stack_.clear();
1939  builder_.Clear();
1940  // Start with a blank namespace just in case this file doesn't have one.
1941  namespaces_.push_back(new Namespace());
1942  ECHECK(SkipByteOrderMark());
1943  NEXT();
1944
1945  if (Is(kTokenEof))
1946      return Error("input file is empty");
1947
1948  // Includes must come before type declarations:
1949  for (;;) {
1950    // Parse pre-include proto statements if any:
1951    if (opts.proto_mode &&
1952        (attribute_ == "option" || attribute_ == "syntax" ||
1953         attribute_ == "package")) {
1954        ECHECK(ParseProtoDecl());
1955    } else if (Is(kTokenNativeInclude)) {
1956      NEXT();
1957      native_included_files_.emplace_back(attribute_);
1958      EXPECT(kTokenStringConstant);
1959    } else if (Is(kTokenInclude) ||
1960               (opts.proto_mode &&
1961                attribute_ == "import" &&
1962                Is(kTokenIdentifier))) {
1963      NEXT();
1964      if (opts.proto_mode && attribute_ == "public") NEXT();
1965      auto name = flatbuffers::PosixPath(attribute_.c_str());
1966      EXPECT(kTokenStringConstant);
1967      // Look for the file in include_paths.
1968      std::string filepath;
1969      for (auto paths = include_paths; paths && *paths; paths++) {
1970        filepath = flatbuffers::ConCatPathFileName(*paths, name);
1971        if(FileExists(filepath.c_str())) break;
1972      }
1973      if (filepath.empty())
1974        return Error("unable to locate include file: " + name);
1975      if (source_filename)
1976        files_included_per_file_[source_filename].insert(filepath);
1977      if (included_files_.find(filepath) == included_files_.end()) {
1978        // We found an include file that we have not parsed yet.
1979        // Load it and parse it.
1980        std::string contents;
1981        if (!LoadFile(filepath.c_str(), true, &contents))
1982          return Error("unable to load include file: " + name);
1983        ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str()));
1984        // We generally do not want to output code for any included files:
1985        if (!opts.generate_all) MarkGenerated();
1986        // This is the easiest way to continue this file after an include:
1987        // instead of saving and restoring all the state, we simply start the
1988        // file anew. This will cause it to encounter the same include
1989        // statement again, but this time it will skip it, because it was
1990        // entered into included_files_.
1991        // This is recursive, but only go as deep as the number of include
1992        // statements.
1993        return DoParse(source, include_paths, source_filename);
1994      }
1995      EXPECT(';');
1996    } else {
1997      break;
1998    }
1999  }
2000  // Now parse all other kinds of declarations:
2001  while (token_ != kTokenEof) {
2002    if (opts.proto_mode) {
2003      ECHECK(ParseProtoDecl());
2004    } else if (token_ == kTokenNameSpace) {
2005      ECHECK(ParseNamespace());
2006    } else if (token_ == '{') {
2007      if (!root_struct_def_)
2008        return Error("no root type set to parse json with");
2009      if (builder_.GetSize()) {
2010        return Error("cannot have more than one json object in a file");
2011      }
2012      uoffset_t toff;
2013      ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
2014      builder_.Finish(Offset<Table>(toff),
2015                file_identifier_.length() ? file_identifier_.c_str() : nullptr);
2016    } else if (token_ == kTokenEnum) {
2017      ECHECK(ParseEnum(false, nullptr));
2018    } else if (token_ == kTokenUnion) {
2019      ECHECK(ParseEnum(true, nullptr));
2020    } else if (token_ == kTokenRootType) {
2021      NEXT();
2022      auto root_type = attribute_;
2023      EXPECT(kTokenIdentifier);
2024      ECHECK(ParseNamespacing(&root_type, nullptr));
2025      if (!SetRootType(root_type.c_str()))
2026        return Error("unknown root type: " + root_type);
2027      if (root_struct_def_->fixed)
2028        return Error("root type must be a table");
2029      EXPECT(';');
2030    } else if (token_ == kTokenFileIdentifier) {
2031      NEXT();
2032      file_identifier_ = attribute_;
2033      EXPECT(kTokenStringConstant);
2034      if (file_identifier_.length() !=
2035          FlatBufferBuilder::kFileIdentifierLength)
2036        return Error("file_identifier must be exactly " +
2037              NumToString(FlatBufferBuilder::kFileIdentifierLength) +
2038              " characters");
2039      EXPECT(';');
2040    } else if (token_ == kTokenFileExtension) {
2041      NEXT();
2042      file_extension_ = attribute_;
2043      EXPECT(kTokenStringConstant);
2044      EXPECT(';');
2045    } else if(token_ == kTokenInclude) {
2046      return Error("includes must come before declarations");
2047    } else if(token_ == kTokenAttribute) {
2048      NEXT();
2049      auto name = attribute_;
2050      EXPECT(kTokenStringConstant);
2051      EXPECT(';');
2052      known_attributes_[name] = false;
2053    } else if (token_ == kTokenService) {
2054      ECHECK(ParseService());
2055    } else {
2056      ECHECK(ParseDecl());
2057    }
2058  }
2059  for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2060    if ((*it)->predecl) {
2061      return Error("type referenced but not defined: " + (*it)->name);
2062    }
2063  }
2064  // This check has to happen here and not earlier, because only now do we
2065  // know for sure what the type of these are.
2066  for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2067    auto &enum_def = **it;
2068    if (enum_def.is_union) {
2069      for (auto val_it = enum_def.vals.vec.begin();
2070           val_it != enum_def.vals.vec.end();
2071           ++val_it) {
2072        auto &val = **val_it;
2073        if (opts.lang_to_generate != IDLOptions::kCpp &&
2074            val.union_type.struct_def && val.union_type.struct_def->fixed)
2075          return Error(
2076                "only tables can be union elements in the generated language: "
2077                + val.name);
2078      }
2079    }
2080  }
2081  return NoError();
2082}
2083
2084std::set<std::string> Parser::GetIncludedFilesRecursive(
2085    const std::string &file_name) const {
2086  std::set<std::string> included_files;
2087  std::list<std::string> to_process;
2088
2089  if (file_name.empty()) return included_files;
2090  to_process.push_back(file_name);
2091
2092  while (!to_process.empty()) {
2093    std::string current = to_process.front();
2094    to_process.pop_front();
2095    included_files.insert(current);
2096
2097    auto new_files = files_included_per_file_.at(current);
2098    for (auto it = new_files.begin(); it != new_files.end(); ++it) {
2099      if (included_files.find(*it) == included_files.end())
2100        to_process.push_back(*it);
2101    }
2102  }
2103
2104  return included_files;
2105}
2106
2107// Schema serialization functionality:
2108
2109template<typename T> bool compareName(const T* a, const T* b) {
2110    return a->defined_namespace->GetFullyQualifiedName(a->name)
2111        < b->defined_namespace->GetFullyQualifiedName(b->name);
2112}
2113
2114template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
2115  // Pre-sort these vectors, such that we can set the correct indices for them.
2116  auto vec = defvec;
2117  std::sort(vec.begin(), vec.end(), compareName<T>);
2118  for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
2119}
2120
2121void Parser::Serialize() {
2122  builder_.Clear();
2123  AssignIndices(structs_.vec);
2124  AssignIndices(enums_.vec);
2125  std::vector<Offset<reflection::Object>> object_offsets;
2126  for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2127    auto offset = (*it)->Serialize(&builder_, *this);
2128    object_offsets.push_back(offset);
2129    (*it)->serialized_location = offset.o;
2130  }
2131  std::vector<Offset<reflection::Enum>> enum_offsets;
2132  for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2133    auto offset = (*it)->Serialize(&builder_, *this);
2134    enum_offsets.push_back(offset);
2135    (*it)->serialized_location = offset.o;
2136  }
2137  auto schema_offset = reflection::CreateSchema(
2138                         builder_,
2139                         builder_.CreateVectorOfSortedTables(&object_offsets),
2140                         builder_.CreateVectorOfSortedTables(&enum_offsets),
2141                         builder_.CreateString(file_identifier_),
2142                         builder_.CreateString(file_extension_),
2143                         root_struct_def_
2144                           ? root_struct_def_->serialized_location
2145                           : 0);
2146  builder_.Finish(schema_offset, reflection::SchemaIdentifier());
2147}
2148
2149Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
2150                                                const Parser &parser) const {
2151  std::vector<Offset<reflection::Field>> field_offsets;
2152  for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
2153    field_offsets.push_back(
2154      (*it)->Serialize(builder,
2155                       static_cast<uint16_t>(it - fields.vec.begin()), parser));
2156  }
2157  auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2158  return reflection::CreateObject(*builder,
2159                                  builder->CreateString(qualified_name),
2160                                  builder->CreateVectorOfSortedTables(
2161                                    &field_offsets),
2162                                  fixed,
2163                                  static_cast<int>(minalign),
2164                                  static_cast<int>(bytesize),
2165                                  SerializeAttributes(builder, parser),
2166                                  parser.opts.binary_schema_comments
2167                                    ? builder->CreateVectorOfStrings(
2168                                        doc_comment)
2169                                    : 0);
2170}
2171
2172Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
2173                                              uint16_t id,
2174                                              const Parser &parser) const {
2175  return reflection::CreateField(*builder,
2176                                 builder->CreateString(name),
2177                                 value.type.Serialize(builder),
2178                                 id,
2179                                 value.offset,
2180                                 IsInteger(value.type.base_type)
2181                                   ? StringToInt(value.constant.c_str())
2182                                   : 0,
2183                                 IsFloat(value.type.base_type)
2184                                   ? strtod(value.constant.c_str(), nullptr)
2185                                   : 0.0,
2186                                 deprecated,
2187                                 required,
2188                                 key,
2189                                 SerializeAttributes(builder, parser),
2190                                 parser.opts.binary_schema_comments
2191                                   ? builder->CreateVectorOfStrings(doc_comment)
2192                                   : 0);
2193  // TODO: value.constant is almost always "0", we could save quite a bit of
2194  // space by sharing it. Same for common values of value.type.
2195}
2196
2197Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
2198                                            const Parser &parser) const {
2199  std::vector<Offset<reflection::EnumVal>> enumval_offsets;
2200  for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
2201    enumval_offsets.push_back((*it)->Serialize(builder));
2202  }
2203  auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2204  return reflection::CreateEnum(*builder,
2205                                builder->CreateString(qualified_name),
2206                                builder->CreateVector(enumval_offsets),
2207                                is_union,
2208                                underlying_type.Serialize(builder),
2209                                SerializeAttributes(builder, parser),
2210                                parser.opts.binary_schema_comments
2211                                  ? builder->CreateVectorOfStrings(doc_comment)
2212                                  : 0);
2213}
2214
2215Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder) const
2216                                                                               {
2217  return reflection::CreateEnumVal(*builder,
2218                                   builder->CreateString(name),
2219                                   value,
2220                                   union_type.struct_def
2221                                     ? union_type.struct_def->
2222                                         serialized_location
2223                                     : 0,
2224                                   union_type.Serialize(builder));
2225}
2226
2227Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
2228  return reflection::CreateType(*builder,
2229                                static_cast<reflection::BaseType>(base_type),
2230                                static_cast<reflection::BaseType>(element),
2231                                struct_def ? struct_def->index :
2232                                             (enum_def ? enum_def->index : -1));
2233}
2234
2235flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<
2236  reflection::KeyValue>>>
2237    Definition::SerializeAttributes(FlatBufferBuilder *builder,
2238                                    const Parser &parser) const {
2239  std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
2240  for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
2241    auto it = parser.known_attributes_.find(kv->first);
2242    assert(it != parser.known_attributes_.end());
2243    if (!it->second) {  // Custom attribute.
2244      attrs.push_back(
2245          reflection::CreateKeyValue(*builder, builder->CreateString(kv->first),
2246                                     builder->CreateString(
2247                                         kv->second->constant)));
2248    }
2249  }
2250  if (attrs.size()) {
2251    return builder->CreateVectorOfSortedTables(&attrs);
2252  } else {
2253    return 0;
2254  }
2255}
2256
2257std::string Parser::ConformTo(const Parser &base) {
2258  for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
2259    auto &struct_def = **sit;
2260    auto qualified_name =
2261        struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
2262    auto struct_def_base = base.structs_.Lookup(qualified_name);
2263    if (!struct_def_base) continue;
2264    for (auto fit = struct_def.fields.vec.begin();
2265             fit != struct_def.fields.vec.end(); ++fit) {
2266      auto &field = **fit;
2267      auto field_base = struct_def_base->fields.Lookup(field.name);
2268      if (field_base) {
2269        if (field.value.offset != field_base->value.offset)
2270          return "offsets differ for field: " + field.name;
2271        if (field.value.constant != field_base->value.constant)
2272          return "defaults differ for field: " + field.name;
2273        if (!EqualByName(field.value.type, field_base->value.type))
2274          return "types differ for field: " + field.name;
2275      } else {
2276        // Doesn't have to exist, deleting fields is fine.
2277        // But we should check if there is a field that has the same offset
2278        // but is incompatible (in the case of field renaming).
2279        for (auto fbit = struct_def_base->fields.vec.begin();
2280                 fbit != struct_def_base->fields.vec.end(); ++fbit) {
2281          field_base = *fbit;
2282          if (field.value.offset == field_base->value.offset) {
2283            if (!EqualByName(field.value.type, field_base->value.type))
2284              return "field renamed to different type: " + field.name;
2285            break;
2286          }
2287        }
2288      }
2289    }
2290  }
2291  for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
2292    auto &enum_def = **eit;
2293    auto qualified_name =
2294        enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
2295    auto enum_def_base = base.enums_.Lookup(qualified_name);
2296    if (!enum_def_base) continue;
2297    for (auto evit = enum_def.vals.vec.begin();
2298             evit != enum_def.vals.vec.end(); ++evit) {
2299      auto &enum_val = **evit;
2300      auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
2301      if (enum_val_base) {
2302        if (enum_val.value != enum_val_base->value)
2303          return "values differ for enum: " + enum_val.name;
2304      }
2305    }
2306  }
2307  return "";
2308}
2309
2310}  // namespace flatbuffers
2311