idl_parser.cpp revision d575321eba7f83f40de5fb23685ed3cdb47bc9cc
1/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18#include <list>
19
20#include "flatbuffers/flatbuffers.h"
21#include "flatbuffers/hash.h"
22#include "flatbuffers/idl.h"
23#include "flatbuffers/util.h"
24
25namespace flatbuffers {
26
27const char *const kTypeNames[] = {
28  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE,
29    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
30  #undef FLATBUFFERS_TD
31  nullptr
32};
33
34const char kTypeSizes[] = {
35  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
36      sizeof(CTYPE),
37    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
38  #undef FLATBUFFERS_TD
39};
40
41static void Error(const std::string &msg) {
42  throw msg;
43}
44
45// Ensure that integer values we parse fit inside the declared integer type.
46static void CheckBitsFit(int64_t val, size_t bits) {
47  auto mask = (1ll << bits) - 1;  // Bits we allow to be used.
48  if (bits < 64 &&
49      (val & ~mask) != 0 &&  // Positive or unsigned.
50      (val |  mask) != -1)   // Negative.
51    Error("constant does not fit in a " + NumToString(bits) + "-bit field");
52}
53
54// atot: templated version of atoi/atof: convert a string to an instance of T.
55template<typename T> inline T atot(const char *s) {
56  auto val = StringToInt(s);
57  CheckBitsFit(val, sizeof(T) * 8);
58  return (T)val;
59}
60template<> inline bool atot<bool>(const char *s) {
61  return 0 != atoi(s);
62}
63template<> inline float atot<float>(const char *s) {
64  return static_cast<float>(strtod(s, nullptr));
65}
66template<> inline double atot<double>(const char *s) {
67  return strtod(s, nullptr);
68}
69
70template<> inline Offset<void> atot<Offset<void>>(const char *s) {
71  return Offset<void>(atoi(s));
72}
73
74// Declare tokens we'll use. Single character tokens are represented by their
75// ascii character code (e.g. '{'), others above 256.
76#define FLATBUFFERS_GEN_TOKENS(TD) \
77  TD(Eof, 256, "end of file") \
78  TD(StringConstant, 257, "string constant") \
79  TD(IntegerConstant, 258, "integer constant") \
80  TD(FloatConstant, 259, "float constant") \
81  TD(Identifier, 260, "identifier") \
82  TD(Table, 261, "table") \
83  TD(Struct, 262, "struct") \
84  TD(Enum, 263, "enum") \
85  TD(Union, 264, "union") \
86  TD(NameSpace, 265, "namespace") \
87  TD(RootType, 266, "root_type") \
88  TD(FileIdentifier, 267, "file_identifier") \
89  TD(FileExtension, 268, "file_extension") \
90  TD(Include, 269, "include") \
91  TD(Attribute, 270, "attribute")
92#ifdef __GNUC__
93__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
94#endif
95enum {
96  #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
97    FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
98  #undef FLATBUFFERS_TOKEN
99  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
100      kToken ## ENUM,
101    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
102  #undef FLATBUFFERS_TD
103};
104
105static std::string TokenToString(int t) {
106  static const char *tokens[] = {
107    #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
108      FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
109    #undef FLATBUFFERS_TOKEN
110    #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE,
111      FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
112    #undef FLATBUFFERS_TD
113  };
114  if (t < 256) {  // A single ascii char token.
115    std::string s;
116    s.append(1, static_cast<char>(t));
117    return s;
118  } else {       // Other tokens.
119    return tokens[t - 256];
120  }
121}
122
123// Parses exactly nibbles worth of hex digits into a number, or error.
124int64_t Parser::ParseHexNum(int nibbles) {
125  for (int i = 0; i < nibbles; i++)
126    if (!isxdigit(cursor_[i]))
127      Error("escape code must be followed by " + NumToString(nibbles) +
128            " hex digits");
129  std::string target(cursor_, cursor_ + nibbles);
130  auto val = StringToInt(target.c_str(), 16);
131  cursor_ += nibbles;
132  return val;
133}
134
135void Parser::Next() {
136  doc_comment_.clear();
137  bool seen_newline = false;
138  for (;;) {
139    char c = *cursor_++;
140    token_ = c;
141    switch (c) {
142      case '\0': cursor_--; token_ = kTokenEof; return;
143      case ' ': case '\r': case '\t': break;
144      case '\n': line_++; seen_newline = true; break;
145      case '{': case '}': case '(': case ')': case '[': case ']': return;
146      case ',': case ':': case ';': case '=': return;
147      case '.':
148        if(!isdigit(*cursor_)) return;
149        Error("floating point constant can\'t start with \".\"");
150        break;
151      case '\"':
152        attribute_ = "";
153        while (*cursor_ != '\"') {
154          if (*cursor_ < ' ' && *cursor_ >= 0)
155            Error("illegal character in string constant");
156          if (*cursor_ == '\\') {
157            cursor_++;
158            switch (*cursor_) {
159              case 'n':  attribute_ += '\n'; cursor_++; break;
160              case 't':  attribute_ += '\t'; cursor_++; break;
161              case 'r':  attribute_ += '\r'; cursor_++; break;
162              case 'b':  attribute_ += '\b'; cursor_++; break;
163              case 'f':  attribute_ += '\f'; cursor_++; break;
164              case '\"': attribute_ += '\"'; cursor_++; break;
165              case '\\': attribute_ += '\\'; cursor_++; break;
166              case '/':  attribute_ += '/';  cursor_++; break;
167              case 'x': {  // Not in the JSON standard
168                cursor_++;
169                attribute_ += static_cast<char>(ParseHexNum(2));
170                break;
171              }
172              case 'u': {
173                cursor_++;
174                ToUTF8(static_cast<int>(ParseHexNum(4)), &attribute_);
175                break;
176              }
177              default: Error("unknown escape code in string constant"); break;
178            }
179          } else { // printable chars + UTF-8 bytes
180            attribute_ += *cursor_++;
181          }
182        }
183        cursor_++;
184        token_ = kTokenStringConstant;
185        return;
186      case '/':
187        if (*cursor_ == '/') {
188          const char *start = ++cursor_;
189          while (*cursor_ && *cursor_ != '\n') cursor_++;
190          if (*start == '/') {  // documentation comment
191            if (cursor_ != source_ && !seen_newline)
192              Error("a documentation comment should be on a line on its own");
193            doc_comment_.push_back(std::string(start + 1, cursor_));
194          }
195          break;
196        }
197        // fall thru
198      default:
199        if (isalpha(static_cast<unsigned char>(c))) {
200          // Collect all chars of an identifier:
201          const char *start = cursor_ - 1;
202          while (isalnum(static_cast<unsigned char>(*cursor_)) ||
203                 *cursor_ == '_')
204            cursor_++;
205          attribute_.clear();
206          attribute_.append(start, cursor_);
207          // First, see if it is a type keyword from the table of types:
208          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
209            if (attribute_ == IDLTYPE) { \
210              token_ = kToken ## ENUM; \
211              return; \
212            }
213            FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
214          #undef FLATBUFFERS_TD
215          // If it's a boolean constant keyword, turn those into integers,
216          // which simplifies our logic downstream.
217          if (attribute_ == "true" || attribute_ == "false") {
218            attribute_ = NumToString(attribute_ == "true");
219            token_ = kTokenIntegerConstant;
220            return;
221          }
222          // Check for declaration keywords:
223          if (attribute_ == "table")     { token_ = kTokenTable;     return; }
224          if (attribute_ == "struct")    { token_ = kTokenStruct;    return; }
225          if (attribute_ == "enum")      { token_ = kTokenEnum;      return; }
226          if (attribute_ == "union")     { token_ = kTokenUnion;     return; }
227          if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; }
228          if (attribute_ == "root_type") { token_ = kTokenRootType;  return; }
229          if (attribute_ == "include")   { token_ = kTokenInclude;   return; }
230          if (attribute_ == "attribute") { token_ = kTokenAttribute; return; }
231          if (attribute_ == "file_identifier") {
232            token_ = kTokenFileIdentifier;
233            return;
234          }
235          if (attribute_ == "file_extension") {
236            token_ = kTokenFileExtension;
237            return;
238          }
239          // If not, it is a user-defined identifier:
240          token_ = kTokenIdentifier;
241          return;
242        } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
243          const char *start = cursor_ - 1;
244          while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
245          if (*cursor_ == '.') {
246            cursor_++;
247            while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
248            // See if this float has a scientific notation suffix. Both JSON
249            // and C++ (through strtod() we use) have the same format:
250            if (*cursor_ == 'e' || *cursor_ == 'E') {
251              cursor_++;
252              if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
253              while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
254            }
255            token_ = kTokenFloatConstant;
256          } else {
257            token_ = kTokenIntegerConstant;
258          }
259          attribute_.clear();
260          attribute_.append(start, cursor_);
261          return;
262        }
263        std::string ch;
264        ch = c;
265        if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
266        Error("illegal character: " + ch);
267        break;
268    }
269  }
270}
271
272// Check if a given token is next, if so, consume it as well.
273bool Parser::IsNext(int t) {
274  bool isnext = t == token_;
275  if (isnext) Next();
276  return isnext;
277}
278
279// Expect a given token to be next, consume it, or error if not present.
280void Parser::Expect(int t) {
281  if (t != token_) {
282    Error("expecting: " + TokenToString(t) + " instead got: " +
283          TokenToString(token_));
284  }
285  Next();
286}
287
288void Parser::ParseTypeIdent(Type &type) {
289  auto enum_def = enums_.Lookup(attribute_);
290  if (enum_def) {
291    type = enum_def->underlying_type;
292    if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
293  } else {
294    type.base_type = BASE_TYPE_STRUCT;
295    type.struct_def = LookupCreateStruct(attribute_);
296  }
297}
298
299// Parse any IDL type.
300void Parser::ParseType(Type &type) {
301  if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
302    type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
303  } else {
304    if (token_ == kTokenIdentifier) {
305      ParseTypeIdent(type);
306    } else if (token_ == '[') {
307      Next();
308      Type subtype;
309      ParseType(subtype);
310      if (subtype.base_type == BASE_TYPE_VECTOR) {
311        // We could support this, but it will complicate things, and it's
312        // easier to work around with a struct around the inner vector.
313        Error("nested vector types not supported (wrap in table first).");
314      }
315      if (subtype.base_type == BASE_TYPE_UNION) {
316        // We could support this if we stored a struct of 2 elements per
317        // union element.
318        Error("vector of union types not supported (wrap in table first).");
319      }
320      type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
321      type.element = subtype.base_type;
322      Expect(']');
323      return;
324    } else {
325      Error("illegal type syntax");
326    }
327  }
328  Next();
329}
330
331FieldDef &Parser::AddField(StructDef &struct_def,
332                           const std::string &name,
333                           const Type &type) {
334  auto &field = *new FieldDef();
335  field.value.offset =
336    FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
337  field.name = name;
338  field.file = struct_def.file;
339  field.value.type = type;
340  if (struct_def.fixed) {  // statically compute the field offset
341    auto size = InlineSize(type);
342    auto alignment = InlineAlignment(type);
343    // structs_ need to have a predictable format, so we need to align to
344    // the largest scalar
345    struct_def.minalign = std::max(struct_def.minalign, alignment);
346    struct_def.PadLastField(alignment);
347    field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
348    struct_def.bytesize += size;
349  }
350  if (struct_def.fields.Add(name, &field))
351    Error("field already exists: " + name);
352  return field;
353}
354
355void Parser::ParseField(StructDef &struct_def) {
356  std::string name = attribute_;
357  std::vector<std::string> dc = doc_comment_;
358  Expect(kTokenIdentifier);
359  Expect(':');
360  Type type;
361  ParseType(type);
362
363  if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
364    Error("structs_ may contain only scalar or struct fields");
365
366  FieldDef *typefield = nullptr;
367  if (type.base_type == BASE_TYPE_UNION) {
368    // For union fields, add a second auto-generated field to hold the type,
369    // with _type appended as the name.
370    typefield = &AddField(struct_def, name + "_type",
371                          type.enum_def->underlying_type);
372  }
373
374  auto &field = AddField(struct_def, name, type);
375
376  if (token_ == '=') {
377    Next();
378    if (!IsScalar(type.base_type))
379      Error("default values currently only supported for scalars");
380    ParseSingleValue(field.value);
381  }
382
383  if (type.enum_def &&
384      IsScalar(type.base_type) &&
385      !struct_def.fixed &&
386      !type.enum_def->attributes.Lookup("bit_flags") &&
387      !type.enum_def->ReverseLookup(static_cast<int>(
388                         StringToInt(field.value.constant.c_str()))))
389    Error("enum " + type.enum_def->name +
390          " does not have a declaration for this field\'s default of " +
391          field.value.constant);
392
393  field.doc_comment = dc;
394  ParseMetaData(field);
395  field.deprecated = field.attributes.Lookup("deprecated") != nullptr;
396  auto hash_name = field.attributes.Lookup("hash");
397  if (hash_name) {
398    switch (type.base_type) {
399      case BASE_TYPE_INT:
400      case BASE_TYPE_UINT: {
401        if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
402          Error("Unknown hashing algorithm for 32 bit types: " +
403                hash_name->constant);
404        break;
405      }
406      case BASE_TYPE_LONG:
407      case BASE_TYPE_ULONG: {
408        if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
409          Error("Unknown hashing algorithm for 64 bit types: " +
410                hash_name->constant);
411        break;
412      }
413      default:
414        Error("only int, uint, long and ulong data types support hashing.");
415    }
416  }
417  if (field.deprecated && struct_def.fixed)
418    Error("can't deprecate fields in a struct");
419  field.required = field.attributes.Lookup("required") != nullptr;
420  if (field.required && (struct_def.fixed ||
421                         IsScalar(field.value.type.base_type)))
422    Error("only non-scalar fields in tables may be 'required'");
423  field.key = field.attributes.Lookup("key") != nullptr;
424  if (field.key) {
425    if (struct_def.has_key)
426      Error("only one field may be set as 'key'");
427    struct_def.has_key = true;
428    if (!IsScalar(field.value.type.base_type)) {
429      field.required = true;
430      if (field.value.type.base_type != BASE_TYPE_STRING)
431        Error("'key' field must be string or scalar type");
432    }
433  }
434  auto nested = field.attributes.Lookup("nested_flatbuffer");
435  if (nested) {
436    if (nested->type.base_type != BASE_TYPE_STRING)
437      Error("nested_flatbuffer attribute must be a string (the root type)");
438    if (field.value.type.base_type != BASE_TYPE_VECTOR ||
439        field.value.type.element != BASE_TYPE_UCHAR)
440      Error("nested_flatbuffer attribute may only apply to a vector of ubyte");
441    // This will cause an error if the root type of the nested flatbuffer
442    // wasn't defined elsewhere.
443    LookupCreateStruct(nested->constant);
444  }
445
446  if (typefield) {
447    // If this field is a union, and it has a manually assigned id,
448    // the automatically added type field should have an id as well (of N - 1).
449    auto attr = field.attributes.Lookup("id");
450    if (attr) {
451      auto id = atoi(attr->constant.c_str());
452      auto val = new Value();
453      val->type = attr->type;
454      val->constant = NumToString(id - 1);
455      typefield->attributes.Add("id", val);
456    }
457  }
458
459  Expect(';');
460}
461
462void Parser::ParseAnyValue(Value &val, FieldDef *field) {
463  switch (val.type.base_type) {
464    case BASE_TYPE_UNION: {
465      assert(field);
466      if (!field_stack_.size() ||
467          field_stack_.back().second->value.type.base_type != BASE_TYPE_UTYPE)
468        Error("missing type field before this union value: " + field->name);
469      auto enum_idx = atot<unsigned char>(
470                                    field_stack_.back().first.constant.c_str());
471      auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
472      if (!enum_val) Error("illegal type id for: " + field->name);
473      val.constant = NumToString(ParseTable(*enum_val->struct_def));
474      break;
475    }
476    case BASE_TYPE_STRUCT:
477      val.constant = NumToString(ParseTable(*val.type.struct_def));
478      break;
479    case BASE_TYPE_STRING: {
480      auto s = attribute_;
481      Expect(kTokenStringConstant);
482      val.constant = NumToString(builder_.CreateString(s).o);
483      break;
484    }
485    case BASE_TYPE_VECTOR: {
486      Expect('[');
487      val.constant = NumToString(ParseVector(val.type.VectorType()));
488      break;
489    }
490    case BASE_TYPE_INT:
491    case BASE_TYPE_UINT:
492    case BASE_TYPE_LONG:
493    case BASE_TYPE_ULONG: {
494      if (field && field->attributes.Lookup("hash") &&
495          (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
496        ParseHash(val, field);
497      } else {
498        ParseSingleValue(val);
499      }
500      break;
501    }
502    default:
503      ParseSingleValue(val);
504      break;
505  }
506}
507
508void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
509  auto off = atot<uoffset_t>(val.constant.c_str());
510  assert(struct_stack_.size() - off == struct_def.bytesize);
511  builder_.Align(struct_def.minalign);
512  builder_.PushBytes(&struct_stack_[off], struct_def.bytesize);
513  struct_stack_.resize(struct_stack_.size() - struct_def.bytesize);
514  builder_.AddStructOffset(val.offset, builder_.GetSize());
515}
516
517uoffset_t Parser::ParseTable(const StructDef &struct_def) {
518  Expect('{');
519  size_t fieldn = 0;
520  for (;;) {
521    if ((!strict_json_ || !fieldn) && IsNext('}')) break;
522    std::string name = attribute_;
523    if (!IsNext(kTokenStringConstant))
524      Expect(strict_json_ ? kTokenStringConstant : kTokenIdentifier);
525    auto field = struct_def.fields.Lookup(name);
526    if (!field) Error("unknown field: " + name);
527    if (struct_def.fixed && (fieldn >= struct_def.fields.vec.size()
528                            || struct_def.fields.vec[fieldn] != field)) {
529       Error("struct field appearing out of order: " + name);
530    }
531    Expect(':');
532    Value val = field->value;
533    ParseAnyValue(val, field);
534    field_stack_.push_back(std::make_pair(val, field));
535    fieldn++;
536    if (IsNext('}')) break;
537    Expect(',');
538  }
539  for (auto it = field_stack_.rbegin();
540           it != field_stack_.rbegin() + fieldn; ++it) {
541    if (it->second->used)
542      Error("field set more than once: " + it->second->name);
543    it->second->used = true;
544  }
545  for (auto it = field_stack_.rbegin();
546           it != field_stack_.rbegin() + fieldn; ++it) {
547    it->second->used = false;
548  }
549  if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
550    Error("incomplete struct initialization: " + struct_def.name);
551  auto start = struct_def.fixed
552                 ? builder_.StartStruct(struct_def.minalign)
553                 : builder_.StartTable();
554
555  for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
556       size;
557       size /= 2) {
558    // Go through elements in reverse, since we're building the data backwards.
559    for (auto it = field_stack_.rbegin();
560             it != field_stack_.rbegin() + fieldn; ++it) {
561      auto &value = it->first;
562      auto field = it->second;
563      if (!struct_def.sortbysize || size == SizeOf(value.type.base_type)) {
564        switch (value.type.base_type) {
565          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
566            case BASE_TYPE_ ## ENUM: \
567              builder_.Pad(field->padding); \
568              if (struct_def.fixed) { \
569                builder_.PushElement(atot<CTYPE>(value.constant.c_str())); \
570              } else { \
571                builder_.AddElement(value.offset, \
572                             atot<CTYPE>(       value.constant.c_str()), \
573                             atot<CTYPE>(field->value.constant.c_str())); \
574              } \
575              break;
576            FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
577          #undef FLATBUFFERS_TD
578          #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
579            case BASE_TYPE_ ## ENUM: \
580              builder_.Pad(field->padding); \
581              if (IsStruct(field->value.type)) { \
582                SerializeStruct(*field->value.type.struct_def, value); \
583              } else { \
584                builder_.AddOffset(value.offset, \
585                  atot<CTYPE>(value.constant.c_str())); \
586              } \
587              break;
588            FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
589          #undef FLATBUFFERS_TD
590        }
591      }
592    }
593  }
594  for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
595
596  if (struct_def.fixed) {
597    builder_.ClearOffsets();
598    builder_.EndStruct();
599    // Temporarily store this struct in a side buffer, since this data has to
600    // be stored in-line later in the parent object.
601    auto off = struct_stack_.size();
602    struct_stack_.insert(struct_stack_.end(),
603                         builder_.GetBufferPointer(),
604                         builder_.GetBufferPointer() + struct_def.bytesize);
605    builder_.PopBytes(struct_def.bytesize);
606    return static_cast<uoffset_t>(off);
607  } else {
608    return builder_.EndTable(
609      start,
610      static_cast<voffset_t>(struct_def.fields.vec.size()));
611  }
612}
613
614uoffset_t Parser::ParseVector(const Type &type) {
615  int count = 0;
616  for (;;) {
617    if ((!strict_json_ || !count) && IsNext(']')) break;
618    Value val;
619    val.type = type;
620    ParseAnyValue(val, nullptr);
621    field_stack_.push_back(std::make_pair(val, nullptr));
622    count++;
623    if (IsNext(']')) break;
624    Expect(',');
625  }
626
627  builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
628                       InlineAlignment(type));
629  for (int i = 0; i < count; i++) {
630    // start at the back, since we're building the data backwards.
631    auto &val = field_stack_.back().first;
632    switch (val.type.base_type) {
633      #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \
634        case BASE_TYPE_ ## ENUM: \
635          if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
636          else builder_.PushElement(atot<CTYPE>(val.constant.c_str())); \
637          break;
638        FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
639      #undef FLATBUFFERS_TD
640    }
641    field_stack_.pop_back();
642  }
643
644  builder_.ClearOffsets();
645  return builder_.EndVector(count);
646}
647
648void Parser::ParseMetaData(Definition &def) {
649  if (IsNext('(')) {
650    for (;;) {
651      auto name = attribute_;
652      Expect(kTokenIdentifier);
653      if (known_attributes_.find(name) == known_attributes_.end())
654        Error("user define attributes must be declared before use: " + name);
655      auto e = new Value();
656      def.attributes.Add(name, e);
657      if (IsNext(':')) {
658        ParseSingleValue(*e);
659      }
660      if (IsNext(')')) break;
661      Expect(',');
662    }
663  }
664}
665
666bool Parser::TryTypedValue(int dtoken,
667                           bool check,
668                           Value &e,
669                           BaseType req) {
670  bool match = dtoken == token_;
671  if (match) {
672    e.constant = attribute_;
673    if (!check) {
674      if (e.type.base_type == BASE_TYPE_NONE) {
675        e.type.base_type = req;
676      } else {
677        Error(std::string("type mismatch: expecting: ") +
678              kTypeNames[e.type.base_type] +
679              ", found: " +
680              kTypeNames[req]);
681      }
682    }
683    Next();
684  }
685  return match;
686}
687
688int64_t Parser::ParseIntegerFromString(Type &type) {
689  int64_t result = 0;
690  // Parse one or more enum identifiers, separated by spaces.
691  const char *next = attribute_.c_str();
692  do {
693    const char *divider = strchr(next, ' ');
694    std::string word;
695    if (divider) {
696      word = std::string(next, divider);
697      next = divider + strspn(divider, " ");
698    } else {
699      word = next;
700      next += word.length();
701    }
702    if (type.enum_def) {  // The field has an enum type
703      auto enum_val = type.enum_def->vals.Lookup(word);
704      if (!enum_val)
705        Error("unknown enum value: " + word +
706              ", for enum: " + type.enum_def->name);
707      result |= enum_val->value;
708    } else {  // No enum type, probably integral field.
709      if (!IsInteger(type.base_type))
710        Error("not a valid value for this field: " + word);
711      // TODO: could check if its a valid number constant here.
712      const char *dot = strchr(word.c_str(), '.');
713      if (!dot) Error("enum values need to be qualified by an enum type");
714      std::string enum_def_str(word.c_str(), dot);
715      std::string enum_val_str(dot + 1, word.c_str() + word.length());
716      auto enum_def = enums_.Lookup(enum_def_str);
717      if (!enum_def) Error("unknown enum: " + enum_def_str);
718      auto enum_val = enum_def->vals.Lookup(enum_val_str);
719      if (!enum_val) Error("unknown enum value: " + enum_val_str);
720      result |= enum_val->value;
721    }
722  } while(*next);
723  return result;
724}
725
726
727void Parser::ParseHash(Value &e, FieldDef* field) {
728  assert(field);
729  Value *hash_name = field->attributes.Lookup("hash");
730  switch (e.type.base_type) {
731    case BASE_TYPE_INT:
732    case BASE_TYPE_UINT: {
733      auto hash = FindHashFunction32(hash_name->constant.c_str());
734      uint32_t hashed_value = hash(attribute_.c_str());
735      e.constant = NumToString(hashed_value);
736      break;
737    }
738    case BASE_TYPE_LONG:
739    case BASE_TYPE_ULONG: {
740      auto hash = FindHashFunction64(hash_name->constant.c_str());
741      uint64_t hashed_value = hash(attribute_.c_str());
742      e.constant = NumToString(hashed_value);
743      break;
744    }
745    default:
746      assert(0);
747  }
748  Next();
749}
750
751void Parser::ParseSingleValue(Value &e) {
752  // First check if this could be a string/identifier enum value:
753  if (e.type.base_type != BASE_TYPE_STRING &&
754      e.type.base_type != BASE_TYPE_NONE &&
755      (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
756      e.constant = NumToString(ParseIntegerFromString(e.type));
757      Next();
758  } else if (TryTypedValue(kTokenIntegerConstant,
759                    IsScalar(e.type.base_type),
760                    e,
761                    BASE_TYPE_INT) ||
762      TryTypedValue(kTokenFloatConstant,
763                    IsFloat(e.type.base_type),
764                    e,
765                    BASE_TYPE_FLOAT) ||
766      TryTypedValue(kTokenStringConstant,
767                    e.type.base_type == BASE_TYPE_STRING,
768                    e,
769                    BASE_TYPE_STRING)) {
770  } else {
771    Error("cannot parse value starting with: " + TokenToString(token_));
772  }
773}
774
775StructDef *Parser::LookupCreateStruct(const std::string &name) {
776  auto struct_def = structs_.Lookup(name);
777  if (!struct_def) {
778    // Rather than failing, we create a "pre declared" StructDef, due to
779    // circular references, and check for errors at the end of parsing.
780    struct_def = new StructDef();
781    structs_.Add(name, struct_def);
782    struct_def->name = name;
783    struct_def->predecl = true;
784    struct_def->defined_namespace = namespaces_.back();
785  }
786  return struct_def;
787}
788
789void Parser::ParseEnum(bool is_union) {
790  std::vector<std::string> dc = doc_comment_;
791  Next();
792  std::string name = attribute_;
793  Expect(kTokenIdentifier);
794  auto &enum_def = *new EnumDef();
795  enum_def.name = name;
796  if (!files_being_parsed_.empty()) enum_def.file = files_being_parsed_.top();
797  enum_def.doc_comment = dc;
798  enum_def.is_union = is_union;
799  enum_def.defined_namespace = namespaces_.back();
800  if (enums_.Add(name, &enum_def)) Error("enum already exists: " + name);
801  if (is_union) {
802    enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
803    enum_def.underlying_type.enum_def = &enum_def;
804  } else {
805    if (proto_mode_) {
806      enum_def.underlying_type.base_type = BASE_TYPE_SHORT;
807    } else {
808      // Give specialized error message, since this type spec used to
809      // be optional in the first FlatBuffers release.
810      if (!IsNext(':')) Error("must specify the underlying integer type for this"
811                              " enum (e.g. \': short\', which was the default).");
812      // Specify the integer type underlying this enum.
813      ParseType(enum_def.underlying_type);
814      if (!IsInteger(enum_def.underlying_type.base_type))
815        Error("underlying enum type must be integral");
816    }
817    // Make this type refer back to the enum it was derived from.
818    enum_def.underlying_type.enum_def = &enum_def;
819  }
820  ParseMetaData(enum_def);
821  Expect('{');
822  if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
823  do {
824    std::string name = attribute_;
825    std::vector<std::string> dc = doc_comment_;
826    Expect(kTokenIdentifier);
827    auto prevsize = enum_def.vals.vec.size();
828    auto value = enum_def.vals.vec.size()
829      ? enum_def.vals.vec.back()->value + 1
830      : 0;
831    auto &ev = *new EnumVal(name, value);
832    if (enum_def.vals.Add(name, &ev))
833      Error("enum value already exists: " + name);
834    ev.doc_comment = dc;
835    if (is_union) {
836      ev.struct_def = LookupCreateStruct(name);
837    }
838    if (IsNext('=')) {
839      ev.value = atoi(attribute_.c_str());
840      Expect(kTokenIntegerConstant);
841      if (prevsize && enum_def.vals.vec[prevsize - 1]->value >= ev.value)
842        Error("enum values must be specified in ascending order");
843    }
844  } while (IsNext(proto_mode_ ? ';' : ',') && token_ != '}');
845  Expect('}');
846  if (enum_def.attributes.Lookup("bit_flags")) {
847    for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
848         ++it) {
849      if (static_cast<size_t>((*it)->value) >=
850           SizeOf(enum_def.underlying_type.base_type) * 8)
851        Error("bit flag out of range of underlying integral type");
852      (*it)->value = 1LL << (*it)->value;
853    }
854  }
855}
856
857StructDef &Parser::StartStruct() {
858  std::string name = attribute_;
859  Expect(kTokenIdentifier);
860  auto &struct_def = *LookupCreateStruct(name);
861  if (!struct_def.predecl) Error("datatype already exists: " + name);
862  struct_def.predecl = false;
863  struct_def.name = name;
864  if (!files_being_parsed_.empty()) struct_def.file = files_being_parsed_.top();
865  // Move this struct to the back of the vector just in case it was predeclared,
866  // to preserve declaration order.
867  remove(structs_.vec.begin(), structs_.vec.end(), &struct_def);
868  structs_.vec.back() = &struct_def;
869  return struct_def;
870}
871
872void Parser::ParseDecl() {
873  std::vector<std::string> dc = doc_comment_;
874  bool fixed = IsNext(kTokenStruct);
875  if (!fixed) Expect(kTokenTable);
876  auto &struct_def = StartStruct();
877  struct_def.doc_comment = dc;
878  struct_def.fixed = fixed;
879  ParseMetaData(struct_def);
880  struct_def.sortbysize =
881    struct_def.attributes.Lookup("original_order") == nullptr && !fixed;
882  Expect('{');
883  while (token_ != '}') ParseField(struct_def);
884  auto force_align = struct_def.attributes.Lookup("force_align");
885  if (fixed && force_align) {
886    auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
887    if (force_align->type.base_type != BASE_TYPE_INT ||
888        align < struct_def.minalign ||
889        align > 256 ||
890        align & (align - 1))
891      Error("force_align must be a power of two integer ranging from the"
892            "struct\'s natural alignment to 256");
893    struct_def.minalign = align;
894  }
895  struct_def.PadLastField(struct_def.minalign);
896  // Check if this is a table that has manual id assignments
897  auto &fields = struct_def.fields.vec;
898  if (!struct_def.fixed && fields.size()) {
899    size_t num_id_fields = 0;
900    for (auto it = fields.begin(); it != fields.end(); ++it) {
901      if ((*it)->attributes.Lookup("id")) num_id_fields++;
902    }
903    // If any fields have ids..
904    if (num_id_fields) {
905      // Then all fields must have them.
906      if (num_id_fields != fields.size())
907        Error("either all fields or no fields must have an 'id' attribute");
908      // Simply sort by id, then the fields are the same as if no ids had
909      // been specified.
910      std::sort(fields.begin(), fields.end(),
911        [](const FieldDef *a, const FieldDef *b) -> bool {
912          auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
913          auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
914          return a_id < b_id;
915      });
916      // Verify we have a contiguous set, and reassign vtable offsets.
917      for (int i = 0; i < static_cast<int>(fields.size()); i++) {
918        if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
919          Error("field id\'s must be consecutive from 0, id " +
920                NumToString(i) + " missing or set twice");
921        fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
922      }
923    }
924  }
925  // Check that no identifiers clash with auto generated fields.
926  // This is not an ideal situation, but should occur very infrequently,
927  // and allows us to keep using very readable names for type & length fields
928  // without inducing compile errors.
929  auto CheckClash = [&fields, &struct_def](const char *suffix,
930                                           BaseType basetype) {
931    auto len = strlen(suffix);
932    for (auto it = fields.begin(); it != fields.end(); ++it) {
933      auto &name = (*it)->name;
934      if (name.length() > len &&
935          name.compare(name.length() - len, len, suffix) == 0 &&
936          (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
937        auto field = struct_def.fields.Lookup(
938                       name.substr(0, name.length() - len));
939        if (field && field->value.type.base_type == basetype)
940          Error("Field " + name +
941                " would clash with generated functions for field " +
942                field->name);
943      }
944    }
945  };
946  CheckClash("_type", BASE_TYPE_UNION);
947  CheckClash("Type", BASE_TYPE_UNION);
948  CheckClash("_length", BASE_TYPE_VECTOR);
949  CheckClash("Length", BASE_TYPE_VECTOR);
950  Expect('}');
951}
952
953bool Parser::SetRootType(const char *name) {
954  root_struct_def = structs_.Lookup(name);
955  return root_struct_def != nullptr;
956}
957
958void Parser::MarkGenerated() {
959  // Since the Parser object retains definitions across files, we must
960  // ensure we only output code for definitions once, in the file they are first
961  // declared. This function marks all existing definitions as having already
962  // been generated.
963  for (auto it = enums_.vec.begin();
964           it != enums_.vec.end(); ++it) {
965    (*it)->generated = true;
966  }
967  for (auto it = structs_.vec.begin();
968           it != structs_.vec.end(); ++it) {
969    (*it)->generated = true;
970  }
971}
972
973void Parser::ParseNamespace() {
974  Next();
975  auto ns = new Namespace();
976  namespaces_.push_back(ns);
977  for (;;) {
978    ns->components.push_back(attribute_);
979    Expect(kTokenIdentifier);
980    if (!IsNext('.')) break;
981  }
982  Expect(';');
983}
984
985// Best effort parsing of .proto declarations, with the aim to turn them
986// in the closest corresponding FlatBuffer equivalent.
987// We parse everything as identifiers instead of keywords, since we don't
988// want protobuf keywords to become invalid identifiers in FlatBuffers.
989void Parser::ParseProtoDecl() {
990  if (attribute_ == "package") {
991    // These are identical in syntax to FlatBuffer's namespace decl.
992    ParseNamespace();
993  } else if (attribute_ == "message") {
994    Next();
995    auto &struct_def = StartStruct();
996    Expect('{');
997    while (token_ != '}') {
998      // Parse the qualifier.
999      bool required = false;
1000      bool repeated = false;
1001      if (attribute_ == "optional") {
1002        // This is the default.
1003      } else if (attribute_ == "required") {
1004        required = true;
1005      } else if (attribute_ == "repeated") {
1006        repeated = true;
1007      } else {
1008        Error("expecting optional/required/repeated, got: " + attribute_);
1009      }
1010      Type type = ParseTypeFromProtoType();
1011      // Repeated elements get mapped to a vector.
1012      if (repeated) {
1013        type.element = type.base_type;
1014        type.base_type = BASE_TYPE_VECTOR;
1015      }
1016      std::string name = attribute_;
1017      Expect(kTokenIdentifier);
1018      // Parse the field id. Since we're just translating schemas, not
1019      // any kind of binary compatibility, we can safely ignore these, and
1020      // assign our own.
1021      Expect('=');
1022      Expect(kTokenIntegerConstant);
1023      auto &field = AddField(struct_def, name, type);
1024      field.required = required;
1025      // See if there's a default specified.
1026      if (IsNext('[')) {
1027        if (attribute_ != "default") Error("\'default\' expected");
1028        Next();
1029        Expect('=');
1030        field.value.constant = attribute_;
1031        Next();
1032        Expect(']');
1033      }
1034      Expect(';');
1035    }
1036    Next();
1037  } else if (attribute_ == "enum") {
1038    // These are almost the same, just with different terminator:
1039    ParseEnum(false);
1040  } else if (attribute_ == "import") {
1041    Next();
1042    included_files_[attribute_] = true;
1043    Expect(kTokenStringConstant);
1044    Expect(';');
1045  } else if (attribute_ == "option") {  // Skip these.
1046    Next();
1047    Expect(kTokenIdentifier);
1048    Expect('=');
1049    Next();  // Any single token.
1050    Expect(';');
1051  } else {
1052    Error("don\'t know how to parse .proto declaration starting with " +
1053          attribute_);
1054  }
1055}
1056
1057// Parse a protobuf type, and map it to the corresponding FlatBuffer one.
1058Type Parser::ParseTypeFromProtoType() {
1059  Expect(kTokenIdentifier);
1060  struct type_lookup { const char *proto_type; BaseType fb_type; };
1061  static type_lookup lookup[] = {
1062    { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
1063    { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
1064    { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
1065    { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
1066    { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
1067    { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
1068    { "bool", BASE_TYPE_BOOL },
1069    { "string", BASE_TYPE_STRING },
1070    { "bytes", BASE_TYPE_STRING },
1071    { nullptr, BASE_TYPE_NONE }
1072  };
1073  Type type;
1074  for (auto tl = lookup; tl->proto_type; tl++) {
1075    if (attribute_ == tl->proto_type) {
1076      type.base_type = tl->fb_type;
1077      Next();
1078      return type;
1079    }
1080  }
1081  ParseTypeIdent(type);
1082  Expect(kTokenIdentifier);
1083  return type;
1084}
1085
1086bool Parser::Parse(const char *source, const char **include_paths,
1087                   const char *source_filename) {
1088  if (source_filename &&
1089      included_files_.find(source_filename) == included_files_.end()) {
1090    included_files_[source_filename] = true;
1091    files_included_per_file_[source_filename] = std::set<std::string>();
1092    files_being_parsed_.push(source_filename);
1093  }
1094  if (!include_paths) {
1095    const char *current_directory[] = { "", nullptr };
1096    include_paths = current_directory;
1097  }
1098  source_ = cursor_ = source;
1099  line_ = 1;
1100  error_.clear();
1101  builder_.Clear();
1102  try {
1103    Next();
1104    // Includes must come first:
1105    while (IsNext(kTokenInclude)) {
1106      auto name = attribute_;
1107      Expect(kTokenStringConstant);
1108      // Look for the file in include_paths.
1109      std::string filepath;
1110      for (auto paths = include_paths; paths && *paths; paths++) {
1111        filepath = flatbuffers::ConCatPathFileName(*paths, name);
1112        if(FileExists(filepath.c_str())) break;
1113      }
1114      if (filepath.empty())
1115        Error("unable to locate include file: " + name);
1116      if (source_filename)
1117        files_included_per_file_[source_filename].insert(filepath);
1118      if (included_files_.find(filepath) == included_files_.end()) {
1119        // We found an include file that we have not parsed yet.
1120        // Load it and parse it.
1121        std::string contents;
1122        if (!LoadFile(filepath.c_str(), true, &contents))
1123          Error("unable to load include file: " + name);
1124        if (!Parse(contents.c_str(), include_paths, filepath.c_str())) {
1125          // Any errors, we're done.
1126          return false;
1127        }
1128        // We do not want to output code for any included files:
1129        MarkGenerated();
1130        // This is the easiest way to continue this file after an include:
1131        // instead of saving and restoring all the state, we simply start the
1132        // file anew. This will cause it to encounter the same include statement
1133        // again, but this time it will skip it, because it was entered into
1134        // included_files_.
1135        // This is recursive, but only go as deep as the number of include
1136        // statements.
1137        return Parse(source, include_paths, source_filename);
1138      }
1139      Expect(';');
1140    }
1141    // Now parse all other kinds of declarations:
1142    while (token_ != kTokenEof) {
1143      if (proto_mode_) {
1144        ParseProtoDecl();
1145      } else if (token_ == kTokenNameSpace) {
1146        ParseNamespace();
1147      } else if (token_ == '{') {
1148        if (!root_struct_def) Error("no root type set to parse json with");
1149        if (builder_.GetSize()) {
1150          Error("cannot have more than one json object in a file");
1151        }
1152        builder_.Finish(Offset<Table>(ParseTable(*root_struct_def)),
1153          file_identifier_.length() ? file_identifier_.c_str() : nullptr);
1154      } else if (token_ == kTokenEnum) {
1155        ParseEnum(false);
1156      } else if (token_ == kTokenUnion) {
1157        ParseEnum(true);
1158      } else if (token_ == kTokenRootType) {
1159        Next();
1160        auto root_type = attribute_;
1161        Expect(kTokenIdentifier);
1162        if (!SetRootType(root_type.c_str()))
1163          Error("unknown root type: " + root_type);
1164        if (root_struct_def->fixed)
1165          Error("root type must be a table");
1166        Expect(';');
1167      } else if (token_ == kTokenFileIdentifier) {
1168        Next();
1169        file_identifier_ = attribute_;
1170        Expect(kTokenStringConstant);
1171        if (file_identifier_.length() !=
1172            FlatBufferBuilder::kFileIdentifierLength)
1173          Error("file_identifier must be exactly " +
1174                NumToString(FlatBufferBuilder::kFileIdentifierLength) +
1175                " characters");
1176        Expect(';');
1177      } else if (token_ == kTokenFileExtension) {
1178        Next();
1179        file_extension_ = attribute_;
1180        Expect(kTokenStringConstant);
1181        Expect(';');
1182      } else if(token_ == kTokenInclude) {
1183        Error("includes must come before declarations");
1184      } else if(token_ == kTokenAttribute) {
1185        Next();
1186        auto name = attribute_;
1187        Expect(kTokenStringConstant);
1188        Expect(';');
1189        known_attributes_.insert(name);
1190      } else {
1191        ParseDecl();
1192      }
1193    }
1194    for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
1195      if ((*it)->predecl)
1196        Error("type referenced but not defined: " + (*it)->name);
1197    }
1198    for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
1199      auto &enum_def = **it;
1200      if (enum_def.is_union) {
1201        for (auto it = enum_def.vals.vec.begin();
1202             it != enum_def.vals.vec.end();
1203             ++it) {
1204          auto &val = **it;
1205          if (val.struct_def && val.struct_def->fixed)
1206            Error("only tables can be union elements: " + val.name);
1207        }
1208      }
1209    }
1210  } catch (const std::string &msg) {
1211    error_ = source_filename ? AbsolutePath(source_filename) : "";
1212    #ifdef _WIN32
1213      error_ += "(" + NumToString(line_) + ")";  // MSVC alike
1214    #else
1215      if (source_filename) error_ += ":";
1216      error_ += NumToString(line_) + ":0";  // gcc alike
1217    #endif
1218    error_ += ": error: " + msg;
1219    if (source_filename) files_being_parsed_.pop();
1220    return false;
1221  }
1222  if (source_filename) files_being_parsed_.pop();
1223  assert(!struct_stack_.size());
1224  return true;
1225}
1226
1227std::set<std::string> Parser::GetIncludedFilesRecursive(
1228    const std::string &file_name) const {
1229  std::set<std::string> included_files;
1230  std::list<std::string> to_process;
1231
1232  if (file_name.empty()) return included_files;
1233  to_process.push_back(file_name);
1234
1235  while (!to_process.empty()) {
1236    std::string current = to_process.front();
1237    to_process.pop_front();
1238    included_files.insert(current);
1239
1240    auto new_files = files_included_per_file_.at(current);
1241    for (auto it = new_files.begin(); it != new_files.end(); ++it) {
1242      if (included_files.find(*it) == included_files.end())
1243        to_process.push_back(*it);
1244    }
1245  }
1246
1247  return included_files;
1248}
1249
1250}  // namespace flatbuffers
1251