idl_parser.cpp revision 07d5965c812fa5e82dc4d3eb32b37540b7c91598
1/* 2 * Copyright 2014 Google Inc. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <algorithm> 18 19#include "flatbuffers/flatbuffers.h" 20#include "flatbuffers/idl.h" 21#include "flatbuffers/util.h" 22 23namespace flatbuffers { 24 25const char *const kTypeNames[] = { 26 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE, 27 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 28 #undef FLATBUFFERS_TD 29 nullptr 30}; 31 32const char kTypeSizes[] = { 33 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 34 sizeof(CTYPE), 35 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 36 #undef FLATBUFFERS_TD 37}; 38 39static void Error(const std::string &msg) { 40 throw msg; 41} 42 43// Ensure that integer values we parse fit inside the declared integer type. 44static void CheckBitsFit(int64_t val, size_t bits) { 45 auto mask = (1ll << bits) - 1; // Bits we allow to be used. 46 if (bits < 64 && 47 (val & ~mask) != 0 && // Positive or unsigned. 48 (val | mask) != -1) // Negative. 49 Error("constant does not fit in a " + NumToString(bits) + "-bit field"); 50} 51 52// atot: templated version of atoi/atof: convert a string to an instance of T. 53template<typename T> inline T atot(const char *s) { 54 auto val = StringToInt(s); 55 CheckBitsFit(val, sizeof(T) * 8); 56 return (T)val; 57} 58template<> inline bool atot<bool>(const char *s) { 59 return 0 != atoi(s); 60} 61template<> inline float atot<float>(const char *s) { 62 return static_cast<float>(strtod(s, nullptr)); 63} 64template<> inline double atot<double>(const char *s) { 65 return strtod(s, nullptr); 66} 67 68template<> inline Offset<void> atot<Offset<void>>(const char *s) { 69 return Offset<void>(atoi(s)); 70} 71 72// Declare tokens we'll use. Single character tokens are represented by their 73// ascii character code (e.g. '{'), others above 256. 74#define FLATBUFFERS_GEN_TOKENS(TD) \ 75 TD(Eof, 256, "end of file") \ 76 TD(StringConstant, 257, "string constant") \ 77 TD(IntegerConstant, 258, "integer constant") \ 78 TD(FloatConstant, 259, "float constant") \ 79 TD(Identifier, 260, "identifier") \ 80 TD(Table, 261, "table") \ 81 TD(Struct, 262, "struct") \ 82 TD(Enum, 263, "enum") \ 83 TD(Union, 264, "union") \ 84 TD(NameSpace, 265, "namespace") \ 85 TD(RootType, 266, "root_type") \ 86 TD(FileIdentifier, 267, "file_identifier") \ 87 TD(FileExtension, 268, "file_extension") \ 88 TD(Include, 269, "include") 89#ifdef __GNUC__ 90__extension__ // Stop GCC complaining about trailing comma with -Wpendantic. 91#endif 92enum { 93 #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE, 94 FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN) 95 #undef FLATBUFFERS_TOKEN 96 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 97 kToken ## ENUM, 98 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 99 #undef FLATBUFFERS_TD 100}; 101 102static std::string TokenToString(int t) { 103 static const char *tokens[] = { 104 #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING, 105 FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN) 106 #undef FLATBUFFERS_TOKEN 107 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) IDLTYPE, 108 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 109 #undef FLATBUFFERS_TD 110 }; 111 if (t < 256) { // A single ascii char token. 112 std::string s; 113 s.append(1, static_cast<char>(t)); 114 return s; 115 } else { // Other tokens. 116 return tokens[t - 256]; 117 } 118} 119 120// Parses exactly nibbles worth of hex digits into a number, or error. 121int64_t Parser::ParseHexNum(int nibbles) { 122 for (int i = 0; i < nibbles; i++) 123 if (!isxdigit(cursor_[i])) 124 Error("escape code must be followed by " + NumToString(nibbles) + 125 " hex digits"); 126 auto val = StringToInt(cursor_, 16); 127 cursor_ += nibbles; 128 return val; 129} 130 131void Parser::Next() { 132 doc_comment_.clear(); 133 bool seen_newline = false; 134 for (;;) { 135 char c = *cursor_++; 136 token_ = c; 137 switch (c) { 138 case '\0': cursor_--; token_ = kTokenEof; return; 139 case ' ': case '\r': case '\t': break; 140 case '\n': line_++; seen_newline = true; break; 141 case '{': case '}': case '(': case ')': case '[': case ']': return; 142 case ',': case ':': case ';': case '=': return; 143 case '.': 144 if(!isdigit(*cursor_)) return; 145 Error("floating point constant can\'t start with \".\""); 146 break; 147 case '\"': 148 attribute_ = ""; 149 while (*cursor_ != '\"') { 150 if (*cursor_ < ' ' && *cursor_ >= 0) 151 Error("illegal character in string constant"); 152 if (*cursor_ == '\\') { 153 cursor_++; 154 switch (*cursor_) { 155 case 'n': attribute_ += '\n'; cursor_++; break; 156 case 't': attribute_ += '\t'; cursor_++; break; 157 case 'r': attribute_ += '\r'; cursor_++; break; 158 case 'b': attribute_ += '\b'; cursor_++; break; 159 case 'f': attribute_ += '\f'; cursor_++; break; 160 case '\"': attribute_ += '\"'; cursor_++; break; 161 case '\\': attribute_ += '\\'; cursor_++; break; 162 case '/': attribute_ += '/'; cursor_++; break; 163 case 'x': { // Not in the JSON standard 164 cursor_++; 165 attribute_ += static_cast<char>(ParseHexNum(2)); 166 break; 167 } 168 case 'u': { 169 cursor_++; 170 ToUTF8(static_cast<int>(ParseHexNum(4)), &attribute_); 171 break; 172 } 173 default: Error("unknown escape code in string constant"); break; 174 } 175 } else { // printable chars + UTF-8 bytes 176 attribute_ += *cursor_++; 177 } 178 } 179 cursor_++; 180 token_ = kTokenStringConstant; 181 return; 182 case '/': 183 if (*cursor_ == '/') { 184 const char *start = ++cursor_; 185 while (*cursor_ && *cursor_ != '\n') cursor_++; 186 if (*start == '/') { // documentation comment 187 if (cursor_ != source_ && !seen_newline) 188 Error("a documentation comment should be on a line on its own"); 189 doc_comment_.push_back(std::string(start + 1, cursor_)); 190 } 191 break; 192 } 193 // fall thru 194 default: 195 if (isalpha(static_cast<unsigned char>(c))) { 196 // Collect all chars of an identifier: 197 const char *start = cursor_ - 1; 198 while (isalnum(static_cast<unsigned char>(*cursor_)) || 199 *cursor_ == '_') 200 cursor_++; 201 attribute_.clear(); 202 attribute_.append(start, cursor_); 203 // First, see if it is a type keyword from the table of types: 204 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 205 if (attribute_ == IDLTYPE) { \ 206 token_ = kToken ## ENUM; \ 207 return; \ 208 } 209 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 210 #undef FLATBUFFERS_TD 211 // If it's a boolean constant keyword, turn those into integers, 212 // which simplifies our logic downstream. 213 if (attribute_ == "true" || attribute_ == "false") { 214 attribute_ = NumToString(attribute_ == "true"); 215 token_ = kTokenIntegerConstant; 216 return; 217 } 218 // Check for declaration keywords: 219 if (attribute_ == "table") { token_ = kTokenTable; return; } 220 if (attribute_ == "struct") { token_ = kTokenStruct; return; } 221 if (attribute_ == "enum") { token_ = kTokenEnum; return; } 222 if (attribute_ == "union") { token_ = kTokenUnion; return; } 223 if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; } 224 if (attribute_ == "root_type") { token_ = kTokenRootType; return; } 225 if (attribute_ == "include") { token_ = kTokenInclude; return; } 226 if (attribute_ == "file_identifier") { 227 token_ = kTokenFileIdentifier; 228 return; 229 } 230 if (attribute_ == "file_extension") { 231 token_ = kTokenFileExtension; 232 return; 233 } 234 // If not, it is a user-defined identifier: 235 token_ = kTokenIdentifier; 236 return; 237 } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') { 238 const char *start = cursor_ - 1; 239 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++; 240 if (*cursor_ == '.') { 241 cursor_++; 242 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++; 243 // See if this float has a scientific notation suffix. Both JSON 244 // and C++ (through strtod() we use) have the same format: 245 if (*cursor_ == 'e' || *cursor_ == 'E') { 246 cursor_++; 247 if (*cursor_ == '+' || *cursor_ == '-') cursor_++; 248 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++; 249 } 250 token_ = kTokenFloatConstant; 251 } else { 252 token_ = kTokenIntegerConstant; 253 } 254 attribute_.clear(); 255 attribute_.append(start, cursor_); 256 return; 257 } 258 std::string ch; 259 ch = c; 260 if (c < ' ' || c > '~') ch = "code: " + NumToString(c); 261 Error("illegal character: " + ch); 262 break; 263 } 264 } 265} 266 267// Check if a given token is next, if so, consume it as well. 268bool Parser::IsNext(int t) { 269 bool isnext = t == token_; 270 if (isnext) Next(); 271 return isnext; 272} 273 274// Expect a given token to be next, consume it, or error if not present. 275void Parser::Expect(int t) { 276 if (t != token_) { 277 Error("expecting: " + TokenToString(t) + " instead got: " + 278 TokenToString(token_)); 279 } 280 Next(); 281} 282 283void Parser::ParseTypeIdent(Type &type) { 284 auto enum_def = enums_.Lookup(attribute_); 285 if (enum_def) { 286 type = enum_def->underlying_type; 287 if (enum_def->is_union) type.base_type = BASE_TYPE_UNION; 288 } else { 289 type.base_type = BASE_TYPE_STRUCT; 290 type.struct_def = LookupCreateStruct(attribute_); 291 } 292} 293 294// Parse any IDL type. 295void Parser::ParseType(Type &type) { 296 if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) { 297 type.base_type = static_cast<BaseType>(token_ - kTokenNONE); 298 } else { 299 if (token_ == kTokenIdentifier) { 300 ParseTypeIdent(type); 301 } else if (token_ == '[') { 302 Next(); 303 Type subtype; 304 ParseType(subtype); 305 if (subtype.base_type == BASE_TYPE_VECTOR) { 306 // We could support this, but it will complicate things, and it's 307 // easier to work around with a struct around the inner vector. 308 Error("nested vector types not supported (wrap in table first)."); 309 } 310 if (subtype.base_type == BASE_TYPE_UNION) { 311 // We could support this if we stored a struct of 2 elements per 312 // union element. 313 Error("vector of union types not supported (wrap in table first)."); 314 } 315 type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def); 316 type.element = subtype.base_type; 317 Expect(']'); 318 return; 319 } else { 320 Error("illegal type syntax"); 321 } 322 } 323 Next(); 324} 325 326FieldDef &Parser::AddField(StructDef &struct_def, 327 const std::string &name, 328 const Type &type) { 329 auto &field = *new FieldDef(); 330 field.value.offset = 331 FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size())); 332 field.name = name; 333 field.value.type = type; 334 if (struct_def.fixed) { // statically compute the field offset 335 auto size = InlineSize(type); 336 auto alignment = InlineAlignment(type); 337 // structs_ need to have a predictable format, so we need to align to 338 // the largest scalar 339 struct_def.minalign = std::max(struct_def.minalign, alignment); 340 struct_def.PadLastField(alignment); 341 field.value.offset = static_cast<voffset_t>(struct_def.bytesize); 342 struct_def.bytesize += size; 343 } 344 if (struct_def.fields.Add(name, &field)) 345 Error("field already exists: " + name); 346 return field; 347} 348 349void Parser::ParseField(StructDef &struct_def) { 350 std::string name = attribute_; 351 std::vector<std::string> dc = doc_comment_; 352 Expect(kTokenIdentifier); 353 Expect(':'); 354 Type type; 355 ParseType(type); 356 357 if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type)) 358 Error("structs_ may contain only scalar or struct fields"); 359 360 FieldDef *typefield = nullptr; 361 if (type.base_type == BASE_TYPE_UNION) { 362 // For union fields, add a second auto-generated field to hold the type, 363 // with _type appended as the name. 364 typefield = &AddField(struct_def, name + "_type", 365 type.enum_def->underlying_type); 366 } 367 368 auto &field = AddField(struct_def, name, type); 369 370 if (token_ == '=') { 371 Next(); 372 if (!IsScalar(type.base_type)) 373 Error("default values currently only supported for scalars"); 374 ParseSingleValue(field.value); 375 } 376 377 if (type.enum_def && 378 IsScalar(type.base_type) && 379 !struct_def.fixed && 380 !type.enum_def->attributes.Lookup("bit_flags") && 381 !type.enum_def->ReverseLookup(static_cast<int>( 382 StringToInt(field.value.constant.c_str())))) 383 Error("enum " + type.enum_def->name + 384 " does not have a declaration for this field\'s default of " + 385 field.value.constant); 386 387 field.doc_comment = dc; 388 ParseMetaData(field); 389 field.deprecated = field.attributes.Lookup("deprecated") != nullptr; 390 if (field.deprecated && struct_def.fixed) 391 Error("can't deprecate fields in a struct"); 392 field.required = field.attributes.Lookup("required") != nullptr; 393 if (field.required && (struct_def.fixed || 394 IsScalar(field.value.type.base_type))) 395 Error("only non-scalar fields in tables may be 'required'"); 396 auto nested = field.attributes.Lookup("nested_flatbuffer"); 397 if (nested) { 398 if (nested->type.base_type != BASE_TYPE_STRING) 399 Error("nested_flatbuffer attribute must be a string (the root type)"); 400 if (field.value.type.base_type != BASE_TYPE_VECTOR || 401 field.value.type.element != BASE_TYPE_UCHAR) 402 Error("nested_flatbuffer attribute may only apply to a vector of ubyte"); 403 // This will cause an error if the root type of the nested flatbuffer 404 // wasn't defined elsewhere. 405 LookupCreateStruct(nested->constant); 406 } 407 408 if (typefield) { 409 // If this field is a union, and it has a manually assigned id, 410 // the automatically added type field should have an id as well (of N - 1). 411 auto attr = field.attributes.Lookup("id"); 412 if (attr) { 413 auto id = atoi(attr->constant.c_str()); 414 auto val = new Value(); 415 val->type = attr->type; 416 val->constant = NumToString(id - 1); 417 typefield->attributes.Add("id", val); 418 } 419 } 420 421 Expect(';'); 422} 423 424void Parser::ParseAnyValue(Value &val, FieldDef *field) { 425 switch (val.type.base_type) { 426 case BASE_TYPE_UNION: { 427 assert(field); 428 if (!field_stack_.size() || 429 field_stack_.back().second->value.type.base_type != BASE_TYPE_UTYPE) 430 Error("missing type field before this union value: " + field->name); 431 auto enum_idx = atot<unsigned char>( 432 field_stack_.back().first.constant.c_str()); 433 auto enum_val = val.type.enum_def->ReverseLookup(enum_idx); 434 if (!enum_val) Error("illegal type id for: " + field->name); 435 val.constant = NumToString(ParseTable(*enum_val->struct_def)); 436 break; 437 } 438 case BASE_TYPE_STRUCT: 439 val.constant = NumToString(ParseTable(*val.type.struct_def)); 440 break; 441 case BASE_TYPE_STRING: { 442 auto s = attribute_; 443 Expect(kTokenStringConstant); 444 val.constant = NumToString(builder_.CreateString(s).o); 445 break; 446 } 447 case BASE_TYPE_VECTOR: { 448 Expect('['); 449 val.constant = NumToString(ParseVector(val.type.VectorType())); 450 break; 451 } 452 default: 453 ParseSingleValue(val); 454 break; 455 } 456} 457 458void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) { 459 auto off = atot<uoffset_t>(val.constant.c_str()); 460 assert(struct_stack_.size() - off == struct_def.bytesize); 461 builder_.Align(struct_def.minalign); 462 builder_.PushBytes(&struct_stack_[off], struct_def.bytesize); 463 struct_stack_.resize(struct_stack_.size() - struct_def.bytesize); 464 builder_.AddStructOffset(val.offset, builder_.GetSize()); 465} 466 467uoffset_t Parser::ParseTable(const StructDef &struct_def) { 468 Expect('{'); 469 size_t fieldn = 0; 470 if (!IsNext('}')) for (;;) { 471 std::string name = attribute_; 472 if (!IsNext(kTokenStringConstant)) Expect(kTokenIdentifier); 473 auto field = struct_def.fields.Lookup(name); 474 if (!field) Error("unknown field: " + name); 475 if (struct_def.fixed && (fieldn >= struct_def.fields.vec.size() 476 || struct_def.fields.vec[fieldn] != field)) { 477 Error("struct field appearing out of order: " + name); 478 } 479 Expect(':'); 480 Value val = field->value; 481 ParseAnyValue(val, field); 482 field_stack_.push_back(std::make_pair(val, field)); 483 fieldn++; 484 if (IsNext('}')) break; 485 Expect(','); 486 } 487 for (auto it = field_stack_.rbegin(); 488 it != field_stack_.rbegin() + fieldn; ++it) { 489 if (it->second->used) 490 Error("field set more than once: " + it->second->name); 491 it->second->used = true; 492 } 493 for (auto it = field_stack_.rbegin(); 494 it != field_stack_.rbegin() + fieldn; ++it) { 495 it->second->used = false; 496 } 497 if (struct_def.fixed && fieldn != struct_def.fields.vec.size()) 498 Error("incomplete struct initialization: " + struct_def.name); 499 auto start = struct_def.fixed 500 ? builder_.StartStruct(struct_def.minalign) 501 : builder_.StartTable(); 502 503 for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1; 504 size; 505 size /= 2) { 506 // Go through elements in reverse, since we're building the data backwards. 507 for (auto it = field_stack_.rbegin(); 508 it != field_stack_.rbegin() + fieldn; ++it) { 509 auto &value = it->first; 510 auto field = it->second; 511 if (!struct_def.sortbysize || size == SizeOf(value.type.base_type)) { 512 switch (value.type.base_type) { 513 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 514 case BASE_TYPE_ ## ENUM: \ 515 builder_.Pad(field->padding); \ 516 if (struct_def.fixed) { \ 517 builder_.PushElement(atot<CTYPE>(value.constant.c_str())); \ 518 } else { \ 519 builder_.AddElement(value.offset, \ 520 atot<CTYPE>( value.constant.c_str()), \ 521 atot<CTYPE>(field->value.constant.c_str())); \ 522 } \ 523 break; 524 FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD); 525 #undef FLATBUFFERS_TD 526 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 527 case BASE_TYPE_ ## ENUM: \ 528 builder_.Pad(field->padding); \ 529 if (IsStruct(field->value.type)) { \ 530 SerializeStruct(*field->value.type.struct_def, value); \ 531 } else { \ 532 builder_.AddOffset(value.offset, \ 533 atot<CTYPE>(value.constant.c_str())); \ 534 } \ 535 break; 536 FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD); 537 #undef FLATBUFFERS_TD 538 } 539 } 540 } 541 } 542 for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back(); 543 544 if (struct_def.fixed) { 545 builder_.ClearOffsets(); 546 builder_.EndStruct(); 547 // Temporarily store this struct in a side buffer, since this data has to 548 // be stored in-line later in the parent object. 549 auto off = struct_stack_.size(); 550 struct_stack_.insert(struct_stack_.end(), 551 builder_.GetBufferPointer(), 552 builder_.GetBufferPointer() + struct_def.bytesize); 553 builder_.PopBytes(struct_def.bytesize); 554 return static_cast<uoffset_t>(off); 555 } else { 556 return builder_.EndTable( 557 start, 558 static_cast<voffset_t>(struct_def.fields.vec.size())); 559 } 560} 561 562uoffset_t Parser::ParseVector(const Type &type) { 563 int count = 0; 564 if (token_ != ']') for (;;) { 565 Value val; 566 val.type = type; 567 ParseAnyValue(val, NULL); 568 field_stack_.push_back(std::make_pair(val, nullptr)); 569 count++; 570 if (token_ == ']') break; 571 Expect(','); 572 } 573 Next(); 574 575 builder_.StartVector(count * InlineSize(type) / InlineAlignment(type), 576 InlineAlignment(type)); 577 for (int i = 0; i < count; i++) { 578 // start at the back, since we're building the data backwards. 579 auto &val = field_stack_.back().first; 580 switch (val.type.base_type) { 581 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE) \ 582 case BASE_TYPE_ ## ENUM: \ 583 if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \ 584 else builder_.PushElement(atot<CTYPE>(val.constant.c_str())); \ 585 break; 586 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) 587 #undef FLATBUFFERS_TD 588 } 589 field_stack_.pop_back(); 590 } 591 592 builder_.ClearOffsets(); 593 return builder_.EndVector(count); 594} 595 596void Parser::ParseMetaData(Definition &def) { 597 if (IsNext('(')) { 598 for (;;) { 599 auto name = attribute_; 600 Expect(kTokenIdentifier); 601 auto e = new Value(); 602 def.attributes.Add(name, e); 603 if (IsNext(':')) { 604 ParseSingleValue(*e); 605 } 606 if (IsNext(')')) break; 607 Expect(','); 608 } 609 } 610} 611 612bool Parser::TryTypedValue(int dtoken, 613 bool check, 614 Value &e, 615 BaseType req) { 616 bool match = dtoken == token_; 617 if (match) { 618 e.constant = attribute_; 619 if (!check) { 620 if (e.type.base_type == BASE_TYPE_NONE) { 621 e.type.base_type = req; 622 } else { 623 Error(std::string("type mismatch: expecting: ") + 624 kTypeNames[e.type.base_type] + 625 ", found: " + 626 kTypeNames[req]); 627 } 628 } 629 Next(); 630 } 631 return match; 632} 633 634int64_t Parser::ParseIntegerFromString(Type &type) { 635 int64_t result = 0; 636 // Parse one or more enum identifiers, separated by spaces. 637 const char *next = attribute_.c_str(); 638 do { 639 const char *divider = strchr(next, ' '); 640 std::string word; 641 if (divider) { 642 word = std::string(next, divider); 643 next = divider + strspn(divider, " "); 644 } else { 645 word = next; 646 next += word.length(); 647 } 648 if (type.enum_def) { // The field has an enum type 649 auto enum_val = type.enum_def->vals.Lookup(word); 650 if (!enum_val) 651 Error("unknown enum value: " + word + 652 ", for enum: " + type.enum_def->name); 653 result |= enum_val->value; 654 } else { // No enum type, probably integral field. 655 if (!IsInteger(type.base_type)) 656 Error("not a valid value for this field: " + word); 657 // TODO: could check if its a valid number constant here. 658 const char *dot = strchr(word.c_str(), '.'); 659 if (!dot) Error("enum values need to be qualified by an enum type"); 660 std::string enum_def_str(word.c_str(), dot); 661 std::string enum_val_str(dot + 1, word.c_str() + word.length()); 662 auto enum_def = enums_.Lookup(enum_def_str); 663 if (!enum_def) Error("unknown enum: " + enum_def_str); 664 auto enum_val = enum_def->vals.Lookup(enum_val_str); 665 if (!enum_val) Error("unknown enum value: " + enum_val_str); 666 result |= enum_val->value; 667 } 668 } while(*next); 669 return result; 670} 671 672void Parser::ParseSingleValue(Value &e) { 673 // First check if this could be a string/identifier enum value: 674 if (e.type.base_type != BASE_TYPE_STRING && 675 e.type.base_type != BASE_TYPE_NONE && 676 (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) { 677 e.constant = NumToString(ParseIntegerFromString(e.type)); 678 Next(); 679 } else if (TryTypedValue(kTokenIntegerConstant, 680 IsScalar(e.type.base_type), 681 e, 682 BASE_TYPE_INT) || 683 TryTypedValue(kTokenFloatConstant, 684 IsFloat(e.type.base_type), 685 e, 686 BASE_TYPE_FLOAT) || 687 TryTypedValue(kTokenStringConstant, 688 e.type.base_type == BASE_TYPE_STRING, 689 e, 690 BASE_TYPE_STRING)) { 691 } else { 692 Error("cannot parse value starting with: " + TokenToString(token_)); 693 } 694} 695 696StructDef *Parser::LookupCreateStruct(const std::string &name) { 697 auto struct_def = structs_.Lookup(name); 698 if (!struct_def) { 699 // Rather than failing, we create a "pre declared" StructDef, due to 700 // circular references, and check for errors at the end of parsing. 701 struct_def = new StructDef(); 702 structs_.Add(name, struct_def); 703 struct_def->name = name; 704 struct_def->predecl = true; 705 struct_def->defined_namespace = namespaces_.back(); 706 } 707 return struct_def; 708} 709 710void Parser::ParseEnum(bool is_union) { 711 std::vector<std::string> dc = doc_comment_; 712 Next(); 713 std::string name = attribute_; 714 Expect(kTokenIdentifier); 715 auto &enum_def = *new EnumDef(); 716 enum_def.name = name; 717 enum_def.doc_comment = dc; 718 enum_def.is_union = is_union; 719 enum_def.defined_namespace = namespaces_.back(); 720 if (enums_.Add(name, &enum_def)) Error("enum already exists: " + name); 721 if (is_union) { 722 enum_def.underlying_type.base_type = BASE_TYPE_UTYPE; 723 enum_def.underlying_type.enum_def = &enum_def; 724 } else { 725 if (proto_mode_) { 726 enum_def.underlying_type.base_type = BASE_TYPE_SHORT; 727 } else { 728 // Give specialized error message, since this type spec used to 729 // be optional in the first FlatBuffers release. 730 if (!IsNext(':')) Error("must specify the underlying integer type for this" 731 " enum (e.g. \': short\', which was the default)."); 732 // Specify the integer type underlying this enum. 733 ParseType(enum_def.underlying_type); 734 if (!IsInteger(enum_def.underlying_type.base_type)) 735 Error("underlying enum type must be integral"); 736 } 737 // Make this type refer back to the enum it was derived from. 738 enum_def.underlying_type.enum_def = &enum_def; 739 } 740 ParseMetaData(enum_def); 741 Expect('{'); 742 if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0)); 743 do { 744 std::string name = attribute_; 745 std::vector<std::string> dc = doc_comment_; 746 Expect(kTokenIdentifier); 747 auto prevsize = enum_def.vals.vec.size(); 748 auto value = enum_def.vals.vec.size() 749 ? enum_def.vals.vec.back()->value + 1 750 : 0; 751 auto &ev = *new EnumVal(name, value); 752 if (enum_def.vals.Add(name, &ev)) 753 Error("enum value already exists: " + name); 754 ev.doc_comment = dc; 755 if (is_union) { 756 ev.struct_def = LookupCreateStruct(name); 757 } 758 if (IsNext('=')) { 759 ev.value = atoi(attribute_.c_str()); 760 Expect(kTokenIntegerConstant); 761 if (prevsize && enum_def.vals.vec[prevsize - 1]->value >= ev.value) 762 Error("enum values must be specified in ascending order"); 763 } 764 } while (IsNext(proto_mode_ ? ';' : ',') && token_ != '}'); 765 Expect('}'); 766 if (enum_def.attributes.Lookup("bit_flags")) { 767 for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end(); 768 ++it) { 769 if (static_cast<size_t>((*it)->value) >= 770 SizeOf(enum_def.underlying_type.base_type) * 8) 771 Error("bit flag out of range of underlying integral type"); 772 (*it)->value = 1LL << (*it)->value; 773 } 774 } 775} 776 777StructDef &Parser::StartStruct() { 778 std::string name = attribute_; 779 Expect(kTokenIdentifier); 780 auto &struct_def = *LookupCreateStruct(name); 781 if (!struct_def.predecl) Error("datatype already exists: " + name); 782 struct_def.predecl = false; 783 struct_def.name = name; 784 // Move this struct to the back of the vector just in case it was predeclared, 785 // to preserve declaration order. 786 remove(structs_.vec.begin(), structs_.vec.end(), &struct_def); 787 structs_.vec.back() = &struct_def; 788 return struct_def; 789} 790 791void Parser::ParseDecl() { 792 std::vector<std::string> dc = doc_comment_; 793 bool fixed = IsNext(kTokenStruct); 794 if (!fixed) Expect(kTokenTable); 795 auto &struct_def = StartStruct(); 796 struct_def.doc_comment = dc; 797 struct_def.fixed = fixed; 798 ParseMetaData(struct_def); 799 struct_def.sortbysize = 800 struct_def.attributes.Lookup("original_order") == nullptr && !fixed; 801 Expect('{'); 802 while (token_ != '}') ParseField(struct_def); 803 auto force_align = struct_def.attributes.Lookup("force_align"); 804 if (fixed && force_align) { 805 auto align = static_cast<size_t>(atoi(force_align->constant.c_str())); 806 if (force_align->type.base_type != BASE_TYPE_INT || 807 align < struct_def.minalign || 808 align > 256 || 809 align & (align - 1)) 810 Error("force_align must be a power of two integer ranging from the" 811 "struct\'s natural alignment to 256"); 812 struct_def.minalign = align; 813 } 814 struct_def.PadLastField(struct_def.minalign); 815 // Check if this is a table that has manual id assignments 816 auto &fields = struct_def.fields.vec; 817 if (!struct_def.fixed && fields.size()) { 818 size_t num_id_fields = 0; 819 for (auto it = fields.begin(); it != fields.end(); ++it) { 820 if ((*it)->attributes.Lookup("id")) num_id_fields++; 821 } 822 // If any fields have ids.. 823 if (num_id_fields) { 824 // Then all fields must have them. 825 if (num_id_fields != fields.size()) 826 Error("either all fields or no fields must have an 'id' attribute"); 827 // Simply sort by id, then the fields are the same as if no ids had 828 // been specified. 829 std::sort(fields.begin(), fields.end(), 830 [](const FieldDef *a, const FieldDef *b) -> bool { 831 auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str()); 832 auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str()); 833 return a_id < b_id; 834 }); 835 // Verify we have a contiguous set, and reassign vtable offsets. 836 for (int i = 0; i < static_cast<int>(fields.size()); i++) { 837 if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str())) 838 Error("field id\'s must be consecutive from 0, id " + 839 NumToString(i) + " missing or set twice"); 840 fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i)); 841 } 842 } 843 } 844 // Check that no identifiers clash with auto generated fields. 845 // This is not an ideal situation, but should occur very infrequently, 846 // and allows us to keep using very readable names for type & length fields 847 // without inducing compile errors. 848 auto CheckClash = [&fields, &struct_def](const char *suffix, 849 BaseType basetype) { 850 auto len = strlen(suffix); 851 for (auto it = fields.begin(); it != fields.end(); ++it) { 852 auto &name = (*it)->name; 853 if (name.length() > len && 854 name.compare(name.length() - len, len, suffix) == 0 && 855 (*it)->value.type.base_type != BASE_TYPE_UTYPE) { 856 auto field = struct_def.fields.Lookup( 857 name.substr(0, name.length() - len)); 858 if (field && field->value.type.base_type == basetype) 859 Error("Field " + name + 860 " would clash with generated functions for field " + 861 field->name); 862 } 863 } 864 }; 865 CheckClash("_type", BASE_TYPE_UNION); 866 CheckClash("Type", BASE_TYPE_UNION); 867 CheckClash("_length", BASE_TYPE_VECTOR); 868 CheckClash("Length", BASE_TYPE_VECTOR); 869 Expect('}'); 870} 871 872bool Parser::SetRootType(const char *name) { 873 root_struct_def = structs_.Lookup(name); 874 return root_struct_def != nullptr; 875} 876 877void Parser::MarkGenerated() { 878 // Since the Parser object retains definitions across files, we must 879 // ensure we only output code for definitions once, in the file they are first 880 // declared. This function marks all existing definitions as having already 881 // been generated. 882 for (auto it = enums_.vec.begin(); 883 it != enums_.vec.end(); ++it) { 884 (*it)->generated = true; 885 } 886 for (auto it = structs_.vec.begin(); 887 it != structs_.vec.end(); ++it) { 888 (*it)->generated = true; 889 } 890} 891 892void Parser::ParseNamespace() { 893 Next(); 894 auto ns = new Namespace(); 895 namespaces_.push_back(ns); 896 for (;;) { 897 ns->components.push_back(attribute_); 898 Expect(kTokenIdentifier); 899 if (!IsNext('.')) break; 900 } 901 Expect(';'); 902} 903 904// Best effort parsing of .proto declarations, with the aim to turn them 905// in the closest corresponding FlatBuffer equivalent. 906// We parse everything as identifiers instead of keywords, since we don't 907// want protobuf keywords to become invalid identifiers in FlatBuffers. 908void Parser::ParseProtoDecl() { 909 if (attribute_ == "package") { 910 // These are identical in syntax to FlatBuffer's namespace decl. 911 ParseNamespace(); 912 } else if (attribute_ == "message") { 913 Next(); 914 auto &struct_def = StartStruct(); 915 Expect('{'); 916 while (token_ != '}') { 917 // Parse the qualifier. 918 bool required = false; 919 bool repeated = false; 920 if (attribute_ == "optional") { 921 // This is the default. 922 } else if (attribute_ == "required") { 923 required = true; 924 } else if (attribute_ == "repeated") { 925 repeated = true; 926 } else { 927 Error("expecting optional/required/repeated, got: " + attribute_); 928 } 929 Type type = ParseTypeFromProtoType(); 930 // Repeated elements get mapped to a vector. 931 if (repeated) { 932 type.element = type.base_type; 933 type.base_type = BASE_TYPE_VECTOR; 934 } 935 std::string name = attribute_; 936 Expect(kTokenIdentifier); 937 // Parse the field id. Since we're just translating schemas, not 938 // any kind of binary compatibility, we can safely ignore these, and 939 // assign our own. 940 Expect('='); 941 Expect(kTokenIntegerConstant); 942 auto &field = AddField(struct_def, name, type); 943 field.required = required; 944 // See if there's a default specified. 945 if (IsNext('[')) { 946 if (attribute_ != "default") Error("\'default\' expected"); 947 Next(); 948 Expect('='); 949 field.value.constant = attribute_; 950 Next(); 951 Expect(']'); 952 } 953 Expect(';'); 954 } 955 Next(); 956 } else if (attribute_ == "enum") { 957 // These are almost the same, just with different terminator: 958 ParseEnum(false); 959 } else if (attribute_ == "import") { 960 Next(); 961 included_files_[attribute_] = true; 962 Expect(kTokenStringConstant); 963 Expect(';'); 964 } else if (attribute_ == "option") { // Skip these. 965 Next(); 966 Expect(kTokenIdentifier); 967 Expect('='); 968 Next(); // Any single token. 969 Expect(';'); 970 } else { 971 Error("don\'t know how to parse .proto declaration starting with " + 972 attribute_); 973 } 974} 975 976// Parse a protobuf type, and map it to the corresponding FlatBuffer one. 977Type Parser::ParseTypeFromProtoType() { 978 Expect(kTokenIdentifier); 979 struct type_lookup { const char *proto_type; BaseType fb_type; }; 980 static type_lookup lookup[] = { 981 { "float", BASE_TYPE_FLOAT }, { "double", BASE_TYPE_DOUBLE }, 982 { "int32", BASE_TYPE_INT }, { "int64", BASE_TYPE_LONG }, 983 { "uint32", BASE_TYPE_UINT }, { "uint64", BASE_TYPE_ULONG }, 984 { "sint32", BASE_TYPE_INT }, { "sint64", BASE_TYPE_LONG }, 985 { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG }, 986 { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG }, 987 { "bool", BASE_TYPE_BOOL }, 988 { "string", BASE_TYPE_STRING }, 989 { "bytes", BASE_TYPE_STRING }, 990 { nullptr, BASE_TYPE_NONE } 991 }; 992 Type type; 993 for (auto tl = lookup; tl->proto_type; tl++) { 994 if (attribute_ == tl->proto_type) { 995 type.base_type = tl->fb_type; 996 Next(); 997 return type; 998 } 999 } 1000 ParseTypeIdent(type); 1001 Expect(kTokenIdentifier); 1002 return type; 1003} 1004 1005bool Parser::Parse(const char *source, const char **include_paths, 1006 const char *source_filename) { 1007 if (source_filename) included_files_[source_filename] = true; 1008 source_ = cursor_ = source; 1009 line_ = 1; 1010 error_.clear(); 1011 builder_.Clear(); 1012 try { 1013 Next(); 1014 // Includes must come first: 1015 while (IsNext(kTokenInclude)) { 1016 auto name = attribute_; 1017 Expect(kTokenStringConstant); 1018 if (included_files_.find(name) == included_files_.end()) { 1019 // We found an include file that we have not parsed yet. 1020 // Load it and parse it. 1021 std::string contents; 1022 if (!include_paths) { 1023 const char *current_directory[] = { "", nullptr }; 1024 include_paths = current_directory; 1025 } 1026 for (auto paths = include_paths; paths && *paths; paths++) { 1027 auto filepath = flatbuffers::ConCatPathFileName(*paths, name); 1028 if(LoadFile(filepath.c_str(), true, &contents)) break; 1029 } 1030 if (contents.empty()) 1031 Error("unable to load include file: " + name); 1032 included_files_[name] = true; 1033 if (!Parse(contents.c_str(), include_paths)) { 1034 // Any errors, we're done. 1035 return false; 1036 } 1037 // We do not want to output code for any included files: 1038 MarkGenerated(); 1039 // This is the easiest way to continue this file after an include: 1040 // instead of saving and restoring all the state, we simply start the 1041 // file anew. This will cause it to encounter the same include statement 1042 // again, but this time it will skip it, because it was entered into 1043 // included_files_. 1044 // This is recursive, but only go as deep as the number of include 1045 // statements. 1046 return Parse(source, include_paths, source_filename); 1047 } 1048 Expect(';'); 1049 } 1050 // Now parse all other kinds of declarations: 1051 while (token_ != kTokenEof) { 1052 if (proto_mode_) { 1053 ParseProtoDecl(); 1054 } else if (token_ == kTokenNameSpace) { 1055 ParseNamespace(); 1056 } else if (token_ == '{') { 1057 if (!root_struct_def) Error("no root type set to parse json with"); 1058 if (builder_.GetSize()) { 1059 Error("cannot have more than one json object in a file"); 1060 } 1061 builder_.Finish(Offset<Table>(ParseTable(*root_struct_def)), 1062 file_identifier_.length() ? file_identifier_.c_str() : nullptr); 1063 } else if (token_ == kTokenEnum) { 1064 ParseEnum(false); 1065 } else if (token_ == kTokenUnion) { 1066 ParseEnum(true); 1067 } else if (token_ == kTokenRootType) { 1068 Next(); 1069 auto root_type = attribute_; 1070 Expect(kTokenIdentifier); 1071 if (!SetRootType(root_type.c_str())) 1072 Error("unknown root type: " + root_type); 1073 if (root_struct_def->fixed) 1074 Error("root type must be a table"); 1075 Expect(';'); 1076 } else if (token_ == kTokenFileIdentifier) { 1077 Next(); 1078 file_identifier_ = attribute_; 1079 Expect(kTokenStringConstant); 1080 if (file_identifier_.length() != 1081 FlatBufferBuilder::kFileIdentifierLength) 1082 Error("file_identifier must be exactly " + 1083 NumToString(FlatBufferBuilder::kFileIdentifierLength) + 1084 " characters"); 1085 Expect(';'); 1086 } else if (token_ == kTokenFileExtension) { 1087 Next(); 1088 file_extension_ = attribute_; 1089 Expect(kTokenStringConstant); 1090 Expect(';'); 1091 } else if(token_ == kTokenInclude) { 1092 Error("includes must come before declarations"); 1093 } else { 1094 ParseDecl(); 1095 } 1096 } 1097 for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) { 1098 if ((*it)->predecl) 1099 Error("type referenced but not defined: " + (*it)->name); 1100 } 1101 for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) { 1102 auto &enum_def = **it; 1103 if (enum_def.is_union) { 1104 for (auto it = enum_def.vals.vec.begin(); 1105 it != enum_def.vals.vec.end(); 1106 ++it) { 1107 auto &val = **it; 1108 if (val.struct_def && val.struct_def->fixed) 1109 Error("only tables can be union elements: " + val.name); 1110 } 1111 } 1112 } 1113 } catch (const std::string &msg) { 1114 error_ = source_filename ? AbsolutePath(source_filename) : ""; 1115 #ifdef _WIN32 1116 error_ += "(" + NumToString(line_) + ")"; // MSVC alike 1117 #else 1118 if (source_filename) error_ += ":"; 1119 error_ += NumToString(line_) + ":0"; // gcc alike 1120 #endif 1121 error_ += ": error: " + msg; 1122 return false; 1123 } 1124 assert(!struct_stack_.size()); 1125 return true; 1126} 1127 1128} // namespace flatbuffers 1129