YAMLParser.cpp revision 93210e847a1496b24cef881723e57c489082dcfe
1//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a YAML parser. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Support/YAMLParser.h" 15 16#include "llvm/ADT/ilist.h" 17#include "llvm/ADT/ilist_node.h" 18#include "llvm/ADT/SmallVector.h" 19#include "llvm/ADT/StringExtras.h" 20#include "llvm/ADT/Twine.h" 21#include "llvm/Support/ErrorHandling.h" 22#include "llvm/Support/MemoryBuffer.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/Support/SourceMgr.h" 25 26using namespace llvm; 27using namespace yaml; 28 29enum UnicodeEncodingForm { 30 UEF_UTF32_LE, //< UTF-32 Little Endian 31 UEF_UTF32_BE, //< UTF-32 Big Endian 32 UEF_UTF16_LE, //< UTF-16 Little Endian 33 UEF_UTF16_BE, //< UTF-16 Big Endian 34 UEF_UTF8, //< UTF-8 or ascii. 35 UEF_Unknown //< Not a valid Unicode encoding. 36}; 37 38/// EncodingInfo - Holds the encoding type and length of the byte order mark if 39/// it exists. Length is in {0, 2, 3, 4}. 40typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; 41 42/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode 43/// encoding form of \a Input. 44/// 45/// @param Input A string of length 0 or more. 46/// @returns An EncodingInfo indicating the Unicode encoding form of the input 47/// and how long the byte order mark is if one exists. 48static EncodingInfo getUnicodeEncoding(StringRef Input) { 49 if (Input.size() == 0) 50 return std::make_pair(UEF_Unknown, 0); 51 52 switch (uint8_t(Input[0])) { 53 case 0x00: 54 if (Input.size() >= 4) { 55 if ( Input[1] == 0 56 && uint8_t(Input[2]) == 0xFE 57 && uint8_t(Input[3]) == 0xFF) 58 return std::make_pair(UEF_UTF32_BE, 4); 59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) 60 return std::make_pair(UEF_UTF32_BE, 0); 61 } 62 63 if (Input.size() >= 2 && Input[1] != 0) 64 return std::make_pair(UEF_UTF16_BE, 0); 65 return std::make_pair(UEF_Unknown, 0); 66 case 0xFF: 67 if ( Input.size() >= 4 68 && uint8_t(Input[1]) == 0xFE 69 && Input[2] == 0 70 && Input[3] == 0) 71 return std::make_pair(UEF_UTF32_LE, 4); 72 73 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) 74 return std::make_pair(UEF_UTF16_LE, 2); 75 return std::make_pair(UEF_Unknown, 0); 76 case 0xFE: 77 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) 78 return std::make_pair(UEF_UTF16_BE, 2); 79 return std::make_pair(UEF_Unknown, 0); 80 case 0xEF: 81 if ( Input.size() >= 3 82 && uint8_t(Input[1]) == 0xBB 83 && uint8_t(Input[2]) == 0xBF) 84 return std::make_pair(UEF_UTF8, 3); 85 return std::make_pair(UEF_Unknown, 0); 86 } 87 88 // It could still be utf-32 or utf-16. 89 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) 90 return std::make_pair(UEF_UTF32_LE, 0); 91 92 if (Input.size() >= 2 && Input[1] == 0) 93 return std::make_pair(UEF_UTF16_LE, 0); 94 95 return std::make_pair(UEF_UTF8, 0); 96} 97 98namespace llvm { 99namespace yaml { 100/// Token - A single YAML token. 101struct Token : ilist_node<Token> { 102 enum TokenKind { 103 TK_Error, // Uninitialized token. 104 TK_StreamStart, 105 TK_StreamEnd, 106 TK_VersionDirective, 107 TK_TagDirective, 108 TK_DocumentStart, 109 TK_DocumentEnd, 110 TK_BlockEntry, 111 TK_BlockEnd, 112 TK_BlockSequenceStart, 113 TK_BlockMappingStart, 114 TK_FlowEntry, 115 TK_FlowSequenceStart, 116 TK_FlowSequenceEnd, 117 TK_FlowMappingStart, 118 TK_FlowMappingEnd, 119 TK_Key, 120 TK_Value, 121 TK_Scalar, 122 TK_Alias, 123 TK_Anchor, 124 TK_Tag 125 } Kind; 126 127 /// A string of length 0 or more whose begin() points to the logical location 128 /// of the token in the input. 129 StringRef Range; 130 131 Token() : Kind(TK_Error) {} 132}; 133} 134} 135 136template<> 137struct ilist_sentinel_traits<Token> { 138 Token *createSentinel() const { 139 return &Sentinel; 140 } 141 static void destroySentinel(Token*) {} 142 143 Token *provideInitialHead() const { return createSentinel(); } 144 Token *ensureHead(Token*) const { return createSentinel(); } 145 static void noteHead(Token*, Token*) {} 146 147private: 148 mutable Token Sentinel; 149}; 150 151template<> 152struct ilist_node_traits<Token> { 153 Token *createNode(const Token &V) { 154 return new (Alloc.Allocate<Token>()) Token(V); 155 } 156 static void deleteNode(Token *V) {} 157 158 void addNodeToList(Token *) {} 159 void removeNodeFromList(Token *) {} 160 void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, 161 ilist_iterator<Token> /*first*/, 162 ilist_iterator<Token> /*last*/) {} 163 164 BumpPtrAllocator Alloc; 165}; 166 167typedef ilist<Token> TokenQueueT; 168 169namespace { 170/// @brief This struct is used to track simple keys. 171/// 172/// Simple keys are handled by creating an entry in SimpleKeys for each Token 173/// which could legally be the start of a simple key. When peekNext is called, 174/// if the Token To be returned is referenced by a SimpleKey, we continue 175/// tokenizing until that potential simple key has either been found to not be 176/// a simple key (we moved on to the next line or went further than 1024 chars). 177/// Or when we run into a Value, and then insert a Key token (and possibly 178/// others) before the SimpleKey's Tok. 179struct SimpleKey { 180 TokenQueueT::iterator Tok; 181 unsigned Column; 182 unsigned Line; 183 unsigned FlowLevel; 184 bool IsRequired; 185 186 bool operator ==(const SimpleKey &Other) { 187 return Tok == Other.Tok; 188 } 189}; 190} 191 192/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit 193/// subsequence and the subsequence's length in code units (uint8_t). 194/// A length of 0 represents an error. 195typedef std::pair<uint32_t, unsigned> UTF8Decoded; 196 197static UTF8Decoded decodeUTF8(StringRef Range) { 198 StringRef::iterator Position= Range.begin(); 199 StringRef::iterator End = Range.end(); 200 // 1 byte: [0x00, 0x7f] 201 // Bit pattern: 0xxxxxxx 202 if ((*Position & 0x80) == 0) { 203 return std::make_pair(*Position, 1); 204 } 205 // 2 bytes: [0x80, 0x7ff] 206 // Bit pattern: 110xxxxx 10xxxxxx 207 if (Position + 1 != End && 208 ((*Position & 0xE0) == 0xC0) && 209 ((*(Position + 1) & 0xC0) == 0x80)) { 210 uint32_t codepoint = ((*Position & 0x1F) << 6) | 211 (*(Position + 1) & 0x3F); 212 if (codepoint >= 0x80) 213 return std::make_pair(codepoint, 2); 214 } 215 // 3 bytes: [0x8000, 0xffff] 216 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx 217 if (Position + 2 != End && 218 ((*Position & 0xF0) == 0xE0) && 219 ((*(Position + 1) & 0xC0) == 0x80) && 220 ((*(Position + 2) & 0xC0) == 0x80)) { 221 uint32_t codepoint = ((*Position & 0x0F) << 12) | 222 ((*(Position + 1) & 0x3F) << 6) | 223 (*(Position + 2) & 0x3F); 224 // Codepoints between 0xD800 and 0xDFFF are invalid, as 225 // they are high / low surrogate halves used by UTF-16. 226 if (codepoint >= 0x800 && 227 (codepoint < 0xD800 || codepoint > 0xDFFF)) 228 return std::make_pair(codepoint, 3); 229 } 230 // 4 bytes: [0x10000, 0x10FFFF] 231 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 232 if (Position + 3 != End && 233 ((*Position & 0xF8) == 0xF0) && 234 ((*(Position + 1) & 0xC0) == 0x80) && 235 ((*(Position + 2) & 0xC0) == 0x80) && 236 ((*(Position + 3) & 0xC0) == 0x80)) { 237 uint32_t codepoint = ((*Position & 0x07) << 18) | 238 ((*(Position + 1) & 0x3F) << 12) | 239 ((*(Position + 2) & 0x3F) << 6) | 240 (*(Position + 3) & 0x3F); 241 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) 242 return std::make_pair(codepoint, 4); 243 } 244 return std::make_pair(0, 0); 245} 246 247namespace llvm { 248namespace yaml { 249/// @brief Scans YAML tokens from a MemoryBuffer. 250class Scanner { 251public: 252 Scanner(const StringRef Input, SourceMgr &SM); 253 254 /// @brief Parse the next token and return it without popping it. 255 Token &peekNext(); 256 257 /// @brief Parse the next token and pop it from the queue. 258 Token getNext(); 259 260 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, 261 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { 262 SM.PrintMessage(Loc, Kind, Message, Ranges); 263 } 264 265 void setError(const Twine &Message, StringRef::iterator Position) { 266 if (Current >= End) 267 Current = End - 1; 268 269 // Don't print out more errors after the first one we encounter. The rest 270 // are just the result of the first, and have no meaning. 271 if (!Failed) 272 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); 273 Failed = true; 274 } 275 276 void setError(const Twine &Message) { 277 setError(Message, Current); 278 } 279 280 /// @brief Returns true if an error occurred while parsing. 281 bool failed() { 282 return Failed; 283 } 284 285private: 286 StringRef currentInput() { 287 return StringRef(Current, End - Current); 288 } 289 290 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting 291 /// at \a Position. 292 /// 293 /// If the UTF-8 code units starting at Position do not form a well-formed 294 /// code unit subsequence, then the Unicode scalar value is 0, and the length 295 /// is 0. 296 UTF8Decoded decodeUTF8(StringRef::iterator Position) { 297 return ::decodeUTF8(StringRef(Position, End - Position)); 298 } 299 300 // The following functions are based on the gramar rules in the YAML spec. The 301 // style of the function names it meant to closely match how they are written 302 // in the spec. The number within the [] is the number of the grammar rule in 303 // the spec. 304 // 305 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. 306 // 307 // c- 308 // A production starting and ending with a special character. 309 // b- 310 // A production matching a single line break. 311 // nb- 312 // A production starting and ending with a non-break character. 313 // s- 314 // A production starting and ending with a white space character. 315 // ns- 316 // A production starting and ending with a non-space character. 317 // l- 318 // A production matching complete line(s). 319 320 /// @brief Skip a single nb-char[27] starting at Position. 321 /// 322 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] 323 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] 324 /// 325 /// @returns The code unit after the nb-char, or Position if it's not an 326 /// nb-char. 327 StringRef::iterator skip_nb_char(StringRef::iterator Position); 328 329 /// @brief Skip a single b-break[28] starting at Position. 330 /// 331 /// A b-break is 0xD 0xA | 0xD | 0xA 332 /// 333 /// @returns The code unit after the b-break, or Position if it's not a 334 /// b-break. 335 StringRef::iterator skip_b_break(StringRef::iterator Position); 336 337 /// @brief Skip a single s-white[33] starting at Position. 338 /// 339 /// A s-white is 0x20 | 0x9 340 /// 341 /// @returns The code unit after the s-white, or Position if it's not a 342 /// s-white. 343 StringRef::iterator skip_s_white(StringRef::iterator Position); 344 345 /// @brief Skip a single ns-char[34] starting at Position. 346 /// 347 /// A ns-char is nb-char - s-white 348 /// 349 /// @returns The code unit after the ns-char, or Position if it's not a 350 /// ns-char. 351 StringRef::iterator skip_ns_char(StringRef::iterator Position); 352 353 typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); 354 /// @brief Skip minimal well-formed code unit subsequences until Func 355 /// returns its input. 356 /// 357 /// @returns The code unit after the last minimal well-formed code unit 358 /// subsequence that Func accepted. 359 StringRef::iterator skip_while( SkipWhileFunc Func 360 , StringRef::iterator Position); 361 362 /// @brief Scan ns-uri-char[39]s starting at Cur. 363 /// 364 /// This updates Cur and Column while scanning. 365 /// 366 /// @returns A StringRef starting at Cur which covers the longest contiguous 367 /// sequence of ns-uri-char. 368 StringRef scan_ns_uri_char(); 369 370 /// @brief Scan ns-plain-one-line[133] starting at \a Cur. 371 StringRef scan_ns_plain_one_line(); 372 373 /// @brief Consume a minimal well-formed code unit subsequence starting at 374 /// \a Cur. Return false if it is not the same Unicode scalar value as 375 /// \a Expected. This updates \a Column. 376 bool consume(uint32_t Expected); 377 378 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. 379 void skip(uint32_t Distance); 380 381 /// @brief Return true if the minimal well-formed code unit subsequence at 382 /// Pos is whitespace or a new line 383 bool isBlankOrBreak(StringRef::iterator Position); 384 385 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. 386 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok 387 , unsigned AtColumn 388 , bool IsRequired); 389 390 /// @brief Remove simple keys that can no longer be valid simple keys. 391 /// 392 /// Invalid simple keys are not on the current line or are further than 1024 393 /// columns back. 394 void removeStaleSimpleKeyCandidates(); 395 396 /// @brief Remove all simple keys on FlowLevel \a Level. 397 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); 398 399 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd 400 /// tokens if needed. 401 bool unrollIndent(int ToColumn); 402 403 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint 404 /// if needed. 405 bool rollIndent( int ToColumn 406 , Token::TokenKind Kind 407 , TokenQueueT::iterator InsertPoint); 408 409 /// @brief Skip whitespace and comments until the start of the next token. 410 void scanToNextToken(); 411 412 /// @brief Must be the first token generated. 413 bool scanStreamStart(); 414 415 /// @brief Generate tokens needed to close out the stream. 416 bool scanStreamEnd(); 417 418 /// @brief Scan a %BLAH directive. 419 bool scanDirective(); 420 421 /// @brief Scan a ... or ---. 422 bool scanDocumentIndicator(bool IsStart); 423 424 /// @brief Scan a [ or { and generate the proper flow collection start token. 425 bool scanFlowCollectionStart(bool IsSequence); 426 427 /// @brief Scan a ] or } and generate the proper flow collection end token. 428 bool scanFlowCollectionEnd(bool IsSequence); 429 430 /// @brief Scan the , that separates entries in a flow collection. 431 bool scanFlowEntry(); 432 433 /// @brief Scan the - that starts block sequence entries. 434 bool scanBlockEntry(); 435 436 /// @brief Scan an explicit ? indicating a key. 437 bool scanKey(); 438 439 /// @brief Scan an explicit : indicating a value. 440 bool scanValue(); 441 442 /// @brief Scan a quoted scalar. 443 bool scanFlowScalar(bool IsDoubleQuoted); 444 445 /// @brief Scan an unquoted scalar. 446 bool scanPlainScalar(); 447 448 /// @brief Scan an Alias or Anchor starting with * or &. 449 bool scanAliasOrAnchor(bool IsAlias); 450 451 /// @brief Scan a block scalar starting with | or >. 452 bool scanBlockScalar(bool IsLiteral); 453 454 /// @brief Scan a tag of the form !stuff. 455 bool scanTag(); 456 457 /// @brief Dispatch to the next scanning function based on \a *Cur. 458 bool fetchMoreTokens(); 459 460 /// @brief The SourceMgr used for diagnostics and buffer management. 461 SourceMgr &SM; 462 463 /// @brief The original input. 464 MemoryBuffer *InputBuffer; 465 466 /// @brief The current position of the scanner. 467 StringRef::iterator Current; 468 469 /// @brief The end of the input (one past the last character). 470 StringRef::iterator End; 471 472 /// @brief Current YAML indentation level in spaces. 473 int Indent; 474 475 /// @brief Current column number in Unicode code points. 476 unsigned Column; 477 478 /// @brief Current line number. 479 unsigned Line; 480 481 /// @brief How deep we are in flow style containers. 0 Means at block level. 482 unsigned FlowLevel; 483 484 /// @brief Are we at the start of the stream? 485 bool IsStartOfStream; 486 487 /// @brief Can the next token be the start of a simple key? 488 bool IsSimpleKeyAllowed; 489 490 /// @brief Is the next token required to start a simple key? 491 bool IsSimpleKeyRequired; 492 493 /// @brief True if an error has occurred. 494 bool Failed; 495 496 /// @brief Queue of tokens. This is required to queue up tokens while looking 497 /// for the end of a simple key. And for cases where a single character 498 /// can produce multiple tokens (e.g. BlockEnd). 499 TokenQueueT TokenQueue; 500 501 /// @brief Indentation levels. 502 SmallVector<int, 4> Indents; 503 504 /// @brief Potential simple keys. 505 SmallVector<SimpleKey, 4> SimpleKeys; 506}; 507 508} // end namespace yaml 509} // end namespace llvm 510 511/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. 512static void encodeUTF8( uint32_t UnicodeScalarValue 513 , SmallVectorImpl<char> &Result) { 514 if (UnicodeScalarValue <= 0x7F) { 515 Result.push_back(UnicodeScalarValue & 0x7F); 516 } else if (UnicodeScalarValue <= 0x7FF) { 517 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); 518 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); 519 Result.push_back(FirstByte); 520 Result.push_back(SecondByte); 521 } else if (UnicodeScalarValue <= 0xFFFF) { 522 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); 523 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 524 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); 525 Result.push_back(FirstByte); 526 Result.push_back(SecondByte); 527 Result.push_back(ThirdByte); 528 } else if (UnicodeScalarValue <= 0x10FFFF) { 529 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); 530 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); 531 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 532 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); 533 Result.push_back(FirstByte); 534 Result.push_back(SecondByte); 535 Result.push_back(ThirdByte); 536 Result.push_back(FourthByte); 537 } 538} 539 540bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { 541 SourceMgr SM; 542 Scanner scanner(Input, SM); 543 while (true) { 544 Token T = scanner.getNext(); 545 switch (T.Kind) { 546 case Token::TK_StreamStart: 547 OS << "Stream-Start: "; 548 break; 549 case Token::TK_StreamEnd: 550 OS << "Stream-End: "; 551 break; 552 case Token::TK_VersionDirective: 553 OS << "Version-Directive: "; 554 break; 555 case Token::TK_TagDirective: 556 OS << "Tag-Directive: "; 557 break; 558 case Token::TK_DocumentStart: 559 OS << "Document-Start: "; 560 break; 561 case Token::TK_DocumentEnd: 562 OS << "Document-End: "; 563 break; 564 case Token::TK_BlockEntry: 565 OS << "Block-Entry: "; 566 break; 567 case Token::TK_BlockEnd: 568 OS << "Block-End: "; 569 break; 570 case Token::TK_BlockSequenceStart: 571 OS << "Block-Sequence-Start: "; 572 break; 573 case Token::TK_BlockMappingStart: 574 OS << "Block-Mapping-Start: "; 575 break; 576 case Token::TK_FlowEntry: 577 OS << "Flow-Entry: "; 578 break; 579 case Token::TK_FlowSequenceStart: 580 OS << "Flow-Sequence-Start: "; 581 break; 582 case Token::TK_FlowSequenceEnd: 583 OS << "Flow-Sequence-End: "; 584 break; 585 case Token::TK_FlowMappingStart: 586 OS << "Flow-Mapping-Start: "; 587 break; 588 case Token::TK_FlowMappingEnd: 589 OS << "Flow-Mapping-End: "; 590 break; 591 case Token::TK_Key: 592 OS << "Key: "; 593 break; 594 case Token::TK_Value: 595 OS << "Value: "; 596 break; 597 case Token::TK_Scalar: 598 OS << "Scalar: "; 599 break; 600 case Token::TK_Alias: 601 OS << "Alias: "; 602 break; 603 case Token::TK_Anchor: 604 OS << "Anchor: "; 605 break; 606 case Token::TK_Tag: 607 OS << "Tag: "; 608 break; 609 case Token::TK_Error: 610 break; 611 } 612 OS << T.Range << "\n"; 613 if (T.Kind == Token::TK_StreamEnd) 614 break; 615 else if (T.Kind == Token::TK_Error) 616 return false; 617 } 618 return true; 619} 620 621bool yaml::scanTokens(StringRef Input) { 622 llvm::SourceMgr SM; 623 llvm::yaml::Scanner scanner(Input, SM); 624 for (;;) { 625 llvm::yaml::Token T = scanner.getNext(); 626 if (T.Kind == Token::TK_StreamEnd) 627 break; 628 else if (T.Kind == Token::TK_Error) 629 return false; 630 } 631 return true; 632} 633 634std::string yaml::escape(StringRef Input) { 635 std::string EscapedInput; 636 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { 637 if (*i == '\\') 638 EscapedInput += "\\\\"; 639 else if (*i == '"') 640 EscapedInput += "\\\""; 641 else if (*i == 0) 642 EscapedInput += "\\0"; 643 else if (*i == 0x07) 644 EscapedInput += "\\a"; 645 else if (*i == 0x08) 646 EscapedInput += "\\b"; 647 else if (*i == 0x09) 648 EscapedInput += "\\t"; 649 else if (*i == 0x0A) 650 EscapedInput += "\\n"; 651 else if (*i == 0x0B) 652 EscapedInput += "\\v"; 653 else if (*i == 0x0C) 654 EscapedInput += "\\f"; 655 else if (*i == 0x0D) 656 EscapedInput += "\\r"; 657 else if (*i == 0x1B) 658 EscapedInput += "\\e"; 659 else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. 660 std::string HexStr = utohexstr(*i); 661 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 662 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. 663 UTF8Decoded UnicodeScalarValue 664 = decodeUTF8(StringRef(i, Input.end() - i)); 665 if (UnicodeScalarValue.second == 0) { 666 // Found invalid char. 667 SmallString<4> Val; 668 encodeUTF8(0xFFFD, Val); 669 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); 670 // FIXME: Error reporting. 671 return EscapedInput; 672 } 673 if (UnicodeScalarValue.first == 0x85) 674 EscapedInput += "\\N"; 675 else if (UnicodeScalarValue.first == 0xA0) 676 EscapedInput += "\\_"; 677 else if (UnicodeScalarValue.first == 0x2028) 678 EscapedInput += "\\L"; 679 else if (UnicodeScalarValue.first == 0x2029) 680 EscapedInput += "\\P"; 681 else { 682 std::string HexStr = utohexstr(UnicodeScalarValue.first); 683 if (HexStr.size() <= 2) 684 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 685 else if (HexStr.size() <= 4) 686 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; 687 else if (HexStr.size() <= 8) 688 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; 689 } 690 i += UnicodeScalarValue.second - 1; 691 } else 692 EscapedInput.push_back(*i); 693 } 694 return EscapedInput; 695} 696 697Scanner::Scanner(StringRef Input, SourceMgr &sm) 698 : SM(sm) 699 , Indent(-1) 700 , Column(0) 701 , Line(0) 702 , FlowLevel(0) 703 , IsStartOfStream(true) 704 , IsSimpleKeyAllowed(true) 705 , IsSimpleKeyRequired(false) 706 , Failed(false) { 707 InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); 708 SM.AddNewSourceBuffer(InputBuffer, SMLoc()); 709 Current = InputBuffer->getBufferStart(); 710 End = InputBuffer->getBufferEnd(); 711} 712 713Token &Scanner::peekNext() { 714 // If the current token is a possible simple key, keep parsing until we 715 // can confirm. 716 bool NeedMore = false; 717 while (true) { 718 if (TokenQueue.empty() || NeedMore) { 719 if (!fetchMoreTokens()) { 720 TokenQueue.clear(); 721 TokenQueue.push_back(Token()); 722 return TokenQueue.front(); 723 } 724 } 725 assert(!TokenQueue.empty() && 726 "fetchMoreTokens lied about getting tokens!"); 727 728 removeStaleSimpleKeyCandidates(); 729 SimpleKey SK; 730 SK.Tok = TokenQueue.front(); 731 if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) 732 == SimpleKeys.end()) 733 break; 734 else 735 NeedMore = true; 736 } 737 return TokenQueue.front(); 738} 739 740Token Scanner::getNext() { 741 Token Ret = peekNext(); 742 // TokenQueue can be empty if there was an error getting the next token. 743 if (!TokenQueue.empty()) 744 TokenQueue.pop_front(); 745 746 // There cannot be any referenced Token's if the TokenQueue is empty. So do a 747 // quick deallocation of them all. 748 if (TokenQueue.empty()) { 749 TokenQueue.Alloc.Reset(); 750 } 751 752 return Ret; 753} 754 755StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { 756 // Check 7 bit c-printable - b-char. 757 if ( *Position == 0x09 758 || (*Position >= 0x20 && *Position <= 0x7E)) 759 return Position + 1; 760 761 // Check for valid UTF-8. 762 if (uint8_t(*Position) & 0x80) { 763 UTF8Decoded u8d = decodeUTF8(Position); 764 if ( u8d.second != 0 765 && u8d.first != 0xFEFF 766 && ( u8d.first == 0x85 767 || ( u8d.first >= 0xA0 768 && u8d.first <= 0xD7FF) 769 || ( u8d.first >= 0xE000 770 && u8d.first <= 0xFFFD) 771 || ( u8d.first >= 0x10000 772 && u8d.first <= 0x10FFFF))) 773 return Position + u8d.second; 774 } 775 return Position; 776} 777 778StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { 779 if (*Position == 0x0D) { 780 if (Position + 1 != End && *(Position + 1) == 0x0A) 781 return Position + 2; 782 return Position + 1; 783 } 784 785 if (*Position == 0x0A) 786 return Position + 1; 787 return Position; 788} 789 790 791StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { 792 if (Position == End) 793 return Position; 794 if (*Position == ' ' || *Position == '\t') 795 return Position + 1; 796 return Position; 797} 798 799StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { 800 if (Position == End) 801 return Position; 802 if (*Position == ' ' || *Position == '\t') 803 return Position; 804 return skip_nb_char(Position); 805} 806 807StringRef::iterator Scanner::skip_while( SkipWhileFunc Func 808 , StringRef::iterator Position) { 809 while (true) { 810 StringRef::iterator i = (this->*Func)(Position); 811 if (i == Position) 812 break; 813 Position = i; 814 } 815 return Position; 816} 817 818static bool is_ns_hex_digit(const char C) { 819 return (C >= '0' && C <= '9') 820 || (C >= 'a' && C <= 'z') 821 || (C >= 'A' && C <= 'Z'); 822} 823 824static bool is_ns_word_char(const char C) { 825 return C == '-' 826 || (C >= 'a' && C <= 'z') 827 || (C >= 'A' && C <= 'Z'); 828} 829 830StringRef Scanner::scan_ns_uri_char() { 831 StringRef::iterator Start = Current; 832 while (true) { 833 if (Current == End) 834 break; 835 if (( *Current == '%' 836 && Current + 2 < End 837 && is_ns_hex_digit(*(Current + 1)) 838 && is_ns_hex_digit(*(Current + 2))) 839 || is_ns_word_char(*Current) 840 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") 841 != StringRef::npos) { 842 ++Current; 843 ++Column; 844 } else 845 break; 846 } 847 return StringRef(Start, Current - Start); 848} 849 850StringRef Scanner::scan_ns_plain_one_line() { 851 StringRef::iterator start = Current; 852 // The first character must already be verified. 853 ++Current; 854 while (true) { 855 if (Current == End) { 856 break; 857 } else if (*Current == ':') { 858 // Check if the next character is a ns-char. 859 if (Current + 1 == End) 860 break; 861 StringRef::iterator i = skip_ns_char(Current + 1); 862 if (Current + 1 != i) { 863 Current = i; 864 Column += 2; // Consume both the ':' and ns-char. 865 } else 866 break; 867 } else if (*Current == '#') { 868 // Check if the previous character was a ns-char. 869 // The & 0x80 check is to check for the trailing byte of a utf-8 870 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { 871 ++Current; 872 ++Column; 873 } else 874 break; 875 } else { 876 StringRef::iterator i = skip_nb_char(Current); 877 if (i == Current) 878 break; 879 Current = i; 880 ++Column; 881 } 882 } 883 return StringRef(start, Current - start); 884} 885 886bool Scanner::consume(uint32_t Expected) { 887 if (Expected >= 0x80) 888 report_fatal_error("Not dealing with this yet"); 889 if (Current == End) 890 return false; 891 if (uint8_t(*Current) >= 0x80) 892 report_fatal_error("Not dealing with this yet"); 893 if (uint8_t(*Current) == Expected) { 894 ++Current; 895 ++Column; 896 return true; 897 } 898 return false; 899} 900 901void Scanner::skip(uint32_t Distance) { 902 Current += Distance; 903 Column += Distance; 904} 905 906bool Scanner::isBlankOrBreak(StringRef::iterator Position) { 907 if (Position == End) 908 return false; 909 if ( *Position == ' ' || *Position == '\t' 910 || *Position == '\r' || *Position == '\n') 911 return true; 912 return false; 913} 914 915void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok 916 , unsigned AtColumn 917 , bool IsRequired) { 918 if (IsSimpleKeyAllowed) { 919 SimpleKey SK; 920 SK.Tok = Tok; 921 SK.Line = Line; 922 SK.Column = AtColumn; 923 SK.IsRequired = IsRequired; 924 SK.FlowLevel = FlowLevel; 925 SimpleKeys.push_back(SK); 926 } 927} 928 929void Scanner::removeStaleSimpleKeyCandidates() { 930 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); 931 i != SimpleKeys.end();) { 932 if (i->Line != Line || i->Column + 1024 < Column) { 933 if (i->IsRequired) 934 setError( "Could not find expected : for simple key" 935 , i->Tok->Range.begin()); 936 i = SimpleKeys.erase(i); 937 } else 938 ++i; 939 } 940} 941 942void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { 943 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) 944 SimpleKeys.pop_back(); 945} 946 947bool Scanner::unrollIndent(int ToColumn) { 948 Token T; 949 // Indentation is ignored in flow. 950 if (FlowLevel != 0) 951 return true; 952 953 while (Indent > ToColumn) { 954 T.Kind = Token::TK_BlockEnd; 955 T.Range = StringRef(Current, 1); 956 TokenQueue.push_back(T); 957 Indent = Indents.pop_back_val(); 958 } 959 960 return true; 961} 962 963bool Scanner::rollIndent( int ToColumn 964 , Token::TokenKind Kind 965 , TokenQueueT::iterator InsertPoint) { 966 if (FlowLevel) 967 return true; 968 if (Indent < ToColumn) { 969 Indents.push_back(Indent); 970 Indent = ToColumn; 971 972 Token T; 973 T.Kind = Kind; 974 T.Range = StringRef(Current, 0); 975 TokenQueue.insert(InsertPoint, T); 976 } 977 return true; 978} 979 980void Scanner::scanToNextToken() { 981 while (true) { 982 while (*Current == ' ' || *Current == '\t') { 983 skip(1); 984 } 985 986 // Skip comment. 987 if (*Current == '#') { 988 while (true) { 989 // This may skip more than one byte, thus Column is only incremented 990 // for code points. 991 StringRef::iterator i = skip_nb_char(Current); 992 if (i == Current) 993 break; 994 Current = i; 995 ++Column; 996 } 997 } 998 999 // Skip EOL. 1000 StringRef::iterator i = skip_b_break(Current); 1001 if (i == Current) 1002 break; 1003 Current = i; 1004 ++Line; 1005 Column = 0; 1006 // New lines may start a simple key. 1007 if (!FlowLevel) 1008 IsSimpleKeyAllowed = true; 1009 } 1010} 1011 1012bool Scanner::scanStreamStart() { 1013 IsStartOfStream = false; 1014 1015 EncodingInfo EI = getUnicodeEncoding(currentInput()); 1016 1017 Token T; 1018 T.Kind = Token::TK_StreamStart; 1019 T.Range = StringRef(Current, EI.second); 1020 TokenQueue.push_back(T); 1021 Current += EI.second; 1022 return true; 1023} 1024 1025bool Scanner::scanStreamEnd() { 1026 // Force an ending new line if one isn't present. 1027 if (Column != 0) { 1028 Column = 0; 1029 ++Line; 1030 } 1031 1032 unrollIndent(-1); 1033 SimpleKeys.clear(); 1034 IsSimpleKeyAllowed = false; 1035 1036 Token T; 1037 T.Kind = Token::TK_StreamEnd; 1038 T.Range = StringRef(Current, 0); 1039 TokenQueue.push_back(T); 1040 return true; 1041} 1042 1043bool Scanner::scanDirective() { 1044 // Reset the indentation level. 1045 unrollIndent(-1); 1046 SimpleKeys.clear(); 1047 IsSimpleKeyAllowed = false; 1048 1049 StringRef::iterator Start = Current; 1050 consume('%'); 1051 StringRef::iterator NameStart = Current; 1052 Current = skip_while(&Scanner::skip_ns_char, Current); 1053 StringRef Name(NameStart, Current - NameStart); 1054 Current = skip_while(&Scanner::skip_s_white, Current); 1055 1056 if (Name == "YAML") { 1057 Current = skip_while(&Scanner::skip_ns_char, Current); 1058 Token T; 1059 T.Kind = Token::TK_VersionDirective; 1060 T.Range = StringRef(Start, Current - Start); 1061 TokenQueue.push_back(T); 1062 return true; 1063 } 1064 return false; 1065} 1066 1067bool Scanner::scanDocumentIndicator(bool IsStart) { 1068 unrollIndent(-1); 1069 SimpleKeys.clear(); 1070 IsSimpleKeyAllowed = false; 1071 1072 Token T; 1073 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; 1074 T.Range = StringRef(Current, 3); 1075 skip(3); 1076 TokenQueue.push_back(T); 1077 return true; 1078} 1079 1080bool Scanner::scanFlowCollectionStart(bool IsSequence) { 1081 Token T; 1082 T.Kind = IsSequence ? Token::TK_FlowSequenceStart 1083 : Token::TK_FlowMappingStart; 1084 T.Range = StringRef(Current, 1); 1085 skip(1); 1086 TokenQueue.push_back(T); 1087 1088 // [ and { may begin a simple key. 1089 saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); 1090 1091 // And may also be followed by a simple key. 1092 IsSimpleKeyAllowed = true; 1093 ++FlowLevel; 1094 return true; 1095} 1096 1097bool Scanner::scanFlowCollectionEnd(bool IsSequence) { 1098 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1099 IsSimpleKeyAllowed = false; 1100 Token T; 1101 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd 1102 : Token::TK_FlowMappingEnd; 1103 T.Range = StringRef(Current, 1); 1104 skip(1); 1105 TokenQueue.push_back(T); 1106 if (FlowLevel) 1107 --FlowLevel; 1108 return true; 1109} 1110 1111bool Scanner::scanFlowEntry() { 1112 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1113 IsSimpleKeyAllowed = true; 1114 Token T; 1115 T.Kind = Token::TK_FlowEntry; 1116 T.Range = StringRef(Current, 1); 1117 skip(1); 1118 TokenQueue.push_back(T); 1119 return true; 1120} 1121 1122bool Scanner::scanBlockEntry() { 1123 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); 1124 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1125 IsSimpleKeyAllowed = true; 1126 Token T; 1127 T.Kind = Token::TK_BlockEntry; 1128 T.Range = StringRef(Current, 1); 1129 skip(1); 1130 TokenQueue.push_back(T); 1131 return true; 1132} 1133 1134bool Scanner::scanKey() { 1135 if (!FlowLevel) 1136 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1137 1138 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1139 IsSimpleKeyAllowed = !FlowLevel; 1140 1141 Token T; 1142 T.Kind = Token::TK_Key; 1143 T.Range = StringRef(Current, 1); 1144 skip(1); 1145 TokenQueue.push_back(T); 1146 return true; 1147} 1148 1149bool Scanner::scanValue() { 1150 // If the previous token could have been a simple key, insert the key token 1151 // into the token queue. 1152 if (!SimpleKeys.empty()) { 1153 SimpleKey SK = SimpleKeys.pop_back_val(); 1154 Token T; 1155 T.Kind = Token::TK_Key; 1156 T.Range = SK.Tok->Range; 1157 TokenQueueT::iterator i, e; 1158 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { 1159 if (i == SK.Tok) 1160 break; 1161 } 1162 assert(i != e && "SimpleKey not in token queue!"); 1163 i = TokenQueue.insert(i, T); 1164 1165 // We may also need to add a Block-Mapping-Start token. 1166 rollIndent(SK.Column, Token::TK_BlockMappingStart, i); 1167 1168 IsSimpleKeyAllowed = false; 1169 } else { 1170 if (!FlowLevel) 1171 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1172 IsSimpleKeyAllowed = !FlowLevel; 1173 } 1174 1175 Token T; 1176 T.Kind = Token::TK_Value; 1177 T.Range = StringRef(Current, 1); 1178 skip(1); 1179 TokenQueue.push_back(T); 1180 return true; 1181} 1182 1183// Forbidding inlining improves performance by roughly 20%. 1184// FIXME: Remove once llvm optimizes this to the faster version without hints. 1185LLVM_ATTRIBUTE_NOINLINE static bool 1186wasEscaped(StringRef::iterator First, StringRef::iterator Position); 1187 1188// Returns whether a character at 'Position' was escaped with a leading '\'. 1189// 'First' specifies the position of the first character in the string. 1190static bool wasEscaped(StringRef::iterator First, 1191 StringRef::iterator Position) { 1192 assert(Position - 1 >= First); 1193 StringRef::iterator I = Position - 1; 1194 // We calculate the number of consecutive '\'s before the current position 1195 // by iterating backwards through our string. 1196 while (I >= First && *I == '\\') --I; 1197 // (Position - 1 - I) now contains the number of '\'s before the current 1198 // position. If it is odd, the character at 'Position' was escaped. 1199 return (Position - 1 - I) % 2 == 1; 1200} 1201 1202bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { 1203 StringRef::iterator Start = Current; 1204 unsigned ColStart = Column; 1205 if (IsDoubleQuoted) { 1206 do { 1207 ++Current; 1208 while (Current != End && *Current != '"') 1209 ++Current; 1210 // Repeat until the previous character was not a '\' or was an escaped 1211 // backslash. 1212 } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); 1213 } else { 1214 skip(1); 1215 while (true) { 1216 // Skip a ' followed by another '. 1217 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { 1218 skip(2); 1219 continue; 1220 } else if (*Current == '\'') 1221 break; 1222 StringRef::iterator i = skip_nb_char(Current); 1223 if (i == Current) { 1224 i = skip_b_break(Current); 1225 if (i == Current) 1226 break; 1227 Current = i; 1228 Column = 0; 1229 ++Line; 1230 } else { 1231 if (i == End) 1232 break; 1233 Current = i; 1234 ++Column; 1235 } 1236 } 1237 } 1238 skip(1); // Skip ending quote. 1239 Token T; 1240 T.Kind = Token::TK_Scalar; 1241 T.Range = StringRef(Start, Current - Start); 1242 TokenQueue.push_back(T); 1243 1244 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1245 1246 IsSimpleKeyAllowed = false; 1247 1248 return true; 1249} 1250 1251bool Scanner::scanPlainScalar() { 1252 StringRef::iterator Start = Current; 1253 unsigned ColStart = Column; 1254 unsigned LeadingBlanks = 0; 1255 assert(Indent >= -1 && "Indent must be >= -1 !"); 1256 unsigned indent = static_cast<unsigned>(Indent + 1); 1257 while (true) { 1258 if (*Current == '#') 1259 break; 1260 1261 while (!isBlankOrBreak(Current)) { 1262 if ( FlowLevel && *Current == ':' 1263 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { 1264 setError("Found unexpected ':' while scanning a plain scalar", Current); 1265 return false; 1266 } 1267 1268 // Check for the end of the plain scalar. 1269 if ( (*Current == ':' && isBlankOrBreak(Current + 1)) 1270 || ( FlowLevel 1271 && (StringRef(Current, 1).find_first_of(",:?[]{}") 1272 != StringRef::npos))) 1273 break; 1274 1275 StringRef::iterator i = skip_nb_char(Current); 1276 if (i == Current) 1277 break; 1278 Current = i; 1279 ++Column; 1280 } 1281 1282 // Are we at the end? 1283 if (!isBlankOrBreak(Current)) 1284 break; 1285 1286 // Eat blanks. 1287 StringRef::iterator Tmp = Current; 1288 while (isBlankOrBreak(Tmp)) { 1289 StringRef::iterator i = skip_s_white(Tmp); 1290 if (i != Tmp) { 1291 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { 1292 setError("Found invalid tab character in indentation", Tmp); 1293 return false; 1294 } 1295 Tmp = i; 1296 ++Column; 1297 } else { 1298 i = skip_b_break(Tmp); 1299 if (!LeadingBlanks) 1300 LeadingBlanks = 1; 1301 Tmp = i; 1302 Column = 0; 1303 ++Line; 1304 } 1305 } 1306 1307 if (!FlowLevel && Column < indent) 1308 break; 1309 1310 Current = Tmp; 1311 } 1312 if (Start == Current) { 1313 setError("Got empty plain scalar", Start); 1314 return false; 1315 } 1316 Token T; 1317 T.Kind = Token::TK_Scalar; 1318 T.Range = StringRef(Start, Current - Start); 1319 TokenQueue.push_back(T); 1320 1321 // Plain scalars can be simple keys. 1322 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1323 1324 IsSimpleKeyAllowed = false; 1325 1326 return true; 1327} 1328 1329bool Scanner::scanAliasOrAnchor(bool IsAlias) { 1330 StringRef::iterator Start = Current; 1331 unsigned ColStart = Column; 1332 skip(1); 1333 while(true) { 1334 if ( *Current == '[' || *Current == ']' 1335 || *Current == '{' || *Current == '}' 1336 || *Current == ',' 1337 || *Current == ':') 1338 break; 1339 StringRef::iterator i = skip_ns_char(Current); 1340 if (i == Current) 1341 break; 1342 Current = i; 1343 ++Column; 1344 } 1345 1346 if (Start == Current) { 1347 setError("Got empty alias or anchor", Start); 1348 return false; 1349 } 1350 1351 Token T; 1352 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; 1353 T.Range = StringRef(Start, Current - Start); 1354 TokenQueue.push_back(T); 1355 1356 // Alias and anchors can be simple keys. 1357 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1358 1359 IsSimpleKeyAllowed = false; 1360 1361 return true; 1362} 1363 1364bool Scanner::scanBlockScalar(bool IsLiteral) { 1365 StringRef::iterator Start = Current; 1366 skip(1); // Eat | or > 1367 while(true) { 1368 StringRef::iterator i = skip_nb_char(Current); 1369 if (i == Current) { 1370 if (Column == 0) 1371 break; 1372 i = skip_b_break(Current); 1373 if (i != Current) { 1374 // We got a line break. 1375 Column = 0; 1376 ++Line; 1377 Current = i; 1378 continue; 1379 } else { 1380 // There was an error, which should already have been printed out. 1381 return false; 1382 } 1383 } 1384 Current = i; 1385 ++Column; 1386 } 1387 1388 if (Start == Current) { 1389 setError("Got empty block scalar", Start); 1390 return false; 1391 } 1392 1393 Token T; 1394 T.Kind = Token::TK_Scalar; 1395 T.Range = StringRef(Start, Current - Start); 1396 TokenQueue.push_back(T); 1397 return true; 1398} 1399 1400bool Scanner::scanTag() { 1401 StringRef::iterator Start = Current; 1402 unsigned ColStart = Column; 1403 skip(1); // Eat !. 1404 if (Current == End || isBlankOrBreak(Current)); // An empty tag. 1405 else if (*Current == '<') { 1406 skip(1); 1407 scan_ns_uri_char(); 1408 if (!consume('>')) 1409 return false; 1410 } else { 1411 // FIXME: Actually parse the c-ns-shorthand-tag rule. 1412 Current = skip_while(&Scanner::skip_ns_char, Current); 1413 } 1414 1415 Token T; 1416 T.Kind = Token::TK_Tag; 1417 T.Range = StringRef(Start, Current - Start); 1418 TokenQueue.push_back(T); 1419 1420 // Tags can be simple keys. 1421 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1422 1423 IsSimpleKeyAllowed = false; 1424 1425 return true; 1426} 1427 1428bool Scanner::fetchMoreTokens() { 1429 if (IsStartOfStream) 1430 return scanStreamStart(); 1431 1432 scanToNextToken(); 1433 1434 if (Current == End) 1435 return scanStreamEnd(); 1436 1437 removeStaleSimpleKeyCandidates(); 1438 1439 unrollIndent(Column); 1440 1441 if (Column == 0 && *Current == '%') 1442 return scanDirective(); 1443 1444 if (Column == 0 && Current + 4 <= End 1445 && *Current == '-' 1446 && *(Current + 1) == '-' 1447 && *(Current + 2) == '-' 1448 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1449 return scanDocumentIndicator(true); 1450 1451 if (Column == 0 && Current + 4 <= End 1452 && *Current == '.' 1453 && *(Current + 1) == '.' 1454 && *(Current + 2) == '.' 1455 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1456 return scanDocumentIndicator(false); 1457 1458 if (*Current == '[') 1459 return scanFlowCollectionStart(true); 1460 1461 if (*Current == '{') 1462 return scanFlowCollectionStart(false); 1463 1464 if (*Current == ']') 1465 return scanFlowCollectionEnd(true); 1466 1467 if (*Current == '}') 1468 return scanFlowCollectionEnd(false); 1469 1470 if (*Current == ',') 1471 return scanFlowEntry(); 1472 1473 if (*Current == '-' && isBlankOrBreak(Current + 1)) 1474 return scanBlockEntry(); 1475 1476 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) 1477 return scanKey(); 1478 1479 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) 1480 return scanValue(); 1481 1482 if (*Current == '*') 1483 return scanAliasOrAnchor(true); 1484 1485 if (*Current == '&') 1486 return scanAliasOrAnchor(false); 1487 1488 if (*Current == '!') 1489 return scanTag(); 1490 1491 if (*Current == '|' && !FlowLevel) 1492 return scanBlockScalar(true); 1493 1494 if (*Current == '>' && !FlowLevel) 1495 return scanBlockScalar(false); 1496 1497 if (*Current == '\'') 1498 return scanFlowScalar(false); 1499 1500 if (*Current == '"') 1501 return scanFlowScalar(true); 1502 1503 // Get a plain scalar. 1504 StringRef FirstChar(Current, 1); 1505 if (!(isBlankOrBreak(Current) 1506 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) 1507 || (*Current == '-' && !isBlankOrBreak(Current + 1)) 1508 || (!FlowLevel && (*Current == '?' || *Current == ':') 1509 && isBlankOrBreak(Current + 1)) 1510 || (!FlowLevel && *Current == ':' 1511 && Current + 2 < End 1512 && *(Current + 1) == ':' 1513 && !isBlankOrBreak(Current + 2))) 1514 return scanPlainScalar(); 1515 1516 setError("Unrecognized character while tokenizing."); 1517 return false; 1518} 1519 1520Stream::Stream(StringRef Input, SourceMgr &SM) 1521 : scanner(new Scanner(Input, SM)) 1522 , CurrentDoc(0) {} 1523 1524bool Stream::failed() { return scanner->failed(); } 1525 1526void Stream::printError(Node *N, const Twine &Msg) { 1527 SmallVector<SMRange, 1> Ranges; 1528 Ranges.push_back(N->getSourceRange()); 1529 scanner->printError( N->getSourceRange().Start 1530 , SourceMgr::DK_Error 1531 , Msg 1532 , Ranges); 1533} 1534 1535void Stream::handleYAMLDirective(const Token &t) { 1536 // TODO: Ensure version is 1.x. 1537} 1538 1539document_iterator Stream::begin() { 1540 if (CurrentDoc) 1541 report_fatal_error("Can only iterate over the stream once"); 1542 1543 // Skip Stream-Start. 1544 scanner->getNext(); 1545 1546 CurrentDoc.reset(new Document(*this)); 1547 return document_iterator(CurrentDoc); 1548} 1549 1550document_iterator Stream::end() { 1551 return document_iterator(); 1552} 1553 1554void Stream::skip() { 1555 for (document_iterator i = begin(), e = end(); i != e; ++i) 1556 i->skip(); 1557} 1558 1559Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) 1560 : Doc(D) 1561 , TypeID(Type) 1562 , Anchor(A) { 1563 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); 1564 SourceRange = SMRange(Start, Start); 1565} 1566 1567Node::~Node() {} 1568 1569Token &Node::peekNext() { 1570 return Doc->peekNext(); 1571} 1572 1573Token Node::getNext() { 1574 return Doc->getNext(); 1575} 1576 1577Node *Node::parseBlockNode() { 1578 return Doc->parseBlockNode(); 1579} 1580 1581BumpPtrAllocator &Node::getAllocator() { 1582 return Doc->NodeAllocator; 1583} 1584 1585void Node::setError(const Twine &Msg, Token &Tok) const { 1586 Doc->setError(Msg, Tok); 1587} 1588 1589bool Node::failed() const { 1590 return Doc->failed(); 1591} 1592 1593 1594 1595StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { 1596 // TODO: Handle newlines properly. We need to remove leading whitespace. 1597 if (Value[0] == '"') { // Double quoted. 1598 // Pull off the leading and trailing "s. 1599 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1600 // Search for characters that would require unescaping the value. 1601 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); 1602 if (i != StringRef::npos) 1603 return unescapeDoubleQuoted(UnquotedValue, i, Storage); 1604 return UnquotedValue; 1605 } else if (Value[0] == '\'') { // Single quoted. 1606 // Pull off the leading and trailing 's. 1607 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1608 StringRef::size_type i = UnquotedValue.find('\''); 1609 if (i != StringRef::npos) { 1610 // We're going to need Storage. 1611 Storage.clear(); 1612 Storage.reserve(UnquotedValue.size()); 1613 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { 1614 StringRef Valid(UnquotedValue.begin(), i); 1615 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1616 Storage.push_back('\''); 1617 UnquotedValue = UnquotedValue.substr(i + 2); 1618 } 1619 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1620 return StringRef(Storage.begin(), Storage.size()); 1621 } 1622 return UnquotedValue; 1623 } 1624 // Plain or block. 1625 size_t trimtrail = Value.rfind(' '); 1626 return Value.drop_back( 1627 trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); 1628} 1629 1630StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue 1631 , StringRef::size_type i 1632 , SmallVectorImpl<char> &Storage) 1633 const { 1634 // Use Storage to build proper value. 1635 Storage.clear(); 1636 Storage.reserve(UnquotedValue.size()); 1637 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { 1638 // Insert all previous chars into Storage. 1639 StringRef Valid(UnquotedValue.begin(), i); 1640 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1641 // Chop off inserted chars. 1642 UnquotedValue = UnquotedValue.substr(i); 1643 1644 assert(!UnquotedValue.empty() && "Can't be empty!"); 1645 1646 // Parse escape or line break. 1647 switch (UnquotedValue[0]) { 1648 case '\r': 1649 case '\n': 1650 Storage.push_back('\n'); 1651 if ( UnquotedValue.size() > 1 1652 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1653 UnquotedValue = UnquotedValue.substr(1); 1654 UnquotedValue = UnquotedValue.substr(1); 1655 break; 1656 default: 1657 if (UnquotedValue.size() == 1) 1658 // TODO: Report error. 1659 break; 1660 UnquotedValue = UnquotedValue.substr(1); 1661 switch (UnquotedValue[0]) { 1662 default: { 1663 Token T; 1664 T.Range = StringRef(UnquotedValue.begin(), 1); 1665 setError("Unrecognized escape code!", T); 1666 return ""; 1667 } 1668 case '\r': 1669 case '\n': 1670 // Remove the new line. 1671 if ( UnquotedValue.size() > 1 1672 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1673 UnquotedValue = UnquotedValue.substr(1); 1674 // If this was just a single byte newline, it will get skipped 1675 // below. 1676 break; 1677 case '0': 1678 Storage.push_back(0x00); 1679 break; 1680 case 'a': 1681 Storage.push_back(0x07); 1682 break; 1683 case 'b': 1684 Storage.push_back(0x08); 1685 break; 1686 case 't': 1687 case 0x09: 1688 Storage.push_back(0x09); 1689 break; 1690 case 'n': 1691 Storage.push_back(0x0A); 1692 break; 1693 case 'v': 1694 Storage.push_back(0x0B); 1695 break; 1696 case 'f': 1697 Storage.push_back(0x0C); 1698 break; 1699 case 'r': 1700 Storage.push_back(0x0D); 1701 break; 1702 case 'e': 1703 Storage.push_back(0x1B); 1704 break; 1705 case ' ': 1706 Storage.push_back(0x20); 1707 break; 1708 case '"': 1709 Storage.push_back(0x22); 1710 break; 1711 case '/': 1712 Storage.push_back(0x2F); 1713 break; 1714 case '\\': 1715 Storage.push_back(0x5C); 1716 break; 1717 case 'N': 1718 encodeUTF8(0x85, Storage); 1719 break; 1720 case '_': 1721 encodeUTF8(0xA0, Storage); 1722 break; 1723 case 'L': 1724 encodeUTF8(0x2028, Storage); 1725 break; 1726 case 'P': 1727 encodeUTF8(0x2029, Storage); 1728 break; 1729 case 'x': { 1730 if (UnquotedValue.size() < 3) 1731 // TODO: Report error. 1732 break; 1733 unsigned int UnicodeScalarValue; 1734 UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); 1735 encodeUTF8(UnicodeScalarValue, Storage); 1736 UnquotedValue = UnquotedValue.substr(2); 1737 break; 1738 } 1739 case 'u': { 1740 if (UnquotedValue.size() < 5) 1741 // TODO: Report error. 1742 break; 1743 unsigned int UnicodeScalarValue; 1744 UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); 1745 encodeUTF8(UnicodeScalarValue, Storage); 1746 UnquotedValue = UnquotedValue.substr(4); 1747 break; 1748 } 1749 case 'U': { 1750 if (UnquotedValue.size() < 9) 1751 // TODO: Report error. 1752 break; 1753 unsigned int UnicodeScalarValue; 1754 UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); 1755 encodeUTF8(UnicodeScalarValue, Storage); 1756 UnquotedValue = UnquotedValue.substr(8); 1757 break; 1758 } 1759 } 1760 UnquotedValue = UnquotedValue.substr(1); 1761 } 1762 } 1763 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1764 return StringRef(Storage.begin(), Storage.size()); 1765} 1766 1767Node *KeyValueNode::getKey() { 1768 if (Key) 1769 return Key; 1770 // Handle implicit null keys. 1771 { 1772 Token &t = peekNext(); 1773 if ( t.Kind == Token::TK_BlockEnd 1774 || t.Kind == Token::TK_Value 1775 || t.Kind == Token::TK_Error) { 1776 return Key = new (getAllocator()) NullNode(Doc); 1777 } 1778 if (t.Kind == Token::TK_Key) 1779 getNext(); // skip TK_Key. 1780 } 1781 1782 // Handle explicit null keys. 1783 Token &t = peekNext(); 1784 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { 1785 return Key = new (getAllocator()) NullNode(Doc); 1786 } 1787 1788 // We've got a normal key. 1789 return Key = parseBlockNode(); 1790} 1791 1792Node *KeyValueNode::getValue() { 1793 if (Value) 1794 return Value; 1795 getKey()->skip(); 1796 if (failed()) 1797 return Value = new (getAllocator()) NullNode(Doc); 1798 1799 // Handle implicit null values. 1800 { 1801 Token &t = peekNext(); 1802 if ( t.Kind == Token::TK_BlockEnd 1803 || t.Kind == Token::TK_FlowMappingEnd 1804 || t.Kind == Token::TK_Key 1805 || t.Kind == Token::TK_FlowEntry 1806 || t.Kind == Token::TK_Error) { 1807 return Value = new (getAllocator()) NullNode(Doc); 1808 } 1809 1810 if (t.Kind != Token::TK_Value) { 1811 setError("Unexpected token in Key Value.", t); 1812 return Value = new (getAllocator()) NullNode(Doc); 1813 } 1814 getNext(); // skip TK_Value. 1815 } 1816 1817 // Handle explicit null values. 1818 Token &t = peekNext(); 1819 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { 1820 return Value = new (getAllocator()) NullNode(Doc); 1821 } 1822 1823 // We got a normal value. 1824 return Value = parseBlockNode(); 1825} 1826 1827void MappingNode::increment() { 1828 if (failed()) { 1829 IsAtEnd = true; 1830 CurrentEntry = 0; 1831 return; 1832 } 1833 if (CurrentEntry) { 1834 CurrentEntry->skip(); 1835 if (Type == MT_Inline) { 1836 IsAtEnd = true; 1837 CurrentEntry = 0; 1838 return; 1839 } 1840 } 1841 Token T = peekNext(); 1842 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { 1843 // KeyValueNode eats the TK_Key. That way it can detect null keys. 1844 CurrentEntry = new (getAllocator()) KeyValueNode(Doc); 1845 } else if (Type == MT_Block) { 1846 switch (T.Kind) { 1847 case Token::TK_BlockEnd: 1848 getNext(); 1849 IsAtEnd = true; 1850 CurrentEntry = 0; 1851 break; 1852 default: 1853 setError("Unexpected token. Expected Key or Block End", T); 1854 case Token::TK_Error: 1855 IsAtEnd = true; 1856 CurrentEntry = 0; 1857 } 1858 } else { 1859 switch (T.Kind) { 1860 case Token::TK_FlowEntry: 1861 // Eat the flow entry and recurse. 1862 getNext(); 1863 return increment(); 1864 case Token::TK_FlowMappingEnd: 1865 getNext(); 1866 case Token::TK_Error: 1867 // Set this to end iterator. 1868 IsAtEnd = true; 1869 CurrentEntry = 0; 1870 break; 1871 default: 1872 setError( "Unexpected token. Expected Key, Flow Entry, or Flow " 1873 "Mapping End." 1874 , T); 1875 IsAtEnd = true; 1876 CurrentEntry = 0; 1877 } 1878 } 1879} 1880 1881void SequenceNode::increment() { 1882 if (failed()) { 1883 IsAtEnd = true; 1884 CurrentEntry = 0; 1885 return; 1886 } 1887 if (CurrentEntry) 1888 CurrentEntry->skip(); 1889 Token T = peekNext(); 1890 if (SeqType == ST_Block) { 1891 switch (T.Kind) { 1892 case Token::TK_BlockEntry: 1893 getNext(); 1894 CurrentEntry = parseBlockNode(); 1895 if (CurrentEntry == 0) { // An error occurred. 1896 IsAtEnd = true; 1897 CurrentEntry = 0; 1898 } 1899 break; 1900 case Token::TK_BlockEnd: 1901 getNext(); 1902 IsAtEnd = true; 1903 CurrentEntry = 0; 1904 break; 1905 default: 1906 setError( "Unexpected token. Expected Block Entry or Block End." 1907 , T); 1908 case Token::TK_Error: 1909 IsAtEnd = true; 1910 CurrentEntry = 0; 1911 } 1912 } else if (SeqType == ST_Indentless) { 1913 switch (T.Kind) { 1914 case Token::TK_BlockEntry: 1915 getNext(); 1916 CurrentEntry = parseBlockNode(); 1917 if (CurrentEntry == 0) { // An error occurred. 1918 IsAtEnd = true; 1919 CurrentEntry = 0; 1920 } 1921 break; 1922 default: 1923 case Token::TK_Error: 1924 IsAtEnd = true; 1925 CurrentEntry = 0; 1926 } 1927 } else if (SeqType == ST_Flow) { 1928 switch (T.Kind) { 1929 case Token::TK_FlowEntry: 1930 // Eat the flow entry and recurse. 1931 getNext(); 1932 WasPreviousTokenFlowEntry = true; 1933 return increment(); 1934 case Token::TK_FlowSequenceEnd: 1935 getNext(); 1936 case Token::TK_Error: 1937 // Set this to end iterator. 1938 IsAtEnd = true; 1939 CurrentEntry = 0; 1940 break; 1941 case Token::TK_StreamEnd: 1942 case Token::TK_DocumentEnd: 1943 case Token::TK_DocumentStart: 1944 setError("Could not find closing ]!", T); 1945 // Set this to end iterator. 1946 IsAtEnd = true; 1947 CurrentEntry = 0; 1948 break; 1949 default: 1950 if (!WasPreviousTokenFlowEntry) { 1951 setError("Expected , between entries!", T); 1952 IsAtEnd = true; 1953 CurrentEntry = 0; 1954 break; 1955 } 1956 // Otherwise it must be a flow entry. 1957 CurrentEntry = parseBlockNode(); 1958 if (!CurrentEntry) { 1959 IsAtEnd = true; 1960 } 1961 WasPreviousTokenFlowEntry = false; 1962 break; 1963 } 1964 } 1965} 1966 1967Document::Document(Stream &S) : stream(S), Root(0) { 1968 if (parseDirectives()) 1969 expectToken(Token::TK_DocumentStart); 1970 Token &T = peekNext(); 1971 if (T.Kind == Token::TK_DocumentStart) 1972 getNext(); 1973} 1974 1975bool Document::skip() { 1976 if (stream.scanner->failed()) 1977 return false; 1978 if (!Root) 1979 getRoot(); 1980 Root->skip(); 1981 Token &T = peekNext(); 1982 if (T.Kind == Token::TK_StreamEnd) 1983 return false; 1984 if (T.Kind == Token::TK_DocumentEnd) { 1985 getNext(); 1986 return skip(); 1987 } 1988 return true; 1989} 1990 1991Token &Document::peekNext() { 1992 return stream.scanner->peekNext(); 1993} 1994 1995Token Document::getNext() { 1996 return stream.scanner->getNext(); 1997} 1998 1999void Document::setError(const Twine &Message, Token &Location) const { 2000 stream.scanner->setError(Message, Location.Range.begin()); 2001} 2002 2003bool Document::failed() const { 2004 return stream.scanner->failed(); 2005} 2006 2007Node *Document::parseBlockNode() { 2008 Token T = peekNext(); 2009 // Handle properties. 2010 Token AnchorInfo; 2011parse_property: 2012 switch (T.Kind) { 2013 case Token::TK_Alias: 2014 getNext(); 2015 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); 2016 case Token::TK_Anchor: 2017 if (AnchorInfo.Kind == Token::TK_Anchor) { 2018 setError("Already encountered an anchor for this node!", T); 2019 return 0; 2020 } 2021 AnchorInfo = getNext(); // Consume TK_Anchor. 2022 T = peekNext(); 2023 goto parse_property; 2024 case Token::TK_Tag: 2025 getNext(); // Skip TK_Tag. 2026 T = peekNext(); 2027 goto parse_property; 2028 default: 2029 break; 2030 } 2031 2032 switch (T.Kind) { 2033 case Token::TK_BlockEntry: 2034 // We got an unindented BlockEntry sequence. This is not terminated with 2035 // a BlockEnd. 2036 // Don't eat the TK_BlockEntry, SequenceNode needs it. 2037 return new (NodeAllocator) SequenceNode( stream.CurrentDoc 2038 , AnchorInfo.Range.substr(1) 2039 , SequenceNode::ST_Indentless); 2040 case Token::TK_BlockSequenceStart: 2041 getNext(); 2042 return new (NodeAllocator) 2043 SequenceNode( stream.CurrentDoc 2044 , AnchorInfo.Range.substr(1) 2045 , SequenceNode::ST_Block); 2046 case Token::TK_BlockMappingStart: 2047 getNext(); 2048 return new (NodeAllocator) 2049 MappingNode( stream.CurrentDoc 2050 , AnchorInfo.Range.substr(1) 2051 , MappingNode::MT_Block); 2052 case Token::TK_FlowSequenceStart: 2053 getNext(); 2054 return new (NodeAllocator) 2055 SequenceNode( stream.CurrentDoc 2056 , AnchorInfo.Range.substr(1) 2057 , SequenceNode::ST_Flow); 2058 case Token::TK_FlowMappingStart: 2059 getNext(); 2060 return new (NodeAllocator) 2061 MappingNode( stream.CurrentDoc 2062 , AnchorInfo.Range.substr(1) 2063 , MappingNode::MT_Flow); 2064 case Token::TK_Scalar: 2065 getNext(); 2066 return new (NodeAllocator) 2067 ScalarNode( stream.CurrentDoc 2068 , AnchorInfo.Range.substr(1) 2069 , T.Range); 2070 case Token::TK_Key: 2071 // Don't eat the TK_Key, KeyValueNode expects it. 2072 return new (NodeAllocator) 2073 MappingNode( stream.CurrentDoc 2074 , AnchorInfo.Range.substr(1) 2075 , MappingNode::MT_Inline); 2076 case Token::TK_DocumentStart: 2077 case Token::TK_DocumentEnd: 2078 case Token::TK_StreamEnd: 2079 default: 2080 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not 2081 // !!null null. 2082 return new (NodeAllocator) NullNode(stream.CurrentDoc); 2083 case Token::TK_Error: 2084 return 0; 2085 } 2086 llvm_unreachable("Control flow shouldn't reach here."); 2087 return 0; 2088} 2089 2090bool Document::parseDirectives() { 2091 bool isDirective = false; 2092 while (true) { 2093 Token T = peekNext(); 2094 if (T.Kind == Token::TK_TagDirective) { 2095 handleTagDirective(getNext()); 2096 isDirective = true; 2097 } else if (T.Kind == Token::TK_VersionDirective) { 2098 stream.handleYAMLDirective(getNext()); 2099 isDirective = true; 2100 } else 2101 break; 2102 } 2103 return isDirective; 2104} 2105 2106bool Document::expectToken(int TK) { 2107 Token T = getNext(); 2108 if (T.Kind != TK) { 2109 setError("Unexpected token", T); 2110 return false; 2111 } 2112 return true; 2113} 2114 2115OwningPtr<Document> document_iterator::NullDoc; 2116