YAMLParser.cpp revision 93210e847a1496b24cef881723e57c489082dcfe
1//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements a YAML parser.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/YAMLParser.h"
15
16#include "llvm/ADT/ilist.h"
17#include "llvm/ADT/ilist_node.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/raw_ostream.h"
24#include "llvm/Support/SourceMgr.h"
25
26using namespace llvm;
27using namespace yaml;
28
29enum UnicodeEncodingForm {
30  UEF_UTF32_LE, //< UTF-32 Little Endian
31  UEF_UTF32_BE, //< UTF-32 Big Endian
32  UEF_UTF16_LE, //< UTF-16 Little Endian
33  UEF_UTF16_BE, //< UTF-16 Big Endian
34  UEF_UTF8,     //< UTF-8 or ascii.
35  UEF_Unknown   //< Not a valid Unicode encoding.
36};
37
38/// EncodingInfo - Holds the encoding type and length of the byte order mark if
39///                it exists. Length is in {0, 2, 3, 4}.
40typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
41
42/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
43///                      encoding form of \a Input.
44///
45/// @param Input A string of length 0 or more.
46/// @returns An EncodingInfo indicating the Unicode encoding form of the input
47///          and how long the byte order mark is if one exists.
48static EncodingInfo getUnicodeEncoding(StringRef Input) {
49  if (Input.size() == 0)
50    return std::make_pair(UEF_Unknown, 0);
51
52  switch (uint8_t(Input[0])) {
53  case 0x00:
54    if (Input.size() >= 4) {
55      if (  Input[1] == 0
56         && uint8_t(Input[2]) == 0xFE
57         && uint8_t(Input[3]) == 0xFF)
58        return std::make_pair(UEF_UTF32_BE, 4);
59      if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
60        return std::make_pair(UEF_UTF32_BE, 0);
61    }
62
63    if (Input.size() >= 2 && Input[1] != 0)
64      return std::make_pair(UEF_UTF16_BE, 0);
65    return std::make_pair(UEF_Unknown, 0);
66  case 0xFF:
67    if (  Input.size() >= 4
68       && uint8_t(Input[1]) == 0xFE
69       && Input[2] == 0
70       && Input[3] == 0)
71      return std::make_pair(UEF_UTF32_LE, 4);
72
73    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
74      return std::make_pair(UEF_UTF16_LE, 2);
75    return std::make_pair(UEF_Unknown, 0);
76  case 0xFE:
77    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
78      return std::make_pair(UEF_UTF16_BE, 2);
79    return std::make_pair(UEF_Unknown, 0);
80  case 0xEF:
81    if (  Input.size() >= 3
82       && uint8_t(Input[1]) == 0xBB
83       && uint8_t(Input[2]) == 0xBF)
84      return std::make_pair(UEF_UTF8, 3);
85    return std::make_pair(UEF_Unknown, 0);
86  }
87
88  // It could still be utf-32 or utf-16.
89  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
90    return std::make_pair(UEF_UTF32_LE, 0);
91
92  if (Input.size() >= 2 && Input[1] == 0)
93    return std::make_pair(UEF_UTF16_LE, 0);
94
95  return std::make_pair(UEF_UTF8, 0);
96}
97
98namespace llvm {
99namespace yaml {
100/// Token - A single YAML token.
101struct Token : ilist_node<Token> {
102  enum TokenKind {
103    TK_Error, // Uninitialized token.
104    TK_StreamStart,
105    TK_StreamEnd,
106    TK_VersionDirective,
107    TK_TagDirective,
108    TK_DocumentStart,
109    TK_DocumentEnd,
110    TK_BlockEntry,
111    TK_BlockEnd,
112    TK_BlockSequenceStart,
113    TK_BlockMappingStart,
114    TK_FlowEntry,
115    TK_FlowSequenceStart,
116    TK_FlowSequenceEnd,
117    TK_FlowMappingStart,
118    TK_FlowMappingEnd,
119    TK_Key,
120    TK_Value,
121    TK_Scalar,
122    TK_Alias,
123    TK_Anchor,
124    TK_Tag
125  } Kind;
126
127  /// A string of length 0 or more whose begin() points to the logical location
128  /// of the token in the input.
129  StringRef Range;
130
131  Token() : Kind(TK_Error) {}
132};
133}
134}
135
136template<>
137struct ilist_sentinel_traits<Token> {
138  Token *createSentinel() const {
139    return &Sentinel;
140  }
141  static void destroySentinel(Token*) {}
142
143  Token *provideInitialHead() const { return createSentinel(); }
144  Token *ensureHead(Token*) const { return createSentinel(); }
145  static void noteHead(Token*, Token*) {}
146
147private:
148  mutable Token Sentinel;
149};
150
151template<>
152struct ilist_node_traits<Token> {
153  Token *createNode(const Token &V) {
154    return new (Alloc.Allocate<Token>()) Token(V);
155  }
156  static void deleteNode(Token *V) {}
157
158  void addNodeToList(Token *) {}
159  void removeNodeFromList(Token *) {}
160  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
161                             ilist_iterator<Token> /*first*/,
162                             ilist_iterator<Token> /*last*/) {}
163
164  BumpPtrAllocator Alloc;
165};
166
167typedef ilist<Token> TokenQueueT;
168
169namespace {
170/// @brief This struct is used to track simple keys.
171///
172/// Simple keys are handled by creating an entry in SimpleKeys for each Token
173/// which could legally be the start of a simple key. When peekNext is called,
174/// if the Token To be returned is referenced by a SimpleKey, we continue
175/// tokenizing until that potential simple key has either been found to not be
176/// a simple key (we moved on to the next line or went further than 1024 chars).
177/// Or when we run into a Value, and then insert a Key token (and possibly
178/// others) before the SimpleKey's Tok.
179struct SimpleKey {
180  TokenQueueT::iterator Tok;
181  unsigned Column;
182  unsigned Line;
183  unsigned FlowLevel;
184  bool IsRequired;
185
186  bool operator ==(const SimpleKey &Other) {
187    return Tok == Other.Tok;
188  }
189};
190}
191
192/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
193///        subsequence and the subsequence's length in code units (uint8_t).
194///        A length of 0 represents an error.
195typedef std::pair<uint32_t, unsigned> UTF8Decoded;
196
197static UTF8Decoded decodeUTF8(StringRef Range) {
198  StringRef::iterator Position= Range.begin();
199  StringRef::iterator End = Range.end();
200  // 1 byte: [0x00, 0x7f]
201  // Bit pattern: 0xxxxxxx
202  if ((*Position & 0x80) == 0) {
203     return std::make_pair(*Position, 1);
204  }
205  // 2 bytes: [0x80, 0x7ff]
206  // Bit pattern: 110xxxxx 10xxxxxx
207  if (Position + 1 != End &&
208      ((*Position & 0xE0) == 0xC0) &&
209      ((*(Position + 1) & 0xC0) == 0x80)) {
210    uint32_t codepoint = ((*Position & 0x1F) << 6) |
211                          (*(Position + 1) & 0x3F);
212    if (codepoint >= 0x80)
213      return std::make_pair(codepoint, 2);
214  }
215  // 3 bytes: [0x8000, 0xffff]
216  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
217  if (Position + 2 != End &&
218      ((*Position & 0xF0) == 0xE0) &&
219      ((*(Position + 1) & 0xC0) == 0x80) &&
220      ((*(Position + 2) & 0xC0) == 0x80)) {
221    uint32_t codepoint = ((*Position & 0x0F) << 12) |
222                         ((*(Position + 1) & 0x3F) << 6) |
223                          (*(Position + 2) & 0x3F);
224    // Codepoints between 0xD800 and 0xDFFF are invalid, as
225    // they are high / low surrogate halves used by UTF-16.
226    if (codepoint >= 0x800 &&
227        (codepoint < 0xD800 || codepoint > 0xDFFF))
228      return std::make_pair(codepoint, 3);
229  }
230  // 4 bytes: [0x10000, 0x10FFFF]
231  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
232  if (Position + 3 != End &&
233      ((*Position & 0xF8) == 0xF0) &&
234      ((*(Position + 1) & 0xC0) == 0x80) &&
235      ((*(Position + 2) & 0xC0) == 0x80) &&
236      ((*(Position + 3) & 0xC0) == 0x80)) {
237    uint32_t codepoint = ((*Position & 0x07) << 18) |
238                         ((*(Position + 1) & 0x3F) << 12) |
239                         ((*(Position + 2) & 0x3F) << 6) |
240                          (*(Position + 3) & 0x3F);
241    if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
242      return std::make_pair(codepoint, 4);
243  }
244  return std::make_pair(0, 0);
245}
246
247namespace llvm {
248namespace yaml {
249/// @brief Scans YAML tokens from a MemoryBuffer.
250class Scanner {
251public:
252  Scanner(const StringRef Input, SourceMgr &SM);
253
254  /// @brief Parse the next token and return it without popping it.
255  Token &peekNext();
256
257  /// @brief Parse the next token and pop it from the queue.
258  Token getNext();
259
260  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
261                  ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
262    SM.PrintMessage(Loc, Kind, Message, Ranges);
263  }
264
265  void setError(const Twine &Message, StringRef::iterator Position) {
266    if (Current >= End)
267      Current = End - 1;
268
269    // Don't print out more errors after the first one we encounter. The rest
270    // are just the result of the first, and have no meaning.
271    if (!Failed)
272      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
273    Failed = true;
274  }
275
276  void setError(const Twine &Message) {
277    setError(Message, Current);
278  }
279
280  /// @brief Returns true if an error occurred while parsing.
281  bool failed() {
282    return Failed;
283  }
284
285private:
286  StringRef currentInput() {
287    return StringRef(Current, End - Current);
288  }
289
290  /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
291  ///        at \a Position.
292  ///
293  /// If the UTF-8 code units starting at Position do not form a well-formed
294  /// code unit subsequence, then the Unicode scalar value is 0, and the length
295  /// is 0.
296  UTF8Decoded decodeUTF8(StringRef::iterator Position) {
297    return ::decodeUTF8(StringRef(Position, End - Position));
298  }
299
300  // The following functions are based on the gramar rules in the YAML spec. The
301  // style of the function names it meant to closely match how they are written
302  // in the spec. The number within the [] is the number of the grammar rule in
303  // the spec.
304  //
305  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
306  //
307  // c-
308  //   A production starting and ending with a special character.
309  // b-
310  //   A production matching a single line break.
311  // nb-
312  //   A production starting and ending with a non-break character.
313  // s-
314  //   A production starting and ending with a white space character.
315  // ns-
316  //   A production starting and ending with a non-space character.
317  // l-
318  //   A production matching complete line(s).
319
320  /// @brief Skip a single nb-char[27] starting at Position.
321  ///
322  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
323  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
324  ///
325  /// @returns The code unit after the nb-char, or Position if it's not an
326  ///          nb-char.
327  StringRef::iterator skip_nb_char(StringRef::iterator Position);
328
329  /// @brief Skip a single b-break[28] starting at Position.
330  ///
331  /// A b-break is 0xD 0xA | 0xD | 0xA
332  ///
333  /// @returns The code unit after the b-break, or Position if it's not a
334  ///          b-break.
335  StringRef::iterator skip_b_break(StringRef::iterator Position);
336
337  /// @brief Skip a single s-white[33] starting at Position.
338  ///
339  /// A s-white is 0x20 | 0x9
340  ///
341  /// @returns The code unit after the s-white, or Position if it's not a
342  ///          s-white.
343  StringRef::iterator skip_s_white(StringRef::iterator Position);
344
345  /// @brief Skip a single ns-char[34] starting at Position.
346  ///
347  /// A ns-char is nb-char - s-white
348  ///
349  /// @returns The code unit after the ns-char, or Position if it's not a
350  ///          ns-char.
351  StringRef::iterator skip_ns_char(StringRef::iterator Position);
352
353  typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
354  /// @brief Skip minimal well-formed code unit subsequences until Func
355  ///        returns its input.
356  ///
357  /// @returns The code unit after the last minimal well-formed code unit
358  ///          subsequence that Func accepted.
359  StringRef::iterator skip_while( SkipWhileFunc Func
360                                , StringRef::iterator Position);
361
362  /// @brief Scan ns-uri-char[39]s starting at Cur.
363  ///
364  /// This updates Cur and Column while scanning.
365  ///
366  /// @returns A StringRef starting at Cur which covers the longest contiguous
367  ///          sequence of ns-uri-char.
368  StringRef scan_ns_uri_char();
369
370  /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
371  StringRef scan_ns_plain_one_line();
372
373  /// @brief Consume a minimal well-formed code unit subsequence starting at
374  ///        \a Cur. Return false if it is not the same Unicode scalar value as
375  ///        \a Expected. This updates \a Column.
376  bool consume(uint32_t Expected);
377
378  /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
379  void skip(uint32_t Distance);
380
381  /// @brief Return true if the minimal well-formed code unit subsequence at
382  ///        Pos is whitespace or a new line
383  bool isBlankOrBreak(StringRef::iterator Position);
384
385  /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
386  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
387                             , unsigned AtColumn
388                             , bool IsRequired);
389
390  /// @brief Remove simple keys that can no longer be valid simple keys.
391  ///
392  /// Invalid simple keys are not on the current line or are further than 1024
393  /// columns back.
394  void removeStaleSimpleKeyCandidates();
395
396  /// @brief Remove all simple keys on FlowLevel \a Level.
397  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
398
399  /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
400  ///        tokens if needed.
401  bool unrollIndent(int ToColumn);
402
403  /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
404  ///        if needed.
405  bool rollIndent( int ToColumn
406                 , Token::TokenKind Kind
407                 , TokenQueueT::iterator InsertPoint);
408
409  /// @brief Skip whitespace and comments until the start of the next token.
410  void scanToNextToken();
411
412  /// @brief Must be the first token generated.
413  bool scanStreamStart();
414
415  /// @brief Generate tokens needed to close out the stream.
416  bool scanStreamEnd();
417
418  /// @brief Scan a %BLAH directive.
419  bool scanDirective();
420
421  /// @brief Scan a ... or ---.
422  bool scanDocumentIndicator(bool IsStart);
423
424  /// @brief Scan a [ or { and generate the proper flow collection start token.
425  bool scanFlowCollectionStart(bool IsSequence);
426
427  /// @brief Scan a ] or } and generate the proper flow collection end token.
428  bool scanFlowCollectionEnd(bool IsSequence);
429
430  /// @brief Scan the , that separates entries in a flow collection.
431  bool scanFlowEntry();
432
433  /// @brief Scan the - that starts block sequence entries.
434  bool scanBlockEntry();
435
436  /// @brief Scan an explicit ? indicating a key.
437  bool scanKey();
438
439  /// @brief Scan an explicit : indicating a value.
440  bool scanValue();
441
442  /// @brief Scan a quoted scalar.
443  bool scanFlowScalar(bool IsDoubleQuoted);
444
445  /// @brief Scan an unquoted scalar.
446  bool scanPlainScalar();
447
448  /// @brief Scan an Alias or Anchor starting with * or &.
449  bool scanAliasOrAnchor(bool IsAlias);
450
451  /// @brief Scan a block scalar starting with | or >.
452  bool scanBlockScalar(bool IsLiteral);
453
454  /// @brief Scan a tag of the form !stuff.
455  bool scanTag();
456
457  /// @brief Dispatch to the next scanning function based on \a *Cur.
458  bool fetchMoreTokens();
459
460  /// @brief The SourceMgr used for diagnostics and buffer management.
461  SourceMgr &SM;
462
463  /// @brief The original input.
464  MemoryBuffer *InputBuffer;
465
466  /// @brief The current position of the scanner.
467  StringRef::iterator Current;
468
469  /// @brief The end of the input (one past the last character).
470  StringRef::iterator End;
471
472  /// @brief Current YAML indentation level in spaces.
473  int Indent;
474
475  /// @brief Current column number in Unicode code points.
476  unsigned Column;
477
478  /// @brief Current line number.
479  unsigned Line;
480
481  /// @brief How deep we are in flow style containers. 0 Means at block level.
482  unsigned FlowLevel;
483
484  /// @brief Are we at the start of the stream?
485  bool IsStartOfStream;
486
487  /// @brief Can the next token be the start of a simple key?
488  bool IsSimpleKeyAllowed;
489
490  /// @brief Is the next token required to start a simple key?
491  bool IsSimpleKeyRequired;
492
493  /// @brief True if an error has occurred.
494  bool Failed;
495
496  /// @brief Queue of tokens. This is required to queue up tokens while looking
497  ///        for the end of a simple key. And for cases where a single character
498  ///        can produce multiple tokens (e.g. BlockEnd).
499  TokenQueueT TokenQueue;
500
501  /// @brief Indentation levels.
502  SmallVector<int, 4> Indents;
503
504  /// @brief Potential simple keys.
505  SmallVector<SimpleKey, 4> SimpleKeys;
506};
507
508} // end namespace yaml
509} // end namespace llvm
510
511/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
512static void encodeUTF8( uint32_t UnicodeScalarValue
513                      , SmallVectorImpl<char> &Result) {
514  if (UnicodeScalarValue <= 0x7F) {
515    Result.push_back(UnicodeScalarValue & 0x7F);
516  } else if (UnicodeScalarValue <= 0x7FF) {
517    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
518    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
519    Result.push_back(FirstByte);
520    Result.push_back(SecondByte);
521  } else if (UnicodeScalarValue <= 0xFFFF) {
522    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
523    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
524    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
525    Result.push_back(FirstByte);
526    Result.push_back(SecondByte);
527    Result.push_back(ThirdByte);
528  } else if (UnicodeScalarValue <= 0x10FFFF) {
529    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
530    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
531    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
532    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
533    Result.push_back(FirstByte);
534    Result.push_back(SecondByte);
535    Result.push_back(ThirdByte);
536    Result.push_back(FourthByte);
537  }
538}
539
540bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
541  SourceMgr SM;
542  Scanner scanner(Input, SM);
543  while (true) {
544    Token T = scanner.getNext();
545    switch (T.Kind) {
546    case Token::TK_StreamStart:
547      OS << "Stream-Start: ";
548      break;
549    case Token::TK_StreamEnd:
550      OS << "Stream-End: ";
551      break;
552    case Token::TK_VersionDirective:
553      OS << "Version-Directive: ";
554      break;
555    case Token::TK_TagDirective:
556      OS << "Tag-Directive: ";
557      break;
558    case Token::TK_DocumentStart:
559      OS << "Document-Start: ";
560      break;
561    case Token::TK_DocumentEnd:
562      OS << "Document-End: ";
563      break;
564    case Token::TK_BlockEntry:
565      OS << "Block-Entry: ";
566      break;
567    case Token::TK_BlockEnd:
568      OS << "Block-End: ";
569      break;
570    case Token::TK_BlockSequenceStart:
571      OS << "Block-Sequence-Start: ";
572      break;
573    case Token::TK_BlockMappingStart:
574      OS << "Block-Mapping-Start: ";
575      break;
576    case Token::TK_FlowEntry:
577      OS << "Flow-Entry: ";
578      break;
579    case Token::TK_FlowSequenceStart:
580      OS << "Flow-Sequence-Start: ";
581      break;
582    case Token::TK_FlowSequenceEnd:
583      OS << "Flow-Sequence-End: ";
584      break;
585    case Token::TK_FlowMappingStart:
586      OS << "Flow-Mapping-Start: ";
587      break;
588    case Token::TK_FlowMappingEnd:
589      OS << "Flow-Mapping-End: ";
590      break;
591    case Token::TK_Key:
592      OS << "Key: ";
593      break;
594    case Token::TK_Value:
595      OS << "Value: ";
596      break;
597    case Token::TK_Scalar:
598      OS << "Scalar: ";
599      break;
600    case Token::TK_Alias:
601      OS << "Alias: ";
602      break;
603    case Token::TK_Anchor:
604      OS << "Anchor: ";
605      break;
606    case Token::TK_Tag:
607      OS << "Tag: ";
608      break;
609    case Token::TK_Error:
610      break;
611    }
612    OS << T.Range << "\n";
613    if (T.Kind == Token::TK_StreamEnd)
614      break;
615    else if (T.Kind == Token::TK_Error)
616      return false;
617  }
618  return true;
619}
620
621bool yaml::scanTokens(StringRef Input) {
622  llvm::SourceMgr SM;
623  llvm::yaml::Scanner scanner(Input, SM);
624  for (;;) {
625    llvm::yaml::Token T = scanner.getNext();
626    if (T.Kind == Token::TK_StreamEnd)
627      break;
628    else if (T.Kind == Token::TK_Error)
629      return false;
630  }
631  return true;
632}
633
634std::string yaml::escape(StringRef Input) {
635  std::string EscapedInput;
636  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
637    if (*i == '\\')
638      EscapedInput += "\\\\";
639    else if (*i == '"')
640      EscapedInput += "\\\"";
641    else if (*i == 0)
642      EscapedInput += "\\0";
643    else if (*i == 0x07)
644      EscapedInput += "\\a";
645    else if (*i == 0x08)
646      EscapedInput += "\\b";
647    else if (*i == 0x09)
648      EscapedInput += "\\t";
649    else if (*i == 0x0A)
650      EscapedInput += "\\n";
651    else if (*i == 0x0B)
652      EscapedInput += "\\v";
653    else if (*i == 0x0C)
654      EscapedInput += "\\f";
655    else if (*i == 0x0D)
656      EscapedInput += "\\r";
657    else if (*i == 0x1B)
658      EscapedInput += "\\e";
659    else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
660      std::string HexStr = utohexstr(*i);
661      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
662    } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
663      UTF8Decoded UnicodeScalarValue
664        = decodeUTF8(StringRef(i, Input.end() - i));
665      if (UnicodeScalarValue.second == 0) {
666        // Found invalid char.
667        SmallString<4> Val;
668        encodeUTF8(0xFFFD, Val);
669        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
670        // FIXME: Error reporting.
671        return EscapedInput;
672      }
673      if (UnicodeScalarValue.first == 0x85)
674        EscapedInput += "\\N";
675      else if (UnicodeScalarValue.first == 0xA0)
676        EscapedInput += "\\_";
677      else if (UnicodeScalarValue.first == 0x2028)
678        EscapedInput += "\\L";
679      else if (UnicodeScalarValue.first == 0x2029)
680        EscapedInput += "\\P";
681      else {
682        std::string HexStr = utohexstr(UnicodeScalarValue.first);
683        if (HexStr.size() <= 2)
684          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
685        else if (HexStr.size() <= 4)
686          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
687        else if (HexStr.size() <= 8)
688          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
689      }
690      i += UnicodeScalarValue.second - 1;
691    } else
692      EscapedInput.push_back(*i);
693  }
694  return EscapedInput;
695}
696
697Scanner::Scanner(StringRef Input, SourceMgr &sm)
698  : SM(sm)
699  , Indent(-1)
700  , Column(0)
701  , Line(0)
702  , FlowLevel(0)
703  , IsStartOfStream(true)
704  , IsSimpleKeyAllowed(true)
705  , IsSimpleKeyRequired(false)
706  , Failed(false) {
707  InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
708  SM.AddNewSourceBuffer(InputBuffer, SMLoc());
709  Current = InputBuffer->getBufferStart();
710  End = InputBuffer->getBufferEnd();
711}
712
713Token &Scanner::peekNext() {
714  // If the current token is a possible simple key, keep parsing until we
715  // can confirm.
716  bool NeedMore = false;
717  while (true) {
718    if (TokenQueue.empty() || NeedMore) {
719      if (!fetchMoreTokens()) {
720        TokenQueue.clear();
721        TokenQueue.push_back(Token());
722        return TokenQueue.front();
723      }
724    }
725    assert(!TokenQueue.empty() &&
726            "fetchMoreTokens lied about getting tokens!");
727
728    removeStaleSimpleKeyCandidates();
729    SimpleKey SK;
730    SK.Tok = TokenQueue.front();
731    if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
732        == SimpleKeys.end())
733      break;
734    else
735      NeedMore = true;
736  }
737  return TokenQueue.front();
738}
739
740Token Scanner::getNext() {
741  Token Ret = peekNext();
742  // TokenQueue can be empty if there was an error getting the next token.
743  if (!TokenQueue.empty())
744    TokenQueue.pop_front();
745
746  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
747  // quick deallocation of them all.
748  if (TokenQueue.empty()) {
749    TokenQueue.Alloc.Reset();
750  }
751
752  return Ret;
753}
754
755StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
756  // Check 7 bit c-printable - b-char.
757  if (   *Position == 0x09
758      || (*Position >= 0x20 && *Position <= 0x7E))
759    return Position + 1;
760
761  // Check for valid UTF-8.
762  if (uint8_t(*Position) & 0x80) {
763    UTF8Decoded u8d = decodeUTF8(Position);
764    if (   u8d.second != 0
765        && u8d.first != 0xFEFF
766        && ( u8d.first == 0x85
767          || ( u8d.first >= 0xA0
768            && u8d.first <= 0xD7FF)
769          || ( u8d.first >= 0xE000
770            && u8d.first <= 0xFFFD)
771          || ( u8d.first >= 0x10000
772            && u8d.first <= 0x10FFFF)))
773      return Position + u8d.second;
774  }
775  return Position;
776}
777
778StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
779  if (*Position == 0x0D) {
780    if (Position + 1 != End && *(Position + 1) == 0x0A)
781      return Position + 2;
782    return Position + 1;
783  }
784
785  if (*Position == 0x0A)
786    return Position + 1;
787  return Position;
788}
789
790
791StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
792  if (Position == End)
793    return Position;
794  if (*Position == ' ' || *Position == '\t')
795    return Position + 1;
796  return Position;
797}
798
799StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
800  if (Position == End)
801    return Position;
802  if (*Position == ' ' || *Position == '\t')
803    return Position;
804  return skip_nb_char(Position);
805}
806
807StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
808                                       , StringRef::iterator Position) {
809  while (true) {
810    StringRef::iterator i = (this->*Func)(Position);
811    if (i == Position)
812      break;
813    Position = i;
814  }
815  return Position;
816}
817
818static bool is_ns_hex_digit(const char C) {
819  return    (C >= '0' && C <= '9')
820         || (C >= 'a' && C <= 'z')
821         || (C >= 'A' && C <= 'Z');
822}
823
824static bool is_ns_word_char(const char C) {
825  return    C == '-'
826         || (C >= 'a' && C <= 'z')
827         || (C >= 'A' && C <= 'Z');
828}
829
830StringRef Scanner::scan_ns_uri_char() {
831  StringRef::iterator Start = Current;
832  while (true) {
833    if (Current == End)
834      break;
835    if ((   *Current == '%'
836          && Current + 2 < End
837          && is_ns_hex_digit(*(Current + 1))
838          && is_ns_hex_digit(*(Current + 2)))
839        || is_ns_word_char(*Current)
840        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
841          != StringRef::npos) {
842      ++Current;
843      ++Column;
844    } else
845      break;
846  }
847  return StringRef(Start, Current - Start);
848}
849
850StringRef Scanner::scan_ns_plain_one_line() {
851  StringRef::iterator start = Current;
852  // The first character must already be verified.
853  ++Current;
854  while (true) {
855    if (Current == End) {
856      break;
857    } else if (*Current == ':') {
858      // Check if the next character is a ns-char.
859      if (Current + 1 == End)
860        break;
861      StringRef::iterator i = skip_ns_char(Current + 1);
862      if (Current + 1 != i) {
863        Current = i;
864        Column += 2; // Consume both the ':' and ns-char.
865      } else
866        break;
867    } else if (*Current == '#') {
868      // Check if the previous character was a ns-char.
869      // The & 0x80 check is to check for the trailing byte of a utf-8
870      if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
871        ++Current;
872        ++Column;
873      } else
874        break;
875    } else {
876      StringRef::iterator i = skip_nb_char(Current);
877      if (i == Current)
878        break;
879      Current = i;
880      ++Column;
881    }
882  }
883  return StringRef(start, Current - start);
884}
885
886bool Scanner::consume(uint32_t Expected) {
887  if (Expected >= 0x80)
888    report_fatal_error("Not dealing with this yet");
889  if (Current == End)
890    return false;
891  if (uint8_t(*Current) >= 0x80)
892    report_fatal_error("Not dealing with this yet");
893  if (uint8_t(*Current) == Expected) {
894    ++Current;
895    ++Column;
896    return true;
897  }
898  return false;
899}
900
901void Scanner::skip(uint32_t Distance) {
902  Current += Distance;
903  Column += Distance;
904}
905
906bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
907  if (Position == End)
908    return false;
909  if (   *Position == ' ' || *Position == '\t'
910      || *Position == '\r' || *Position == '\n')
911    return true;
912  return false;
913}
914
915void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
916                                    , unsigned AtColumn
917                                    , bool IsRequired) {
918  if (IsSimpleKeyAllowed) {
919    SimpleKey SK;
920    SK.Tok = Tok;
921    SK.Line = Line;
922    SK.Column = AtColumn;
923    SK.IsRequired = IsRequired;
924    SK.FlowLevel = FlowLevel;
925    SimpleKeys.push_back(SK);
926  }
927}
928
929void Scanner::removeStaleSimpleKeyCandidates() {
930  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
931                                            i != SimpleKeys.end();) {
932    if (i->Line != Line || i->Column + 1024 < Column) {
933      if (i->IsRequired)
934        setError( "Could not find expected : for simple key"
935                , i->Tok->Range.begin());
936      i = SimpleKeys.erase(i);
937    } else
938      ++i;
939  }
940}
941
942void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
943  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
944    SimpleKeys.pop_back();
945}
946
947bool Scanner::unrollIndent(int ToColumn) {
948  Token T;
949  // Indentation is ignored in flow.
950  if (FlowLevel != 0)
951    return true;
952
953  while (Indent > ToColumn) {
954    T.Kind = Token::TK_BlockEnd;
955    T.Range = StringRef(Current, 1);
956    TokenQueue.push_back(T);
957    Indent = Indents.pop_back_val();
958  }
959
960  return true;
961}
962
963bool Scanner::rollIndent( int ToColumn
964                        , Token::TokenKind Kind
965                        , TokenQueueT::iterator InsertPoint) {
966  if (FlowLevel)
967    return true;
968  if (Indent < ToColumn) {
969    Indents.push_back(Indent);
970    Indent = ToColumn;
971
972    Token T;
973    T.Kind = Kind;
974    T.Range = StringRef(Current, 0);
975    TokenQueue.insert(InsertPoint, T);
976  }
977  return true;
978}
979
980void Scanner::scanToNextToken() {
981  while (true) {
982    while (*Current == ' ' || *Current == '\t') {
983      skip(1);
984    }
985
986    // Skip comment.
987    if (*Current == '#') {
988      while (true) {
989        // This may skip more than one byte, thus Column is only incremented
990        // for code points.
991        StringRef::iterator i = skip_nb_char(Current);
992        if (i == Current)
993          break;
994        Current = i;
995        ++Column;
996      }
997    }
998
999    // Skip EOL.
1000    StringRef::iterator i = skip_b_break(Current);
1001    if (i == Current)
1002      break;
1003    Current = i;
1004    ++Line;
1005    Column = 0;
1006    // New lines may start a simple key.
1007    if (!FlowLevel)
1008      IsSimpleKeyAllowed = true;
1009  }
1010}
1011
1012bool Scanner::scanStreamStart() {
1013  IsStartOfStream = false;
1014
1015  EncodingInfo EI = getUnicodeEncoding(currentInput());
1016
1017  Token T;
1018  T.Kind = Token::TK_StreamStart;
1019  T.Range = StringRef(Current, EI.second);
1020  TokenQueue.push_back(T);
1021  Current += EI.second;
1022  return true;
1023}
1024
1025bool Scanner::scanStreamEnd() {
1026  // Force an ending new line if one isn't present.
1027  if (Column != 0) {
1028    Column = 0;
1029    ++Line;
1030  }
1031
1032  unrollIndent(-1);
1033  SimpleKeys.clear();
1034  IsSimpleKeyAllowed = false;
1035
1036  Token T;
1037  T.Kind = Token::TK_StreamEnd;
1038  T.Range = StringRef(Current, 0);
1039  TokenQueue.push_back(T);
1040  return true;
1041}
1042
1043bool Scanner::scanDirective() {
1044  // Reset the indentation level.
1045  unrollIndent(-1);
1046  SimpleKeys.clear();
1047  IsSimpleKeyAllowed = false;
1048
1049  StringRef::iterator Start = Current;
1050  consume('%');
1051  StringRef::iterator NameStart = Current;
1052  Current = skip_while(&Scanner::skip_ns_char, Current);
1053  StringRef Name(NameStart, Current - NameStart);
1054  Current = skip_while(&Scanner::skip_s_white, Current);
1055
1056  if (Name == "YAML") {
1057    Current = skip_while(&Scanner::skip_ns_char, Current);
1058    Token T;
1059    T.Kind = Token::TK_VersionDirective;
1060    T.Range = StringRef(Start, Current - Start);
1061    TokenQueue.push_back(T);
1062    return true;
1063  }
1064  return false;
1065}
1066
1067bool Scanner::scanDocumentIndicator(bool IsStart) {
1068  unrollIndent(-1);
1069  SimpleKeys.clear();
1070  IsSimpleKeyAllowed = false;
1071
1072  Token T;
1073  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1074  T.Range = StringRef(Current, 3);
1075  skip(3);
1076  TokenQueue.push_back(T);
1077  return true;
1078}
1079
1080bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1081  Token T;
1082  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1083                      : Token::TK_FlowMappingStart;
1084  T.Range = StringRef(Current, 1);
1085  skip(1);
1086  TokenQueue.push_back(T);
1087
1088  // [ and { may begin a simple key.
1089  saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
1090
1091  // And may also be followed by a simple key.
1092  IsSimpleKeyAllowed = true;
1093  ++FlowLevel;
1094  return true;
1095}
1096
1097bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1098  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1099  IsSimpleKeyAllowed = false;
1100  Token T;
1101  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1102                      : Token::TK_FlowMappingEnd;
1103  T.Range = StringRef(Current, 1);
1104  skip(1);
1105  TokenQueue.push_back(T);
1106  if (FlowLevel)
1107    --FlowLevel;
1108  return true;
1109}
1110
1111bool Scanner::scanFlowEntry() {
1112  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1113  IsSimpleKeyAllowed = true;
1114  Token T;
1115  T.Kind = Token::TK_FlowEntry;
1116  T.Range = StringRef(Current, 1);
1117  skip(1);
1118  TokenQueue.push_back(T);
1119  return true;
1120}
1121
1122bool Scanner::scanBlockEntry() {
1123  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1124  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1125  IsSimpleKeyAllowed = true;
1126  Token T;
1127  T.Kind = Token::TK_BlockEntry;
1128  T.Range = StringRef(Current, 1);
1129  skip(1);
1130  TokenQueue.push_back(T);
1131  return true;
1132}
1133
1134bool Scanner::scanKey() {
1135  if (!FlowLevel)
1136    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1137
1138  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1139  IsSimpleKeyAllowed = !FlowLevel;
1140
1141  Token T;
1142  T.Kind = Token::TK_Key;
1143  T.Range = StringRef(Current, 1);
1144  skip(1);
1145  TokenQueue.push_back(T);
1146  return true;
1147}
1148
1149bool Scanner::scanValue() {
1150  // If the previous token could have been a simple key, insert the key token
1151  // into the token queue.
1152  if (!SimpleKeys.empty()) {
1153    SimpleKey SK = SimpleKeys.pop_back_val();
1154    Token T;
1155    T.Kind = Token::TK_Key;
1156    T.Range = SK.Tok->Range;
1157    TokenQueueT::iterator i, e;
1158    for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1159      if (i == SK.Tok)
1160        break;
1161    }
1162    assert(i != e && "SimpleKey not in token queue!");
1163    i = TokenQueue.insert(i, T);
1164
1165    // We may also need to add a Block-Mapping-Start token.
1166    rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1167
1168    IsSimpleKeyAllowed = false;
1169  } else {
1170    if (!FlowLevel)
1171      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1172    IsSimpleKeyAllowed = !FlowLevel;
1173  }
1174
1175  Token T;
1176  T.Kind = Token::TK_Value;
1177  T.Range = StringRef(Current, 1);
1178  skip(1);
1179  TokenQueue.push_back(T);
1180  return true;
1181}
1182
1183// Forbidding inlining improves performance by roughly 20%.
1184// FIXME: Remove once llvm optimizes this to the faster version without hints.
1185LLVM_ATTRIBUTE_NOINLINE static bool
1186wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1187
1188// Returns whether a character at 'Position' was escaped with a leading '\'.
1189// 'First' specifies the position of the first character in the string.
1190static bool wasEscaped(StringRef::iterator First,
1191                       StringRef::iterator Position) {
1192  assert(Position - 1 >= First);
1193  StringRef::iterator I = Position - 1;
1194  // We calculate the number of consecutive '\'s before the current position
1195  // by iterating backwards through our string.
1196  while (I >= First && *I == '\\') --I;
1197  // (Position - 1 - I) now contains the number of '\'s before the current
1198  // position. If it is odd, the character at 'Position' was escaped.
1199  return (Position - 1 - I) % 2 == 1;
1200}
1201
1202bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1203  StringRef::iterator Start = Current;
1204  unsigned ColStart = Column;
1205  if (IsDoubleQuoted) {
1206    do {
1207      ++Current;
1208      while (Current != End && *Current != '"')
1209        ++Current;
1210      // Repeat until the previous character was not a '\' or was an escaped
1211      // backslash.
1212    } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
1213  } else {
1214    skip(1);
1215    while (true) {
1216      // Skip a ' followed by another '.
1217      if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1218        skip(2);
1219        continue;
1220      } else if (*Current == '\'')
1221        break;
1222      StringRef::iterator i = skip_nb_char(Current);
1223      if (i == Current) {
1224        i = skip_b_break(Current);
1225        if (i == Current)
1226          break;
1227        Current = i;
1228        Column = 0;
1229        ++Line;
1230      } else {
1231        if (i == End)
1232          break;
1233        Current = i;
1234        ++Column;
1235      }
1236    }
1237  }
1238  skip(1); // Skip ending quote.
1239  Token T;
1240  T.Kind = Token::TK_Scalar;
1241  T.Range = StringRef(Start, Current - Start);
1242  TokenQueue.push_back(T);
1243
1244  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1245
1246  IsSimpleKeyAllowed = false;
1247
1248  return true;
1249}
1250
1251bool Scanner::scanPlainScalar() {
1252  StringRef::iterator Start = Current;
1253  unsigned ColStart = Column;
1254  unsigned LeadingBlanks = 0;
1255  assert(Indent >= -1 && "Indent must be >= -1 !");
1256  unsigned indent = static_cast<unsigned>(Indent + 1);
1257  while (true) {
1258    if (*Current == '#')
1259      break;
1260
1261    while (!isBlankOrBreak(Current)) {
1262      if (  FlowLevel && *Current == ':'
1263          && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
1264        setError("Found unexpected ':' while scanning a plain scalar", Current);
1265        return false;
1266      }
1267
1268      // Check for the end of the plain scalar.
1269      if (  (*Current == ':' && isBlankOrBreak(Current + 1))
1270          || (  FlowLevel
1271          && (StringRef(Current, 1).find_first_of(",:?[]{}")
1272              != StringRef::npos)))
1273        break;
1274
1275      StringRef::iterator i = skip_nb_char(Current);
1276      if (i == Current)
1277        break;
1278      Current = i;
1279      ++Column;
1280    }
1281
1282    // Are we at the end?
1283    if (!isBlankOrBreak(Current))
1284      break;
1285
1286    // Eat blanks.
1287    StringRef::iterator Tmp = Current;
1288    while (isBlankOrBreak(Tmp)) {
1289      StringRef::iterator i = skip_s_white(Tmp);
1290      if (i != Tmp) {
1291        if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1292          setError("Found invalid tab character in indentation", Tmp);
1293          return false;
1294        }
1295        Tmp = i;
1296        ++Column;
1297      } else {
1298        i = skip_b_break(Tmp);
1299        if (!LeadingBlanks)
1300          LeadingBlanks = 1;
1301        Tmp = i;
1302        Column = 0;
1303        ++Line;
1304      }
1305    }
1306
1307    if (!FlowLevel && Column < indent)
1308      break;
1309
1310    Current = Tmp;
1311  }
1312  if (Start == Current) {
1313    setError("Got empty plain scalar", Start);
1314    return false;
1315  }
1316  Token T;
1317  T.Kind = Token::TK_Scalar;
1318  T.Range = StringRef(Start, Current - Start);
1319  TokenQueue.push_back(T);
1320
1321  // Plain scalars can be simple keys.
1322  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1323
1324  IsSimpleKeyAllowed = false;
1325
1326  return true;
1327}
1328
1329bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1330  StringRef::iterator Start = Current;
1331  unsigned ColStart = Column;
1332  skip(1);
1333  while(true) {
1334    if (   *Current == '[' || *Current == ']'
1335        || *Current == '{' || *Current == '}'
1336        || *Current == ','
1337        || *Current == ':')
1338      break;
1339    StringRef::iterator i = skip_ns_char(Current);
1340    if (i == Current)
1341      break;
1342    Current = i;
1343    ++Column;
1344  }
1345
1346  if (Start == Current) {
1347    setError("Got empty alias or anchor", Start);
1348    return false;
1349  }
1350
1351  Token T;
1352  T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
1353  T.Range = StringRef(Start, Current - Start);
1354  TokenQueue.push_back(T);
1355
1356  // Alias and anchors can be simple keys.
1357  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1358
1359  IsSimpleKeyAllowed = false;
1360
1361  return true;
1362}
1363
1364bool Scanner::scanBlockScalar(bool IsLiteral) {
1365  StringRef::iterator Start = Current;
1366  skip(1); // Eat | or >
1367  while(true) {
1368    StringRef::iterator i = skip_nb_char(Current);
1369    if (i == Current) {
1370      if (Column == 0)
1371        break;
1372      i = skip_b_break(Current);
1373      if (i != Current) {
1374        // We got a line break.
1375        Column = 0;
1376        ++Line;
1377        Current = i;
1378        continue;
1379      } else {
1380        // There was an error, which should already have been printed out.
1381        return false;
1382      }
1383    }
1384    Current = i;
1385    ++Column;
1386  }
1387
1388  if (Start == Current) {
1389    setError("Got empty block scalar", Start);
1390    return false;
1391  }
1392
1393  Token T;
1394  T.Kind = Token::TK_Scalar;
1395  T.Range = StringRef(Start, Current - Start);
1396  TokenQueue.push_back(T);
1397  return true;
1398}
1399
1400bool Scanner::scanTag() {
1401  StringRef::iterator Start = Current;
1402  unsigned ColStart = Column;
1403  skip(1); // Eat !.
1404  if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1405  else if (*Current == '<') {
1406    skip(1);
1407    scan_ns_uri_char();
1408    if (!consume('>'))
1409      return false;
1410  } else {
1411    // FIXME: Actually parse the c-ns-shorthand-tag rule.
1412    Current = skip_while(&Scanner::skip_ns_char, Current);
1413  }
1414
1415  Token T;
1416  T.Kind = Token::TK_Tag;
1417  T.Range = StringRef(Start, Current - Start);
1418  TokenQueue.push_back(T);
1419
1420  // Tags can be simple keys.
1421  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1422
1423  IsSimpleKeyAllowed = false;
1424
1425  return true;
1426}
1427
1428bool Scanner::fetchMoreTokens() {
1429  if (IsStartOfStream)
1430    return scanStreamStart();
1431
1432  scanToNextToken();
1433
1434  if (Current == End)
1435    return scanStreamEnd();
1436
1437  removeStaleSimpleKeyCandidates();
1438
1439  unrollIndent(Column);
1440
1441  if (Column == 0 && *Current == '%')
1442    return scanDirective();
1443
1444  if (Column == 0 && Current + 4 <= End
1445      && *Current == '-'
1446      && *(Current + 1) == '-'
1447      && *(Current + 2) == '-'
1448      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1449    return scanDocumentIndicator(true);
1450
1451  if (Column == 0 && Current + 4 <= End
1452      && *Current == '.'
1453      && *(Current + 1) == '.'
1454      && *(Current + 2) == '.'
1455      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1456    return scanDocumentIndicator(false);
1457
1458  if (*Current == '[')
1459    return scanFlowCollectionStart(true);
1460
1461  if (*Current == '{')
1462    return scanFlowCollectionStart(false);
1463
1464  if (*Current == ']')
1465    return scanFlowCollectionEnd(true);
1466
1467  if (*Current == '}')
1468    return scanFlowCollectionEnd(false);
1469
1470  if (*Current == ',')
1471    return scanFlowEntry();
1472
1473  if (*Current == '-' && isBlankOrBreak(Current + 1))
1474    return scanBlockEntry();
1475
1476  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1477    return scanKey();
1478
1479  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1480    return scanValue();
1481
1482  if (*Current == '*')
1483    return scanAliasOrAnchor(true);
1484
1485  if (*Current == '&')
1486    return scanAliasOrAnchor(false);
1487
1488  if (*Current == '!')
1489    return scanTag();
1490
1491  if (*Current == '|' && !FlowLevel)
1492    return scanBlockScalar(true);
1493
1494  if (*Current == '>' && !FlowLevel)
1495    return scanBlockScalar(false);
1496
1497  if (*Current == '\'')
1498    return scanFlowScalar(false);
1499
1500  if (*Current == '"')
1501    return scanFlowScalar(true);
1502
1503  // Get a plain scalar.
1504  StringRef FirstChar(Current, 1);
1505  if (!(isBlankOrBreak(Current)
1506        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1507      || (*Current == '-' && !isBlankOrBreak(Current + 1))
1508      || (!FlowLevel && (*Current == '?' || *Current == ':')
1509          && isBlankOrBreak(Current + 1))
1510      || (!FlowLevel && *Current == ':'
1511                      && Current + 2 < End
1512                      && *(Current + 1) == ':'
1513                      && !isBlankOrBreak(Current + 2)))
1514    return scanPlainScalar();
1515
1516  setError("Unrecognized character while tokenizing.");
1517  return false;
1518}
1519
1520Stream::Stream(StringRef Input, SourceMgr &SM)
1521  : scanner(new Scanner(Input, SM))
1522  , CurrentDoc(0) {}
1523
1524bool Stream::failed() { return scanner->failed(); }
1525
1526void Stream::printError(Node *N, const Twine &Msg) {
1527  SmallVector<SMRange, 1> Ranges;
1528  Ranges.push_back(N->getSourceRange());
1529  scanner->printError( N->getSourceRange().Start
1530                     , SourceMgr::DK_Error
1531                     , Msg
1532                     , Ranges);
1533}
1534
1535void Stream::handleYAMLDirective(const Token &t) {
1536  // TODO: Ensure version is 1.x.
1537}
1538
1539document_iterator Stream::begin() {
1540  if (CurrentDoc)
1541    report_fatal_error("Can only iterate over the stream once");
1542
1543  // Skip Stream-Start.
1544  scanner->getNext();
1545
1546  CurrentDoc.reset(new Document(*this));
1547  return document_iterator(CurrentDoc);
1548}
1549
1550document_iterator Stream::end() {
1551  return document_iterator();
1552}
1553
1554void Stream::skip() {
1555  for (document_iterator i = begin(), e = end(); i != e; ++i)
1556    i->skip();
1557}
1558
1559Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
1560  : Doc(D)
1561  , TypeID(Type)
1562  , Anchor(A) {
1563  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1564  SourceRange = SMRange(Start, Start);
1565}
1566
1567Node::~Node() {}
1568
1569Token &Node::peekNext() {
1570  return Doc->peekNext();
1571}
1572
1573Token Node::getNext() {
1574  return Doc->getNext();
1575}
1576
1577Node *Node::parseBlockNode() {
1578  return Doc->parseBlockNode();
1579}
1580
1581BumpPtrAllocator &Node::getAllocator() {
1582  return Doc->NodeAllocator;
1583}
1584
1585void Node::setError(const Twine &Msg, Token &Tok) const {
1586  Doc->setError(Msg, Tok);
1587}
1588
1589bool Node::failed() const {
1590  return Doc->failed();
1591}
1592
1593
1594
1595StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
1596  // TODO: Handle newlines properly. We need to remove leading whitespace.
1597  if (Value[0] == '"') { // Double quoted.
1598    // Pull off the leading and trailing "s.
1599    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1600    // Search for characters that would require unescaping the value.
1601    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1602    if (i != StringRef::npos)
1603      return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1604    return UnquotedValue;
1605  } else if (Value[0] == '\'') { // Single quoted.
1606    // Pull off the leading and trailing 's.
1607    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1608    StringRef::size_type i = UnquotedValue.find('\'');
1609    if (i != StringRef::npos) {
1610      // We're going to need Storage.
1611      Storage.clear();
1612      Storage.reserve(UnquotedValue.size());
1613      for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
1614        StringRef Valid(UnquotedValue.begin(), i);
1615        Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1616        Storage.push_back('\'');
1617        UnquotedValue = UnquotedValue.substr(i + 2);
1618      }
1619      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1620      return StringRef(Storage.begin(), Storage.size());
1621    }
1622    return UnquotedValue;
1623  }
1624  // Plain or block.
1625  size_t trimtrail = Value.rfind(' ');
1626  return Value.drop_back(
1627    trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
1628}
1629
1630StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1631                                          , StringRef::size_type i
1632                                          , SmallVectorImpl<char> &Storage)
1633                                          const {
1634  // Use Storage to build proper value.
1635  Storage.clear();
1636  Storage.reserve(UnquotedValue.size());
1637  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
1638    // Insert all previous chars into Storage.
1639    StringRef Valid(UnquotedValue.begin(), i);
1640    Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1641    // Chop off inserted chars.
1642    UnquotedValue = UnquotedValue.substr(i);
1643
1644    assert(!UnquotedValue.empty() && "Can't be empty!");
1645
1646    // Parse escape or line break.
1647    switch (UnquotedValue[0]) {
1648    case '\r':
1649    case '\n':
1650      Storage.push_back('\n');
1651      if (   UnquotedValue.size() > 1
1652          && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1653        UnquotedValue = UnquotedValue.substr(1);
1654      UnquotedValue = UnquotedValue.substr(1);
1655      break;
1656    default:
1657      if (UnquotedValue.size() == 1)
1658        // TODO: Report error.
1659        break;
1660      UnquotedValue = UnquotedValue.substr(1);
1661      switch (UnquotedValue[0]) {
1662      default: {
1663          Token T;
1664          T.Range = StringRef(UnquotedValue.begin(), 1);
1665          setError("Unrecognized escape code!", T);
1666          return "";
1667        }
1668      case '\r':
1669      case '\n':
1670        // Remove the new line.
1671        if (   UnquotedValue.size() > 1
1672            && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1673          UnquotedValue = UnquotedValue.substr(1);
1674        // If this was just a single byte newline, it will get skipped
1675        // below.
1676        break;
1677      case '0':
1678        Storage.push_back(0x00);
1679        break;
1680      case 'a':
1681        Storage.push_back(0x07);
1682        break;
1683      case 'b':
1684        Storage.push_back(0x08);
1685        break;
1686      case 't':
1687      case 0x09:
1688        Storage.push_back(0x09);
1689        break;
1690      case 'n':
1691        Storage.push_back(0x0A);
1692        break;
1693      case 'v':
1694        Storage.push_back(0x0B);
1695        break;
1696      case 'f':
1697        Storage.push_back(0x0C);
1698        break;
1699      case 'r':
1700        Storage.push_back(0x0D);
1701        break;
1702      case 'e':
1703        Storage.push_back(0x1B);
1704        break;
1705      case ' ':
1706        Storage.push_back(0x20);
1707        break;
1708      case '"':
1709        Storage.push_back(0x22);
1710        break;
1711      case '/':
1712        Storage.push_back(0x2F);
1713        break;
1714      case '\\':
1715        Storage.push_back(0x5C);
1716        break;
1717      case 'N':
1718        encodeUTF8(0x85, Storage);
1719        break;
1720      case '_':
1721        encodeUTF8(0xA0, Storage);
1722        break;
1723      case 'L':
1724        encodeUTF8(0x2028, Storage);
1725        break;
1726      case 'P':
1727        encodeUTF8(0x2029, Storage);
1728        break;
1729      case 'x': {
1730          if (UnquotedValue.size() < 3)
1731            // TODO: Report error.
1732            break;
1733          unsigned int UnicodeScalarValue;
1734          UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
1735          encodeUTF8(UnicodeScalarValue, Storage);
1736          UnquotedValue = UnquotedValue.substr(2);
1737          break;
1738        }
1739      case 'u': {
1740          if (UnquotedValue.size() < 5)
1741            // TODO: Report error.
1742            break;
1743          unsigned int UnicodeScalarValue;
1744          UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
1745          encodeUTF8(UnicodeScalarValue, Storage);
1746          UnquotedValue = UnquotedValue.substr(4);
1747          break;
1748        }
1749      case 'U': {
1750          if (UnquotedValue.size() < 9)
1751            // TODO: Report error.
1752            break;
1753          unsigned int UnicodeScalarValue;
1754          UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
1755          encodeUTF8(UnicodeScalarValue, Storage);
1756          UnquotedValue = UnquotedValue.substr(8);
1757          break;
1758        }
1759      }
1760      UnquotedValue = UnquotedValue.substr(1);
1761    }
1762  }
1763  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1764  return StringRef(Storage.begin(), Storage.size());
1765}
1766
1767Node *KeyValueNode::getKey() {
1768  if (Key)
1769    return Key;
1770  // Handle implicit null keys.
1771  {
1772    Token &t = peekNext();
1773    if (   t.Kind == Token::TK_BlockEnd
1774        || t.Kind == Token::TK_Value
1775        || t.Kind == Token::TK_Error) {
1776      return Key = new (getAllocator()) NullNode(Doc);
1777    }
1778    if (t.Kind == Token::TK_Key)
1779      getNext(); // skip TK_Key.
1780  }
1781
1782  // Handle explicit null keys.
1783  Token &t = peekNext();
1784  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
1785    return Key = new (getAllocator()) NullNode(Doc);
1786  }
1787
1788  // We've got a normal key.
1789  return Key = parseBlockNode();
1790}
1791
1792Node *KeyValueNode::getValue() {
1793  if (Value)
1794    return Value;
1795  getKey()->skip();
1796  if (failed())
1797    return Value = new (getAllocator()) NullNode(Doc);
1798
1799  // Handle implicit null values.
1800  {
1801    Token &t = peekNext();
1802    if (   t.Kind == Token::TK_BlockEnd
1803        || t.Kind == Token::TK_FlowMappingEnd
1804        || t.Kind == Token::TK_Key
1805        || t.Kind == Token::TK_FlowEntry
1806        || t.Kind == Token::TK_Error) {
1807      return Value = new (getAllocator()) NullNode(Doc);
1808    }
1809
1810    if (t.Kind != Token::TK_Value) {
1811      setError("Unexpected token in Key Value.", t);
1812      return Value = new (getAllocator()) NullNode(Doc);
1813    }
1814    getNext(); // skip TK_Value.
1815  }
1816
1817  // Handle explicit null values.
1818  Token &t = peekNext();
1819  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
1820    return Value = new (getAllocator()) NullNode(Doc);
1821  }
1822
1823  // We got a normal value.
1824  return Value = parseBlockNode();
1825}
1826
1827void MappingNode::increment() {
1828  if (failed()) {
1829    IsAtEnd = true;
1830    CurrentEntry = 0;
1831    return;
1832  }
1833  if (CurrentEntry) {
1834    CurrentEntry->skip();
1835    if (Type == MT_Inline) {
1836      IsAtEnd = true;
1837      CurrentEntry = 0;
1838      return;
1839    }
1840  }
1841  Token T = peekNext();
1842  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
1843    // KeyValueNode eats the TK_Key. That way it can detect null keys.
1844    CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
1845  } else if (Type == MT_Block) {
1846    switch (T.Kind) {
1847    case Token::TK_BlockEnd:
1848      getNext();
1849      IsAtEnd = true;
1850      CurrentEntry = 0;
1851      break;
1852    default:
1853      setError("Unexpected token. Expected Key or Block End", T);
1854    case Token::TK_Error:
1855      IsAtEnd = true;
1856      CurrentEntry = 0;
1857    }
1858  } else {
1859    switch (T.Kind) {
1860    case Token::TK_FlowEntry:
1861      // Eat the flow entry and recurse.
1862      getNext();
1863      return increment();
1864    case Token::TK_FlowMappingEnd:
1865      getNext();
1866    case Token::TK_Error:
1867      // Set this to end iterator.
1868      IsAtEnd = true;
1869      CurrentEntry = 0;
1870      break;
1871    default:
1872      setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
1873                "Mapping End."
1874              , T);
1875      IsAtEnd = true;
1876      CurrentEntry = 0;
1877    }
1878  }
1879}
1880
1881void SequenceNode::increment() {
1882  if (failed()) {
1883    IsAtEnd = true;
1884    CurrentEntry = 0;
1885    return;
1886  }
1887  if (CurrentEntry)
1888    CurrentEntry->skip();
1889  Token T = peekNext();
1890  if (SeqType == ST_Block) {
1891    switch (T.Kind) {
1892    case Token::TK_BlockEntry:
1893      getNext();
1894      CurrentEntry = parseBlockNode();
1895      if (CurrentEntry == 0) { // An error occurred.
1896        IsAtEnd = true;
1897        CurrentEntry = 0;
1898      }
1899      break;
1900    case Token::TK_BlockEnd:
1901      getNext();
1902      IsAtEnd = true;
1903      CurrentEntry = 0;
1904      break;
1905    default:
1906      setError( "Unexpected token. Expected Block Entry or Block End."
1907              , T);
1908    case Token::TK_Error:
1909      IsAtEnd = true;
1910      CurrentEntry = 0;
1911    }
1912  } else if (SeqType == ST_Indentless) {
1913    switch (T.Kind) {
1914    case Token::TK_BlockEntry:
1915      getNext();
1916      CurrentEntry = parseBlockNode();
1917      if (CurrentEntry == 0) { // An error occurred.
1918        IsAtEnd = true;
1919        CurrentEntry = 0;
1920      }
1921      break;
1922    default:
1923    case Token::TK_Error:
1924      IsAtEnd = true;
1925      CurrentEntry = 0;
1926    }
1927  } else if (SeqType == ST_Flow) {
1928    switch (T.Kind) {
1929    case Token::TK_FlowEntry:
1930      // Eat the flow entry and recurse.
1931      getNext();
1932      WasPreviousTokenFlowEntry = true;
1933      return increment();
1934    case Token::TK_FlowSequenceEnd:
1935      getNext();
1936    case Token::TK_Error:
1937      // Set this to end iterator.
1938      IsAtEnd = true;
1939      CurrentEntry = 0;
1940      break;
1941    case Token::TK_StreamEnd:
1942    case Token::TK_DocumentEnd:
1943    case Token::TK_DocumentStart:
1944      setError("Could not find closing ]!", T);
1945      // Set this to end iterator.
1946      IsAtEnd = true;
1947      CurrentEntry = 0;
1948      break;
1949    default:
1950      if (!WasPreviousTokenFlowEntry) {
1951        setError("Expected , between entries!", T);
1952        IsAtEnd = true;
1953        CurrentEntry = 0;
1954        break;
1955      }
1956      // Otherwise it must be a flow entry.
1957      CurrentEntry = parseBlockNode();
1958      if (!CurrentEntry) {
1959        IsAtEnd = true;
1960      }
1961      WasPreviousTokenFlowEntry = false;
1962      break;
1963    }
1964  }
1965}
1966
1967Document::Document(Stream &S) : stream(S), Root(0) {
1968  if (parseDirectives())
1969    expectToken(Token::TK_DocumentStart);
1970  Token &T = peekNext();
1971  if (T.Kind == Token::TK_DocumentStart)
1972    getNext();
1973}
1974
1975bool Document::skip()  {
1976  if (stream.scanner->failed())
1977    return false;
1978  if (!Root)
1979    getRoot();
1980  Root->skip();
1981  Token &T = peekNext();
1982  if (T.Kind == Token::TK_StreamEnd)
1983    return false;
1984  if (T.Kind == Token::TK_DocumentEnd) {
1985    getNext();
1986    return skip();
1987  }
1988  return true;
1989}
1990
1991Token &Document::peekNext() {
1992  return stream.scanner->peekNext();
1993}
1994
1995Token Document::getNext() {
1996  return stream.scanner->getNext();
1997}
1998
1999void Document::setError(const Twine &Message, Token &Location) const {
2000  stream.scanner->setError(Message, Location.Range.begin());
2001}
2002
2003bool Document::failed() const {
2004  return stream.scanner->failed();
2005}
2006
2007Node *Document::parseBlockNode() {
2008  Token T = peekNext();
2009  // Handle properties.
2010  Token AnchorInfo;
2011parse_property:
2012  switch (T.Kind) {
2013  case Token::TK_Alias:
2014    getNext();
2015    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2016  case Token::TK_Anchor:
2017    if (AnchorInfo.Kind == Token::TK_Anchor) {
2018      setError("Already encountered an anchor for this node!", T);
2019      return 0;
2020    }
2021    AnchorInfo = getNext(); // Consume TK_Anchor.
2022    T = peekNext();
2023    goto parse_property;
2024  case Token::TK_Tag:
2025    getNext(); // Skip TK_Tag.
2026    T = peekNext();
2027    goto parse_property;
2028  default:
2029    break;
2030  }
2031
2032  switch (T.Kind) {
2033  case Token::TK_BlockEntry:
2034    // We got an unindented BlockEntry sequence. This is not terminated with
2035    // a BlockEnd.
2036    // Don't eat the TK_BlockEntry, SequenceNode needs it.
2037    return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2038                                           , AnchorInfo.Range.substr(1)
2039                                           , SequenceNode::ST_Indentless);
2040  case Token::TK_BlockSequenceStart:
2041    getNext();
2042    return new (NodeAllocator)
2043      SequenceNode( stream.CurrentDoc
2044                  , AnchorInfo.Range.substr(1)
2045                  , SequenceNode::ST_Block);
2046  case Token::TK_BlockMappingStart:
2047    getNext();
2048    return new (NodeAllocator)
2049      MappingNode( stream.CurrentDoc
2050                 , AnchorInfo.Range.substr(1)
2051                 , MappingNode::MT_Block);
2052  case Token::TK_FlowSequenceStart:
2053    getNext();
2054    return new (NodeAllocator)
2055      SequenceNode( stream.CurrentDoc
2056                  , AnchorInfo.Range.substr(1)
2057                  , SequenceNode::ST_Flow);
2058  case Token::TK_FlowMappingStart:
2059    getNext();
2060    return new (NodeAllocator)
2061      MappingNode( stream.CurrentDoc
2062                 , AnchorInfo.Range.substr(1)
2063                 , MappingNode::MT_Flow);
2064  case Token::TK_Scalar:
2065    getNext();
2066    return new (NodeAllocator)
2067      ScalarNode( stream.CurrentDoc
2068                , AnchorInfo.Range.substr(1)
2069                , T.Range);
2070  case Token::TK_Key:
2071    // Don't eat the TK_Key, KeyValueNode expects it.
2072    return new (NodeAllocator)
2073      MappingNode( stream.CurrentDoc
2074                 , AnchorInfo.Range.substr(1)
2075                 , MappingNode::MT_Inline);
2076  case Token::TK_DocumentStart:
2077  case Token::TK_DocumentEnd:
2078  case Token::TK_StreamEnd:
2079  default:
2080    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2081    //       !!null null.
2082    return new (NodeAllocator) NullNode(stream.CurrentDoc);
2083  case Token::TK_Error:
2084    return 0;
2085  }
2086  llvm_unreachable("Control flow shouldn't reach here.");
2087  return 0;
2088}
2089
2090bool Document::parseDirectives() {
2091  bool isDirective = false;
2092  while (true) {
2093    Token T = peekNext();
2094    if (T.Kind == Token::TK_TagDirective) {
2095      handleTagDirective(getNext());
2096      isDirective = true;
2097    } else if (T.Kind == Token::TK_VersionDirective) {
2098      stream.handleYAMLDirective(getNext());
2099      isDirective = true;
2100    } else
2101      break;
2102  }
2103  return isDirective;
2104}
2105
2106bool Document::expectToken(int TK) {
2107  Token T = getNext();
2108  if (T.Kind != TK) {
2109    setError("Unexpected token", T);
2110    return false;
2111  }
2112  return true;
2113}
2114
2115OwningPtr<Document> document_iterator::NullDoc;
2116