1//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAMLPARSER_H 39#define LLVM_SUPPORT_YAMLPARSER_H 40 41#include "llvm/ADT/StringRef.h" 42#include "llvm/Support/Allocator.h" 43#include "llvm/Support/SMLoc.h" 44#include <cassert> 45#include <cstddef> 46#include <iterator> 47#include <map> 48#include <memory> 49#include <string> 50#include <system_error> 51 52namespace llvm { 53 54class MemoryBufferRef; 55class SourceMgr; 56class raw_ostream; 57class Twine; 58 59namespace yaml { 60 61class Document; 62class document_iterator; 63class Node; 64class Scanner; 65struct Token; 66 67/// \brief Dump all the tokens in this stream to OS. 68/// \returns true if there was an error, false otherwise. 69bool dumpTokens(StringRef Input, raw_ostream &); 70 71/// \brief Scans all tokens in input without outputting anything. This is used 72/// for benchmarking the tokenizer. 73/// \returns true if there was an error, false otherwise. 74bool scanTokens(StringRef Input); 75 76/// \brief Escape \a Input for a double quoted scalar. 77std::string escape(StringRef Input); 78 79/// \brief This class represents a YAML stream potentially containing multiple 80/// documents. 81class Stream { 82public: 83 /// \brief This keeps a reference to the string referenced by \p Input. 84 Stream(StringRef Input, SourceMgr &, bool ShowColors = true, 85 std::error_code *EC = nullptr); 86 87 Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true, 88 std::error_code *EC = nullptr); 89 ~Stream(); 90 91 document_iterator begin(); 92 document_iterator end(); 93 void skip(); 94 bool failed(); 95 96 bool validate() { 97 skip(); 98 return !failed(); 99 } 100 101 void printError(Node *N, const Twine &Msg); 102 103private: 104 friend class Document; 105 106 std::unique_ptr<Scanner> scanner; 107 std::unique_ptr<Document> CurrentDoc; 108}; 109 110/// \brief Abstract base class for all Nodes. 111class Node { 112 virtual void anchor(); 113 114public: 115 enum NodeKind { 116 NK_Null, 117 NK_Scalar, 118 NK_BlockScalar, 119 NK_KeyValue, 120 NK_Mapping, 121 NK_Sequence, 122 NK_Alias 123 }; 124 125 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 126 StringRef Tag); 127 128 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 129 size_t Alignment = 16) noexcept { 130 return Alloc.Allocate(Size, Alignment); 131 } 132 133 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, 134 size_t Size) noexcept { 135 Alloc.Deallocate(Ptr, Size); 136 } 137 138 void operator delete(void *) noexcept = delete; 139 140 /// \brief Get the value of the anchor attached to this node. If it does not 141 /// have one, getAnchor().size() will be 0. 142 StringRef getAnchor() const { return Anchor; } 143 144 /// \brief Get the tag as it was written in the document. This does not 145 /// perform tag resolution. 146 StringRef getRawTag() const { return Tag; } 147 148 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 149 /// and substitution. 150 std::string getVerbatimTag() const; 151 152 SMRange getSourceRange() const { return SourceRange; } 153 void setSourceRange(SMRange SR) { SourceRange = SR; } 154 155 // These functions forward to Document and Scanner. 156 Token &peekNext(); 157 Token getNext(); 158 Node *parseBlockNode(); 159 BumpPtrAllocator &getAllocator(); 160 void setError(const Twine &Message, Token &Location) const; 161 bool failed() const; 162 163 virtual void skip() {} 164 165 unsigned int getType() const { return TypeID; } 166 167protected: 168 std::unique_ptr<Document> &Doc; 169 SMRange SourceRange; 170 171 ~Node() = default; 172 173private: 174 unsigned int TypeID; 175 StringRef Anchor; 176 /// \brief The tag as typed in the document. 177 StringRef Tag; 178}; 179 180/// \brief A null value. 181/// 182/// Example: 183/// !!null null 184class NullNode final : public Node { 185 void anchor() override; 186 187public: 188 NullNode(std::unique_ptr<Document> &D) 189 : Node(NK_Null, D, StringRef(), StringRef()) {} 190 191 static bool classof(const Node *N) { return N->getType() == NK_Null; } 192}; 193 194/// \brief A scalar node is an opaque datum that can be presented as a 195/// series of zero or more Unicode scalar values. 196/// 197/// Example: 198/// Adena 199class ScalarNode final : public Node { 200 void anchor() override; 201 202public: 203 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 204 StringRef Val) 205 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 206 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 207 SMLoc End = SMLoc::getFromPointer(Val.end()); 208 SourceRange = SMRange(Start, End); 209 } 210 211 // Return Value without any escaping or folding or other fun YAML stuff. This 212 // is the exact bytes that are contained in the file (after conversion to 213 // utf8). 214 StringRef getRawValue() const { return Value; } 215 216 /// \brief Gets the value of this node as a StringRef. 217 /// 218 /// \param Storage is used to store the content of the returned StringRef iff 219 /// it requires any modification from how it appeared in the source. 220 /// This happens with escaped characters and multi-line literals. 221 StringRef getValue(SmallVectorImpl<char> &Storage) const; 222 223 static bool classof(const Node *N) { 224 return N->getType() == NK_Scalar; 225 } 226 227private: 228 StringRef Value; 229 230 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 231 StringRef::size_type Start, 232 SmallVectorImpl<char> &Storage) const; 233}; 234 235/// \brief A block scalar node is an opaque datum that can be presented as a 236/// series of zero or more Unicode scalar values. 237/// 238/// Example: 239/// | 240/// Hello 241/// World 242class BlockScalarNode final : public Node { 243 void anchor() override; 244 245public: 246 BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 247 StringRef Value, StringRef RawVal) 248 : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) { 249 SMLoc Start = SMLoc::getFromPointer(RawVal.begin()); 250 SMLoc End = SMLoc::getFromPointer(RawVal.end()); 251 SourceRange = SMRange(Start, End); 252 } 253 254 /// \brief Gets the value of this node as a StringRef. 255 StringRef getValue() const { return Value; } 256 257 static bool classof(const Node *N) { 258 return N->getType() == NK_BlockScalar; 259 } 260 261private: 262 StringRef Value; 263}; 264 265/// \brief A key and value pair. While not technically a Node under the YAML 266/// representation graph, it is easier to treat them this way. 267/// 268/// TODO: Consider making this not a child of Node. 269/// 270/// Example: 271/// Section: .text 272class KeyValueNode final : public Node { 273 void anchor() override; 274 275public: 276 KeyValueNode(std::unique_ptr<Document> &D) 277 : Node(NK_KeyValue, D, StringRef(), StringRef()) {} 278 279 /// \brief Parse and return the key. 280 /// 281 /// This may be called multiple times. 282 /// 283 /// \returns The key, or nullptr if failed() == true. 284 Node *getKey(); 285 286 /// \brief Parse and return the value. 287 /// 288 /// This may be called multiple times. 289 /// 290 /// \returns The value, or nullptr if failed() == true. 291 Node *getValue(); 292 293 void skip() override { 294 getKey()->skip(); 295 if (Node *Val = getValue()) 296 Val->skip(); 297 } 298 299 static bool classof(const Node *N) { 300 return N->getType() == NK_KeyValue; 301 } 302 303private: 304 Node *Key = nullptr; 305 Node *Value = nullptr; 306}; 307 308/// \brief This is an iterator abstraction over YAML collections shared by both 309/// sequences and maps. 310/// 311/// BaseT must have a ValueT* member named CurrentEntry and a member function 312/// increment() which must set CurrentEntry to 0 to create an end iterator. 313template <class BaseT, class ValueT> 314class basic_collection_iterator 315 : public std::iterator<std::input_iterator_tag, ValueT> { 316public: 317 basic_collection_iterator() = default; 318 basic_collection_iterator(BaseT *B) : Base(B) {} 319 320 ValueT *operator->() const { 321 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 322 return Base->CurrentEntry; 323 } 324 325 ValueT &operator*() const { 326 assert(Base && Base->CurrentEntry && 327 "Attempted to dereference end iterator!"); 328 return *Base->CurrentEntry; 329 } 330 331 operator ValueT *() const { 332 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 333 return Base->CurrentEntry; 334 } 335 336 /// Note on EqualityComparable: 337 /// 338 /// The iterator is not re-entrant, 339 /// it is meant to be used for parsing YAML on-demand 340 /// Once iteration started - it can point only to one entry at a time 341 /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal 342 /// iff Base and Other.Base are equal. 343 bool operator==(const basic_collection_iterator &Other) const { 344 if (Base && (Base == Other.Base)) { 345 assert((Base->CurrentEntry == Other.Base->CurrentEntry) 346 && "Equal Bases expected to point to equal Entries"); 347 } 348 349 return Base == Other.Base; 350 } 351 352 bool operator!=(const basic_collection_iterator &Other) const { 353 return !(Base == Other.Base); 354 } 355 356 basic_collection_iterator &operator++() { 357 assert(Base && "Attempted to advance iterator past end!"); 358 Base->increment(); 359 // Create an end iterator. 360 if (!Base->CurrentEntry) 361 Base = nullptr; 362 return *this; 363 } 364 365private: 366 BaseT *Base = nullptr; 367}; 368 369// The following two templates are used for both MappingNode and Sequence Node. 370template <class CollectionType> 371typename CollectionType::iterator begin(CollectionType &C) { 372 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 373 C.IsAtBeginning = false; 374 typename CollectionType::iterator ret(&C); 375 ++ret; 376 return ret; 377} 378 379template <class CollectionType> void skip(CollectionType &C) { 380 // TODO: support skipping from the middle of a parsed collection ;/ 381 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 382 if (C.IsAtBeginning) 383 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 384 ++i) 385 i->skip(); 386} 387 388/// \brief Represents a YAML map created from either a block map for a flow map. 389/// 390/// This parses the YAML stream as increment() is called. 391/// 392/// Example: 393/// Name: _main 394/// Scope: Global 395class MappingNode final : public Node { 396 void anchor() override; 397 398public: 399 enum MappingType { 400 MT_Block, 401 MT_Flow, 402 MT_Inline ///< An inline mapping node is used for "[key: value]". 403 }; 404 405 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 406 MappingType MT) 407 : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {} 408 409 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 410 411 using iterator = basic_collection_iterator<MappingNode, KeyValueNode>; 412 413 template <class T> friend typename T::iterator yaml::begin(T &); 414 template <class T> friend void yaml::skip(T &); 415 416 iterator begin() { return yaml::begin(*this); } 417 418 iterator end() { return iterator(); } 419 420 void skip() override { yaml::skip(*this); } 421 422 static bool classof(const Node *N) { 423 return N->getType() == NK_Mapping; 424 } 425 426private: 427 MappingType Type; 428 bool IsAtBeginning = true; 429 bool IsAtEnd = false; 430 KeyValueNode *CurrentEntry = nullptr; 431 432 void increment(); 433}; 434 435/// \brief Represents a YAML sequence created from either a block sequence for a 436/// flow sequence. 437/// 438/// This parses the YAML stream as increment() is called. 439/// 440/// Example: 441/// - Hello 442/// - World 443class SequenceNode final : public Node { 444 void anchor() override; 445 446public: 447 enum SequenceType { 448 ST_Block, 449 ST_Flow, 450 // Use for: 451 // 452 // key: 453 // - val1 454 // - val2 455 // 456 // As a BlockMappingEntry and BlockEnd are not created in this case. 457 ST_Indentless 458 }; 459 460 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 461 SequenceType ST) 462 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {} 463 464 friend class basic_collection_iterator<SequenceNode, Node>; 465 466 using iterator = basic_collection_iterator<SequenceNode, Node>; 467 468 template <class T> friend typename T::iterator yaml::begin(T &); 469 template <class T> friend void yaml::skip(T &); 470 471 void increment(); 472 473 iterator begin() { return yaml::begin(*this); } 474 475 iterator end() { return iterator(); } 476 477 void skip() override { yaml::skip(*this); } 478 479 static bool classof(const Node *N) { 480 return N->getType() == NK_Sequence; 481 } 482 483private: 484 SequenceType SeqType; 485 bool IsAtBeginning = true; 486 bool IsAtEnd = false; 487 bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','. 488 Node *CurrentEntry = nullptr; 489}; 490 491/// \brief Represents an alias to a Node with an anchor. 492/// 493/// Example: 494/// *AnchorName 495class AliasNode final : public Node { 496 void anchor() override; 497 498public: 499 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 500 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 501 502 StringRef getName() const { return Name; } 503 Node *getTarget(); 504 505 static bool classof(const Node *N) { return N->getType() == NK_Alias; } 506 507private: 508 StringRef Name; 509}; 510 511/// \brief A YAML Stream is a sequence of Documents. A document contains a root 512/// node. 513class Document { 514public: 515 Document(Stream &ParentStream); 516 517 /// \brief Root for parsing a node. Returns a single node. 518 Node *parseBlockNode(); 519 520 /// \brief Finish parsing the current document and return true if there are 521 /// more. Return false otherwise. 522 bool skip(); 523 524 /// \brief Parse and return the root level node. 525 Node *getRoot() { 526 if (Root) 527 return Root; 528 return Root = parseBlockNode(); 529 } 530 531 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 532 533private: 534 friend class Node; 535 friend class document_iterator; 536 537 /// \brief Stream to read tokens from. 538 Stream &stream; 539 540 /// \brief Used to allocate nodes to. All are destroyed without calling their 541 /// destructor when the document is destroyed. 542 BumpPtrAllocator NodeAllocator; 543 544 /// \brief The root node. Used to support skipping a partially parsed 545 /// document. 546 Node *Root; 547 548 /// \brief Maps tag prefixes to their expansion. 549 std::map<StringRef, StringRef> TagMap; 550 551 Token &peekNext(); 552 Token getNext(); 553 void setError(const Twine &Message, Token &Location) const; 554 bool failed() const; 555 556 /// \brief Parse %BLAH directives and return true if any were encountered. 557 bool parseDirectives(); 558 559 /// \brief Parse %YAML 560 void parseYAMLDirective(); 561 562 /// \brief Parse %TAG 563 void parseTAGDirective(); 564 565 /// \brief Consume the next token and error if it is not \a TK. 566 bool expectToken(int TK); 567}; 568 569/// \brief Iterator abstraction for Documents over a Stream. 570class document_iterator { 571public: 572 document_iterator() = default; 573 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 574 575 bool operator==(const document_iterator &Other) const { 576 if (isAtEnd() || Other.isAtEnd()) 577 return isAtEnd() && Other.isAtEnd(); 578 579 return Doc == Other.Doc; 580 } 581 bool operator!=(const document_iterator &Other) const { 582 return !(*this == Other); 583 } 584 585 document_iterator operator++() { 586 assert(Doc && "incrementing iterator past the end."); 587 if (!(*Doc)->skip()) { 588 Doc->reset(nullptr); 589 } else { 590 Stream &S = (*Doc)->stream; 591 Doc->reset(new Document(S)); 592 } 593 return *this; 594 } 595 596 Document &operator*() { return *Doc->get(); } 597 598 std::unique_ptr<Document> &operator->() { return *Doc; } 599 600private: 601 bool isAtEnd() const { return !Doc || !*Doc; } 602 603 std::unique_ptr<Document> *Doc = nullptr; 604}; 605 606} // end namespace yaml 607 608} // end namespace llvm 609 610#endif // LLVM_SUPPORT_YAMLPARSER_H 611