1//===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAMLPARSER_H 39#define LLVM_SUPPORT_YAMLPARSER_H 40 41#include "llvm/ADT/StringRef.h" 42#include "llvm/Support/Allocator.h" 43#include "llvm/Support/SMLoc.h" 44#include <limits> 45#include <map> 46#include <utility> 47 48namespace llvm { 49class MemoryBufferRef; 50class SourceMgr; 51class Twine; 52class raw_ostream; 53 54namespace yaml { 55 56class document_iterator; 57class Document; 58class Node; 59class Scanner; 60struct Token; 61 62/// \brief Dump all the tokens in this stream to OS. 63/// \returns true if there was an error, false otherwise. 64bool dumpTokens(StringRef Input, raw_ostream &); 65 66/// \brief Scans all tokens in input without outputting anything. This is used 67/// for benchmarking the tokenizer. 68/// \returns true if there was an error, false otherwise. 69bool scanTokens(StringRef Input); 70 71/// \brief Escape \a Input for a double quoted scalar. 72std::string escape(StringRef Input); 73 74/// \brief This class represents a YAML stream potentially containing multiple 75/// documents. 76class Stream { 77public: 78 /// \brief This keeps a reference to the string referenced by \p Input. 79 Stream(StringRef Input, SourceMgr &, bool ShowColors = true); 80 81 Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true); 82 ~Stream(); 83 84 document_iterator begin(); 85 document_iterator end(); 86 void skip(); 87 bool failed(); 88 bool validate() { 89 skip(); 90 return !failed(); 91 } 92 93 void printError(Node *N, const Twine &Msg); 94 95private: 96 std::unique_ptr<Scanner> scanner; 97 std::unique_ptr<Document> CurrentDoc; 98 99 friend class Document; 100}; 101 102/// \brief Abstract base class for all Nodes. 103class Node { 104 virtual void anchor(); 105 106public: 107 enum NodeKind { 108 NK_Null, 109 NK_Scalar, 110 NK_BlockScalar, 111 NK_KeyValue, 112 NK_Mapping, 113 NK_Sequence, 114 NK_Alias 115 }; 116 117 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 118 StringRef Tag); 119 120 /// \brief Get the value of the anchor attached to this node. If it does not 121 /// have one, getAnchor().size() will be 0. 122 StringRef getAnchor() const { return Anchor; } 123 124 /// \brief Get the tag as it was written in the document. This does not 125 /// perform tag resolution. 126 StringRef getRawTag() const { return Tag; } 127 128 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 129 /// and substitution. 130 std::string getVerbatimTag() const; 131 132 SMRange getSourceRange() const { return SourceRange; } 133 void setSourceRange(SMRange SR) { SourceRange = SR; } 134 135 // These functions forward to Document and Scanner. 136 Token &peekNext(); 137 Token getNext(); 138 Node *parseBlockNode(); 139 BumpPtrAllocator &getAllocator(); 140 void setError(const Twine &Message, Token &Location) const; 141 bool failed() const; 142 143 virtual void skip() {} 144 145 unsigned int getType() const { return TypeID; } 146 147 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 148 size_t Alignment = 16) LLVM_NOEXCEPT { 149 return Alloc.Allocate(Size, Alignment); 150 } 151 152 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, 153 size_t Size) LLVM_NOEXCEPT { 154 Alloc.Deallocate(Ptr, Size); 155 } 156 157protected: 158 std::unique_ptr<Document> &Doc; 159 SMRange SourceRange; 160 161 void operator delete(void *) LLVM_NOEXCEPT = delete; 162 163 ~Node() = default; 164 165private: 166 unsigned int TypeID; 167 StringRef Anchor; 168 /// \brief The tag as typed in the document. 169 StringRef Tag; 170}; 171 172/// \brief A null value. 173/// 174/// Example: 175/// !!null null 176class NullNode final : public Node { 177 void anchor() override; 178 179public: 180 NullNode(std::unique_ptr<Document> &D) 181 : Node(NK_Null, D, StringRef(), StringRef()) {} 182 183 static inline bool classof(const Node *N) { return N->getType() == NK_Null; } 184}; 185 186/// \brief A scalar node is an opaque datum that can be presented as a 187/// series of zero or more Unicode scalar values. 188/// 189/// Example: 190/// Adena 191class ScalarNode final : public Node { 192 void anchor() override; 193 194public: 195 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 196 StringRef Val) 197 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 198 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 199 SMLoc End = SMLoc::getFromPointer(Val.end()); 200 SourceRange = SMRange(Start, End); 201 } 202 203 // Return Value without any escaping or folding or other fun YAML stuff. This 204 // is the exact bytes that are contained in the file (after conversion to 205 // utf8). 206 StringRef getRawValue() const { return Value; } 207 208 /// \brief Gets the value of this node as a StringRef. 209 /// 210 /// \param Storage is used to store the content of the returned StringRef iff 211 /// it requires any modification from how it appeared in the source. 212 /// This happens with escaped characters and multi-line literals. 213 StringRef getValue(SmallVectorImpl<char> &Storage) const; 214 215 static inline bool classof(const Node *N) { 216 return N->getType() == NK_Scalar; 217 } 218 219private: 220 StringRef Value; 221 222 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 223 StringRef::size_type Start, 224 SmallVectorImpl<char> &Storage) const; 225}; 226 227/// \brief A block scalar node is an opaque datum that can be presented as a 228/// series of zero or more Unicode scalar values. 229/// 230/// Example: 231/// | 232/// Hello 233/// World 234class BlockScalarNode final : public Node { 235 void anchor() override; 236 237public: 238 BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 239 StringRef Value, StringRef RawVal) 240 : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) { 241 SMLoc Start = SMLoc::getFromPointer(RawVal.begin()); 242 SMLoc End = SMLoc::getFromPointer(RawVal.end()); 243 SourceRange = SMRange(Start, End); 244 } 245 246 /// \brief Gets the value of this node as a StringRef. 247 StringRef getValue() const { return Value; } 248 249 static inline bool classof(const Node *N) { 250 return N->getType() == NK_BlockScalar; 251 } 252 253private: 254 StringRef Value; 255}; 256 257/// \brief A key and value pair. While not technically a Node under the YAML 258/// representation graph, it is easier to treat them this way. 259/// 260/// TODO: Consider making this not a child of Node. 261/// 262/// Example: 263/// Section: .text 264class KeyValueNode final : public Node { 265 void anchor() override; 266 267public: 268 KeyValueNode(std::unique_ptr<Document> &D) 269 : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), 270 Value(nullptr) {} 271 272 /// \brief Parse and return the key. 273 /// 274 /// This may be called multiple times. 275 /// 276 /// \returns The key, or nullptr if failed() == true. 277 Node *getKey(); 278 279 /// \brief Parse and return the value. 280 /// 281 /// This may be called multiple times. 282 /// 283 /// \returns The value, or nullptr if failed() == true. 284 Node *getValue(); 285 286 void skip() override { 287 getKey()->skip(); 288 if (Node *Val = getValue()) 289 Val->skip(); 290 } 291 292 static inline bool classof(const Node *N) { 293 return N->getType() == NK_KeyValue; 294 } 295 296private: 297 Node *Key; 298 Node *Value; 299}; 300 301/// \brief This is an iterator abstraction over YAML collections shared by both 302/// sequences and maps. 303/// 304/// BaseT must have a ValueT* member named CurrentEntry and a member function 305/// increment() which must set CurrentEntry to 0 to create an end iterator. 306template <class BaseT, class ValueT> 307class basic_collection_iterator 308 : public std::iterator<std::forward_iterator_tag, ValueT> { 309public: 310 basic_collection_iterator() : Base(nullptr) {} 311 basic_collection_iterator(BaseT *B) : Base(B) {} 312 313 ValueT *operator->() const { 314 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 315 return Base->CurrentEntry; 316 } 317 318 ValueT &operator*() const { 319 assert(Base && Base->CurrentEntry && 320 "Attempted to dereference end iterator!"); 321 return *Base->CurrentEntry; 322 } 323 324 operator ValueT *() const { 325 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 326 return Base->CurrentEntry; 327 } 328 329 bool operator!=(const basic_collection_iterator &Other) const { 330 if (Base != Other.Base) 331 return true; 332 return (Base && Other.Base) && 333 Base->CurrentEntry != Other.Base->CurrentEntry; 334 } 335 336 basic_collection_iterator &operator++() { 337 assert(Base && "Attempted to advance iterator past end!"); 338 Base->increment(); 339 // Create an end iterator. 340 if (!Base->CurrentEntry) 341 Base = nullptr; 342 return *this; 343 } 344 345private: 346 BaseT *Base; 347}; 348 349// The following two templates are used for both MappingNode and Sequence Node. 350template <class CollectionType> 351typename CollectionType::iterator begin(CollectionType &C) { 352 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 353 C.IsAtBeginning = false; 354 typename CollectionType::iterator ret(&C); 355 ++ret; 356 return ret; 357} 358 359template <class CollectionType> void skip(CollectionType &C) { 360 // TODO: support skipping from the middle of a parsed collection ;/ 361 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 362 if (C.IsAtBeginning) 363 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 364 ++i) 365 i->skip(); 366} 367 368/// \brief Represents a YAML map created from either a block map for a flow map. 369/// 370/// This parses the YAML stream as increment() is called. 371/// 372/// Example: 373/// Name: _main 374/// Scope: Global 375class MappingNode final : public Node { 376 void anchor() override; 377 378public: 379 enum MappingType { 380 MT_Block, 381 MT_Flow, 382 MT_Inline ///< An inline mapping node is used for "[key: value]". 383 }; 384 385 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 386 MappingType MT) 387 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 388 IsAtEnd(false), CurrentEntry(nullptr) {} 389 390 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 391 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 392 template <class T> friend typename T::iterator yaml::begin(T &); 393 template <class T> friend void yaml::skip(T &); 394 395 iterator begin() { return yaml::begin(*this); } 396 397 iterator end() { return iterator(); } 398 399 void skip() override { yaml::skip(*this); } 400 401 static inline bool classof(const Node *N) { 402 return N->getType() == NK_Mapping; 403 } 404 405private: 406 MappingType Type; 407 bool IsAtBeginning; 408 bool IsAtEnd; 409 KeyValueNode *CurrentEntry; 410 411 void increment(); 412}; 413 414/// \brief Represents a YAML sequence created from either a block sequence for a 415/// flow sequence. 416/// 417/// This parses the YAML stream as increment() is called. 418/// 419/// Example: 420/// - Hello 421/// - World 422class SequenceNode final : public Node { 423 void anchor() override; 424 425public: 426 enum SequenceType { 427 ST_Block, 428 ST_Flow, 429 // Use for: 430 // 431 // key: 432 // - val1 433 // - val2 434 // 435 // As a BlockMappingEntry and BlockEnd are not created in this case. 436 ST_Indentless 437 }; 438 439 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 440 SequenceType ST) 441 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 442 IsAtEnd(false), 443 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 444 CurrentEntry(nullptr) {} 445 446 friend class basic_collection_iterator<SequenceNode, Node>; 447 typedef basic_collection_iterator<SequenceNode, Node> iterator; 448 template <class T> friend typename T::iterator yaml::begin(T &); 449 template <class T> friend void yaml::skip(T &); 450 451 void increment(); 452 453 iterator begin() { return yaml::begin(*this); } 454 455 iterator end() { return iterator(); } 456 457 void skip() override { yaml::skip(*this); } 458 459 static inline bool classof(const Node *N) { 460 return N->getType() == NK_Sequence; 461 } 462 463private: 464 SequenceType SeqType; 465 bool IsAtBeginning; 466 bool IsAtEnd; 467 bool WasPreviousTokenFlowEntry; 468 Node *CurrentEntry; 469}; 470 471/// \brief Represents an alias to a Node with an anchor. 472/// 473/// Example: 474/// *AnchorName 475class AliasNode final : public Node { 476 void anchor() override; 477 478public: 479 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 480 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 481 482 StringRef getName() const { return Name; } 483 Node *getTarget(); 484 485 static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } 486 487private: 488 StringRef Name; 489}; 490 491/// \brief A YAML Stream is a sequence of Documents. A document contains a root 492/// node. 493class Document { 494public: 495 /// \brief Root for parsing a node. Returns a single node. 496 Node *parseBlockNode(); 497 498 Document(Stream &ParentStream); 499 500 /// \brief Finish parsing the current document and return true if there are 501 /// more. Return false otherwise. 502 bool skip(); 503 504 /// \brief Parse and return the root level node. 505 Node *getRoot() { 506 if (Root) 507 return Root; 508 return Root = parseBlockNode(); 509 } 510 511 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 512 513private: 514 friend class Node; 515 friend class document_iterator; 516 517 /// \brief Stream to read tokens from. 518 Stream &stream; 519 520 /// \brief Used to allocate nodes to. All are destroyed without calling their 521 /// destructor when the document is destroyed. 522 BumpPtrAllocator NodeAllocator; 523 524 /// \brief The root node. Used to support skipping a partially parsed 525 /// document. 526 Node *Root; 527 528 /// \brief Maps tag prefixes to their expansion. 529 std::map<StringRef, StringRef> TagMap; 530 531 Token &peekNext(); 532 Token getNext(); 533 void setError(const Twine &Message, Token &Location) const; 534 bool failed() const; 535 536 /// \brief Parse %BLAH directives and return true if any were encountered. 537 bool parseDirectives(); 538 539 /// \brief Parse %YAML 540 void parseYAMLDirective(); 541 542 /// \brief Parse %TAG 543 void parseTAGDirective(); 544 545 /// \brief Consume the next token and error if it is not \a TK. 546 bool expectToken(int TK); 547}; 548 549/// \brief Iterator abstraction for Documents over a Stream. 550class document_iterator { 551public: 552 document_iterator() : Doc(nullptr) {} 553 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 554 555 bool operator==(const document_iterator &Other) { 556 if (isAtEnd() || Other.isAtEnd()) 557 return isAtEnd() && Other.isAtEnd(); 558 559 return Doc == Other.Doc; 560 } 561 bool operator!=(const document_iterator &Other) { return !(*this == Other); } 562 563 document_iterator operator++() { 564 assert(Doc && "incrementing iterator past the end."); 565 if (!(*Doc)->skip()) { 566 Doc->reset(nullptr); 567 } else { 568 Stream &S = (*Doc)->stream; 569 Doc->reset(new Document(S)); 570 } 571 return *this; 572 } 573 574 Document &operator*() { return *Doc->get(); } 575 576 std::unique_ptr<Document> &operator->() { return *Doc; } 577 578private: 579 bool isAtEnd() const { return !Doc || !*Doc; } 580 581 std::unique_ptr<Document> *Doc; 582}; 583 584} // End namespace yaml. 585 586} // End namespace llvm. 587 588#endif 589