1//===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAMLPARSER_H 39#define LLVM_SUPPORT_YAMLPARSER_H 40 41#include "llvm/ADT/SmallString.h" 42#include "llvm/ADT/StringRef.h" 43#include "llvm/Support/Allocator.h" 44#include "llvm/Support/SMLoc.h" 45#include <limits> 46#include <map> 47#include <utility> 48 49namespace llvm { 50class MemoryBuffer; 51class SourceMgr; 52class raw_ostream; 53class Twine; 54 55namespace yaml { 56 57class document_iterator; 58class Document; 59class Node; 60class Scanner; 61struct Token; 62 63/// \brief Dump all the tokens in this stream to OS. 64/// \returns true if there was an error, false otherwise. 65bool dumpTokens(StringRef Input, raw_ostream &); 66 67/// \brief Scans all tokens in input without outputting anything. This is used 68/// for benchmarking the tokenizer. 69/// \returns true if there was an error, false otherwise. 70bool scanTokens(StringRef Input); 71 72/// \brief Escape \a Input for a double quoted scalar. 73std::string escape(StringRef Input); 74 75/// \brief This class represents a YAML stream potentially containing multiple 76/// documents. 77class Stream { 78public: 79 /// \brief This keeps a reference to the string referenced by \p Input. 80 Stream(StringRef Input, SourceMgr &); 81 82 /// \brief This takes ownership of \p InputBuffer. 83 Stream(MemoryBuffer *InputBuffer, SourceMgr &); 84 ~Stream(); 85 86 document_iterator begin(); 87 document_iterator end(); 88 void skip(); 89 bool failed(); 90 bool validate() { 91 skip(); 92 return !failed(); 93 } 94 95 void printError(Node *N, const Twine &Msg); 96 97private: 98 std::unique_ptr<Scanner> scanner; 99 std::unique_ptr<Document> CurrentDoc; 100 101 friend class Document; 102}; 103 104/// \brief Abstract base class for all Nodes. 105class Node { 106 virtual void anchor(); 107 108public: 109 enum NodeKind { 110 NK_Null, 111 NK_Scalar, 112 NK_KeyValue, 113 NK_Mapping, 114 NK_Sequence, 115 NK_Alias 116 }; 117 118 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 119 StringRef Tag); 120 121 /// \brief Get the value of the anchor attached to this node. If it does not 122 /// have one, getAnchor().size() will be 0. 123 StringRef getAnchor() const { return Anchor; } 124 125 /// \brief Get the tag as it was written in the document. This does not 126 /// perform tag resolution. 127 StringRef getRawTag() const { return Tag; } 128 129 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 130 /// and substitution. 131 std::string getVerbatimTag() const; 132 133 SMRange getSourceRange() const { return SourceRange; } 134 void setSourceRange(SMRange SR) { SourceRange = SR; } 135 136 // These functions forward to Document and Scanner. 137 Token &peekNext(); 138 Token getNext(); 139 Node *parseBlockNode(); 140 BumpPtrAllocator &getAllocator(); 141 void setError(const Twine &Message, Token &Location) const; 142 bool failed() const; 143 144 virtual void skip() {} 145 146 unsigned int getType() const { return TypeID; } 147 148 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 149 size_t Alignment = 16) throw() { 150 return Alloc.Allocate(Size, Alignment); 151 } 152 153 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() { 154 Alloc.Deallocate(Ptr, Size); 155 } 156 157protected: 158 std::unique_ptr<Document> &Doc; 159 SMRange SourceRange; 160 161 void operator delete(void *) throw() {} 162 163 virtual ~Node() {} 164 165private: 166 unsigned int TypeID; 167 StringRef Anchor; 168 /// \brief The tag as typed in the document. 169 StringRef Tag; 170}; 171 172/// \brief A null value. 173/// 174/// Example: 175/// !!null null 176class NullNode : public Node { 177 void anchor() override; 178 179public: 180 NullNode(std::unique_ptr<Document> &D) 181 : Node(NK_Null, D, StringRef(), StringRef()) {} 182 183 static inline bool classof(const Node *N) { return N->getType() == NK_Null; } 184}; 185 186/// \brief A scalar node is an opaque datum that can be presented as a 187/// series of zero or more Unicode scalar values. 188/// 189/// Example: 190/// Adena 191class ScalarNode : public Node { 192 void anchor() override; 193 194public: 195 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 196 StringRef Val) 197 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 198 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 199 SMLoc End = SMLoc::getFromPointer(Val.end()); 200 SourceRange = SMRange(Start, End); 201 } 202 203 // Return Value without any escaping or folding or other fun YAML stuff. This 204 // is the exact bytes that are contained in the file (after conversion to 205 // utf8). 206 StringRef getRawValue() const { return Value; } 207 208 /// \brief Gets the value of this node as a StringRef. 209 /// 210 /// \param Storage is used to store the content of the returned StringRef iff 211 /// it requires any modification from how it appeared in the source. 212 /// This happens with escaped characters and multi-line literals. 213 StringRef getValue(SmallVectorImpl<char> &Storage) const; 214 215 static inline bool classof(const Node *N) { 216 return N->getType() == NK_Scalar; 217 } 218 219private: 220 StringRef Value; 221 222 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 223 StringRef::size_type Start, 224 SmallVectorImpl<char> &Storage) const; 225}; 226 227/// \brief A key and value pair. While not technically a Node under the YAML 228/// representation graph, it is easier to treat them this way. 229/// 230/// TODO: Consider making this not a child of Node. 231/// 232/// Example: 233/// Section: .text 234class KeyValueNode : public Node { 235 void anchor() override; 236 237public: 238 KeyValueNode(std::unique_ptr<Document> &D) 239 : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), 240 Value(nullptr) {} 241 242 /// \brief Parse and return the key. 243 /// 244 /// This may be called multiple times. 245 /// 246 /// \returns The key, or nullptr if failed() == true. 247 Node *getKey(); 248 249 /// \brief Parse and return the value. 250 /// 251 /// This may be called multiple times. 252 /// 253 /// \returns The value, or nullptr if failed() == true. 254 Node *getValue(); 255 256 void skip() override { 257 getKey()->skip(); 258 getValue()->skip(); 259 } 260 261 static inline bool classof(const Node *N) { 262 return N->getType() == NK_KeyValue; 263 } 264 265private: 266 Node *Key; 267 Node *Value; 268}; 269 270/// \brief This is an iterator abstraction over YAML collections shared by both 271/// sequences and maps. 272/// 273/// BaseT must have a ValueT* member named CurrentEntry and a member function 274/// increment() which must set CurrentEntry to 0 to create an end iterator. 275template <class BaseT, class ValueT> 276class basic_collection_iterator 277 : public std::iterator<std::forward_iterator_tag, ValueT> { 278public: 279 basic_collection_iterator() : Base(nullptr) {} 280 basic_collection_iterator(BaseT *B) : Base(B) {} 281 282 ValueT *operator->() const { 283 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 284 return Base->CurrentEntry; 285 } 286 287 ValueT &operator*() const { 288 assert(Base && Base->CurrentEntry && 289 "Attempted to dereference end iterator!"); 290 return *Base->CurrentEntry; 291 } 292 293 operator ValueT *() const { 294 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 295 return Base->CurrentEntry; 296 } 297 298 bool operator!=(const basic_collection_iterator &Other) const { 299 if (Base != Other.Base) 300 return true; 301 return (Base && Other.Base) && 302 Base->CurrentEntry != Other.Base->CurrentEntry; 303 } 304 305 basic_collection_iterator &operator++() { 306 assert(Base && "Attempted to advance iterator past end!"); 307 Base->increment(); 308 // Create an end iterator. 309 if (!Base->CurrentEntry) 310 Base = nullptr; 311 return *this; 312 } 313 314private: 315 BaseT *Base; 316}; 317 318// The following two templates are used for both MappingNode and Sequence Node. 319template <class CollectionType> 320typename CollectionType::iterator begin(CollectionType &C) { 321 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 322 C.IsAtBeginning = false; 323 typename CollectionType::iterator ret(&C); 324 ++ret; 325 return ret; 326} 327 328template <class CollectionType> void skip(CollectionType &C) { 329 // TODO: support skipping from the middle of a parsed collection ;/ 330 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 331 if (C.IsAtBeginning) 332 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 333 ++i) 334 i->skip(); 335} 336 337/// \brief Represents a YAML map created from either a block map for a flow map. 338/// 339/// This parses the YAML stream as increment() is called. 340/// 341/// Example: 342/// Name: _main 343/// Scope: Global 344class MappingNode : public Node { 345 void anchor() override; 346 347public: 348 enum MappingType { 349 MT_Block, 350 MT_Flow, 351 MT_Inline ///< An inline mapping node is used for "[key: value]". 352 }; 353 354 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 355 MappingType MT) 356 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 357 IsAtEnd(false), CurrentEntry(nullptr) {} 358 359 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 360 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 361 template <class T> friend typename T::iterator yaml::begin(T &); 362 template <class T> friend void yaml::skip(T &); 363 364 iterator begin() { return yaml::begin(*this); } 365 366 iterator end() { return iterator(); } 367 368 void skip() override { yaml::skip(*this); } 369 370 static inline bool classof(const Node *N) { 371 return N->getType() == NK_Mapping; 372 } 373 374private: 375 MappingType Type; 376 bool IsAtBeginning; 377 bool IsAtEnd; 378 KeyValueNode *CurrentEntry; 379 380 void increment(); 381}; 382 383/// \brief Represents a YAML sequence created from either a block sequence for a 384/// flow sequence. 385/// 386/// This parses the YAML stream as increment() is called. 387/// 388/// Example: 389/// - Hello 390/// - World 391class SequenceNode : public Node { 392 void anchor() override; 393 394public: 395 enum SequenceType { 396 ST_Block, 397 ST_Flow, 398 // Use for: 399 // 400 // key: 401 // - val1 402 // - val2 403 // 404 // As a BlockMappingEntry and BlockEnd are not created in this case. 405 ST_Indentless 406 }; 407 408 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 409 SequenceType ST) 410 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 411 IsAtEnd(false), 412 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 413 CurrentEntry(nullptr) {} 414 415 friend class basic_collection_iterator<SequenceNode, Node>; 416 typedef basic_collection_iterator<SequenceNode, Node> iterator; 417 template <class T> friend typename T::iterator yaml::begin(T &); 418 template <class T> friend void yaml::skip(T &); 419 420 void increment(); 421 422 iterator begin() { return yaml::begin(*this); } 423 424 iterator end() { return iterator(); } 425 426 void skip() override { yaml::skip(*this); } 427 428 static inline bool classof(const Node *N) { 429 return N->getType() == NK_Sequence; 430 } 431 432private: 433 SequenceType SeqType; 434 bool IsAtBeginning; 435 bool IsAtEnd; 436 bool WasPreviousTokenFlowEntry; 437 Node *CurrentEntry; 438}; 439 440/// \brief Represents an alias to a Node with an anchor. 441/// 442/// Example: 443/// *AnchorName 444class AliasNode : public Node { 445 void anchor() override; 446 447public: 448 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 449 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 450 451 StringRef getName() const { return Name; } 452 Node *getTarget(); 453 454 static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } 455 456private: 457 StringRef Name; 458}; 459 460/// \brief A YAML Stream is a sequence of Documents. A document contains a root 461/// node. 462class Document { 463public: 464 /// \brief Root for parsing a node. Returns a single node. 465 Node *parseBlockNode(); 466 467 Document(Stream &ParentStream); 468 469 /// \brief Finish parsing the current document and return true if there are 470 /// more. Return false otherwise. 471 bool skip(); 472 473 /// \brief Parse and return the root level node. 474 Node *getRoot() { 475 if (Root) 476 return Root; 477 return Root = parseBlockNode(); 478 } 479 480 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 481 482private: 483 friend class Node; 484 friend class document_iterator; 485 486 /// \brief Stream to read tokens from. 487 Stream &stream; 488 489 /// \brief Used to allocate nodes to. All are destroyed without calling their 490 /// destructor when the document is destroyed. 491 BumpPtrAllocator NodeAllocator; 492 493 /// \brief The root node. Used to support skipping a partially parsed 494 /// document. 495 Node *Root; 496 497 /// \brief Maps tag prefixes to their expansion. 498 std::map<StringRef, StringRef> TagMap; 499 500 Token &peekNext(); 501 Token getNext(); 502 void setError(const Twine &Message, Token &Location) const; 503 bool failed() const; 504 505 /// \brief Parse %BLAH directives and return true if any were encountered. 506 bool parseDirectives(); 507 508 /// \brief Parse %YAML 509 void parseYAMLDirective(); 510 511 /// \brief Parse %TAG 512 void parseTAGDirective(); 513 514 /// \brief Consume the next token and error if it is not \a TK. 515 bool expectToken(int TK); 516}; 517 518/// \brief Iterator abstraction for Documents over a Stream. 519class document_iterator { 520public: 521 document_iterator() : Doc(nullptr) {} 522 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 523 524 bool operator==(const document_iterator &Other) { 525 if (isAtEnd() || Other.isAtEnd()) 526 return isAtEnd() && Other.isAtEnd(); 527 528 return Doc == Other.Doc; 529 } 530 bool operator!=(const document_iterator &Other) { return !(*this == Other); } 531 532 document_iterator operator++() { 533 assert(Doc && "incrementing iterator past the end."); 534 if (!(*Doc)->skip()) { 535 Doc->reset(nullptr); 536 } else { 537 Stream &S = (*Doc)->stream; 538 Doc->reset(new Document(S)); 539 } 540 return *this; 541 } 542 543 Document &operator*() { return *Doc->get(); } 544 545 std::unique_ptr<Document> &operator->() { return *Doc; } 546 547private: 548 bool isAtEnd() const { return !Doc || !*Doc; } 549 550 std::unique_ptr<Document> *Doc; 551}; 552 553} // End namespace yaml. 554 555} // End namespace llvm. 556 557#endif 558