1//===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAMLPARSER_H 39#define LLVM_SUPPORT_YAMLPARSER_H 40 41#include "llvm/ADT/StringRef.h" 42#include "llvm/Support/Allocator.h" 43#include "llvm/Support/SMLoc.h" 44#include <limits> 45#include <map> 46#include <utility> 47 48namespace llvm { 49class MemoryBufferRef; 50class SourceMgr; 51class Twine; 52class raw_ostream; 53 54namespace yaml { 55 56class document_iterator; 57class Document; 58class Node; 59class Scanner; 60struct Token; 61 62/// \brief Dump all the tokens in this stream to OS. 63/// \returns true if there was an error, false otherwise. 64bool dumpTokens(StringRef Input, raw_ostream &); 65 66/// \brief Scans all tokens in input without outputting anything. This is used 67/// for benchmarking the tokenizer. 68/// \returns true if there was an error, false otherwise. 69bool scanTokens(StringRef Input); 70 71/// \brief Escape \a Input for a double quoted scalar. 72std::string escape(StringRef Input); 73 74/// \brief This class represents a YAML stream potentially containing multiple 75/// documents. 76class Stream { 77public: 78 /// \brief This keeps a reference to the string referenced by \p Input. 79 Stream(StringRef Input, SourceMgr &); 80 81 Stream(MemoryBufferRef InputBuffer, SourceMgr &); 82 ~Stream(); 83 84 document_iterator begin(); 85 document_iterator end(); 86 void skip(); 87 bool failed(); 88 bool validate() { 89 skip(); 90 return !failed(); 91 } 92 93 void printError(Node *N, const Twine &Msg); 94 95private: 96 std::unique_ptr<Scanner> scanner; 97 std::unique_ptr<Document> CurrentDoc; 98 99 friend class Document; 100}; 101 102/// \brief Abstract base class for all Nodes. 103class Node { 104 virtual void anchor(); 105 106public: 107 enum NodeKind { 108 NK_Null, 109 NK_Scalar, 110 NK_KeyValue, 111 NK_Mapping, 112 NK_Sequence, 113 NK_Alias 114 }; 115 116 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 117 StringRef Tag); 118 119 /// \brief Get the value of the anchor attached to this node. If it does not 120 /// have one, getAnchor().size() will be 0. 121 StringRef getAnchor() const { return Anchor; } 122 123 /// \brief Get the tag as it was written in the document. This does not 124 /// perform tag resolution. 125 StringRef getRawTag() const { return Tag; } 126 127 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 128 /// and substitution. 129 std::string getVerbatimTag() const; 130 131 SMRange getSourceRange() const { return SourceRange; } 132 void setSourceRange(SMRange SR) { SourceRange = SR; } 133 134 // These functions forward to Document and Scanner. 135 Token &peekNext(); 136 Token getNext(); 137 Node *parseBlockNode(); 138 BumpPtrAllocator &getAllocator(); 139 void setError(const Twine &Message, Token &Location) const; 140 bool failed() const; 141 142 virtual void skip() {} 143 144 unsigned int getType() const { return TypeID; } 145 146 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 147 size_t Alignment = 16) throw() { 148 return Alloc.Allocate(Size, Alignment); 149 } 150 151 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() { 152 Alloc.Deallocate(Ptr, Size); 153 } 154 155protected: 156 std::unique_ptr<Document> &Doc; 157 SMRange SourceRange; 158 159 void operator delete(void *) throw() {} 160 161 virtual ~Node() {} 162 163private: 164 unsigned int TypeID; 165 StringRef Anchor; 166 /// \brief The tag as typed in the document. 167 StringRef Tag; 168}; 169 170/// \brief A null value. 171/// 172/// Example: 173/// !!null null 174class NullNode : public Node { 175 void anchor() override; 176 177public: 178 NullNode(std::unique_ptr<Document> &D) 179 : Node(NK_Null, D, StringRef(), StringRef()) {} 180 181 static inline bool classof(const Node *N) { return N->getType() == NK_Null; } 182}; 183 184/// \brief A scalar node is an opaque datum that can be presented as a 185/// series of zero or more Unicode scalar values. 186/// 187/// Example: 188/// Adena 189class ScalarNode : public Node { 190 void anchor() override; 191 192public: 193 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 194 StringRef Val) 195 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 196 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 197 SMLoc End = SMLoc::getFromPointer(Val.end()); 198 SourceRange = SMRange(Start, End); 199 } 200 201 // Return Value without any escaping or folding or other fun YAML stuff. This 202 // is the exact bytes that are contained in the file (after conversion to 203 // utf8). 204 StringRef getRawValue() const { return Value; } 205 206 /// \brief Gets the value of this node as a StringRef. 207 /// 208 /// \param Storage is used to store the content of the returned StringRef iff 209 /// it requires any modification from how it appeared in the source. 210 /// This happens with escaped characters and multi-line literals. 211 StringRef getValue(SmallVectorImpl<char> &Storage) const; 212 213 static inline bool classof(const Node *N) { 214 return N->getType() == NK_Scalar; 215 } 216 217private: 218 StringRef Value; 219 220 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 221 StringRef::size_type Start, 222 SmallVectorImpl<char> &Storage) const; 223}; 224 225/// \brief A key and value pair. While not technically a Node under the YAML 226/// representation graph, it is easier to treat them this way. 227/// 228/// TODO: Consider making this not a child of Node. 229/// 230/// Example: 231/// Section: .text 232class KeyValueNode : public Node { 233 void anchor() override; 234 235public: 236 KeyValueNode(std::unique_ptr<Document> &D) 237 : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), 238 Value(nullptr) {} 239 240 /// \brief Parse and return the key. 241 /// 242 /// This may be called multiple times. 243 /// 244 /// \returns The key, or nullptr if failed() == true. 245 Node *getKey(); 246 247 /// \brief Parse and return the value. 248 /// 249 /// This may be called multiple times. 250 /// 251 /// \returns The value, or nullptr if failed() == true. 252 Node *getValue(); 253 254 void skip() override { 255 getKey()->skip(); 256 getValue()->skip(); 257 } 258 259 static inline bool classof(const Node *N) { 260 return N->getType() == NK_KeyValue; 261 } 262 263private: 264 Node *Key; 265 Node *Value; 266}; 267 268/// \brief This is an iterator abstraction over YAML collections shared by both 269/// sequences and maps. 270/// 271/// BaseT must have a ValueT* member named CurrentEntry and a member function 272/// increment() which must set CurrentEntry to 0 to create an end iterator. 273template <class BaseT, class ValueT> 274class basic_collection_iterator 275 : public std::iterator<std::forward_iterator_tag, ValueT> { 276public: 277 basic_collection_iterator() : Base(nullptr) {} 278 basic_collection_iterator(BaseT *B) : Base(B) {} 279 280 ValueT *operator->() const { 281 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 282 return Base->CurrentEntry; 283 } 284 285 ValueT &operator*() const { 286 assert(Base && Base->CurrentEntry && 287 "Attempted to dereference end iterator!"); 288 return *Base->CurrentEntry; 289 } 290 291 operator ValueT *() const { 292 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 293 return Base->CurrentEntry; 294 } 295 296 bool operator!=(const basic_collection_iterator &Other) const { 297 if (Base != Other.Base) 298 return true; 299 return (Base && Other.Base) && 300 Base->CurrentEntry != Other.Base->CurrentEntry; 301 } 302 303 basic_collection_iterator &operator++() { 304 assert(Base && "Attempted to advance iterator past end!"); 305 Base->increment(); 306 // Create an end iterator. 307 if (!Base->CurrentEntry) 308 Base = nullptr; 309 return *this; 310 } 311 312private: 313 BaseT *Base; 314}; 315 316// The following two templates are used for both MappingNode and Sequence Node. 317template <class CollectionType> 318typename CollectionType::iterator begin(CollectionType &C) { 319 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 320 C.IsAtBeginning = false; 321 typename CollectionType::iterator ret(&C); 322 ++ret; 323 return ret; 324} 325 326template <class CollectionType> void skip(CollectionType &C) { 327 // TODO: support skipping from the middle of a parsed collection ;/ 328 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 329 if (C.IsAtBeginning) 330 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 331 ++i) 332 i->skip(); 333} 334 335/// \brief Represents a YAML map created from either a block map for a flow map. 336/// 337/// This parses the YAML stream as increment() is called. 338/// 339/// Example: 340/// Name: _main 341/// Scope: Global 342class MappingNode : public Node { 343 void anchor() override; 344 345public: 346 enum MappingType { 347 MT_Block, 348 MT_Flow, 349 MT_Inline ///< An inline mapping node is used for "[key: value]". 350 }; 351 352 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 353 MappingType MT) 354 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 355 IsAtEnd(false), CurrentEntry(nullptr) {} 356 357 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 358 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 359 template <class T> friend typename T::iterator yaml::begin(T &); 360 template <class T> friend void yaml::skip(T &); 361 362 iterator begin() { return yaml::begin(*this); } 363 364 iterator end() { return iterator(); } 365 366 void skip() override { yaml::skip(*this); } 367 368 static inline bool classof(const Node *N) { 369 return N->getType() == NK_Mapping; 370 } 371 372private: 373 MappingType Type; 374 bool IsAtBeginning; 375 bool IsAtEnd; 376 KeyValueNode *CurrentEntry; 377 378 void increment(); 379}; 380 381/// \brief Represents a YAML sequence created from either a block sequence for a 382/// flow sequence. 383/// 384/// This parses the YAML stream as increment() is called. 385/// 386/// Example: 387/// - Hello 388/// - World 389class SequenceNode : public Node { 390 void anchor() override; 391 392public: 393 enum SequenceType { 394 ST_Block, 395 ST_Flow, 396 // Use for: 397 // 398 // key: 399 // - val1 400 // - val2 401 // 402 // As a BlockMappingEntry and BlockEnd are not created in this case. 403 ST_Indentless 404 }; 405 406 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 407 SequenceType ST) 408 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 409 IsAtEnd(false), 410 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 411 CurrentEntry(nullptr) {} 412 413 friend class basic_collection_iterator<SequenceNode, Node>; 414 typedef basic_collection_iterator<SequenceNode, Node> iterator; 415 template <class T> friend typename T::iterator yaml::begin(T &); 416 template <class T> friend void yaml::skip(T &); 417 418 void increment(); 419 420 iterator begin() { return yaml::begin(*this); } 421 422 iterator end() { return iterator(); } 423 424 void skip() override { yaml::skip(*this); } 425 426 static inline bool classof(const Node *N) { 427 return N->getType() == NK_Sequence; 428 } 429 430private: 431 SequenceType SeqType; 432 bool IsAtBeginning; 433 bool IsAtEnd; 434 bool WasPreviousTokenFlowEntry; 435 Node *CurrentEntry; 436}; 437 438/// \brief Represents an alias to a Node with an anchor. 439/// 440/// Example: 441/// *AnchorName 442class AliasNode : public Node { 443 void anchor() override; 444 445public: 446 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 447 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 448 449 StringRef getName() const { return Name; } 450 Node *getTarget(); 451 452 static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } 453 454private: 455 StringRef Name; 456}; 457 458/// \brief A YAML Stream is a sequence of Documents. A document contains a root 459/// node. 460class Document { 461public: 462 /// \brief Root for parsing a node. Returns a single node. 463 Node *parseBlockNode(); 464 465 Document(Stream &ParentStream); 466 467 /// \brief Finish parsing the current document and return true if there are 468 /// more. Return false otherwise. 469 bool skip(); 470 471 /// \brief Parse and return the root level node. 472 Node *getRoot() { 473 if (Root) 474 return Root; 475 return Root = parseBlockNode(); 476 } 477 478 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 479 480private: 481 friend class Node; 482 friend class document_iterator; 483 484 /// \brief Stream to read tokens from. 485 Stream &stream; 486 487 /// \brief Used to allocate nodes to. All are destroyed without calling their 488 /// destructor when the document is destroyed. 489 BumpPtrAllocator NodeAllocator; 490 491 /// \brief The root node. Used to support skipping a partially parsed 492 /// document. 493 Node *Root; 494 495 /// \brief Maps tag prefixes to their expansion. 496 std::map<StringRef, StringRef> TagMap; 497 498 Token &peekNext(); 499 Token getNext(); 500 void setError(const Twine &Message, Token &Location) const; 501 bool failed() const; 502 503 /// \brief Parse %BLAH directives and return true if any were encountered. 504 bool parseDirectives(); 505 506 /// \brief Parse %YAML 507 void parseYAMLDirective(); 508 509 /// \brief Parse %TAG 510 void parseTAGDirective(); 511 512 /// \brief Consume the next token and error if it is not \a TK. 513 bool expectToken(int TK); 514}; 515 516/// \brief Iterator abstraction for Documents over a Stream. 517class document_iterator { 518public: 519 document_iterator() : Doc(nullptr) {} 520 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 521 522 bool operator==(const document_iterator &Other) { 523 if (isAtEnd() || Other.isAtEnd()) 524 return isAtEnd() && Other.isAtEnd(); 525 526 return Doc == Other.Doc; 527 } 528 bool operator!=(const document_iterator &Other) { return !(*this == Other); } 529 530 document_iterator operator++() { 531 assert(Doc && "incrementing iterator past the end."); 532 if (!(*Doc)->skip()) { 533 Doc->reset(nullptr); 534 } else { 535 Stream &S = (*Doc)->stream; 536 Doc->reset(new Document(S)); 537 } 538 return *this; 539 } 540 541 Document &operator*() { return *Doc->get(); } 542 543 std::unique_ptr<Document> &operator->() { return *Doc; } 544 545private: 546 bool isAtEnd() const { return !Doc || !*Doc; } 547 548 std::unique_ptr<Document> *Doc; 549}; 550 551} // End namespace yaml. 552 553} // End namespace llvm. 554 555#endif 556