1//===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAML_PARSER_H 39#define LLVM_SUPPORT_YAML_PARSER_H 40 41#include "llvm/ADT/OwningPtr.h" 42#include "llvm/ADT/SmallString.h" 43#include "llvm/ADT/StringRef.h" 44#include "llvm/Support/Allocator.h" 45#include "llvm/Support/SMLoc.h" 46 47#include <limits> 48#include <utility> 49 50namespace llvm { 51class MemoryBuffer; 52class SourceMgr; 53class raw_ostream; 54class Twine; 55 56namespace yaml { 57 58class document_iterator; 59class Document; 60class Node; 61class Scanner; 62struct Token; 63 64/// @brief Dump all the tokens in this stream to OS. 65/// @returns true if there was an error, false otherwise. 66bool dumpTokens(StringRef Input, raw_ostream &); 67 68/// @brief Scans all tokens in input without outputting anything. This is used 69/// for benchmarking the tokenizer. 70/// @returns true if there was an error, false otherwise. 71bool scanTokens(StringRef Input); 72 73/// @brief Escape \a Input for a double quoted scalar. 74std::string escape(StringRef Input); 75 76/// @brief This class represents a YAML stream potentially containing multiple 77/// documents. 78class Stream { 79public: 80 Stream(StringRef Input, SourceMgr &); 81 ~Stream(); 82 83 document_iterator begin(); 84 document_iterator end(); 85 void skip(); 86 bool failed(); 87 bool validate() { 88 skip(); 89 return !failed(); 90 } 91 92 void printError(Node *N, const Twine &Msg); 93 94private: 95 OwningPtr<Scanner> scanner; 96 OwningPtr<Document> CurrentDoc; 97 98 friend class Document; 99 100 /// @brief Validate a %YAML x.x directive. 101 void handleYAMLDirective(const Token &); 102}; 103 104/// @brief Abstract base class for all Nodes. 105class Node { 106public: 107 enum NodeKind { 108 NK_Null, 109 NK_Scalar, 110 NK_KeyValue, 111 NK_Mapping, 112 NK_Sequence, 113 NK_Alias 114 }; 115 116 Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor); 117 118 /// @brief Get the value of the anchor attached to this node. If it does not 119 /// have one, getAnchor().size() will be 0. 120 StringRef getAnchor() const { return Anchor; } 121 122 SMRange getSourceRange() const { return SourceRange; } 123 void setSourceRange(SMRange SR) { SourceRange = SR; } 124 125 // These functions forward to Document and Scanner. 126 Token &peekNext(); 127 Token getNext(); 128 Node *parseBlockNode(); 129 BumpPtrAllocator &getAllocator(); 130 void setError(const Twine &Message, Token &Location) const; 131 bool failed() const; 132 133 virtual void skip() {} 134 135 unsigned int getType() const { return TypeID; } 136 static inline bool classof(const Node *) { return true; } 137 138 void *operator new ( size_t Size 139 , BumpPtrAllocator &Alloc 140 , size_t Alignment = 16) throw() { 141 return Alloc.Allocate(Size, Alignment); 142 } 143 144 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { 145 Alloc.Deallocate(Ptr); 146 } 147 148protected: 149 OwningPtr<Document> &Doc; 150 SMRange SourceRange; 151 152 void operator delete(void *) throw() {} 153 154 virtual ~Node() {} 155 156private: 157 unsigned int TypeID; 158 StringRef Anchor; 159}; 160 161/// @brief A null value. 162/// 163/// Example: 164/// !!null null 165class NullNode : public Node { 166public: 167 NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {} 168 169 static inline bool classof(const NullNode *) { return true; } 170 static inline bool classof(const Node *N) { 171 return N->getType() == NK_Null; 172 } 173}; 174 175/// @brief A scalar node is an opaque datum that can be presented as a 176/// series of zero or more Unicode scalar values. 177/// 178/// Example: 179/// Adena 180class ScalarNode : public Node { 181public: 182 ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val) 183 : Node(NK_Scalar, D, Anchor) 184 , Value(Val) { 185 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 186 SMLoc End = SMLoc::getFromPointer(Val.end() - 1); 187 SourceRange = SMRange(Start, End); 188 } 189 190 // Return Value without any escaping or folding or other fun YAML stuff. This 191 // is the exact bytes that are contained in the file (after conversion to 192 // utf8). 193 StringRef getRawValue() const { return Value; } 194 195 /// @brief Gets the value of this node as a StringRef. 196 /// 197 /// @param Storage is used to store the content of the returned StringRef iff 198 /// it requires any modification from how it appeared in the source. 199 /// This happens with escaped characters and multi-line literals. 200 StringRef getValue(SmallVectorImpl<char> &Storage) const; 201 202 static inline bool classof(const ScalarNode *) { return true; } 203 static inline bool classof(const Node *N) { 204 return N->getType() == NK_Scalar; 205 } 206 207private: 208 StringRef Value; 209 210 StringRef unescapeDoubleQuoted( StringRef UnquotedValue 211 , StringRef::size_type Start 212 , SmallVectorImpl<char> &Storage) const; 213}; 214 215/// @brief A key and value pair. While not technically a Node under the YAML 216/// representation graph, it is easier to treat them this way. 217/// 218/// TODO: Consider making this not a child of Node. 219/// 220/// Example: 221/// Section: .text 222class KeyValueNode : public Node { 223public: 224 KeyValueNode(OwningPtr<Document> &D) 225 : Node(NK_KeyValue, D, StringRef()) 226 , Key(0) 227 , Value(0) 228 {} 229 230 /// @brief Parse and return the key. 231 /// 232 /// This may be called multiple times. 233 /// 234 /// @returns The key, or nullptr if failed() == true. 235 Node *getKey(); 236 237 /// @brief Parse and return the value. 238 /// 239 /// This may be called multiple times. 240 /// 241 /// @returns The value, or nullptr if failed() == true. 242 Node *getValue(); 243 244 virtual void skip() { 245 getKey()->skip(); 246 getValue()->skip(); 247 } 248 249 static inline bool classof(const KeyValueNode *) { return true; } 250 static inline bool classof(const Node *N) { 251 return N->getType() == NK_KeyValue; 252 } 253 254private: 255 Node *Key; 256 Node *Value; 257}; 258 259/// @brief This is an iterator abstraction over YAML collections shared by both 260/// sequences and maps. 261/// 262/// BaseT must have a ValueT* member named CurrentEntry and a member function 263/// increment() which must set CurrentEntry to 0 to create an end iterator. 264template <class BaseT, class ValueT> 265class basic_collection_iterator 266 : public std::iterator<std::forward_iterator_tag, ValueT> { 267public: 268 basic_collection_iterator() : Base(0) {} 269 basic_collection_iterator(BaseT *B) : Base(B) {} 270 271 ValueT *operator ->() const { 272 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 273 return Base->CurrentEntry; 274 } 275 276 ValueT &operator *() const { 277 assert(Base && Base->CurrentEntry && 278 "Attempted to dereference end iterator!"); 279 return *Base->CurrentEntry; 280 } 281 282 operator ValueT*() const { 283 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 284 return Base->CurrentEntry; 285 } 286 287 bool operator !=(const basic_collection_iterator &Other) const { 288 if(Base != Other.Base) 289 return true; 290 return (Base && Other.Base) && Base->CurrentEntry 291 != Other.Base->CurrentEntry; 292 } 293 294 basic_collection_iterator &operator++() { 295 assert(Base && "Attempted to advance iterator past end!"); 296 Base->increment(); 297 // Create an end iterator. 298 if (Base->CurrentEntry == 0) 299 Base = 0; 300 return *this; 301 } 302 303private: 304 BaseT *Base; 305}; 306 307// The following two templates are used for both MappingNode and Sequence Node. 308template <class CollectionType> 309typename CollectionType::iterator begin(CollectionType &C) { 310 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 311 C.IsAtBeginning = false; 312 typename CollectionType::iterator ret(&C); 313 ++ret; 314 return ret; 315} 316 317template <class CollectionType> 318void skip(CollectionType &C) { 319 // TODO: support skipping from the middle of a parsed collection ;/ 320 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 321 if (C.IsAtBeginning) 322 for (typename CollectionType::iterator i = begin(C), e = C.end(); 323 i != e; ++i) 324 i->skip(); 325} 326 327/// @brief Represents a YAML map created from either a block map for a flow map. 328/// 329/// This parses the YAML stream as increment() is called. 330/// 331/// Example: 332/// Name: _main 333/// Scope: Global 334class MappingNode : public Node { 335public: 336 enum MappingType { 337 MT_Block, 338 MT_Flow, 339 MT_Inline ///< An inline mapping node is used for "[key: value]". 340 }; 341 342 MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT) 343 : Node(NK_Mapping, D, Anchor) 344 , Type(MT) 345 , IsAtBeginning(true) 346 , IsAtEnd(false) 347 , CurrentEntry(0) 348 {} 349 350 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 351 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 352 template <class T> friend typename T::iterator yaml::begin(T &); 353 template <class T> friend void yaml::skip(T &); 354 355 iterator begin() { 356 return yaml::begin(*this); 357 } 358 359 iterator end() { return iterator(); } 360 361 virtual void skip() { 362 yaml::skip(*this); 363 } 364 365 static inline bool classof(const MappingNode *) { return true; } 366 static inline bool classof(const Node *N) { 367 return N->getType() == NK_Mapping; 368 } 369 370private: 371 MappingType Type; 372 bool IsAtBeginning; 373 bool IsAtEnd; 374 KeyValueNode *CurrentEntry; 375 376 void increment(); 377}; 378 379/// @brief Represents a YAML sequence created from either a block sequence for a 380/// flow sequence. 381/// 382/// This parses the YAML stream as increment() is called. 383/// 384/// Example: 385/// - Hello 386/// - World 387class SequenceNode : public Node { 388public: 389 enum SequenceType { 390 ST_Block, 391 ST_Flow, 392 // Use for: 393 // 394 // key: 395 // - val1 396 // - val2 397 // 398 // As a BlockMappingEntry and BlockEnd are not created in this case. 399 ST_Indentless 400 }; 401 402 SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST) 403 : Node(NK_Sequence, D, Anchor) 404 , SeqType(ST) 405 , IsAtBeginning(true) 406 , IsAtEnd(false) 407 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. 408 , CurrentEntry(0) 409 {} 410 411 friend class basic_collection_iterator<SequenceNode, Node>; 412 typedef basic_collection_iterator<SequenceNode, Node> iterator; 413 template <class T> friend typename T::iterator yaml::begin(T &); 414 template <class T> friend void yaml::skip(T &); 415 416 void increment(); 417 418 iterator begin() { 419 return yaml::begin(*this); 420 } 421 422 iterator end() { return iterator(); } 423 424 virtual void skip() { 425 yaml::skip(*this); 426 } 427 428 static inline bool classof(const SequenceNode *) { return true; } 429 static inline bool classof(const Node *N) { 430 return N->getType() == NK_Sequence; 431 } 432 433private: 434 SequenceType SeqType; 435 bool IsAtBeginning; 436 bool IsAtEnd; 437 bool WasPreviousTokenFlowEntry; 438 Node *CurrentEntry; 439}; 440 441/// @brief Represents an alias to a Node with an anchor. 442/// 443/// Example: 444/// *AnchorName 445class AliasNode : public Node { 446public: 447 AliasNode(OwningPtr<Document> &D, StringRef Val) 448 : Node(NK_Alias, D, StringRef()), Name(Val) {} 449 450 StringRef getName() const { return Name; } 451 Node *getTarget(); 452 453 static inline bool classof(const ScalarNode *) { return true; } 454 static inline bool classof(const Node *N) { 455 return N->getType() == NK_Alias; 456 } 457 458private: 459 StringRef Name; 460}; 461 462/// @brief A YAML Stream is a sequence of Documents. A document contains a root 463/// node. 464class Document { 465public: 466 /// @brief Root for parsing a node. Returns a single node. 467 Node *parseBlockNode(); 468 469 Document(Stream &ParentStream); 470 471 /// @brief Finish parsing the current document and return true if there are 472 /// more. Return false otherwise. 473 bool skip(); 474 475 /// @brief Parse and return the root level node. 476 Node *getRoot() { 477 if (Root) 478 return Root; 479 return Root = parseBlockNode(); 480 } 481 482private: 483 friend class Node; 484 friend class document_iterator; 485 486 /// @brief Stream to read tokens from. 487 Stream &stream; 488 489 /// @brief Used to allocate nodes to. All are destroyed without calling their 490 /// destructor when the document is destroyed. 491 BumpPtrAllocator NodeAllocator; 492 493 /// @brief The root node. Used to support skipping a partially parsed 494 /// document. 495 Node *Root; 496 497 Token &peekNext(); 498 Token getNext(); 499 void setError(const Twine &Message, Token &Location) const; 500 bool failed() const; 501 502 void handleTagDirective(const Token &Tag) { 503 // TODO: Track tags. 504 } 505 506 /// @brief Parse %BLAH directives and return true if any were encountered. 507 bool parseDirectives(); 508 509 /// @brief Consume the next token and error if it is not \a TK. 510 bool expectToken(int TK); 511}; 512 513/// @brief Iterator abstraction for Documents over a Stream. 514class document_iterator { 515public: 516 document_iterator() : Doc(0) {} 517 document_iterator(OwningPtr<Document> &D) : Doc(&D) {} 518 519 bool operator ==(const document_iterator &Other) { 520 if (isAtEnd() || Other.isAtEnd()) 521 return isAtEnd() && Other.isAtEnd(); 522 523 return *Doc == *Other.Doc; 524 } 525 bool operator !=(const document_iterator &Other) { 526 return !(*this == Other); 527 } 528 529 document_iterator operator ++() { 530 assert(Doc != 0 && "incrementing iterator past the end."); 531 if (!(*Doc)->skip()) { 532 Doc->reset(0); 533 } else { 534 Stream &S = (*Doc)->stream; 535 Doc->reset(new Document(S)); 536 } 537 return *this; 538 } 539 540 Document &operator *() { 541 return *Doc->get(); 542 } 543 544 OwningPtr<Document> &operator ->() { 545 return *Doc; 546 } 547 548private: 549 bool isAtEnd() const { 550 return Doc == 0 || *Doc == 0; 551 } 552 553 OwningPtr<Document> *Doc; 554}; 555 556} 557} 558 559#endif 560