UnwrappedLineParser.cpp revision d19dc2ddf3f8630cfdbefec490c0000c14bee6bd
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "format-parser" 20 21#include "UnwrappedLineParser.h" 22#include "clang/Basic/Diagnostic.h" 23#include "llvm/Support/Debug.h" 24 25// Uncomment to get debug output from tests: 26// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0) 27 28namespace clang { 29namespace format { 30 31class ScopedMacroState : public FormatTokenSource { 32public: 33 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 34 FormatToken &ResetToken) 35 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 36 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 37 TokenSource = this; 38 Line.Level = 0; 39 Line.InPPDirective = true; 40 } 41 42 ~ScopedMacroState() { 43 TokenSource = PreviousTokenSource; 44 ResetToken = Token; 45 Line.InPPDirective = false; 46 Line.Level = PreviousLineLevel; 47 } 48 49 virtual FormatToken getNextToken() { 50 // The \c UnwrappedLineParser guards against this by never calling 51 // \c getNextToken() after it has encountered the first eof token. 52 assert(!eof()); 53 Token = PreviousTokenSource->getNextToken(); 54 if (eof()) 55 return createEOF(); 56 return Token; 57 } 58 59private: 60 bool eof() { 61 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 62 } 63 64 FormatToken createEOF() { 65 FormatToken FormatTok; 66 FormatTok.Tok.startToken(); 67 FormatTok.Tok.setKind(tok::eof); 68 return FormatTok; 69 } 70 71 UnwrappedLine &Line; 72 FormatTokenSource *&TokenSource; 73 FormatToken &ResetToken; 74 unsigned PreviousLineLevel; 75 FormatTokenSource *PreviousTokenSource; 76 77 FormatToken Token; 78}; 79 80class ScopedLineState { 81public: 82 ScopedLineState(UnwrappedLineParser &Parser, 83 bool SwitchToPreprocessorLines = false) 84 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 85 if (SwitchToPreprocessorLines) 86 Parser.CurrentLines = &Parser.PreprocessorDirectives; 87 PreBlockLine = Parser.Line.take(); 88 Parser.Line.reset(new UnwrappedLine()); 89 Parser.Line->Level = PreBlockLine->Level; 90 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 91 } 92 93 ~ScopedLineState() { 94 if (!Parser.Line->Tokens.empty()) { 95 Parser.addUnwrappedLine(); 96 } 97 assert(Parser.Line->Tokens.empty()); 98 Parser.Line.reset(PreBlockLine); 99 Parser.MustBreakBeforeNextToken = true; 100 if (SwitchToPreprocessorLines) 101 Parser.CurrentLines = &Parser.Lines; 102 } 103 104private: 105 UnwrappedLineParser &Parser; 106 const bool SwitchToPreprocessorLines; 107 108 UnwrappedLine *PreBlockLine; 109}; 110 111UnwrappedLineParser::UnwrappedLineParser( 112 clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 113 FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) 114 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 115 CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens), 116 Callback(Callback) {} 117 118bool UnwrappedLineParser::parse() { 119 DEBUG(llvm::dbgs() << "----\n"); 120 readToken(); 121 bool Error = parseFile(); 122 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), 123 E = Lines.end(); 124 I != E; ++I) { 125 Callback.consumeUnwrappedLine(*I); 126 } 127 return Error; 128} 129 130bool UnwrappedLineParser::parseFile() { 131 bool Error = parseLevel(/*HasOpeningBrace=*/false); 132 // Make sure to format the remaining tokens. 133 addUnwrappedLine(); 134 return Error; 135} 136 137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 138 bool Error = false; 139 do { 140 switch (FormatTok.Tok.getKind()) { 141 case tok::comment: 142 nextToken(); 143 addUnwrappedLine(); 144 break; 145 case tok::l_brace: 146 Error |= parseBlock(); 147 addUnwrappedLine(); 148 break; 149 case tok::r_brace: 150 if (HasOpeningBrace) { 151 return false; 152 } else { 153 Diag.Report(FormatTok.Tok.getLocation(), 154 Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, 155 "unexpected '}'")); 156 Error = true; 157 nextToken(); 158 addUnwrappedLine(); 159 } 160 break; 161 default: 162 parseStructuralElement(); 163 break; 164 } 165 } while (!eof()); 166 return Error; 167} 168 169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 170 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 171 nextToken(); 172 173 if (!FormatTok.Tok.is(tok::r_brace)) { 174 addUnwrappedLine(); 175 176 Line->Level += AddLevels; 177 parseLevel(/*HasOpeningBrace=*/true); 178 Line->Level -= AddLevels; 179 180 if (!FormatTok.Tok.is(tok::r_brace)) 181 return true; 182 183 } 184 nextToken(); // Munch the closing brace. 185 return false; 186} 187 188void UnwrappedLineParser::parsePPDirective() { 189 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 190 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 191 nextToken(); 192 193 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 194 addUnwrappedLine(); 195 return; 196 } 197 198 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 199 case tok::pp_define: 200 parsePPDefine(); 201 break; 202 default: 203 parsePPUnknown(); 204 break; 205 } 206} 207 208void UnwrappedLineParser::parsePPDefine() { 209 nextToken(); 210 211 if (FormatTok.Tok.getKind() != tok::identifier) { 212 parsePPUnknown(); 213 return; 214 } 215 nextToken(); 216 if (FormatTok.Tok.getKind() == tok::l_paren) { 217 parseParens(); 218 } 219 addUnwrappedLine(); 220 Line->Level = 1; 221 222 // Errors during a preprocessor directive can only affect the layout of the 223 // preprocessor directive, and thus we ignore them. An alternative approach 224 // would be to use the same approach we use on the file level (no 225 // re-indentation if there was a structural error) within the macro 226 // definition. 227 parseFile(); 228} 229 230void UnwrappedLineParser::parsePPUnknown() { 231 do { 232 nextToken(); 233 } while (!eof()); 234 addUnwrappedLine(); 235} 236 237void UnwrappedLineParser::parseComments() { 238 // Consume leading line comments, e.g. for branches without compounds. 239 while (FormatTok.Tok.is(tok::comment)) { 240 nextToken(); 241 addUnwrappedLine(); 242 } 243} 244 245void UnwrappedLineParser::parseStructuralElement() { 246 assert(!FormatTok.Tok.is(tok::l_brace)); 247 parseComments(); 248 249 int TokenNumber = 0; 250 switch (FormatTok.Tok.getKind()) { 251 case tok::at: 252 nextToken(); 253 switch (FormatTok.Tok.getObjCKeywordID()) { 254 case tok::objc_public: 255 case tok::objc_protected: 256 case tok::objc_package: 257 case tok::objc_private: 258 return parseAccessSpecifier(); 259 case tok::objc_interface: 260 case tok::objc_implementation: 261 return parseObjCInterfaceOrImplementation(); 262 case tok::objc_protocol: 263 return parseObjCProtocol(); 264 case tok::objc_end: 265 return; // Handled by the caller. 266 case tok::objc_optional: 267 case tok::objc_required: 268 nextToken(); 269 addUnwrappedLine(); 270 return; 271 default: 272 break; 273 } 274 break; 275 case tok::kw_namespace: 276 parseNamespace(); 277 return; 278 case tok::kw_inline: 279 nextToken(); 280 TokenNumber++; 281 if (FormatTok.Tok.is(tok::kw_namespace)) { 282 parseNamespace(); 283 return; 284 } 285 break; 286 case tok::kw_public: 287 case tok::kw_protected: 288 case tok::kw_private: 289 parseAccessSpecifier(); 290 return; 291 case tok::kw_if: 292 parseIfThenElse(); 293 return; 294 case tok::kw_for: 295 case tok::kw_while: 296 parseForOrWhileLoop(); 297 return; 298 case tok::kw_do: 299 parseDoWhile(); 300 return; 301 case tok::kw_switch: 302 parseSwitch(); 303 return; 304 case tok::kw_default: 305 nextToken(); 306 parseLabel(); 307 return; 308 case tok::kw_case: 309 parseCaseLabel(); 310 return; 311 case tok::kw_return: 312 parseReturn(); 313 return; 314 case tok::kw_extern: 315 nextToken(); 316 if (FormatTok.Tok.is(tok::string_literal)) { 317 nextToken(); 318 if (FormatTok.Tok.is(tok::l_brace)) { 319 parseBlock(0); 320 addUnwrappedLine(); 321 return; 322 } 323 } 324 // In all other cases, parse the declaration. 325 break; 326 default: 327 break; 328 } 329 do { 330 ++TokenNumber; 331 switch (FormatTok.Tok.getKind()) { 332 case tok::kw_enum: 333 parseEnum(); 334 return; 335 case tok::kw_struct: 336 case tok::kw_union: 337 case tok::kw_class: 338 parseRecord(); 339 // A record declaration or definition is always the start of a structural 340 // element. 341 break; 342 case tok::semi: 343 nextToken(); 344 addUnwrappedLine(); 345 return; 346 case tok::r_brace: 347 addUnwrappedLine(); 348 return; 349 case tok::l_paren: 350 parseParens(); 351 break; 352 case tok::l_brace: 353 // A block outside of parentheses must be the last part of a 354 // structural element. 355 // FIXME: Figure out cases where this is not true, and add projections for 356 // them (the one we know is missing are lambdas). 357 parseBlock(); 358 addUnwrappedLine(); 359 return; 360 case tok::identifier: 361 nextToken(); 362 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 363 parseLabel(); 364 return; 365 } 366 break; 367 case tok::equal: 368 nextToken(); 369 if (FormatTok.Tok.is(tok::l_brace)) { 370 parseBracedList(); 371 } 372 break; 373 default: 374 nextToken(); 375 break; 376 } 377 } while (!eof()); 378} 379 380void UnwrappedLineParser::parseBracedList() { 381 nextToken(); 382 383 do { 384 switch (FormatTok.Tok.getKind()) { 385 case tok::l_brace: 386 parseBracedList(); 387 break; 388 case tok::r_brace: 389 nextToken(); 390 return; 391 default: 392 nextToken(); 393 break; 394 } 395 } while (!eof()); 396} 397 398void UnwrappedLineParser::parseReturn() { 399 nextToken(); 400 401 do { 402 switch (FormatTok.Tok.getKind()) { 403 case tok::l_brace: 404 parseBracedList(); 405 break; 406 case tok::l_paren: 407 parseParens(); 408 break; 409 case tok::r_brace: 410 // Assume missing ';'. 411 addUnwrappedLine(); 412 return; 413 case tok::semi: 414 nextToken(); 415 addUnwrappedLine(); 416 return; 417 default: 418 nextToken(); 419 break; 420 } 421 } while (!eof()); 422} 423 424void UnwrappedLineParser::parseParens() { 425 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 426 nextToken(); 427 do { 428 switch (FormatTok.Tok.getKind()) { 429 case tok::l_paren: 430 parseParens(); 431 break; 432 case tok::r_paren: 433 nextToken(); 434 return; 435 case tok::l_brace: 436 { 437 nextToken(); 438 ScopedLineState LineState(*this); 439 Line->Level += 1; 440 parseLevel(/*HasOpeningBrace=*/true); 441 Line->Level -= 1; 442 } 443 break; 444 default: 445 nextToken(); 446 break; 447 } 448 } while (!eof()); 449} 450 451void UnwrappedLineParser::parseIfThenElse() { 452 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 453 nextToken(); 454 if (FormatTok.Tok.is(tok::l_paren)) 455 parseParens(); 456 bool NeedsUnwrappedLine = false; 457 if (FormatTok.Tok.is(tok::l_brace)) { 458 parseBlock(); 459 NeedsUnwrappedLine = true; 460 } else { 461 addUnwrappedLine(); 462 ++Line->Level; 463 parseStructuralElement(); 464 --Line->Level; 465 } 466 if (FormatTok.Tok.is(tok::kw_else)) { 467 nextToken(); 468 if (FormatTok.Tok.is(tok::l_brace)) { 469 parseBlock(); 470 addUnwrappedLine(); 471 } else if (FormatTok.Tok.is(tok::kw_if)) { 472 parseIfThenElse(); 473 } else { 474 addUnwrappedLine(); 475 ++Line->Level; 476 parseStructuralElement(); 477 --Line->Level; 478 } 479 } else if (NeedsUnwrappedLine) { 480 addUnwrappedLine(); 481 } 482} 483 484void UnwrappedLineParser::parseNamespace() { 485 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 486 nextToken(); 487 if (FormatTok.Tok.is(tok::identifier)) 488 nextToken(); 489 if (FormatTok.Tok.is(tok::l_brace)) { 490 parseBlock(0); 491 addUnwrappedLine(); 492 } 493 // FIXME: Add error handling. 494} 495 496void UnwrappedLineParser::parseForOrWhileLoop() { 497 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 498 "'for' or 'while' expected"); 499 nextToken(); 500 if (FormatTok.Tok.is(tok::l_paren)) 501 parseParens(); 502 if (FormatTok.Tok.is(tok::l_brace)) { 503 parseBlock(); 504 addUnwrappedLine(); 505 } else { 506 addUnwrappedLine(); 507 ++Line->Level; 508 parseStructuralElement(); 509 --Line->Level; 510 } 511} 512 513void UnwrappedLineParser::parseDoWhile() { 514 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 515 nextToken(); 516 if (FormatTok.Tok.is(tok::l_brace)) { 517 parseBlock(); 518 } else { 519 addUnwrappedLine(); 520 ++Line->Level; 521 parseStructuralElement(); 522 --Line->Level; 523 } 524 525 // FIXME: Add error handling. 526 if (!FormatTok.Tok.is(tok::kw_while)) { 527 addUnwrappedLine(); 528 return; 529 } 530 531 nextToken(); 532 parseStructuralElement(); 533} 534 535void UnwrappedLineParser::parseLabel() { 536 // FIXME: remove all asserts. 537 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 538 nextToken(); 539 unsigned OldLineLevel = Line->Level; 540 if (Line->Level > 0) 541 --Line->Level; 542 if (FormatTok.Tok.is(tok::l_brace)) { 543 parseBlock(); 544 if (FormatTok.Tok.is(tok::kw_break)) 545 parseStructuralElement(); // "break;" after "}" goes on the same line. 546 } 547 addUnwrappedLine(); 548 Line->Level = OldLineLevel; 549} 550 551void UnwrappedLineParser::parseCaseLabel() { 552 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 553 // FIXME: fix handling of complex expressions here. 554 do { 555 nextToken(); 556 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 557 parseLabel(); 558} 559 560void UnwrappedLineParser::parseSwitch() { 561 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 562 nextToken(); 563 if (FormatTok.Tok.is(tok::l_paren)) 564 parseParens(); 565 if (FormatTok.Tok.is(tok::l_brace)) { 566 parseBlock(Style.IndentCaseLabels ? 2 : 1); 567 addUnwrappedLine(); 568 } else { 569 addUnwrappedLine(); 570 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 571 parseStructuralElement(); 572 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 573 } 574} 575 576void UnwrappedLineParser::parseAccessSpecifier() { 577 nextToken(); 578 // Otherwise, we don't know what it is, and we'd better keep the next token. 579 if (FormatTok.Tok.is(tok::colon)) 580 nextToken(); 581 addUnwrappedLine(); 582} 583 584void UnwrappedLineParser::parseEnum() { 585 bool HasContents = false; 586 do { 587 switch (FormatTok.Tok.getKind()) { 588 case tok::l_brace: 589 nextToken(); 590 addUnwrappedLine(); 591 ++Line->Level; 592 parseComments(); 593 break; 594 case tok::l_paren: 595 parseParens(); 596 break; 597 case tok::comma: 598 nextToken(); 599 addUnwrappedLine(); 600 parseComments(); 601 break; 602 case tok::r_brace: 603 if (HasContents) 604 addUnwrappedLine(); 605 --Line->Level; 606 nextToken(); 607 break; 608 case tok::semi: 609 nextToken(); 610 addUnwrappedLine(); 611 return; 612 default: 613 HasContents = true; 614 nextToken(); 615 break; 616 } 617 } while (!eof()); 618} 619 620void UnwrappedLineParser::parseRecord() { 621 nextToken(); 622 if (FormatTok.Tok.is(tok::identifier) || 623 FormatTok.Tok.is(tok::kw___attribute) || 624 FormatTok.Tok.is(tok::kw___declspec)) { 625 nextToken(); 626 // We can have macros or attributes in between 'class' and the class name. 627 if (FormatTok.Tok.is(tok::l_paren)) { 628 parseParens(); 629 } 630 // The actual identifier can be a nested name specifier. 631 while (FormatTok.Tok.is(tok::identifier) || 632 FormatTok.Tok.is(tok::coloncolon)) 633 nextToken(); 634 635 // Note that parsing away template declarations here leads to incorrectly 636 // accepting function declarations as record declarations. 637 // In general, we cannot solve this problem. Consider: 638 // class A<int> B() {} 639 // which can be a function definition or a class definition when B() is a 640 // macro. If we find enough real-world cases where this is a problem, we 641 // can parse for the 'template' keyword in the beginning of the statement, 642 // and thus rule out the record production in case there is no template 643 // (this would still leave us with an ambiguity between template function 644 // and class declarations). 645 if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { 646 while (FormatTok.Tok.isNot(tok::l_brace)) { 647 if (FormatTok.Tok.is(tok::semi)) 648 return; 649 nextToken(); 650 } 651 } 652 } 653 if (FormatTok.Tok.is(tok::l_brace)) 654 parseBlock(); 655 // We fall through to parsing a structural element afterwards, so 656 // class A {} n, m; 657 // will end up in one unwrapped line. 658} 659 660void UnwrappedLineParser::parseObjCProtocolList() { 661 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 662 do 663 nextToken(); 664 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 665 nextToken(); // Skip '>'. 666} 667 668void UnwrappedLineParser::parseObjCUntilAtEnd() { 669 do { 670 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 671 nextToken(); 672 addUnwrappedLine(); 673 break; 674 } 675 parseStructuralElement(); 676 } while (!eof()); 677} 678 679void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 680 nextToken(); 681 nextToken(); // interface name 682 683 // @interface can be followed by either a base class, or a category. 684 if (FormatTok.Tok.is(tok::colon)) { 685 nextToken(); 686 nextToken(); // base class name 687 } else if (FormatTok.Tok.is(tok::l_paren)) 688 // Skip category, if present. 689 parseParens(); 690 691 if (FormatTok.Tok.is(tok::less)) 692 parseObjCProtocolList(); 693 694 // If instance variables are present, keep the '{' on the first line too. 695 if (FormatTok.Tok.is(tok::l_brace)) 696 parseBlock(); 697 698 // With instance variables, this puts '}' on its own line. Without instance 699 // variables, this ends the @interface line. 700 addUnwrappedLine(); 701 702 parseObjCUntilAtEnd(); 703} 704 705void UnwrappedLineParser::parseObjCProtocol() { 706 nextToken(); 707 nextToken(); // protocol name 708 709 if (FormatTok.Tok.is(tok::less)) 710 parseObjCProtocolList(); 711 712 // Check for protocol declaration. 713 if (FormatTok.Tok.is(tok::semi)) { 714 nextToken(); 715 return addUnwrappedLine(); 716 } 717 718 addUnwrappedLine(); 719 parseObjCUntilAtEnd(); 720} 721 722void UnwrappedLineParser::addUnwrappedLine() { 723 if (Line->Tokens.empty()) 724 return; 725 // Consume trailing comments. 726 while (!eof() && FormatTok.NewlinesBefore == 0 && 727 FormatTok.Tok.is(tok::comment)) { 728 nextToken(); 729 } 730 DEBUG({ 731 llvm::dbgs() << "Line: "; 732 for (std::list<FormatToken>::iterator I = Line->Tokens.begin(), 733 E = Line->Tokens.end(); 734 I != E; ++I) { 735 llvm::dbgs() << I->Tok.getName() << " "; 736 737 } 738 llvm::dbgs() << "\n"; 739 }); 740 CurrentLines->push_back(*Line); 741 Line->Tokens.clear(); 742 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 743 for (std::vector<UnwrappedLine>::iterator I = PreprocessorDirectives 744 .begin(), E = PreprocessorDirectives.end(); 745 I != E; ++I) { 746 CurrentLines->push_back(*I); 747 } 748 PreprocessorDirectives.clear(); 749 } 750 751} 752 753bool UnwrappedLineParser::eof() const { 754 return FormatTok.Tok.is(tok::eof); 755} 756 757void UnwrappedLineParser::nextToken() { 758 if (eof()) 759 return; 760 Line->Tokens.push_back(FormatTok); 761 if (MustBreakBeforeNextToken) { 762 Line->Tokens.back().MustBreakBefore = true; 763 MustBreakBeforeNextToken = false; 764 } 765 readToken(); 766} 767 768void UnwrappedLineParser::readToken() { 769 FormatTok = Tokens->getNextToken(); 770 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 771 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 772 FormatTok.IsFirst)) { 773 // If there is an unfinished unwrapped line, we flush the preprocessor 774 // directives only after that unwrapped line was finished later. 775 bool SwitchToPreprocessorLines = !Line->Tokens.empty() && 776 CurrentLines == &Lines; 777 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 778 parsePPDirective(); 779 } 780} 781 782} // end namespace format 783} // end namespace clang 784