UnwrappedLineParser.cpp revision 3a3408cceb438af7cdebb5b3d938abb916162bb4
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "format-parser" 20 21#include "UnwrappedLineParser.h" 22#include "clang/Basic/Diagnostic.h" 23#include "llvm/Support/Debug.h" 24 25// Uncomment to get debug output from tests: 26// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0) 27 28namespace clang { 29namespace format { 30 31class ScopedMacroState : public FormatTokenSource { 32public: 33 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 34 FormatToken &ResetToken) 35 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 36 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 37 TokenSource = this; 38 Line.Level = 0; 39 Line.InPPDirective = true; 40 } 41 42 ~ScopedMacroState() { 43 TokenSource = PreviousTokenSource; 44 ResetToken = Token; 45 Line.InPPDirective = false; 46 Line.Level = PreviousLineLevel; 47 } 48 49 virtual FormatToken getNextToken() { 50 // The \c UnwrappedLineParser guards against this by never calling 51 // \c getNextToken() after it has encountered the first eof token. 52 assert(!eof()); 53 Token = PreviousTokenSource->getNextToken(); 54 if (eof()) 55 return createEOF(); 56 return Token; 57 } 58 59private: 60 bool eof() { 61 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 62 } 63 64 FormatToken createEOF() { 65 FormatToken FormatTok; 66 FormatTok.Tok.startToken(); 67 FormatTok.Tok.setKind(tok::eof); 68 return FormatTok; 69 } 70 71 UnwrappedLine &Line; 72 FormatTokenSource *&TokenSource; 73 FormatToken &ResetToken; 74 unsigned PreviousLineLevel; 75 FormatTokenSource *PreviousTokenSource; 76 77 FormatToken Token; 78}; 79 80class ScopedLineState { 81public: 82 ScopedLineState(UnwrappedLineParser &Parser, 83 bool SwitchToPreprocessorLines = false) 84 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 85 if (SwitchToPreprocessorLines) 86 Parser.CurrentLines = &Parser.PreprocessorDirectives; 87 PreBlockLine = Parser.Line.take(); 88 Parser.Line.reset(new UnwrappedLine()); 89 Parser.Line->Level = PreBlockLine->Level; 90 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 91 } 92 93 ~ScopedLineState() { 94 if (!Parser.Line->Tokens.empty()) { 95 Parser.addUnwrappedLine(); 96 } 97 assert(Parser.Line->Tokens.empty()); 98 Parser.Line.reset(PreBlockLine); 99 Parser.MustBreakBeforeNextToken = true; 100 if (SwitchToPreprocessorLines) 101 Parser.CurrentLines = &Parser.Lines; 102 } 103 104private: 105 UnwrappedLineParser &Parser; 106 const bool SwitchToPreprocessorLines; 107 108 UnwrappedLine *PreBlockLine; 109}; 110 111UnwrappedLineParser::UnwrappedLineParser( 112 clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 113 FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) 114 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 115 CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens), 116 Callback(Callback) {} 117 118bool UnwrappedLineParser::parse() { 119 DEBUG(llvm::dbgs() << "----\n"); 120 readToken(); 121 bool Error = parseFile(); 122 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), 123 E = Lines.end(); 124 I != E; ++I) { 125 Callback.consumeUnwrappedLine(*I); 126 } 127 return Error; 128} 129 130bool UnwrappedLineParser::parseFile() { 131 bool Error = parseLevel(/*HasOpeningBrace=*/false); 132 // Make sure to format the remaining tokens. 133 addUnwrappedLine(); 134 return Error; 135} 136 137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 138 bool Error = false; 139 do { 140 switch (FormatTok.Tok.getKind()) { 141 case tok::comment: 142 nextToken(); 143 addUnwrappedLine(); 144 break; 145 case tok::l_brace: 146 Error |= parseBlock(); 147 addUnwrappedLine(); 148 break; 149 case tok::r_brace: 150 if (HasOpeningBrace) { 151 return false; 152 } else { 153 Diag.Report(FormatTok.Tok.getLocation(), 154 Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, 155 "unexpected '}'")); 156 Error = true; 157 nextToken(); 158 addUnwrappedLine(); 159 } 160 break; 161 default: 162 parseStructuralElement(); 163 break; 164 } 165 } while (!eof()); 166 return Error; 167} 168 169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 170 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 171 nextToken(); 172 173 if (!FormatTok.Tok.is(tok::r_brace)) { 174 addUnwrappedLine(); 175 176 Line->Level += AddLevels; 177 parseLevel(/*HasOpeningBrace=*/true); 178 Line->Level -= AddLevels; 179 180 if (!FormatTok.Tok.is(tok::r_brace)) 181 return true; 182 183 } 184 nextToken(); // Munch the closing brace. 185 return false; 186} 187 188void UnwrappedLineParser::parsePPDirective() { 189 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 190 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 191 nextToken(); 192 193 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 194 addUnwrappedLine(); 195 return; 196 } 197 198 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 199 case tok::pp_define: 200 parsePPDefine(); 201 break; 202 default: 203 parsePPUnknown(); 204 break; 205 } 206} 207 208void UnwrappedLineParser::parsePPDefine() { 209 nextToken(); 210 211 if (FormatTok.Tok.getKind() != tok::identifier) { 212 parsePPUnknown(); 213 return; 214 } 215 nextToken(); 216 if (FormatTok.Tok.getKind() == tok::l_paren) { 217 parseParens(); 218 } 219 addUnwrappedLine(); 220 Line->Level = 1; 221 222 // Errors during a preprocessor directive can only affect the layout of the 223 // preprocessor directive, and thus we ignore them. An alternative approach 224 // would be to use the same approach we use on the file level (no 225 // re-indentation if there was a structural error) within the macro 226 // definition. 227 parseFile(); 228} 229 230void UnwrappedLineParser::parsePPUnknown() { 231 do { 232 nextToken(); 233 } while (!eof()); 234 addUnwrappedLine(); 235} 236 237void UnwrappedLineParser::parseComments() { 238 // Consume leading line comments, e.g. for branches without compounds. 239 while (FormatTok.Tok.is(tok::comment)) { 240 nextToken(); 241 addUnwrappedLine(); 242 } 243} 244 245void UnwrappedLineParser::parseStructuralElement() { 246 assert(!FormatTok.Tok.is(tok::l_brace)); 247 parseComments(); 248 249 int TokenNumber = 0; 250 switch (FormatTok.Tok.getKind()) { 251 case tok::at: 252 nextToken(); 253 switch (FormatTok.Tok.getObjCKeywordID()) { 254 case tok::objc_public: 255 case tok::objc_protected: 256 case tok::objc_package: 257 case tok::objc_private: 258 return parseAccessSpecifier(); 259 case tok::objc_interface: 260 case tok::objc_implementation: 261 return parseObjCInterfaceOrImplementation(); 262 case tok::objc_protocol: 263 return parseObjCProtocol(); 264 case tok::objc_end: 265 return; // Handled by the caller. 266 case tok::objc_optional: 267 case tok::objc_required: 268 nextToken(); 269 addUnwrappedLine(); 270 return; 271 default: 272 break; 273 } 274 break; 275 case tok::kw_namespace: 276 parseNamespace(); 277 return; 278 case tok::kw_inline: 279 nextToken(); 280 TokenNumber++; 281 if (FormatTok.Tok.is(tok::kw_namespace)) { 282 parseNamespace(); 283 return; 284 } 285 break; 286 case tok::kw_public: 287 case tok::kw_protected: 288 case tok::kw_private: 289 parseAccessSpecifier(); 290 return; 291 case tok::kw_if: 292 parseIfThenElse(); 293 return; 294 case tok::kw_for: 295 case tok::kw_while: 296 parseForOrWhileLoop(); 297 return; 298 case tok::kw_do: 299 parseDoWhile(); 300 return; 301 case tok::kw_switch: 302 parseSwitch(); 303 return; 304 case tok::kw_default: 305 nextToken(); 306 parseLabel(); 307 return; 308 case tok::kw_case: 309 parseCaseLabel(); 310 return; 311 case tok::kw_return: 312 parseReturn(); 313 return; 314 default: 315 break; 316 } 317 do { 318 ++TokenNumber; 319 switch (FormatTok.Tok.getKind()) { 320 case tok::kw_enum: 321 parseEnum(); 322 return; 323 case tok::kw_struct: 324 case tok::kw_union: 325 case tok::kw_class: 326 parseRecord(); 327 // A record declaration or definition is always the start of a structural 328 // element. 329 break; 330 case tok::semi: 331 nextToken(); 332 addUnwrappedLine(); 333 return; 334 case tok::r_brace: 335 addUnwrappedLine(); 336 return; 337 case tok::l_paren: 338 parseParens(); 339 break; 340 case tok::l_brace: 341 // A block outside of parentheses must be the last part of a 342 // structural element. 343 // FIXME: Figure out cases where this is not true, and add projections for 344 // them (the one we know is missing are lambdas). 345 parseBlock(); 346 addUnwrappedLine(); 347 return; 348 case tok::identifier: 349 nextToken(); 350 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 351 parseLabel(); 352 return; 353 } 354 break; 355 case tok::equal: 356 nextToken(); 357 if (FormatTok.Tok.is(tok::l_brace)) { 358 parseBracedList(); 359 } 360 break; 361 default: 362 nextToken(); 363 break; 364 } 365 } while (!eof()); 366} 367 368void UnwrappedLineParser::parseBracedList() { 369 nextToken(); 370 371 do { 372 switch (FormatTok.Tok.getKind()) { 373 case tok::l_brace: 374 parseBracedList(); 375 break; 376 case tok::r_brace: 377 nextToken(); 378 return; 379 default: 380 nextToken(); 381 break; 382 } 383 } while (!eof()); 384} 385 386void UnwrappedLineParser::parseReturn() { 387 nextToken(); 388 389 do { 390 switch (FormatTok.Tok.getKind()) { 391 case tok::l_brace: 392 parseBracedList(); 393 break; 394 case tok::l_paren: 395 parseParens(); 396 break; 397 case tok::r_brace: 398 // Assume missing ';'. 399 addUnwrappedLine(); 400 return; 401 case tok::semi: 402 nextToken(); 403 addUnwrappedLine(); 404 return; 405 default: 406 nextToken(); 407 break; 408 } 409 } while (!eof()); 410} 411 412void UnwrappedLineParser::parseParens() { 413 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 414 nextToken(); 415 do { 416 switch (FormatTok.Tok.getKind()) { 417 case tok::l_paren: 418 parseParens(); 419 break; 420 case tok::r_paren: 421 nextToken(); 422 return; 423 case tok::l_brace: 424 { 425 nextToken(); 426 ScopedLineState LineState(*this); 427 Line->Level += 1; 428 parseLevel(/*HasOpeningBrace=*/true); 429 Line->Level -= 1; 430 } 431 break; 432 default: 433 nextToken(); 434 break; 435 } 436 } while (!eof()); 437} 438 439void UnwrappedLineParser::parseIfThenElse() { 440 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 441 nextToken(); 442 if (FormatTok.Tok.is(tok::l_paren)) 443 parseParens(); 444 bool NeedsUnwrappedLine = false; 445 if (FormatTok.Tok.is(tok::l_brace)) { 446 parseBlock(); 447 NeedsUnwrappedLine = true; 448 } else { 449 addUnwrappedLine(); 450 ++Line->Level; 451 parseStructuralElement(); 452 --Line->Level; 453 } 454 if (FormatTok.Tok.is(tok::kw_else)) { 455 nextToken(); 456 if (FormatTok.Tok.is(tok::l_brace)) { 457 parseBlock(); 458 addUnwrappedLine(); 459 } else if (FormatTok.Tok.is(tok::kw_if)) { 460 parseIfThenElse(); 461 } else { 462 addUnwrappedLine(); 463 ++Line->Level; 464 parseStructuralElement(); 465 --Line->Level; 466 } 467 } else if (NeedsUnwrappedLine) { 468 addUnwrappedLine(); 469 } 470} 471 472void UnwrappedLineParser::parseNamespace() { 473 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 474 nextToken(); 475 if (FormatTok.Tok.is(tok::identifier)) 476 nextToken(); 477 if (FormatTok.Tok.is(tok::l_brace)) { 478 parseBlock(0); 479 addUnwrappedLine(); 480 } 481 // FIXME: Add error handling. 482} 483 484void UnwrappedLineParser::parseForOrWhileLoop() { 485 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 486 "'for' or 'while' expected"); 487 nextToken(); 488 if (FormatTok.Tok.is(tok::l_paren)) 489 parseParens(); 490 if (FormatTok.Tok.is(tok::l_brace)) { 491 parseBlock(); 492 addUnwrappedLine(); 493 } else { 494 addUnwrappedLine(); 495 ++Line->Level; 496 parseStructuralElement(); 497 --Line->Level; 498 } 499} 500 501void UnwrappedLineParser::parseDoWhile() { 502 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 503 nextToken(); 504 if (FormatTok.Tok.is(tok::l_brace)) { 505 parseBlock(); 506 } else { 507 addUnwrappedLine(); 508 ++Line->Level; 509 parseStructuralElement(); 510 --Line->Level; 511 } 512 513 // FIXME: Add error handling. 514 if (!FormatTok.Tok.is(tok::kw_while)) { 515 addUnwrappedLine(); 516 return; 517 } 518 519 nextToken(); 520 parseStructuralElement(); 521} 522 523void UnwrappedLineParser::parseLabel() { 524 // FIXME: remove all asserts. 525 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 526 nextToken(); 527 unsigned OldLineLevel = Line->Level; 528 if (Line->Level > 0) 529 --Line->Level; 530 if (FormatTok.Tok.is(tok::l_brace)) { 531 parseBlock(); 532 if (FormatTok.Tok.is(tok::kw_break)) 533 parseStructuralElement(); // "break;" after "}" goes on the same line. 534 } 535 addUnwrappedLine(); 536 Line->Level = OldLineLevel; 537} 538 539void UnwrappedLineParser::parseCaseLabel() { 540 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 541 // FIXME: fix handling of complex expressions here. 542 do { 543 nextToken(); 544 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 545 parseLabel(); 546} 547 548void UnwrappedLineParser::parseSwitch() { 549 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 550 nextToken(); 551 if (FormatTok.Tok.is(tok::l_paren)) 552 parseParens(); 553 if (FormatTok.Tok.is(tok::l_brace)) { 554 parseBlock(Style.IndentCaseLabels ? 2 : 1); 555 addUnwrappedLine(); 556 } else { 557 addUnwrappedLine(); 558 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 559 parseStructuralElement(); 560 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 561 } 562} 563 564void UnwrappedLineParser::parseAccessSpecifier() { 565 nextToken(); 566 // Otherwise, we don't know what it is, and we'd better keep the next token. 567 if (FormatTok.Tok.is(tok::colon)) 568 nextToken(); 569 addUnwrappedLine(); 570} 571 572void UnwrappedLineParser::parseEnum() { 573 bool HasContents = false; 574 do { 575 switch (FormatTok.Tok.getKind()) { 576 case tok::l_brace: 577 nextToken(); 578 addUnwrappedLine(); 579 ++Line->Level; 580 parseComments(); 581 break; 582 case tok::l_paren: 583 parseParens(); 584 break; 585 case tok::comma: 586 nextToken(); 587 addUnwrappedLine(); 588 parseComments(); 589 break; 590 case tok::r_brace: 591 if (HasContents) 592 addUnwrappedLine(); 593 --Line->Level; 594 nextToken(); 595 break; 596 case tok::semi: 597 nextToken(); 598 addUnwrappedLine(); 599 return; 600 default: 601 HasContents = true; 602 nextToken(); 603 break; 604 } 605 } while (!eof()); 606} 607 608void UnwrappedLineParser::parseRecord() { 609 nextToken(); 610 if (FormatTok.Tok.is(tok::identifier) || 611 FormatTok.Tok.is(tok::kw___attribute) || 612 FormatTok.Tok.is(tok::kw___declspec)) { 613 nextToken(); 614 // We can have macros or attributes in between 'class' and the class name. 615 if (FormatTok.Tok.is(tok::l_paren)) { 616 parseParens(); 617 } 618 // The actual identifier can be a nested name specifier. 619 while (FormatTok.Tok.is(tok::identifier) || 620 FormatTok.Tok.is(tok::coloncolon)) 621 nextToken(); 622 623 // Note that parsing away template declarations here leads to incorrectly 624 // accepting function declarations as record declarations. 625 // In general, we cannot solve this problem. Consider: 626 // class A<int> B() {} 627 // which can be a function definition or a class definition when B() is a 628 // macro. If we find enough real-world cases where this is a problem, we 629 // can parse for the 'template' keyword in the beginning of the statement, 630 // and thus rule out the record production in case there is no template 631 // (this would still leave us with an ambiguity between template function 632 // and class declarations). 633 if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { 634 while (FormatTok.Tok.isNot(tok::l_brace)) { 635 if (FormatTok.Tok.is(tok::semi)) 636 return; 637 nextToken(); 638 } 639 } 640 } 641 if (FormatTok.Tok.is(tok::l_brace)) 642 parseBlock(); 643 // We fall through to parsing a structural element afterwards, so 644 // class A {} n, m; 645 // will end up in one unwrapped line. 646} 647 648void UnwrappedLineParser::parseObjCProtocolList() { 649 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 650 do 651 nextToken(); 652 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 653 nextToken(); // Skip '>'. 654} 655 656void UnwrappedLineParser::parseObjCUntilAtEnd() { 657 do { 658 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 659 nextToken(); 660 addUnwrappedLine(); 661 break; 662 } 663 parseStructuralElement(); 664 } while (!eof()); 665} 666 667void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 668 nextToken(); 669 nextToken(); // interface name 670 671 // @interface can be followed by either a base class, or a category. 672 if (FormatTok.Tok.is(tok::colon)) { 673 nextToken(); 674 nextToken(); // base class name 675 } else if (FormatTok.Tok.is(tok::l_paren)) 676 // Skip category, if present. 677 parseParens(); 678 679 if (FormatTok.Tok.is(tok::less)) 680 parseObjCProtocolList(); 681 682 // If instance variables are present, keep the '{' on the first line too. 683 if (FormatTok.Tok.is(tok::l_brace)) 684 parseBlock(); 685 686 // With instance variables, this puts '}' on its own line. Without instance 687 // variables, this ends the @interface line. 688 addUnwrappedLine(); 689 690 parseObjCUntilAtEnd(); 691} 692 693void UnwrappedLineParser::parseObjCProtocol() { 694 nextToken(); 695 nextToken(); // protocol name 696 697 if (FormatTok.Tok.is(tok::less)) 698 parseObjCProtocolList(); 699 700 // Check for protocol declaration. 701 if (FormatTok.Tok.is(tok::semi)) { 702 nextToken(); 703 return addUnwrappedLine(); 704 } 705 706 addUnwrappedLine(); 707 parseObjCUntilAtEnd(); 708} 709 710void UnwrappedLineParser::addUnwrappedLine() { 711 if (Line->Tokens.empty()) 712 return; 713 // Consume trailing comments. 714 while (!eof() && FormatTok.NewlinesBefore == 0 && 715 FormatTok.Tok.is(tok::comment)) { 716 nextToken(); 717 } 718 DEBUG({ 719 llvm::dbgs() << "Line: "; 720 for (std::list<FormatToken>::iterator I = Line->Tokens.begin(), 721 E = Line->Tokens.end(); 722 I != E; ++I) { 723 llvm::dbgs() << I->Tok.getName() << " "; 724 725 } 726 llvm::dbgs() << "\n"; 727 }); 728 CurrentLines->push_back(*Line); 729 Line->Tokens.clear(); 730 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 731 for (std::vector<UnwrappedLine>::iterator I = PreprocessorDirectives 732 .begin(), E = PreprocessorDirectives.end(); 733 I != E; ++I) { 734 CurrentLines->push_back(*I); 735 } 736 PreprocessorDirectives.clear(); 737 } 738 739} 740 741bool UnwrappedLineParser::eof() const { 742 return FormatTok.Tok.is(tok::eof); 743} 744 745void UnwrappedLineParser::nextToken() { 746 if (eof()) 747 return; 748 Line->Tokens.push_back(FormatTok); 749 if (MustBreakBeforeNextToken) { 750 Line->Tokens.back().MustBreakBefore = true; 751 MustBreakBeforeNextToken = false; 752 } 753 readToken(); 754} 755 756void UnwrappedLineParser::readToken() { 757 FormatTok = Tokens->getNextToken(); 758 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 759 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 760 FormatTok.IsFirst)) { 761 // If there is an unfinished unwrapped line, we flush the preprocessor 762 // directives only after that unwrapped line was finished later. 763 bool SwitchToPreprocessorLines = !Line->Tokens.empty() && 764 CurrentLines == &Lines; 765 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 766 parsePPDirective(); 767 } 768} 769 770} // end namespace format 771} // end namespace clang 772