UnwrappedLineParser.cpp revision 02eacc2cf85fe11577bf6ff20f6c203e87147c57
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "format-parser" 17 18#include "UnwrappedLineParser.h" 19#include "llvm/Support/Debug.h" 20 21namespace clang { 22namespace format { 23 24class FormatTokenSource { 25public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31}; 32 33namespace { 34 35class ScopedDeclarationState { 36public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54}; 55 56class ScopedMacroState : public FormatTokenSource { 57public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(NULL) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 virtual FormatToken *getNextToken() { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } 88 89 virtual FormatToken *setPosition(unsigned Position) { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117}; 118 119} // end anonymous namespace 120 121class ScopedLineState { 122public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 PreBlockLine = Parser.Line.take(); 129 Parser.Line.reset(new UnwrappedLine()); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line.reset(PreBlockLine); 140 Parser.MustBreakBeforeNextToken = true; 141 if (SwitchToPreprocessorLines) 142 Parser.CurrentLines = &Parser.Lines; 143 } 144 145private: 146 UnwrappedLineParser &Parser; 147 const bool SwitchToPreprocessorLines; 148 149 UnwrappedLine *PreBlockLine; 150}; 151 152namespace { 153 154class IndexedTokenSource : public FormatTokenSource { 155public: 156 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 157 : Tokens(Tokens), Position(-1) {} 158 159 virtual FormatToken *getNextToken() { 160 ++Position; 161 return Tokens[Position]; 162 } 163 164 virtual unsigned getPosition() { 165 assert(Position >= 0); 166 return Position; 167 } 168 169 virtual FormatToken *setPosition(unsigned P) { 170 Position = P; 171 return Tokens[Position]; 172 } 173 174private: 175 ArrayRef<FormatToken *> Tokens; 176 int Position; 177}; 178 179} // end anonymous namespace 180 181UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 182 ArrayRef<FormatToken *> Tokens, 183 UnwrappedLineConsumer &Callback) 184 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 185 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), 186 Callback(Callback), AllTokens(Tokens) { 187 LBraces.resize(Tokens.size(), BS_Unknown); 188} 189 190bool UnwrappedLineParser::parse() { 191 DEBUG(llvm::dbgs() << "----\n"); 192 IndexedTokenSource TokenSource(AllTokens); 193 Tokens = &TokenSource; 194 readToken(); 195 parseFile(); 196 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 197 I != E; ++I) { 198 Callback.consumeUnwrappedLine(*I); 199 } 200 201 // Create line with eof token. 202 pushToken(FormatTok); 203 Callback.consumeUnwrappedLine(*Line); 204 return StructuralError; 205} 206 207void UnwrappedLineParser::parseFile() { 208 ScopedDeclarationState DeclarationState( 209 *Line, DeclarationScopeStack, 210 /*MustBeDeclaration=*/ !Line->InPPDirective); 211 parseLevel(/*HasOpeningBrace=*/false); 212 // Make sure to format the remaining tokens. 213 flushComments(true); 214 addUnwrappedLine(); 215} 216 217void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 218 do { 219 switch (FormatTok->Tok.getKind()) { 220 case tok::comment: 221 nextToken(); 222 addUnwrappedLine(); 223 break; 224 case tok::l_brace: 225 // FIXME: Add parameter whether this can happen - if this happens, we must 226 // be in a non-declaration context. 227 parseBlock(/*MustBeDeclaration=*/false); 228 addUnwrappedLine(); 229 break; 230 case tok::r_brace: 231 if (HasOpeningBrace) 232 return; 233 StructuralError = true; 234 nextToken(); 235 addUnwrappedLine(); 236 break; 237 default: 238 parseStructuralElement(); 239 break; 240 } 241 } while (!eof()); 242} 243 244void UnwrappedLineParser::calculateBraceTypes() { 245 // We'll parse forward through the tokens until we hit 246 // a closing brace or eof - note that getNextToken() will 247 // parse macros, so this will magically work inside macro 248 // definitions, too. 249 unsigned StoredPosition = Tokens->getPosition(); 250 unsigned Position = StoredPosition; 251 FormatToken *Tok = FormatTok; 252 // Keep a stack of positions of lbrace tokens. We will 253 // update information about whether an lbrace starts a 254 // braced init list or a different block during the loop. 255 SmallVector<unsigned, 8> LBraceStack; 256 assert(Tok->Tok.is(tok::l_brace)); 257 do { 258 // Get next none-comment token. 259 FormatToken *NextTok; 260 do { 261 NextTok = Tokens->getNextToken(); 262 } while (NextTok->is(tok::comment)); 263 264 switch (Tok->Tok.getKind()) { 265 case tok::l_brace: 266 LBraceStack.push_back(Position); 267 break; 268 case tok::r_brace: 269 if (!LBraceStack.empty()) { 270 if (LBraces[LBraceStack.back()] == BS_Unknown) { 271 // If there is a comma, semicolon or right paren after the closing 272 // brace, we assume this is a braced initializer list. 273 274 // FIXME: Note that this currently works only because we do not 275 // use the brace information while inside a braced init list. 276 // Thus, if the parent is a braced init list, we consider all 277 // brace blocks inside it braced init list. That works good enough 278 // for now, but we will need to fix it to correctly handle lambdas. 279 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, 280 tok::l_brace, tok::colon)) 281 LBraces[LBraceStack.back()] = BS_BracedInit; 282 else 283 LBraces[LBraceStack.back()] = BS_Block; 284 } 285 LBraceStack.pop_back(); 286 } 287 break; 288 case tok::semi: 289 case tok::kw_if: 290 case tok::kw_while: 291 case tok::kw_for: 292 case tok::kw_switch: 293 case tok::kw_try: 294 if (!LBraceStack.empty()) 295 LBraces[LBraceStack.back()] = BS_Block; 296 break; 297 default: 298 break; 299 } 300 Tok = NextTok; 301 ++Position; 302 } while (Tok->Tok.isNot(tok::eof)); 303 // Assume other blocks for all unclosed opening braces. 304 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 305 if (LBraces[LBraceStack[i]] == BS_Unknown) 306 LBraces[LBraceStack[i]] = BS_Block; 307 } 308 FormatTok = Tokens->setPosition(StoredPosition); 309} 310 311void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 312 unsigned AddLevels) { 313 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 314 nextToken(); 315 316 addUnwrappedLine(); 317 318 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 319 MustBeDeclaration); 320 Line->Level += AddLevels; 321 parseLevel(/*HasOpeningBrace=*/true); 322 323 if (!FormatTok->Tok.is(tok::r_brace)) { 324 Line->Level -= AddLevels; 325 StructuralError = true; 326 return; 327 } 328 329 nextToken(); // Munch the closing brace. 330 Line->Level -= AddLevels; 331} 332 333void UnwrappedLineParser::parsePPDirective() { 334 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 335 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 336 nextToken(); 337 338 if (FormatTok->Tok.getIdentifierInfo() == NULL) { 339 parsePPUnknown(); 340 return; 341 } 342 343 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 344 case tok::pp_define: 345 parsePPDefine(); 346 return; 347 case tok::pp_if: 348 parsePPIf(); 349 break; 350 case tok::pp_ifdef: 351 case tok::pp_ifndef: 352 parsePPIfdef(); 353 break; 354 case tok::pp_else: 355 parsePPElse(); 356 break; 357 case tok::pp_elif: 358 parsePPElIf(); 359 break; 360 case tok::pp_endif: 361 parsePPEndIf(); 362 break; 363 default: 364 parsePPUnknown(); 365 break; 366 } 367} 368 369void UnwrappedLineParser::pushPPConditional() { 370 if (!PPStack.empty() && PPStack.back() == PP_Unreachable) 371 PPStack.push_back(PP_Unreachable); 372 else 373 PPStack.push_back(PP_Conditional); 374} 375 376void UnwrappedLineParser::parsePPIf() { 377 nextToken(); 378 if ((FormatTok->Tok.isLiteral() && 379 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == 380 "0") || 381 FormatTok->Tok.is(tok::kw_false)) { 382 PPStack.push_back(PP_Unreachable); 383 } else { 384 pushPPConditional(); 385 } 386 parsePPUnknown(); 387} 388 389void UnwrappedLineParser::parsePPIfdef() { 390 pushPPConditional(); 391 parsePPUnknown(); 392} 393 394void UnwrappedLineParser::parsePPElse() { 395 if (!PPStack.empty()) 396 PPStack.pop_back(); 397 pushPPConditional(); 398 parsePPUnknown(); 399} 400 401void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 402 403void UnwrappedLineParser::parsePPEndIf() { 404 if (!PPStack.empty()) 405 PPStack.pop_back(); 406 parsePPUnknown(); 407} 408 409void UnwrappedLineParser::parsePPDefine() { 410 nextToken(); 411 412 if (FormatTok->Tok.getKind() != tok::identifier) { 413 parsePPUnknown(); 414 return; 415 } 416 nextToken(); 417 if (FormatTok->Tok.getKind() == tok::l_paren && 418 FormatTok->WhitespaceRange.getBegin() == 419 FormatTok->WhitespaceRange.getEnd()) { 420 parseParens(); 421 } 422 addUnwrappedLine(); 423 Line->Level = 1; 424 425 // Errors during a preprocessor directive can only affect the layout of the 426 // preprocessor directive, and thus we ignore them. An alternative approach 427 // would be to use the same approach we use on the file level (no 428 // re-indentation if there was a structural error) within the macro 429 // definition. 430 parseFile(); 431} 432 433void UnwrappedLineParser::parsePPUnknown() { 434 do { 435 nextToken(); 436 } while (!eof()); 437 addUnwrappedLine(); 438} 439 440// Here we blacklist certain tokens that are not usually the first token in an 441// unwrapped line. This is used in attempt to distinguish macro calls without 442// trailing semicolons from other constructs split to several lines. 443bool tokenCanStartNewLine(clang::Token Tok) { 444 // Semicolon can be a null-statement, l_square can be a start of a macro or 445 // a C++11 attribute, but this doesn't seem to be common. 446 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 447 Tok.isNot(tok::l_square) && 448 // Tokens that can only be used as binary operators and a part of 449 // overloaded operator names. 450 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 451 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 452 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 453 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 454 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 455 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 456 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 457 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 458 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 459 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 460 Tok.isNot(tok::lesslessequal) && 461 // Colon is used in labels, base class lists, initializer lists, 462 // range-based for loops, ternary operator, but should never be the 463 // first token in an unwrapped line. 464 Tok.isNot(tok::colon); 465} 466 467void UnwrappedLineParser::parseStructuralElement() { 468 assert(!FormatTok->Tok.is(tok::l_brace)); 469 switch (FormatTok->Tok.getKind()) { 470 case tok::at: 471 nextToken(); 472 if (FormatTok->Tok.is(tok::l_brace)) { 473 parseBracedList(); 474 break; 475 } 476 switch (FormatTok->Tok.getObjCKeywordID()) { 477 case tok::objc_public: 478 case tok::objc_protected: 479 case tok::objc_package: 480 case tok::objc_private: 481 return parseAccessSpecifier(); 482 case tok::objc_interface: 483 case tok::objc_implementation: 484 return parseObjCInterfaceOrImplementation(); 485 case tok::objc_protocol: 486 return parseObjCProtocol(); 487 case tok::objc_end: 488 return; // Handled by the caller. 489 case tok::objc_optional: 490 case tok::objc_required: 491 nextToken(); 492 addUnwrappedLine(); 493 return; 494 default: 495 break; 496 } 497 break; 498 case tok::kw_namespace: 499 parseNamespace(); 500 return; 501 case tok::kw_inline: 502 nextToken(); 503 if (FormatTok->Tok.is(tok::kw_namespace)) { 504 parseNamespace(); 505 return; 506 } 507 break; 508 case tok::kw_public: 509 case tok::kw_protected: 510 case tok::kw_private: 511 parseAccessSpecifier(); 512 return; 513 case tok::kw_if: 514 parseIfThenElse(); 515 return; 516 case tok::kw_for: 517 case tok::kw_while: 518 parseForOrWhileLoop(); 519 return; 520 case tok::kw_do: 521 parseDoWhile(); 522 return; 523 case tok::kw_switch: 524 parseSwitch(); 525 return; 526 case tok::kw_default: 527 nextToken(); 528 parseLabel(); 529 return; 530 case tok::kw_case: 531 parseCaseLabel(); 532 return; 533 case tok::kw_return: 534 parseReturn(); 535 return; 536 case tok::kw_extern: 537 nextToken(); 538 if (FormatTok->Tok.is(tok::string_literal)) { 539 nextToken(); 540 if (FormatTok->Tok.is(tok::l_brace)) { 541 parseBlock(/*MustBeDeclaration=*/true, 0); 542 addUnwrappedLine(); 543 return; 544 } 545 } 546 // In all other cases, parse the declaration. 547 break; 548 default: 549 break; 550 } 551 do { 552 switch (FormatTok->Tok.getKind()) { 553 case tok::at: 554 nextToken(); 555 if (FormatTok->Tok.is(tok::l_brace)) 556 parseBracedList(); 557 break; 558 case tok::kw_enum: 559 parseEnum(); 560 break; 561 case tok::kw_struct: 562 case tok::kw_union: 563 case tok::kw_class: 564 parseRecord(); 565 // A record declaration or definition is always the start of a structural 566 // element. 567 break; 568 case tok::semi: 569 nextToken(); 570 addUnwrappedLine(); 571 return; 572 case tok::r_brace: 573 addUnwrappedLine(); 574 return; 575 case tok::l_paren: 576 parseParens(); 577 break; 578 case tok::l_brace: 579 if (!tryToParseBracedList()) { 580 // A block outside of parentheses must be the last part of a 581 // structural element. 582 // FIXME: Figure out cases where this is not true, and add projections 583 // for them (the one we know is missing are lambdas). 584 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 585 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 586 addUnwrappedLine(); 587 parseBlock(/*MustBeDeclaration=*/false); 588 addUnwrappedLine(); 589 return; 590 } 591 // Otherwise this was a braced init list, and the structural 592 // element continues. 593 break; 594 case tok::identifier: { 595 StringRef Text = FormatTok->TokenText; 596 nextToken(); 597 if (Line->Tokens.size() == 1) { 598 if (FormatTok->Tok.is(tok::colon)) { 599 parseLabel(); 600 return; 601 } 602 // Recognize function-like macro usages without trailing semicolon. 603 if (FormatTok->Tok.is(tok::l_paren)) { 604 parseParens(); 605 if (FormatTok->HasUnescapedNewline && 606 tokenCanStartNewLine(FormatTok->Tok)) { 607 addUnwrappedLine(); 608 return; 609 } 610 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && 611 Text == Text.upper()) { 612 // Recognize free-standing macros like Q_OBJECT. 613 addUnwrappedLine(); 614 return; 615 } 616 } 617 break; 618 } 619 case tok::equal: 620 nextToken(); 621 if (FormatTok->Tok.is(tok::l_brace)) { 622 parseBracedList(); 623 } 624 break; 625 default: 626 nextToken(); 627 break; 628 } 629 } while (!eof()); 630} 631 632bool UnwrappedLineParser::tryToParseBracedList() { 633 if (LBraces[Tokens->getPosition()] == BS_Unknown) 634 calculateBraceTypes(); 635 assert(LBraces[Tokens->getPosition()] != BS_Unknown); 636 if (LBraces[Tokens->getPosition()] == BS_Block) 637 return false; 638 parseBracedList(); 639 return true; 640} 641 642void UnwrappedLineParser::parseBracedList() { 643 nextToken(); 644 645 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 646 // replace this by using parseAssigmentExpression() inside. 647 do { 648 // FIXME: When we start to support lambdas, we'll want to parse them away 649 // here, otherwise our bail-out scenarios below break. The better solution 650 // might be to just implement a more or less complete expression parser. 651 switch (FormatTok->Tok.getKind()) { 652 case tok::l_brace: 653 parseBracedList(); 654 break; 655 case tok::r_brace: 656 nextToken(); 657 return; 658 case tok::semi: 659 // Probably a missing closing brace. Bail out. 660 return; 661 case tok::comma: 662 nextToken(); 663 break; 664 default: 665 nextToken(); 666 break; 667 } 668 } while (!eof()); 669} 670 671void UnwrappedLineParser::parseReturn() { 672 nextToken(); 673 674 do { 675 switch (FormatTok->Tok.getKind()) { 676 case tok::l_brace: 677 parseBracedList(); 678 if (FormatTok->Tok.isNot(tok::semi)) { 679 // Assume missing ';'. 680 addUnwrappedLine(); 681 return; 682 } 683 break; 684 case tok::l_paren: 685 parseParens(); 686 break; 687 case tok::r_brace: 688 // Assume missing ';'. 689 addUnwrappedLine(); 690 return; 691 case tok::semi: 692 nextToken(); 693 addUnwrappedLine(); 694 return; 695 default: 696 nextToken(); 697 break; 698 } 699 } while (!eof()); 700} 701 702void UnwrappedLineParser::parseParens() { 703 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 704 nextToken(); 705 do { 706 switch (FormatTok->Tok.getKind()) { 707 case tok::l_paren: 708 parseParens(); 709 break; 710 case tok::r_paren: 711 nextToken(); 712 return; 713 case tok::r_brace: 714 // A "}" inside parenthesis is an error if there wasn't a matching "{". 715 return; 716 case tok::l_brace: { 717 if (!tryToParseBracedList()) { 718 nextToken(); 719 { 720 ScopedLineState LineState(*this); 721 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 722 /*MustBeDeclaration=*/false); 723 Line->Level += 1; 724 parseLevel(/*HasOpeningBrace=*/true); 725 Line->Level -= 1; 726 } 727 nextToken(); 728 } 729 break; 730 } 731 case tok::at: 732 nextToken(); 733 if (FormatTok->Tok.is(tok::l_brace)) 734 parseBracedList(); 735 break; 736 default: 737 nextToken(); 738 break; 739 } 740 } while (!eof()); 741} 742 743void UnwrappedLineParser::parseIfThenElse() { 744 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 745 nextToken(); 746 if (FormatTok->Tok.is(tok::l_paren)) 747 parseParens(); 748 bool NeedsUnwrappedLine = false; 749 if (FormatTok->Tok.is(tok::l_brace)) { 750 parseBlock(/*MustBeDeclaration=*/false); 751 NeedsUnwrappedLine = true; 752 } else { 753 addUnwrappedLine(); 754 ++Line->Level; 755 parseStructuralElement(); 756 --Line->Level; 757 } 758 if (FormatTok->Tok.is(tok::kw_else)) { 759 nextToken(); 760 if (FormatTok->Tok.is(tok::l_brace)) { 761 parseBlock(/*MustBeDeclaration=*/false); 762 addUnwrappedLine(); 763 } else if (FormatTok->Tok.is(tok::kw_if)) { 764 parseIfThenElse(); 765 } else { 766 addUnwrappedLine(); 767 ++Line->Level; 768 parseStructuralElement(); 769 --Line->Level; 770 } 771 } else if (NeedsUnwrappedLine) { 772 addUnwrappedLine(); 773 } 774} 775 776void UnwrappedLineParser::parseNamespace() { 777 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 778 nextToken(); 779 if (FormatTok->Tok.is(tok::identifier)) 780 nextToken(); 781 if (FormatTok->Tok.is(tok::l_brace)) { 782 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 783 addUnwrappedLine(); 784 785 parseBlock(/*MustBeDeclaration=*/true, 0); 786 // Munch the semicolon after a namespace. This is more common than one would 787 // think. Puttin the semicolon into its own line is very ugly. 788 if (FormatTok->Tok.is(tok::semi)) 789 nextToken(); 790 addUnwrappedLine(); 791 } 792 // FIXME: Add error handling. 793} 794 795void UnwrappedLineParser::parseForOrWhileLoop() { 796 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && 797 "'for' or 'while' expected"); 798 nextToken(); 799 if (FormatTok->Tok.is(tok::l_paren)) 800 parseParens(); 801 if (FormatTok->Tok.is(tok::l_brace)) { 802 parseBlock(/*MustBeDeclaration=*/false); 803 addUnwrappedLine(); 804 } else { 805 addUnwrappedLine(); 806 ++Line->Level; 807 parseStructuralElement(); 808 --Line->Level; 809 } 810} 811 812void UnwrappedLineParser::parseDoWhile() { 813 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 814 nextToken(); 815 if (FormatTok->Tok.is(tok::l_brace)) { 816 parseBlock(/*MustBeDeclaration=*/false); 817 } else { 818 addUnwrappedLine(); 819 ++Line->Level; 820 parseStructuralElement(); 821 --Line->Level; 822 } 823 824 // FIXME: Add error handling. 825 if (!FormatTok->Tok.is(tok::kw_while)) { 826 addUnwrappedLine(); 827 return; 828 } 829 830 nextToken(); 831 parseStructuralElement(); 832} 833 834void UnwrappedLineParser::parseLabel() { 835 if (FormatTok->Tok.isNot(tok::colon)) 836 return; 837 nextToken(); 838 unsigned OldLineLevel = Line->Level; 839 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 840 --Line->Level; 841 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 842 parseBlock(/*MustBeDeclaration=*/false); 843 if (FormatTok->Tok.is(tok::kw_break)) 844 parseStructuralElement(); // "break;" after "}" goes on the same line. 845 } 846 addUnwrappedLine(); 847 Line->Level = OldLineLevel; 848} 849 850void UnwrappedLineParser::parseCaseLabel() { 851 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 852 // FIXME: fix handling of complex expressions here. 853 do { 854 nextToken(); 855 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 856 parseLabel(); 857} 858 859void UnwrappedLineParser::parseSwitch() { 860 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 861 nextToken(); 862 if (FormatTok->Tok.is(tok::l_paren)) 863 parseParens(); 864 if (FormatTok->Tok.is(tok::l_brace)) { 865 parseBlock(/*MustBeDeclaration=*/false, Style.IndentCaseLabels ? 2 : 1); 866 addUnwrappedLine(); 867 } else { 868 addUnwrappedLine(); 869 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 870 parseStructuralElement(); 871 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 872 } 873} 874 875void UnwrappedLineParser::parseAccessSpecifier() { 876 nextToken(); 877 // Otherwise, we don't know what it is, and we'd better keep the next token. 878 if (FormatTok->Tok.is(tok::colon)) 879 nextToken(); 880 addUnwrappedLine(); 881} 882 883void UnwrappedLineParser::parseEnum() { 884 nextToken(); 885 if (FormatTok->Tok.is(tok::identifier) || 886 FormatTok->Tok.is(tok::kw___attribute) || 887 FormatTok->Tok.is(tok::kw___declspec)) { 888 nextToken(); 889 // We can have macros or attributes in between 'enum' and the enum name. 890 if (FormatTok->Tok.is(tok::l_paren)) { 891 parseParens(); 892 } 893 if (FormatTok->Tok.is(tok::identifier)) 894 nextToken(); 895 } 896 if (FormatTok->Tok.is(tok::l_brace)) { 897 nextToken(); 898 addUnwrappedLine(); 899 ++Line->Level; 900 do { 901 switch (FormatTok->Tok.getKind()) { 902 case tok::l_paren: 903 parseParens(); 904 break; 905 case tok::r_brace: 906 addUnwrappedLine(); 907 nextToken(); 908 --Line->Level; 909 return; 910 case tok::comma: 911 nextToken(); 912 addUnwrappedLine(); 913 break; 914 default: 915 nextToken(); 916 break; 917 } 918 } while (!eof()); 919 } 920 // We fall through to parsing a structural element afterwards, so that in 921 // enum A {} n, m; 922 // "} n, m;" will end up in one unwrapped line. 923} 924 925void UnwrappedLineParser::parseRecord() { 926 nextToken(); 927 if (FormatTok->Tok.is(tok::identifier) || 928 FormatTok->Tok.is(tok::kw___attribute) || 929 FormatTok->Tok.is(tok::kw___declspec)) { 930 nextToken(); 931 // We can have macros or attributes in between 'class' and the class name. 932 if (FormatTok->Tok.is(tok::l_paren)) { 933 parseParens(); 934 } 935 // The actual identifier can be a nested name specifier, and in macros 936 // it is often token-pasted. 937 while (FormatTok->Tok.is(tok::identifier) || 938 FormatTok->Tok.is(tok::coloncolon) || 939 FormatTok->Tok.is(tok::hashhash)) 940 nextToken(); 941 942 // Note that parsing away template declarations here leads to incorrectly 943 // accepting function declarations as record declarations. 944 // In general, we cannot solve this problem. Consider: 945 // class A<int> B() {} 946 // which can be a function definition or a class definition when B() is a 947 // macro. If we find enough real-world cases where this is a problem, we 948 // can parse for the 'template' keyword in the beginning of the statement, 949 // and thus rule out the record production in case there is no template 950 // (this would still leave us with an ambiguity between template function 951 // and class declarations). 952 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 953 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 954 if (FormatTok->Tok.is(tok::semi)) 955 return; 956 nextToken(); 957 } 958 } 959 } 960 if (FormatTok->Tok.is(tok::l_brace)) { 961 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 962 addUnwrappedLine(); 963 964 parseBlock(/*MustBeDeclaration=*/true); 965 } 966 // We fall through to parsing a structural element afterwards, so 967 // class A {} n, m; 968 // will end up in one unwrapped line. 969} 970 971void UnwrappedLineParser::parseObjCProtocolList() { 972 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 973 do 974 nextToken(); 975 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 976 nextToken(); // Skip '>'. 977} 978 979void UnwrappedLineParser::parseObjCUntilAtEnd() { 980 do { 981 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 982 nextToken(); 983 addUnwrappedLine(); 984 break; 985 } 986 parseStructuralElement(); 987 } while (!eof()); 988} 989 990void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 991 nextToken(); 992 nextToken(); // interface name 993 994 // @interface can be followed by either a base class, or a category. 995 if (FormatTok->Tok.is(tok::colon)) { 996 nextToken(); 997 nextToken(); // base class name 998 } else if (FormatTok->Tok.is(tok::l_paren)) 999 // Skip category, if present. 1000 parseParens(); 1001 1002 if (FormatTok->Tok.is(tok::less)) 1003 parseObjCProtocolList(); 1004 1005 // If instance variables are present, keep the '{' on the first line too. 1006 if (FormatTok->Tok.is(tok::l_brace)) 1007 parseBlock(/*MustBeDeclaration=*/true); 1008 1009 // With instance variables, this puts '}' on its own line. Without instance 1010 // variables, this ends the @interface line. 1011 addUnwrappedLine(); 1012 1013 parseObjCUntilAtEnd(); 1014} 1015 1016void UnwrappedLineParser::parseObjCProtocol() { 1017 nextToken(); 1018 nextToken(); // protocol name 1019 1020 if (FormatTok->Tok.is(tok::less)) 1021 parseObjCProtocolList(); 1022 1023 // Check for protocol declaration. 1024 if (FormatTok->Tok.is(tok::semi)) { 1025 nextToken(); 1026 return addUnwrappedLine(); 1027 } 1028 1029 addUnwrappedLine(); 1030 parseObjCUntilAtEnd(); 1031} 1032 1033void UnwrappedLineParser::addUnwrappedLine() { 1034 if (Line->Tokens.empty()) 1035 return; 1036 DEBUG({ 1037 llvm::dbgs() << "Line(" << Line->Level << ")" 1038 << (Line->InPPDirective ? " MACRO" : "") << ": "; 1039 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(), 1040 E = Line->Tokens.end(); 1041 I != E; ++I) { 1042 llvm::dbgs() << (*I)->Tok.getName() << " "; 1043 } 1044 llvm::dbgs() << "\n"; 1045 }); 1046 CurrentLines->push_back(*Line); 1047 Line->Tokens.clear(); 1048 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1049 for (std::vector<UnwrappedLine>::iterator 1050 I = PreprocessorDirectives.begin(), 1051 E = PreprocessorDirectives.end(); 1052 I != E; ++I) { 1053 CurrentLines->push_back(*I); 1054 } 1055 PreprocessorDirectives.clear(); 1056 } 1057} 1058 1059bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1060 1061void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1062 bool JustComments = Line->Tokens.empty(); 1063 for (SmallVectorImpl<FormatToken *>::const_iterator 1064 I = CommentsBeforeNextToken.begin(), 1065 E = CommentsBeforeNextToken.end(); 1066 I != E; ++I) { 1067 if ((*I)->NewlinesBefore && JustComments) { 1068 addUnwrappedLine(); 1069 } 1070 pushToken(*I); 1071 } 1072 if (NewlineBeforeNext && JustComments) { 1073 addUnwrappedLine(); 1074 } 1075 CommentsBeforeNextToken.clear(); 1076} 1077 1078void UnwrappedLineParser::nextToken() { 1079 if (eof()) 1080 return; 1081 flushComments(FormatTok->NewlinesBefore > 0); 1082 pushToken(FormatTok); 1083 readToken(); 1084} 1085 1086void UnwrappedLineParser::readToken() { 1087 bool CommentsInCurrentLine = true; 1088 do { 1089 FormatTok = Tokens->getNextToken(); 1090 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1091 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1092 // If there is an unfinished unwrapped line, we flush the preprocessor 1093 // directives only after that unwrapped line was finished later. 1094 bool SwitchToPreprocessorLines = 1095 !Line->Tokens.empty() && CurrentLines == &Lines; 1096 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1097 // Comments stored before the preprocessor directive need to be output 1098 // before the preprocessor directive, at the same level as the 1099 // preprocessor directive, as we consider them to apply to the directive. 1100 flushComments(FormatTok->NewlinesBefore > 0); 1101 parsePPDirective(); 1102 } 1103 1104 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1105 !Line->InPPDirective) { 1106 continue; 1107 } 1108 1109 if (!FormatTok->Tok.is(tok::comment)) 1110 return; 1111 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { 1112 CommentsInCurrentLine = false; 1113 } 1114 if (CommentsInCurrentLine) { 1115 pushToken(FormatTok); 1116 } else { 1117 CommentsBeforeNextToken.push_back(FormatTok); 1118 } 1119 } while (!eof()); 1120} 1121 1122void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1123 Line->Tokens.push_back(Tok); 1124 if (MustBreakBeforeNextToken) { 1125 Line->Tokens.back()->MustBreakBefore = true; 1126 MustBreakBeforeNextToken = false; 1127 } 1128} 1129 1130} // end namespace format 1131} // end namespace clang 1132