UnwrappedLineParser.cpp revision ad3094b7d85a4f8eeaf3f60364d5e8796537f061
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "format-parser" 17 18#include "UnwrappedLineParser.h" 19#include "llvm/Support/Debug.h" 20 21namespace clang { 22namespace format { 23 24class ScopedDeclarationState { 25public: 26 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 27 bool MustBeDeclaration) 28 : Line(Line), Stack(Stack) { 29 Line.MustBeDeclaration = MustBeDeclaration; 30 Stack.push_back(MustBeDeclaration); 31 } 32 ~ScopedDeclarationState() { 33 Stack.pop_back(); 34 if (!Stack.empty()) 35 Line.MustBeDeclaration = Stack.back(); 36 else 37 Line.MustBeDeclaration = true; 38 } 39private: 40 UnwrappedLine &Line; 41 std::vector<bool> &Stack; 42}; 43 44class ScopedMacroState : public FormatTokenSource { 45public: 46 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 47 FormatToken &ResetToken, bool &StructuralError) 48 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 49 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 50 StructuralError(StructuralError), 51 PreviousStructuralError(StructuralError) { 52 TokenSource = this; 53 Line.Level = 0; 54 Line.InPPDirective = true; 55 } 56 57 ~ScopedMacroState() { 58 TokenSource = PreviousTokenSource; 59 ResetToken = Token; 60 Line.InPPDirective = false; 61 Line.Level = PreviousLineLevel; 62 StructuralError = PreviousStructuralError; 63 } 64 65 virtual FormatToken getNextToken() { 66 // The \c UnwrappedLineParser guards against this by never calling 67 // \c getNextToken() after it has encountered the first eof token. 68 assert(!eof()); 69 Token = PreviousTokenSource->getNextToken(); 70 if (eof()) 71 return createEOF(); 72 return Token; 73 } 74 75 virtual unsigned getPosition() { 76 return PreviousTokenSource->getPosition(); 77 } 78 79 virtual FormatToken setPosition(unsigned Position) { 80 Token = PreviousTokenSource->setPosition(Position); 81 return Token; 82 } 83 84private: 85 bool eof() { return Token.HasUnescapedNewline; } 86 87 FormatToken createEOF() { 88 FormatToken FormatTok; 89 FormatTok.Tok.startToken(); 90 FormatTok.Tok.setKind(tok::eof); 91 return FormatTok; 92 } 93 94 UnwrappedLine &Line; 95 FormatTokenSource *&TokenSource; 96 FormatToken &ResetToken; 97 unsigned PreviousLineLevel; 98 FormatTokenSource *PreviousTokenSource; 99 bool &StructuralError; 100 bool PreviousStructuralError; 101 102 FormatToken Token; 103}; 104 105class ScopedLineState { 106public: 107 ScopedLineState(UnwrappedLineParser &Parser, 108 bool SwitchToPreprocessorLines = false) 109 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 110 if (SwitchToPreprocessorLines) 111 Parser.CurrentLines = &Parser.PreprocessorDirectives; 112 PreBlockLine = Parser.Line.take(); 113 Parser.Line.reset(new UnwrappedLine()); 114 Parser.Line->Level = PreBlockLine->Level; 115 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) { 120 Parser.addUnwrappedLine(); 121 } 122 assert(Parser.Line->Tokens.empty()); 123 Parser.Line.reset(PreBlockLine); 124 Parser.MustBreakBeforeNextToken = true; 125 if (SwitchToPreprocessorLines) 126 Parser.CurrentLines = &Parser.Lines; 127 } 128 129private: 130 UnwrappedLineParser &Parser; 131 const bool SwitchToPreprocessorLines; 132 133 UnwrappedLine *PreBlockLine; 134}; 135 136class IndexedTokenSource : public FormatTokenSource { 137public: 138 IndexedTokenSource(ArrayRef<FormatToken> Tokens) 139 : Tokens(Tokens), Position(-1) {} 140 141 virtual FormatToken getNextToken() { 142 ++Position; 143 return Tokens[Position]; 144 } 145 146 virtual unsigned getPosition() { 147 assert(Position >= 0); 148 return Position; 149 } 150 151 virtual FormatToken setPosition(unsigned P) { 152 Position = P; 153 return Tokens[Position]; 154 } 155 156private: 157 ArrayRef<FormatToken> Tokens; 158 int Position; 159}; 160 161UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 162 FormatTokenSource &Tokens, 163 UnwrappedLineConsumer &Callback) 164 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 165 CurrentLines(&Lines), StructuralError(false), Style(Style), 166 Tokens(NULL), Callback(Callback) { 167 FormatToken Tok; 168 do { 169 Tok = Tokens.getNextToken(); 170 AllTokens.push_back(Tok); 171 } while (Tok.Tok.isNot(tok::eof)); 172 LBraces.resize(AllTokens.size(), BS_Unknown); 173} 174 175bool UnwrappedLineParser::parse() { 176 DEBUG(llvm::dbgs() << "----\n"); 177 IndexedTokenSource TokenSource(AllTokens); 178 Tokens = &TokenSource; 179 readToken(); 180 parseFile(); 181 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 182 I != E; ++I) { 183 Callback.consumeUnwrappedLine(*I); 184 } 185 186 // Create line with eof token. 187 pushToken(FormatTok); 188 Callback.consumeUnwrappedLine(*Line); 189 return StructuralError; 190} 191 192void UnwrappedLineParser::parseFile() { 193 ScopedDeclarationState DeclarationState( 194 *Line, DeclarationScopeStack, 195 /*MustBeDeclaration=*/ !Line->InPPDirective); 196 parseLevel(/*HasOpeningBrace=*/ false); 197 // Make sure to format the remaining tokens. 198 flushComments(true); 199 addUnwrappedLine(); 200} 201 202void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 203 do { 204 switch (FormatTok.Tok.getKind()) { 205 case tok::comment: 206 nextToken(); 207 addUnwrappedLine(); 208 break; 209 case tok::l_brace: 210 // FIXME: Add parameter whether this can happen - if this happens, we must 211 // be in a non-declaration context. 212 parseBlock(/*MustBeDeclaration=*/ false); 213 addUnwrappedLine(); 214 break; 215 case tok::r_brace: 216 if (HasOpeningBrace) 217 return; 218 StructuralError = true; 219 nextToken(); 220 addUnwrappedLine(); 221 break; 222 default: 223 parseStructuralElement(); 224 break; 225 } 226 } while (!eof()); 227} 228 229void UnwrappedLineParser::calculateBraceTypes() { 230 // We'll parse forward through the tokens until we hit 231 // a closing brace or eof - note that getNextToken() will 232 // parse macros, so this will magically work inside macro 233 // definitions, too. 234 unsigned StoredPosition = Tokens->getPosition(); 235 unsigned Position = StoredPosition; 236 FormatToken Tok = FormatTok; 237 // Keep a stack of positions of lbrace tokens. We will 238 // update information about whether an lbrace starts a 239 // braced init list or a different block during the loop. 240 SmallVector<unsigned, 8> LBraceStack; 241 assert(Tok.Tok.is(tok::l_brace)); 242 do { 243 FormatToken NextTok = Tokens->getNextToken(); 244 switch (Tok.Tok.getKind()) { 245 case tok::l_brace: 246 LBraceStack.push_back(Position); 247 break; 248 case tok::r_brace: 249 if (!LBraceStack.empty()) { 250 if (LBraces[LBraceStack.back()] == BS_Unknown) { 251 // If there is a comma, semicolon or right paren after the closing 252 // brace, we assume this is a braced initializer list. 253 254 // FIXME: Note that this currently works only because we do not 255 // use the brace information while inside a braced init list. 256 // Thus, if the parent is a braced init list, we consider all 257 // brace blocks inside it braced init list. That works good enough 258 // for now, but we will need to fix it to correctly handle lambdas. 259 if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) || 260 NextTok.Tok.is(tok::r_paren)) 261 LBraces[LBraceStack.back()] = BS_BracedInit; 262 else 263 LBraces[LBraceStack.back()] = BS_Block; 264 } 265 LBraceStack.pop_back(); 266 } 267 break; 268 case tok::semi: 269 case tok::kw_if: 270 case tok::kw_while: 271 case tok::kw_for: 272 case tok::kw_switch: 273 case tok::kw_try: 274 if (!LBraceStack.empty()) 275 LBraces[LBraceStack.back()] = BS_Block; 276 break; 277 default: 278 break; 279 } 280 Tok = NextTok; 281 ++Position; 282 } while (Tok.Tok.isNot(tok::eof)); 283 // Assume other blocks for all unclosed opening braces. 284 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 285 if (LBraces[LBraceStack[i]] == BS_Unknown) 286 LBraces[LBraceStack[i]] = BS_Block; 287 } 288 FormatTok = Tokens->setPosition(StoredPosition); 289} 290 291void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 292 unsigned AddLevels) { 293 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 294 nextToken(); 295 296 addUnwrappedLine(); 297 298 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 299 MustBeDeclaration); 300 Line->Level += AddLevels; 301 parseLevel(/*HasOpeningBrace=*/ true); 302 303 if (!FormatTok.Tok.is(tok::r_brace)) { 304 Line->Level -= AddLevels; 305 StructuralError = true; 306 return; 307 } 308 309 nextToken(); // Munch the closing brace. 310 Line->Level -= AddLevels; 311} 312 313void UnwrappedLineParser::parsePPDirective() { 314 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 315 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 316 nextToken(); 317 318 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 319 parsePPUnknown(); 320 return; 321 } 322 323 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 324 case tok::pp_define: 325 parsePPDefine(); 326 break; 327 default: 328 parsePPUnknown(); 329 break; 330 } 331} 332 333void UnwrappedLineParser::parsePPDefine() { 334 nextToken(); 335 336 if (FormatTok.Tok.getKind() != tok::identifier) { 337 parsePPUnknown(); 338 return; 339 } 340 nextToken(); 341 if (FormatTok.Tok.getKind() == tok::l_paren && 342 FormatTok.WhitespaceRange.getBegin() == 343 FormatTok.WhitespaceRange.getEnd()) { 344 parseParens(); 345 } 346 addUnwrappedLine(); 347 Line->Level = 1; 348 349 // Errors during a preprocessor directive can only affect the layout of the 350 // preprocessor directive, and thus we ignore them. An alternative approach 351 // would be to use the same approach we use on the file level (no 352 // re-indentation if there was a structural error) within the macro 353 // definition. 354 parseFile(); 355} 356 357void UnwrappedLineParser::parsePPUnknown() { 358 do { 359 nextToken(); 360 } while (!eof()); 361 addUnwrappedLine(); 362} 363 364// Here we blacklist certain tokens that are not usually the first token in an 365// unwrapped line. This is used in attempt to distinguish macro calls without 366// trailing semicolons from other constructs split to several lines. 367bool tokenCanStartNewLine(clang::Token Tok) { 368 // Semicolon can be a null-statement, l_square can be a start of a macro or 369 // a C++11 attribute, but this doesn't seem to be common. 370 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 371 Tok.isNot(tok::l_square) && 372 // Tokens that can only be used as binary operators and a part of 373 // overloaded operator names. 374 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 375 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 376 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 377 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 378 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 379 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 380 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 381 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 382 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 383 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 384 Tok.isNot(tok::lesslessequal) && 385 // Colon is used in labels, base class lists, initializer lists, 386 // range-based for loops, ternary operator, but should never be the 387 // first token in an unwrapped line. 388 Tok.isNot(tok::colon); 389} 390 391void UnwrappedLineParser::parseStructuralElement() { 392 assert(!FormatTok.Tok.is(tok::l_brace)); 393 switch (FormatTok.Tok.getKind()) { 394 case tok::at: 395 nextToken(); 396 if (FormatTok.Tok.is(tok::l_brace)) { 397 parseBracedList(); 398 break; 399 } 400 switch (FormatTok.Tok.getObjCKeywordID()) { 401 case tok::objc_public: 402 case tok::objc_protected: 403 case tok::objc_package: 404 case tok::objc_private: 405 return parseAccessSpecifier(); 406 case tok::objc_interface: 407 case tok::objc_implementation: 408 return parseObjCInterfaceOrImplementation(); 409 case tok::objc_protocol: 410 return parseObjCProtocol(); 411 case tok::objc_end: 412 return; // Handled by the caller. 413 case tok::objc_optional: 414 case tok::objc_required: 415 nextToken(); 416 addUnwrappedLine(); 417 return; 418 default: 419 break; 420 } 421 break; 422 case tok::kw_namespace: 423 parseNamespace(); 424 return; 425 case tok::kw_inline: 426 nextToken(); 427 if (FormatTok.Tok.is(tok::kw_namespace)) { 428 parseNamespace(); 429 return; 430 } 431 break; 432 case tok::kw_public: 433 case tok::kw_protected: 434 case tok::kw_private: 435 parseAccessSpecifier(); 436 return; 437 case tok::kw_if: 438 parseIfThenElse(); 439 return; 440 case tok::kw_for: 441 case tok::kw_while: 442 parseForOrWhileLoop(); 443 return; 444 case tok::kw_do: 445 parseDoWhile(); 446 return; 447 case tok::kw_switch: 448 parseSwitch(); 449 return; 450 case tok::kw_default: 451 nextToken(); 452 parseLabel(); 453 return; 454 case tok::kw_case: 455 parseCaseLabel(); 456 return; 457 case tok::kw_return: 458 parseReturn(); 459 return; 460 case tok::kw_extern: 461 nextToken(); 462 if (FormatTok.Tok.is(tok::string_literal)) { 463 nextToken(); 464 if (FormatTok.Tok.is(tok::l_brace)) { 465 parseBlock(/*MustBeDeclaration=*/ true, 0); 466 addUnwrappedLine(); 467 return; 468 } 469 } 470 // In all other cases, parse the declaration. 471 break; 472 default: 473 break; 474 } 475 do { 476 switch (FormatTok.Tok.getKind()) { 477 case tok::at: 478 nextToken(); 479 if (FormatTok.Tok.is(tok::l_brace)) 480 parseBracedList(); 481 break; 482 case tok::kw_enum: 483 parseEnum(); 484 break; 485 case tok::kw_struct: 486 case tok::kw_union: 487 case tok::kw_class: 488 parseRecord(); 489 // A record declaration or definition is always the start of a structural 490 // element. 491 break; 492 case tok::semi: 493 nextToken(); 494 addUnwrappedLine(); 495 return; 496 case tok::r_brace: 497 addUnwrappedLine(); 498 return; 499 case tok::l_paren: 500 parseParens(); 501 break; 502 case tok::l_brace: 503 if (!tryToParseBracedList()) { 504 // A block outside of parentheses must be the last part of a 505 // structural element. 506 // FIXME: Figure out cases where this is not true, and add projections 507 // for them (the one we know is missing are lambdas). 508 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 509 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 510 addUnwrappedLine(); 511 parseBlock(/*MustBeDeclaration=*/ false); 512 addUnwrappedLine(); 513 return; 514 } 515 // Otherwise this was a braced init list, and the structural 516 // element continues. 517 break; 518 case tok::identifier: 519 nextToken(); 520 if (Line->Tokens.size() == 1) { 521 if (FormatTok.Tok.is(tok::colon)) { 522 parseLabel(); 523 return; 524 } 525 // Recognize function-like macro usages without trailing semicolon. 526 if (FormatTok.Tok.is(tok::l_paren)) { 527 parseParens(); 528 if (FormatTok.HasUnescapedNewline && 529 tokenCanStartNewLine(FormatTok.Tok)) { 530 addUnwrappedLine(); 531 return; 532 } 533 } 534 } 535 break; 536 case tok::equal: 537 nextToken(); 538 if (FormatTok.Tok.is(tok::l_brace)) { 539 parseBracedList(); 540 } 541 break; 542 default: 543 nextToken(); 544 break; 545 } 546 } while (!eof()); 547} 548 549bool UnwrappedLineParser::tryToParseBracedList() { 550 if (LBraces[Tokens->getPosition()] == BS_Unknown) 551 calculateBraceTypes(); 552 assert(LBraces[Tokens->getPosition()] != BS_Unknown); 553 if (LBraces[Tokens->getPosition()] == BS_Block) 554 return false; 555 parseBracedList(); 556 return true; 557} 558 559void UnwrappedLineParser::parseBracedList() { 560 nextToken(); 561 562 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 563 // replace this by using parseAssigmentExpression() inside. 564 bool StartOfExpression = true; 565 do { 566 // FIXME: When we start to support lambdas, we'll want to parse them away 567 // here, otherwise our bail-out scenarios below break. The better solution 568 // might be to just implement a more or less complete expression parser. 569 switch (FormatTok.Tok.getKind()) { 570 case tok::l_brace: 571 if (!StartOfExpression) { 572 // Probably a missing closing brace. Bail out. 573 addUnwrappedLine(); 574 return; 575 } 576 parseBracedList(); 577 StartOfExpression = false; 578 break; 579 case tok::r_brace: 580 nextToken(); 581 return; 582 case tok::semi: 583 // Probably a missing closing brace. Bail out. 584 return; 585 case tok::comma: 586 nextToken(); 587 StartOfExpression = true; 588 break; 589 default: 590 nextToken(); 591 StartOfExpression = false; 592 break; 593 } 594 } while (!eof()); 595} 596 597void UnwrappedLineParser::parseReturn() { 598 nextToken(); 599 600 do { 601 switch (FormatTok.Tok.getKind()) { 602 case tok::l_brace: 603 parseBracedList(); 604 if (FormatTok.Tok.isNot(tok::semi)) { 605 // Assume missing ';'. 606 addUnwrappedLine(); 607 return; 608 } 609 break; 610 case tok::l_paren: 611 parseParens(); 612 break; 613 case tok::r_brace: 614 // Assume missing ';'. 615 addUnwrappedLine(); 616 return; 617 case tok::semi: 618 nextToken(); 619 addUnwrappedLine(); 620 return; 621 default: 622 nextToken(); 623 break; 624 } 625 } while (!eof()); 626} 627 628void UnwrappedLineParser::parseParens() { 629 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 630 nextToken(); 631 do { 632 switch (FormatTok.Tok.getKind()) { 633 case tok::l_paren: 634 parseParens(); 635 break; 636 case tok::r_paren: 637 nextToken(); 638 return; 639 case tok::l_brace: { 640 if (!tryToParseBracedList()) { 641 nextToken(); 642 ScopedLineState LineState(*this); 643 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 644 /*MustBeDeclaration=*/ false); 645 Line->Level += 1; 646 parseLevel(/*HasOpeningBrace=*/ true); 647 Line->Level -= 1; 648 } 649 break; 650 } 651 case tok::at: 652 nextToken(); 653 if (FormatTok.Tok.is(tok::l_brace)) 654 parseBracedList(); 655 break; 656 default: 657 nextToken(); 658 break; 659 } 660 } while (!eof()); 661} 662 663void UnwrappedLineParser::parseIfThenElse() { 664 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 665 nextToken(); 666 if (FormatTok.Tok.is(tok::l_paren)) 667 parseParens(); 668 bool NeedsUnwrappedLine = false; 669 if (FormatTok.Tok.is(tok::l_brace)) { 670 parseBlock(/*MustBeDeclaration=*/ false); 671 NeedsUnwrappedLine = true; 672 } else { 673 addUnwrappedLine(); 674 ++Line->Level; 675 parseStructuralElement(); 676 --Line->Level; 677 } 678 if (FormatTok.Tok.is(tok::kw_else)) { 679 nextToken(); 680 if (FormatTok.Tok.is(tok::l_brace)) { 681 parseBlock(/*MustBeDeclaration=*/ false); 682 addUnwrappedLine(); 683 } else if (FormatTok.Tok.is(tok::kw_if)) { 684 parseIfThenElse(); 685 } else { 686 addUnwrappedLine(); 687 ++Line->Level; 688 parseStructuralElement(); 689 --Line->Level; 690 } 691 } else if (NeedsUnwrappedLine) { 692 addUnwrappedLine(); 693 } 694} 695 696void UnwrappedLineParser::parseNamespace() { 697 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 698 nextToken(); 699 if (FormatTok.Tok.is(tok::identifier)) 700 nextToken(); 701 if (FormatTok.Tok.is(tok::l_brace)) { 702 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 703 addUnwrappedLine(); 704 705 parseBlock(/*MustBeDeclaration=*/ true, 0); 706 // Munch the semicolon after a namespace. This is more common than one would 707 // think. Puttin the semicolon into its own line is very ugly. 708 if (FormatTok.Tok.is(tok::semi)) 709 nextToken(); 710 addUnwrappedLine(); 711 } 712 // FIXME: Add error handling. 713} 714 715void UnwrappedLineParser::parseForOrWhileLoop() { 716 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 717 "'for' or 'while' expected"); 718 nextToken(); 719 if (FormatTok.Tok.is(tok::l_paren)) 720 parseParens(); 721 if (FormatTok.Tok.is(tok::l_brace)) { 722 parseBlock(/*MustBeDeclaration=*/ false); 723 addUnwrappedLine(); 724 } else { 725 addUnwrappedLine(); 726 ++Line->Level; 727 parseStructuralElement(); 728 --Line->Level; 729 } 730} 731 732void UnwrappedLineParser::parseDoWhile() { 733 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 734 nextToken(); 735 if (FormatTok.Tok.is(tok::l_brace)) { 736 parseBlock(/*MustBeDeclaration=*/ false); 737 } else { 738 addUnwrappedLine(); 739 ++Line->Level; 740 parseStructuralElement(); 741 --Line->Level; 742 } 743 744 // FIXME: Add error handling. 745 if (!FormatTok.Tok.is(tok::kw_while)) { 746 addUnwrappedLine(); 747 return; 748 } 749 750 nextToken(); 751 parseStructuralElement(); 752} 753 754void UnwrappedLineParser::parseLabel() { 755 if (FormatTok.Tok.isNot(tok::colon)) 756 return; 757 nextToken(); 758 unsigned OldLineLevel = Line->Level; 759 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 760 --Line->Level; 761 if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) { 762 parseBlock(/*MustBeDeclaration=*/ false); 763 if (FormatTok.Tok.is(tok::kw_break)) 764 parseStructuralElement(); // "break;" after "}" goes on the same line. 765 } 766 addUnwrappedLine(); 767 Line->Level = OldLineLevel; 768} 769 770void UnwrappedLineParser::parseCaseLabel() { 771 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 772 // FIXME: fix handling of complex expressions here. 773 do { 774 nextToken(); 775 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 776 parseLabel(); 777} 778 779void UnwrappedLineParser::parseSwitch() { 780 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 781 nextToken(); 782 if (FormatTok.Tok.is(tok::l_paren)) 783 parseParens(); 784 if (FormatTok.Tok.is(tok::l_brace)) { 785 parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1); 786 addUnwrappedLine(); 787 } else { 788 addUnwrappedLine(); 789 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 790 parseStructuralElement(); 791 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 792 } 793} 794 795void UnwrappedLineParser::parseAccessSpecifier() { 796 nextToken(); 797 // Otherwise, we don't know what it is, and we'd better keep the next token. 798 if (FormatTok.Tok.is(tok::colon)) 799 nextToken(); 800 addUnwrappedLine(); 801} 802 803void UnwrappedLineParser::parseEnum() { 804 nextToken(); 805 if (FormatTok.Tok.is(tok::identifier) || 806 FormatTok.Tok.is(tok::kw___attribute) || 807 FormatTok.Tok.is(tok::kw___declspec)) { 808 nextToken(); 809 // We can have macros or attributes in between 'enum' and the enum name. 810 if (FormatTok.Tok.is(tok::l_paren)) { 811 parseParens(); 812 } 813 if (FormatTok.Tok.is(tok::identifier)) 814 nextToken(); 815 } 816 if (FormatTok.Tok.is(tok::l_brace)) { 817 nextToken(); 818 addUnwrappedLine(); 819 ++Line->Level; 820 do { 821 switch (FormatTok.Tok.getKind()) { 822 case tok::l_paren: 823 parseParens(); 824 break; 825 case tok::r_brace: 826 addUnwrappedLine(); 827 nextToken(); 828 --Line->Level; 829 return; 830 case tok::comma: 831 nextToken(); 832 addUnwrappedLine(); 833 break; 834 default: 835 nextToken(); 836 break; 837 } 838 } while (!eof()); 839 } 840 // We fall through to parsing a structural element afterwards, so that in 841 // enum A {} n, m; 842 // "} n, m;" will end up in one unwrapped line. 843} 844 845void UnwrappedLineParser::parseRecord() { 846 nextToken(); 847 if (FormatTok.Tok.is(tok::identifier) || 848 FormatTok.Tok.is(tok::kw___attribute) || 849 FormatTok.Tok.is(tok::kw___declspec)) { 850 nextToken(); 851 // We can have macros or attributes in between 'class' and the class name. 852 if (FormatTok.Tok.is(tok::l_paren)) { 853 parseParens(); 854 } 855 // The actual identifier can be a nested name specifier, and in macros 856 // it is often token-pasted. 857 while (FormatTok.Tok.is(tok::identifier) || 858 FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash)) 859 nextToken(); 860 861 // Note that parsing away template declarations here leads to incorrectly 862 // accepting function declarations as record declarations. 863 // In general, we cannot solve this problem. Consider: 864 // class A<int> B() {} 865 // which can be a function definition or a class definition when B() is a 866 // macro. If we find enough real-world cases where this is a problem, we 867 // can parse for the 'template' keyword in the beginning of the statement, 868 // and thus rule out the record production in case there is no template 869 // (this would still leave us with an ambiguity between template function 870 // and class declarations). 871 if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { 872 while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) { 873 if (FormatTok.Tok.is(tok::semi)) 874 return; 875 nextToken(); 876 } 877 } 878 } 879 if (FormatTok.Tok.is(tok::l_brace)) { 880 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 881 addUnwrappedLine(); 882 883 parseBlock(/*MustBeDeclaration=*/ true); 884 } 885 // We fall through to parsing a structural element afterwards, so 886 // class A {} n, m; 887 // will end up in one unwrapped line. 888} 889 890void UnwrappedLineParser::parseObjCProtocolList() { 891 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 892 do 893 nextToken(); 894 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 895 nextToken(); // Skip '>'. 896} 897 898void UnwrappedLineParser::parseObjCUntilAtEnd() { 899 do { 900 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 901 nextToken(); 902 addUnwrappedLine(); 903 break; 904 } 905 parseStructuralElement(); 906 } while (!eof()); 907} 908 909void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 910 nextToken(); 911 nextToken(); // interface name 912 913 // @interface can be followed by either a base class, or a category. 914 if (FormatTok.Tok.is(tok::colon)) { 915 nextToken(); 916 nextToken(); // base class name 917 } else if (FormatTok.Tok.is(tok::l_paren)) 918 // Skip category, if present. 919 parseParens(); 920 921 if (FormatTok.Tok.is(tok::less)) 922 parseObjCProtocolList(); 923 924 // If instance variables are present, keep the '{' on the first line too. 925 if (FormatTok.Tok.is(tok::l_brace)) 926 parseBlock(/*MustBeDeclaration=*/ true); 927 928 // With instance variables, this puts '}' on its own line. Without instance 929 // variables, this ends the @interface line. 930 addUnwrappedLine(); 931 932 parseObjCUntilAtEnd(); 933} 934 935void UnwrappedLineParser::parseObjCProtocol() { 936 nextToken(); 937 nextToken(); // protocol name 938 939 if (FormatTok.Tok.is(tok::less)) 940 parseObjCProtocolList(); 941 942 // Check for protocol declaration. 943 if (FormatTok.Tok.is(tok::semi)) { 944 nextToken(); 945 return addUnwrappedLine(); 946 } 947 948 addUnwrappedLine(); 949 parseObjCUntilAtEnd(); 950} 951 952void UnwrappedLineParser::addUnwrappedLine() { 953 if (Line->Tokens.empty()) 954 return; 955 DEBUG({ 956 llvm::dbgs() << "Line(" << Line->Level << ")" 957 << (Line->InPPDirective ? " MACRO" : "") << ": "; 958 for (std::list<FormatToken>::iterator I = Line->Tokens.begin(), 959 E = Line->Tokens.end(); 960 I != E; ++I) { 961 llvm::dbgs() << I->Tok.getName() << " "; 962 963 } 964 llvm::dbgs() << "\n"; 965 }); 966 CurrentLines->push_back(*Line); 967 Line->Tokens.clear(); 968 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 969 for (std::vector<UnwrappedLine>::iterator 970 I = PreprocessorDirectives.begin(), 971 E = PreprocessorDirectives.end(); 972 I != E; ++I) { 973 CurrentLines->push_back(*I); 974 } 975 PreprocessorDirectives.clear(); 976 } 977} 978 979bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); } 980 981void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 982 bool JustComments = Line->Tokens.empty(); 983 for (SmallVectorImpl<FormatToken>::const_iterator 984 I = CommentsBeforeNextToken.begin(), 985 E = CommentsBeforeNextToken.end(); 986 I != E; ++I) { 987 if (I->NewlinesBefore && JustComments) { 988 addUnwrappedLine(); 989 } 990 pushToken(*I); 991 } 992 if (NewlineBeforeNext && JustComments) { 993 addUnwrappedLine(); 994 } 995 CommentsBeforeNextToken.clear(); 996} 997 998void UnwrappedLineParser::nextToken() { 999 if (eof()) 1000 return; 1001 flushComments(FormatTok.NewlinesBefore > 0); 1002 pushToken(FormatTok); 1003 readToken(); 1004} 1005 1006void UnwrappedLineParser::readToken() { 1007 bool CommentsInCurrentLine = true; 1008 do { 1009 FormatTok = Tokens->getNextToken(); 1010 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 1011 (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) { 1012 // If there is an unfinished unwrapped line, we flush the preprocessor 1013 // directives only after that unwrapped line was finished later. 1014 bool SwitchToPreprocessorLines = 1015 !Line->Tokens.empty() && CurrentLines == &Lines; 1016 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1017 // Comments stored before the preprocessor directive need to be output 1018 // before the preprocessor directive, at the same level as the 1019 // preprocessor directive, as we consider them to apply to the directive. 1020 flushComments(FormatTok.NewlinesBefore > 0); 1021 parsePPDirective(); 1022 } 1023 if (!FormatTok.Tok.is(tok::comment)) 1024 return; 1025 if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) { 1026 CommentsInCurrentLine = false; 1027 } 1028 if (CommentsInCurrentLine) { 1029 pushToken(FormatTok); 1030 } else { 1031 CommentsBeforeNextToken.push_back(FormatTok); 1032 } 1033 } while (!eof()); 1034} 1035 1036void UnwrappedLineParser::pushToken(const FormatToken &Tok) { 1037 Line->Tokens.push_back(Tok); 1038 if (MustBreakBeforeNextToken) { 1039 Line->Tokens.back().MustBreakBefore = true; 1040 MustBreakBeforeNextToken = false; 1041 } 1042} 1043 1044} // end namespace format 1045} // end namespace clang 1046