1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14//===----------------------------------------------------------------------===// 15 16#include "UnwrappedLineParser.h" 17#include "llvm/ADT/STLExtras.h" 18#include "llvm/Support/Debug.h" 19#include "llvm/Support/raw_ostream.h" 20 21#define DEBUG_TYPE "format-parser" 22 23namespace clang { 24namespace format { 25 26class FormatTokenSource { 27public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33}; 34 35namespace { 36 37class ScopedDeclarationState { 38public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56}; 57 58class ScopedMacroState : public FormatTokenSource { 59public: 60 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 61 FormatToken *&ResetToken) 62 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 63 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 64 Token(nullptr) { 65 TokenSource = this; 66 Line.Level = 0; 67 Line.InPPDirective = true; 68 } 69 70 ~ScopedMacroState() override { 71 TokenSource = PreviousTokenSource; 72 ResetToken = Token; 73 Line.InPPDirective = false; 74 Line.Level = PreviousLineLevel; 75 } 76 77 FormatToken *getNextToken() override { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 88 89 FormatToken *setPosition(unsigned Position) override { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 114 FormatToken *Token; 115}; 116 117} // end anonymous namespace 118 119class ScopedLineState { 120public: 121 ScopedLineState(UnwrappedLineParser &Parser, 122 bool SwitchToPreprocessorLines = false) 123 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 124 if (SwitchToPreprocessorLines) 125 Parser.CurrentLines = &Parser.PreprocessorDirectives; 126 else if (!Parser.Line->Tokens.empty()) 127 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 128 PreBlockLine = std::move(Parser.Line); 129 Parser.Line = llvm::make_unique<UnwrappedLine>(); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line = std::move(PreBlockLine); 140 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 141 Parser.MustBreakBeforeNextToken = true; 142 Parser.CurrentLines = OriginalLines; 143 } 144 145private: 146 UnwrappedLineParser &Parser; 147 148 std::unique_ptr<UnwrappedLine> PreBlockLine; 149 SmallVectorImpl<UnwrappedLine> *OriginalLines; 150}; 151 152class CompoundStatementIndenter { 153public: 154 CompoundStatementIndenter(UnwrappedLineParser *Parser, 155 const FormatStyle &Style, unsigned &LineLevel) 156 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 157 if (Style.BraceWrapping.AfterControlStatement) 158 Parser->addUnwrappedLine(); 159 if (Style.BraceWrapping.IndentBraces) 160 ++LineLevel; 161 } 162 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 163 164private: 165 unsigned &LineLevel; 166 unsigned OldLineLevel; 167}; 168 169namespace { 170 171class IndexedTokenSource : public FormatTokenSource { 172public: 173 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 174 : Tokens(Tokens), Position(-1) {} 175 176 FormatToken *getNextToken() override { 177 ++Position; 178 return Tokens[Position]; 179 } 180 181 unsigned getPosition() override { 182 assert(Position >= 0); 183 return Position; 184 } 185 186 FormatToken *setPosition(unsigned P) override { 187 Position = P; 188 return Tokens[Position]; 189 } 190 191 void reset() { Position = -1; } 192 193private: 194 ArrayRef<FormatToken *> Tokens; 195 int Position; 196}; 197 198} // end anonymous namespace 199 200UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 201 const AdditionalKeywords &Keywords, 202 ArrayRef<FormatToken *> Tokens, 203 UnwrappedLineConsumer &Callback) 204 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 205 CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), 206 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 207 208void UnwrappedLineParser::reset() { 209 PPBranchLevel = -1; 210 Line.reset(new UnwrappedLine); 211 CommentsBeforeNextToken.clear(); 212 FormatTok = nullptr; 213 MustBreakBeforeNextToken = false; 214 PreprocessorDirectives.clear(); 215 CurrentLines = &Lines; 216 DeclarationScopeStack.clear(); 217 PPStack.clear(); 218} 219 220void UnwrappedLineParser::parse() { 221 IndexedTokenSource TokenSource(AllTokens); 222 do { 223 DEBUG(llvm::dbgs() << "----\n"); 224 reset(); 225 Tokens = &TokenSource; 226 TokenSource.reset(); 227 228 readToken(); 229 parseFile(); 230 // Create line with eof token. 231 pushToken(FormatTok); 232 addUnwrappedLine(); 233 234 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 235 E = Lines.end(); 236 I != E; ++I) { 237 Callback.consumeUnwrappedLine(*I); 238 } 239 Callback.finishRun(); 240 Lines.clear(); 241 while (!PPLevelBranchIndex.empty() && 242 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 243 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 244 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 245 } 246 if (!PPLevelBranchIndex.empty()) { 247 ++PPLevelBranchIndex.back(); 248 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 249 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 250 } 251 } while (!PPLevelBranchIndex.empty()); 252} 253 254void UnwrappedLineParser::parseFile() { 255 // The top-level context in a file always has declarations, except for pre- 256 // processor directives and JavaScript files. 257 bool MustBeDeclaration = 258 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 259 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 260 MustBeDeclaration); 261 parseLevel(/*HasOpeningBrace=*/false); 262 // Make sure to format the remaining tokens. 263 flushComments(true); 264 addUnwrappedLine(); 265} 266 267void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 268 bool SwitchLabelEncountered = false; 269 do { 270 tok::TokenKind kind = FormatTok->Tok.getKind(); 271 if (FormatTok->Type == TT_MacroBlockBegin) { 272 kind = tok::l_brace; 273 } else if (FormatTok->Type == TT_MacroBlockEnd) { 274 kind = tok::r_brace; 275 } 276 277 switch (kind) { 278 case tok::comment: 279 nextToken(); 280 addUnwrappedLine(); 281 break; 282 case tok::l_brace: 283 // FIXME: Add parameter whether this can happen - if this happens, we must 284 // be in a non-declaration context. 285 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 286 continue; 287 parseBlock(/*MustBeDeclaration=*/false); 288 addUnwrappedLine(); 289 break; 290 case tok::r_brace: 291 if (HasOpeningBrace) 292 return; 293 nextToken(); 294 addUnwrappedLine(); 295 break; 296 case tok::kw_default: 297 case tok::kw_case: 298 if (!SwitchLabelEncountered && 299 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 300 ++Line->Level; 301 SwitchLabelEncountered = true; 302 parseStructuralElement(); 303 break; 304 default: 305 parseStructuralElement(); 306 break; 307 } 308 } while (!eof()); 309} 310 311void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 312 // We'll parse forward through the tokens until we hit 313 // a closing brace or eof - note that getNextToken() will 314 // parse macros, so this will magically work inside macro 315 // definitions, too. 316 unsigned StoredPosition = Tokens->getPosition(); 317 FormatToken *Tok = FormatTok; 318 // Keep a stack of positions of lbrace tokens. We will 319 // update information about whether an lbrace starts a 320 // braced init list or a different block during the loop. 321 SmallVector<FormatToken *, 8> LBraceStack; 322 assert(Tok->Tok.is(tok::l_brace)); 323 do { 324 // Get next non-comment token. 325 FormatToken *NextTok; 326 unsigned ReadTokens = 0; 327 do { 328 NextTok = Tokens->getNextToken(); 329 ++ReadTokens; 330 } while (NextTok->is(tok::comment)); 331 332 switch (Tok->Tok.getKind()) { 333 case tok::l_brace: 334 Tok->BlockKind = BK_Unknown; 335 LBraceStack.push_back(Tok); 336 break; 337 case tok::r_brace: 338 if (!LBraceStack.empty()) { 339 if (LBraceStack.back()->BlockKind == BK_Unknown) { 340 bool ProbablyBracedList = false; 341 if (Style.Language == FormatStyle::LK_Proto) { 342 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 343 } else { 344 // Using OriginalColumn to distinguish between ObjC methods and 345 // binary operators is a bit hacky. 346 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 347 NextTok->OriginalColumn == 0; 348 349 // If there is a comma, semicolon or right paren after the closing 350 // brace, we assume this is a braced initializer list. Note that 351 // regardless how we mark inner braces here, we will overwrite the 352 // BlockKind later if we parse a braced list (where all blocks 353 // inside are by default braced lists), or when we explicitly detect 354 // blocks (for example while parsing lambdas). 355 // 356 // We exclude + and - as they can be ObjC visibility modifiers. 357 ProbablyBracedList = 358 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 359 tok::r_paren, tok::r_square, tok::l_brace, 360 tok::l_paren, tok::ellipsis) || 361 (NextTok->is(tok::semi) && 362 (!ExpectClassBody || LBraceStack.size() != 1)) || 363 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 364 } 365 if (ProbablyBracedList) { 366 Tok->BlockKind = BK_BracedInit; 367 LBraceStack.back()->BlockKind = BK_BracedInit; 368 } else { 369 Tok->BlockKind = BK_Block; 370 LBraceStack.back()->BlockKind = BK_Block; 371 } 372 } 373 LBraceStack.pop_back(); 374 } 375 break; 376 case tok::at: 377 case tok::semi: 378 case tok::kw_if: 379 case tok::kw_while: 380 case tok::kw_for: 381 case tok::kw_switch: 382 case tok::kw_try: 383 case tok::kw___try: 384 if (!LBraceStack.empty()) 385 LBraceStack.back()->BlockKind = BK_Block; 386 break; 387 default: 388 break; 389 } 390 Tok = NextTok; 391 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 392 // Assume other blocks for all unclosed opening braces. 393 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 394 if (LBraceStack[i]->BlockKind == BK_Unknown) 395 LBraceStack[i]->BlockKind = BK_Block; 396 } 397 398 FormatTok = Tokens->setPosition(StoredPosition); 399} 400 401void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 402 bool MunchSemi) { 403 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 404 "'{' or macro block token expected"); 405 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 406 FormatTok->BlockKind = BK_Block; 407 408 unsigned InitialLevel = Line->Level; 409 nextToken(); 410 411 if (MacroBlock && FormatTok->is(tok::l_paren)) 412 parseParens(); 413 414 addUnwrappedLine(); 415 416 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 417 MustBeDeclaration); 418 if (AddLevel) 419 ++Line->Level; 420 parseLevel(/*HasOpeningBrace=*/true); 421 422 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 423 : !FormatTok->is(tok::r_brace)) { 424 Line->Level = InitialLevel; 425 FormatTok->BlockKind = BK_Block; 426 return; 427 } 428 429 nextToken(); // Munch the closing brace. 430 431 if (MacroBlock && FormatTok->is(tok::l_paren)) 432 parseParens(); 433 434 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 435 nextToken(); 436 Line->Level = InitialLevel; 437} 438 439static bool isGoogScope(const UnwrappedLine &Line) { 440 // FIXME: Closure-library specific stuff should not be hard-coded but be 441 // configurable. 442 if (Line.Tokens.size() < 4) 443 return false; 444 auto I = Line.Tokens.begin(); 445 if (I->Tok->TokenText != "goog") 446 return false; 447 ++I; 448 if (I->Tok->isNot(tok::period)) 449 return false; 450 ++I; 451 if (I->Tok->TokenText != "scope") 452 return false; 453 ++I; 454 return I->Tok->is(tok::l_paren); 455} 456 457static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 458 const FormatToken &InitialToken) { 459 if (InitialToken.is(tok::kw_namespace)) 460 return Style.BraceWrapping.AfterNamespace; 461 if (InitialToken.is(tok::kw_class)) 462 return Style.BraceWrapping.AfterClass; 463 if (InitialToken.is(tok::kw_union)) 464 return Style.BraceWrapping.AfterUnion; 465 if (InitialToken.is(tok::kw_struct)) 466 return Style.BraceWrapping.AfterStruct; 467 return false; 468} 469 470void UnwrappedLineParser::parseChildBlock() { 471 FormatTok->BlockKind = BK_Block; 472 nextToken(); 473 { 474 bool GoogScope = 475 Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line); 476 ScopedLineState LineState(*this); 477 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 478 /*MustBeDeclaration=*/false); 479 Line->Level += GoogScope ? 0 : 1; 480 parseLevel(/*HasOpeningBrace=*/true); 481 flushComments(isOnNewLine(*FormatTok)); 482 Line->Level -= GoogScope ? 0 : 1; 483 } 484 nextToken(); 485} 486 487void UnwrappedLineParser::parsePPDirective() { 488 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 489 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 490 nextToken(); 491 492 if (!FormatTok->Tok.getIdentifierInfo()) { 493 parsePPUnknown(); 494 return; 495 } 496 497 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 498 case tok::pp_define: 499 parsePPDefine(); 500 return; 501 case tok::pp_if: 502 parsePPIf(/*IfDef=*/false); 503 break; 504 case tok::pp_ifdef: 505 case tok::pp_ifndef: 506 parsePPIf(/*IfDef=*/true); 507 break; 508 case tok::pp_else: 509 parsePPElse(); 510 break; 511 case tok::pp_elif: 512 parsePPElIf(); 513 break; 514 case tok::pp_endif: 515 parsePPEndIf(); 516 break; 517 default: 518 parsePPUnknown(); 519 break; 520 } 521} 522 523void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 524 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 525 PPStack.push_back(PP_Unreachable); 526 else 527 PPStack.push_back(PP_Conditional); 528} 529 530void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 531 ++PPBranchLevel; 532 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 533 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 534 PPLevelBranchIndex.push_back(0); 535 PPLevelBranchCount.push_back(0); 536 } 537 PPChainBranchIndex.push(0); 538 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 539 conditionalCompilationCondition(Unreachable || Skip); 540} 541 542void UnwrappedLineParser::conditionalCompilationAlternative() { 543 if (!PPStack.empty()) 544 PPStack.pop_back(); 545 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 546 if (!PPChainBranchIndex.empty()) 547 ++PPChainBranchIndex.top(); 548 conditionalCompilationCondition( 549 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 550 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 551} 552 553void UnwrappedLineParser::conditionalCompilationEnd() { 554 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 555 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 556 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 557 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 558 } 559 } 560 // Guard against #endif's without #if. 561 if (PPBranchLevel > 0) 562 --PPBranchLevel; 563 if (!PPChainBranchIndex.empty()) 564 PPChainBranchIndex.pop(); 565 if (!PPStack.empty()) 566 PPStack.pop_back(); 567} 568 569void UnwrappedLineParser::parsePPIf(bool IfDef) { 570 nextToken(); 571 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && 572 FormatTok->Tok.getLiteralData() != nullptr && 573 StringRef(FormatTok->Tok.getLiteralData(), 574 FormatTok->Tok.getLength()) == "0") || 575 FormatTok->Tok.is(tok::kw_false); 576 conditionalCompilationStart(!IfDef && IsLiteralFalse); 577 parsePPUnknown(); 578} 579 580void UnwrappedLineParser::parsePPElse() { 581 conditionalCompilationAlternative(); 582 parsePPUnknown(); 583} 584 585void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 586 587void UnwrappedLineParser::parsePPEndIf() { 588 conditionalCompilationEnd(); 589 parsePPUnknown(); 590} 591 592void UnwrappedLineParser::parsePPDefine() { 593 nextToken(); 594 595 if (FormatTok->Tok.getKind() != tok::identifier) { 596 parsePPUnknown(); 597 return; 598 } 599 nextToken(); 600 if (FormatTok->Tok.getKind() == tok::l_paren && 601 FormatTok->WhitespaceRange.getBegin() == 602 FormatTok->WhitespaceRange.getEnd()) { 603 parseParens(); 604 } 605 addUnwrappedLine(); 606 Line->Level = 1; 607 608 // Errors during a preprocessor directive can only affect the layout of the 609 // preprocessor directive, and thus we ignore them. An alternative approach 610 // would be to use the same approach we use on the file level (no 611 // re-indentation if there was a structural error) within the macro 612 // definition. 613 parseFile(); 614} 615 616void UnwrappedLineParser::parsePPUnknown() { 617 do { 618 nextToken(); 619 } while (!eof()); 620 addUnwrappedLine(); 621} 622 623// Here we blacklist certain tokens that are not usually the first token in an 624// unwrapped line. This is used in attempt to distinguish macro calls without 625// trailing semicolons from other constructs split to several lines. 626static bool tokenCanStartNewLine(const clang::Token &Tok) { 627 // Semicolon can be a null-statement, l_square can be a start of a macro or 628 // a C++11 attribute, but this doesn't seem to be common. 629 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 630 Tok.isNot(tok::l_square) && 631 // Tokens that can only be used as binary operators and a part of 632 // overloaded operator names. 633 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 634 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 635 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 636 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 637 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 638 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 639 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 640 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 641 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 642 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 643 Tok.isNot(tok::lesslessequal) && 644 // Colon is used in labels, base class lists, initializer lists, 645 // range-based for loops, ternary operator, but should never be the 646 // first token in an unwrapped line. 647 Tok.isNot(tok::colon) && 648 // 'noexcept' is a trailing annotation. 649 Tok.isNot(tok::kw_noexcept); 650} 651 652void UnwrappedLineParser::parseStructuralElement() { 653 assert(!FormatTok->Tok.is(tok::l_brace)); 654 switch (FormatTok->Tok.getKind()) { 655 case tok::at: 656 nextToken(); 657 if (FormatTok->Tok.is(tok::l_brace)) { 658 parseBracedList(); 659 break; 660 } 661 switch (FormatTok->Tok.getObjCKeywordID()) { 662 case tok::objc_public: 663 case tok::objc_protected: 664 case tok::objc_package: 665 case tok::objc_private: 666 return parseAccessSpecifier(); 667 case tok::objc_interface: 668 case tok::objc_implementation: 669 return parseObjCInterfaceOrImplementation(); 670 case tok::objc_protocol: 671 return parseObjCProtocol(); 672 case tok::objc_end: 673 return; // Handled by the caller. 674 case tok::objc_optional: 675 case tok::objc_required: 676 nextToken(); 677 addUnwrappedLine(); 678 return; 679 case tok::objc_autoreleasepool: 680 nextToken(); 681 if (FormatTok->Tok.is(tok::l_brace)) { 682 if (Style.BraceWrapping.AfterObjCDeclaration) 683 addUnwrappedLine(); 684 parseBlock(/*MustBeDeclaration=*/false); 685 } 686 addUnwrappedLine(); 687 return; 688 case tok::objc_try: 689 // This branch isn't strictly necessary (the kw_try case below would 690 // do this too after the tok::at is parsed above). But be explicit. 691 parseTryCatch(); 692 return; 693 default: 694 break; 695 } 696 break; 697 case tok::kw_asm: 698 nextToken(); 699 if (FormatTok->is(tok::l_brace)) { 700 FormatTok->Type = TT_InlineASMBrace; 701 nextToken(); 702 while (FormatTok && FormatTok->isNot(tok::eof)) { 703 if (FormatTok->is(tok::r_brace)) { 704 FormatTok->Type = TT_InlineASMBrace; 705 nextToken(); 706 addUnwrappedLine(); 707 break; 708 } 709 FormatTok->Finalized = true; 710 nextToken(); 711 } 712 } 713 break; 714 case tok::kw_namespace: 715 parseNamespace(); 716 return; 717 case tok::kw_inline: 718 nextToken(); 719 if (FormatTok->Tok.is(tok::kw_namespace)) { 720 parseNamespace(); 721 return; 722 } 723 break; 724 case tok::kw_public: 725 case tok::kw_protected: 726 case tok::kw_private: 727 if (Style.Language == FormatStyle::LK_Java || 728 Style.Language == FormatStyle::LK_JavaScript) 729 nextToken(); 730 else 731 parseAccessSpecifier(); 732 return; 733 case tok::kw_if: 734 parseIfThenElse(); 735 return; 736 case tok::kw_for: 737 case tok::kw_while: 738 parseForOrWhileLoop(); 739 return; 740 case tok::kw_do: 741 parseDoWhile(); 742 return; 743 case tok::kw_switch: 744 parseSwitch(); 745 return; 746 case tok::kw_default: 747 nextToken(); 748 parseLabel(); 749 return; 750 case tok::kw_case: 751 parseCaseLabel(); 752 return; 753 case tok::kw_try: 754 case tok::kw___try: 755 parseTryCatch(); 756 return; 757 case tok::kw_extern: 758 nextToken(); 759 if (FormatTok->Tok.is(tok::string_literal)) { 760 nextToken(); 761 if (FormatTok->Tok.is(tok::l_brace)) { 762 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 763 addUnwrappedLine(); 764 return; 765 } 766 } 767 break; 768 case tok::kw_export: 769 if (Style.Language == FormatStyle::LK_JavaScript) { 770 parseJavaScriptEs6ImportExport(); 771 return; 772 } 773 break; 774 case tok::identifier: 775 if (FormatTok->is(TT_ForEachMacro)) { 776 parseForOrWhileLoop(); 777 return; 778 } 779 if (FormatTok->is(TT_MacroBlockBegin)) { 780 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 781 /*MunchSemi=*/false); 782 return; 783 } 784 if (Style.Language == FormatStyle::LK_JavaScript && 785 FormatTok->is(Keywords.kw_import)) { 786 parseJavaScriptEs6ImportExport(); 787 return; 788 } 789 if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 790 Keywords.kw_slots, Keywords.kw_qslots)) { 791 nextToken(); 792 if (FormatTok->is(tok::colon)) { 793 nextToken(); 794 addUnwrappedLine(); 795 } 796 return; 797 } 798 // In all other cases, parse the declaration. 799 break; 800 default: 801 break; 802 } 803 do { 804 switch (FormatTok->Tok.getKind()) { 805 case tok::at: 806 nextToken(); 807 if (FormatTok->Tok.is(tok::l_brace)) 808 parseBracedList(); 809 break; 810 case tok::kw_enum: 811 // parseEnum falls through and does not yet add an unwrapped line as an 812 // enum definition can start a structural element. 813 parseEnum(); 814 // This only applies for C++. 815 if (Style.Language != FormatStyle::LK_Cpp) { 816 addUnwrappedLine(); 817 return; 818 } 819 break; 820 case tok::kw_typedef: 821 nextToken(); 822 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 823 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 824 parseEnum(); 825 break; 826 case tok::kw_struct: 827 case tok::kw_union: 828 case tok::kw_class: 829 // parseRecord falls through and does not yet add an unwrapped line as a 830 // record declaration or definition can start a structural element. 831 parseRecord(); 832 // This does not apply for Java and JavaScript. 833 if (Style.Language == FormatStyle::LK_Java || 834 Style.Language == FormatStyle::LK_JavaScript) { 835 addUnwrappedLine(); 836 return; 837 } 838 break; 839 case tok::period: 840 nextToken(); 841 // In Java, classes have an implicit static member "class". 842 if (Style.Language == FormatStyle::LK_Java && FormatTok && 843 FormatTok->is(tok::kw_class)) 844 nextToken(); 845 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 846 FormatTok->Tok.getIdentifierInfo()) 847 // JavaScript only has pseudo keywords, all keywords are allowed to 848 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 849 nextToken(); 850 break; 851 case tok::semi: 852 nextToken(); 853 addUnwrappedLine(); 854 return; 855 case tok::r_brace: 856 addUnwrappedLine(); 857 return; 858 case tok::l_paren: 859 parseParens(); 860 break; 861 case tok::kw_operator: 862 nextToken(); 863 if (FormatTok->isBinaryOperator()) 864 nextToken(); 865 break; 866 case tok::caret: 867 nextToken(); 868 if (FormatTok->Tok.isAnyIdentifier() || 869 FormatTok->isSimpleTypeSpecifier()) 870 nextToken(); 871 if (FormatTok->is(tok::l_paren)) 872 parseParens(); 873 if (FormatTok->is(tok::l_brace)) 874 parseChildBlock(); 875 break; 876 case tok::l_brace: 877 if (!tryToParseBracedList()) { 878 // A block outside of parentheses must be the last part of a 879 // structural element. 880 // FIXME: Figure out cases where this is not true, and add projections 881 // for them (the one we know is missing are lambdas). 882 if (Style.BraceWrapping.AfterFunction) 883 addUnwrappedLine(); 884 FormatTok->Type = TT_FunctionLBrace; 885 parseBlock(/*MustBeDeclaration=*/false); 886 addUnwrappedLine(); 887 return; 888 } 889 // Otherwise this was a braced init list, and the structural 890 // element continues. 891 break; 892 case tok::kw_try: 893 // We arrive here when parsing function-try blocks. 894 parseTryCatch(); 895 return; 896 case tok::identifier: { 897 if (FormatTok->is(TT_MacroBlockEnd)) { 898 addUnwrappedLine(); 899 return; 900 } 901 902 // Parse function literal unless 'function' is the first token in a line 903 // in which case this should be treated as a free-standing function. 904 if (Style.Language == FormatStyle::LK_JavaScript && 905 FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) { 906 tryToParseJSFunction(); 907 break; 908 } 909 if ((Style.Language == FormatStyle::LK_JavaScript || 910 Style.Language == FormatStyle::LK_Java) && 911 FormatTok->is(Keywords.kw_interface)) { 912 parseRecord(); 913 addUnwrappedLine(); 914 return; 915 } 916 917 StringRef Text = FormatTok->TokenText; 918 nextToken(); 919 if (Line->Tokens.size() == 1 && 920 // JS doesn't have macros, and within classes colons indicate fields, 921 // not labels. 922 Style.Language != FormatStyle::LK_JavaScript) { 923 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 924 parseLabel(); 925 return; 926 } 927 // Recognize function-like macro usages without trailing semicolon as 928 // well as free-standing macros like Q_OBJECT. 929 bool FunctionLike = FormatTok->is(tok::l_paren); 930 if (FunctionLike) 931 parseParens(); 932 933 bool FollowedByNewline = 934 CommentsBeforeNextToken.empty() 935 ? FormatTok->NewlinesBefore > 0 936 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 937 938 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 939 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 940 addUnwrappedLine(); 941 return; 942 } 943 } 944 break; 945 } 946 case tok::equal: 947 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 948 // TT_JsFatArrow. The always start an expression or a child block if 949 // followed by a curly. 950 if (FormatTok->is(TT_JsFatArrow)) { 951 nextToken(); 952 if (FormatTok->is(tok::l_brace)) 953 parseChildBlock(); 954 break; 955 } 956 957 nextToken(); 958 if (FormatTok->Tok.is(tok::l_brace)) { 959 parseBracedList(); 960 } 961 break; 962 case tok::l_square: 963 parseSquare(); 964 break; 965 case tok::kw_new: 966 parseNew(); 967 break; 968 default: 969 nextToken(); 970 break; 971 } 972 } while (!eof()); 973} 974 975bool UnwrappedLineParser::tryToParseLambda() { 976 if (Style.Language != FormatStyle::LK_Cpp) { 977 nextToken(); 978 return false; 979 } 980 // FIXME: This is a dirty way to access the previous token. Find a better 981 // solution. 982 if (!Line->Tokens.empty() && 983 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, 984 tok::kw_new, tok::kw_delete) || 985 Line->Tokens.back().Tok->closesScope() || 986 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { 987 nextToken(); 988 return false; 989 } 990 assert(FormatTok->is(tok::l_square)); 991 FormatToken &LSquare = *FormatTok; 992 if (!tryToParseLambdaIntroducer()) 993 return false; 994 995 while (FormatTok->isNot(tok::l_brace)) { 996 if (FormatTok->isSimpleTypeSpecifier()) { 997 nextToken(); 998 continue; 999 } 1000 switch (FormatTok->Tok.getKind()) { 1001 case tok::l_brace: 1002 break; 1003 case tok::l_paren: 1004 parseParens(); 1005 break; 1006 case tok::amp: 1007 case tok::star: 1008 case tok::kw_const: 1009 case tok::comma: 1010 case tok::less: 1011 case tok::greater: 1012 case tok::identifier: 1013 case tok::numeric_constant: 1014 case tok::coloncolon: 1015 case tok::kw_mutable: 1016 nextToken(); 1017 break; 1018 case tok::arrow: 1019 FormatTok->Type = TT_LambdaArrow; 1020 nextToken(); 1021 break; 1022 default: 1023 return true; 1024 } 1025 } 1026 LSquare.Type = TT_LambdaLSquare; 1027 parseChildBlock(); 1028 return true; 1029} 1030 1031bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1032 nextToken(); 1033 if (FormatTok->is(tok::equal)) { 1034 nextToken(); 1035 if (FormatTok->is(tok::r_square)) { 1036 nextToken(); 1037 return true; 1038 } 1039 if (FormatTok->isNot(tok::comma)) 1040 return false; 1041 nextToken(); 1042 } else if (FormatTok->is(tok::amp)) { 1043 nextToken(); 1044 if (FormatTok->is(tok::r_square)) { 1045 nextToken(); 1046 return true; 1047 } 1048 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1049 return false; 1050 } 1051 if (FormatTok->is(tok::comma)) 1052 nextToken(); 1053 } else if (FormatTok->is(tok::r_square)) { 1054 nextToken(); 1055 return true; 1056 } 1057 do { 1058 if (FormatTok->is(tok::amp)) 1059 nextToken(); 1060 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1061 return false; 1062 nextToken(); 1063 if (FormatTok->is(tok::ellipsis)) 1064 nextToken(); 1065 if (FormatTok->is(tok::comma)) { 1066 nextToken(); 1067 } else if (FormatTok->is(tok::r_square)) { 1068 nextToken(); 1069 return true; 1070 } else { 1071 return false; 1072 } 1073 } while (!eof()); 1074 return false; 1075} 1076 1077void UnwrappedLineParser::tryToParseJSFunction() { 1078 nextToken(); 1079 1080 // Consume function name. 1081 if (FormatTok->is(tok::identifier)) 1082 nextToken(); 1083 1084 if (FormatTok->isNot(tok::l_paren)) 1085 return; 1086 1087 // Parse formal parameter list. 1088 parseParens(); 1089 1090 if (FormatTok->is(tok::colon)) { 1091 // Parse a type definition. 1092 nextToken(); 1093 1094 // Eat the type declaration. For braced inline object types, balance braces, 1095 // otherwise just parse until finding an l_brace for the function body. 1096 if (FormatTok->is(tok::l_brace)) 1097 tryToParseBracedList(); 1098 else 1099 while (FormatTok->isNot(tok::l_brace) && !eof()) 1100 nextToken(); 1101 } 1102 1103 parseChildBlock(); 1104} 1105 1106bool UnwrappedLineParser::tryToParseBracedList() { 1107 if (FormatTok->BlockKind == BK_Unknown) 1108 calculateBraceTypes(); 1109 assert(FormatTok->BlockKind != BK_Unknown); 1110 if (FormatTok->BlockKind == BK_Block) 1111 return false; 1112 parseBracedList(); 1113 return true; 1114} 1115 1116bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 1117 bool HasError = false; 1118 nextToken(); 1119 1120 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1121 // replace this by using parseAssigmentExpression() inside. 1122 do { 1123 if (Style.Language == FormatStyle::LK_JavaScript) { 1124 if (FormatTok->is(Keywords.kw_function)) { 1125 tryToParseJSFunction(); 1126 continue; 1127 } 1128 if (FormatTok->is(TT_JsFatArrow)) { 1129 nextToken(); 1130 // Fat arrows can be followed by simple expressions or by child blocks 1131 // in curly braces. 1132 if (FormatTok->is(tok::l_brace)) { 1133 parseChildBlock(); 1134 continue; 1135 } 1136 } 1137 } 1138 switch (FormatTok->Tok.getKind()) { 1139 case tok::caret: 1140 nextToken(); 1141 if (FormatTok->is(tok::l_brace)) { 1142 parseChildBlock(); 1143 } 1144 break; 1145 case tok::l_square: 1146 tryToParseLambda(); 1147 break; 1148 case tok::l_brace: 1149 // Assume there are no blocks inside a braced init list apart 1150 // from the ones we explicitly parse out (like lambdas). 1151 FormatTok->BlockKind = BK_BracedInit; 1152 parseBracedList(); 1153 break; 1154 case tok::l_paren: 1155 parseParens(); 1156 // JavaScript can just have free standing methods and getters/setters in 1157 // object literals. Detect them by a "{" following ")". 1158 if (Style.Language == FormatStyle::LK_JavaScript) { 1159 if (FormatTok->is(tok::l_brace)) 1160 parseChildBlock(); 1161 break; 1162 } 1163 break; 1164 case tok::r_brace: 1165 nextToken(); 1166 return !HasError; 1167 case tok::semi: 1168 HasError = true; 1169 if (!ContinueOnSemicolons) 1170 return !HasError; 1171 nextToken(); 1172 break; 1173 case tok::comma: 1174 nextToken(); 1175 break; 1176 default: 1177 nextToken(); 1178 break; 1179 } 1180 } while (!eof()); 1181 return false; 1182} 1183 1184void UnwrappedLineParser::parseParens() { 1185 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1186 nextToken(); 1187 do { 1188 switch (FormatTok->Tok.getKind()) { 1189 case tok::l_paren: 1190 parseParens(); 1191 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1192 parseChildBlock(); 1193 break; 1194 case tok::r_paren: 1195 nextToken(); 1196 return; 1197 case tok::r_brace: 1198 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1199 return; 1200 case tok::l_square: 1201 tryToParseLambda(); 1202 break; 1203 case tok::l_brace: 1204 if (!tryToParseBracedList()) 1205 parseChildBlock(); 1206 break; 1207 case tok::at: 1208 nextToken(); 1209 if (FormatTok->Tok.is(tok::l_brace)) 1210 parseBracedList(); 1211 break; 1212 case tok::identifier: 1213 if (Style.Language == FormatStyle::LK_JavaScript && 1214 FormatTok->is(Keywords.kw_function)) 1215 tryToParseJSFunction(); 1216 else 1217 nextToken(); 1218 break; 1219 default: 1220 nextToken(); 1221 break; 1222 } 1223 } while (!eof()); 1224} 1225 1226void UnwrappedLineParser::parseSquare() { 1227 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1228 if (tryToParseLambda()) 1229 return; 1230 do { 1231 switch (FormatTok->Tok.getKind()) { 1232 case tok::l_paren: 1233 parseParens(); 1234 break; 1235 case tok::r_square: 1236 nextToken(); 1237 return; 1238 case tok::r_brace: 1239 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1240 return; 1241 case tok::l_square: 1242 parseSquare(); 1243 break; 1244 case tok::l_brace: { 1245 if (!tryToParseBracedList()) 1246 parseChildBlock(); 1247 break; 1248 } 1249 case tok::at: 1250 nextToken(); 1251 if (FormatTok->Tok.is(tok::l_brace)) 1252 parseBracedList(); 1253 break; 1254 default: 1255 nextToken(); 1256 break; 1257 } 1258 } while (!eof()); 1259} 1260 1261void UnwrappedLineParser::parseIfThenElse() { 1262 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1263 nextToken(); 1264 if (FormatTok->Tok.is(tok::l_paren)) 1265 parseParens(); 1266 bool NeedsUnwrappedLine = false; 1267 if (FormatTok->Tok.is(tok::l_brace)) { 1268 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1269 parseBlock(/*MustBeDeclaration=*/false); 1270 if (Style.BraceWrapping.BeforeElse) 1271 addUnwrappedLine(); 1272 else 1273 NeedsUnwrappedLine = true; 1274 } else { 1275 addUnwrappedLine(); 1276 ++Line->Level; 1277 parseStructuralElement(); 1278 --Line->Level; 1279 } 1280 if (FormatTok->Tok.is(tok::kw_else)) { 1281 nextToken(); 1282 if (FormatTok->Tok.is(tok::l_brace)) { 1283 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1284 parseBlock(/*MustBeDeclaration=*/false); 1285 addUnwrappedLine(); 1286 } else if (FormatTok->Tok.is(tok::kw_if)) { 1287 parseIfThenElse(); 1288 } else { 1289 addUnwrappedLine(); 1290 ++Line->Level; 1291 parseStructuralElement(); 1292 --Line->Level; 1293 } 1294 } else if (NeedsUnwrappedLine) { 1295 addUnwrappedLine(); 1296 } 1297} 1298 1299void UnwrappedLineParser::parseTryCatch() { 1300 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1301 nextToken(); 1302 bool NeedsUnwrappedLine = false; 1303 if (FormatTok->is(tok::colon)) { 1304 // We are in a function try block, what comes is an initializer list. 1305 nextToken(); 1306 while (FormatTok->is(tok::identifier)) { 1307 nextToken(); 1308 if (FormatTok->is(tok::l_paren)) 1309 parseParens(); 1310 if (FormatTok->is(tok::comma)) 1311 nextToken(); 1312 } 1313 } 1314 // Parse try with resource. 1315 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1316 parseParens(); 1317 } 1318 if (FormatTok->is(tok::l_brace)) { 1319 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1320 parseBlock(/*MustBeDeclaration=*/false); 1321 if (Style.BraceWrapping.BeforeCatch) { 1322 addUnwrappedLine(); 1323 } else { 1324 NeedsUnwrappedLine = true; 1325 } 1326 } else if (!FormatTok->is(tok::kw_catch)) { 1327 // The C++ standard requires a compound-statement after a try. 1328 // If there's none, we try to assume there's a structuralElement 1329 // and try to continue. 1330 addUnwrappedLine(); 1331 ++Line->Level; 1332 parseStructuralElement(); 1333 --Line->Level; 1334 } 1335 while (1) { 1336 if (FormatTok->is(tok::at)) 1337 nextToken(); 1338 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1339 tok::kw___finally) || 1340 ((Style.Language == FormatStyle::LK_Java || 1341 Style.Language == FormatStyle::LK_JavaScript) && 1342 FormatTok->is(Keywords.kw_finally)) || 1343 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1344 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1345 break; 1346 nextToken(); 1347 while (FormatTok->isNot(tok::l_brace)) { 1348 if (FormatTok->is(tok::l_paren)) { 1349 parseParens(); 1350 continue; 1351 } 1352 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1353 return; 1354 nextToken(); 1355 } 1356 NeedsUnwrappedLine = false; 1357 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1358 parseBlock(/*MustBeDeclaration=*/false); 1359 if (Style.BraceWrapping.BeforeCatch) 1360 addUnwrappedLine(); 1361 else 1362 NeedsUnwrappedLine = true; 1363 } 1364 if (NeedsUnwrappedLine) 1365 addUnwrappedLine(); 1366} 1367 1368void UnwrappedLineParser::parseNamespace() { 1369 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1370 1371 const FormatToken &InitialToken = *FormatTok; 1372 nextToken(); 1373 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1374 nextToken(); 1375 if (FormatTok->Tok.is(tok::l_brace)) { 1376 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1377 addUnwrappedLine(); 1378 1379 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1380 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1381 DeclarationScopeStack.size() > 1); 1382 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1383 // Munch the semicolon after a namespace. This is more common than one would 1384 // think. Puttin the semicolon into its own line is very ugly. 1385 if (FormatTok->Tok.is(tok::semi)) 1386 nextToken(); 1387 addUnwrappedLine(); 1388 } 1389 // FIXME: Add error handling. 1390} 1391 1392void UnwrappedLineParser::parseNew() { 1393 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1394 nextToken(); 1395 if (Style.Language != FormatStyle::LK_Java) 1396 return; 1397 1398 // In Java, we can parse everything up to the parens, which aren't optional. 1399 do { 1400 // There should not be a ;, { or } before the new's open paren. 1401 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1402 return; 1403 1404 // Consume the parens. 1405 if (FormatTok->is(tok::l_paren)) { 1406 parseParens(); 1407 1408 // If there is a class body of an anonymous class, consume that as child. 1409 if (FormatTok->is(tok::l_brace)) 1410 parseChildBlock(); 1411 return; 1412 } 1413 nextToken(); 1414 } while (!eof()); 1415} 1416 1417void UnwrappedLineParser::parseForOrWhileLoop() { 1418 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1419 "'for', 'while' or foreach macro expected"); 1420 nextToken(); 1421 if (FormatTok->Tok.is(tok::l_paren)) 1422 parseParens(); 1423 if (FormatTok->Tok.is(tok::l_brace)) { 1424 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1425 parseBlock(/*MustBeDeclaration=*/false); 1426 addUnwrappedLine(); 1427 } else { 1428 addUnwrappedLine(); 1429 ++Line->Level; 1430 parseStructuralElement(); 1431 --Line->Level; 1432 } 1433} 1434 1435void UnwrappedLineParser::parseDoWhile() { 1436 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1437 nextToken(); 1438 if (FormatTok->Tok.is(tok::l_brace)) { 1439 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1440 parseBlock(/*MustBeDeclaration=*/false); 1441 if (Style.BraceWrapping.IndentBraces) 1442 addUnwrappedLine(); 1443 } else { 1444 addUnwrappedLine(); 1445 ++Line->Level; 1446 parseStructuralElement(); 1447 --Line->Level; 1448 } 1449 1450 // FIXME: Add error handling. 1451 if (!FormatTok->Tok.is(tok::kw_while)) { 1452 addUnwrappedLine(); 1453 return; 1454 } 1455 1456 nextToken(); 1457 parseStructuralElement(); 1458} 1459 1460void UnwrappedLineParser::parseLabel() { 1461 nextToken(); 1462 unsigned OldLineLevel = Line->Level; 1463 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1464 --Line->Level; 1465 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1466 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1467 parseBlock(/*MustBeDeclaration=*/false); 1468 if (FormatTok->Tok.is(tok::kw_break)) { 1469 if (Style.BraceWrapping.AfterControlStatement) 1470 addUnwrappedLine(); 1471 parseStructuralElement(); 1472 } 1473 addUnwrappedLine(); 1474 } else { 1475 if (FormatTok->is(tok::semi)) 1476 nextToken(); 1477 addUnwrappedLine(); 1478 } 1479 Line->Level = OldLineLevel; 1480} 1481 1482void UnwrappedLineParser::parseCaseLabel() { 1483 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1484 // FIXME: fix handling of complex expressions here. 1485 do { 1486 nextToken(); 1487 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1488 parseLabel(); 1489} 1490 1491void UnwrappedLineParser::parseSwitch() { 1492 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1493 nextToken(); 1494 if (FormatTok->Tok.is(tok::l_paren)) 1495 parseParens(); 1496 if (FormatTok->Tok.is(tok::l_brace)) { 1497 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1498 parseBlock(/*MustBeDeclaration=*/false); 1499 addUnwrappedLine(); 1500 } else { 1501 addUnwrappedLine(); 1502 ++Line->Level; 1503 parseStructuralElement(); 1504 --Line->Level; 1505 } 1506} 1507 1508void UnwrappedLineParser::parseAccessSpecifier() { 1509 nextToken(); 1510 // Understand Qt's slots. 1511 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1512 nextToken(); 1513 // Otherwise, we don't know what it is, and we'd better keep the next token. 1514 if (FormatTok->Tok.is(tok::colon)) 1515 nextToken(); 1516 addUnwrappedLine(); 1517} 1518 1519void UnwrappedLineParser::parseEnum() { 1520 // Won't be 'enum' for NS_ENUMs. 1521 if (FormatTok->Tok.is(tok::kw_enum)) 1522 nextToken(); 1523 1524 // Eat up enum class ... 1525 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1526 nextToken(); 1527 1528 while (FormatTok->Tok.getIdentifierInfo() || 1529 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1530 tok::greater, tok::comma, tok::question)) { 1531 nextToken(); 1532 // We can have macros or attributes in between 'enum' and the enum name. 1533 if (FormatTok->is(tok::l_paren)) 1534 parseParens(); 1535 if (FormatTok->is(tok::identifier)) { 1536 nextToken(); 1537 // If there are two identifiers in a row, this is likely an elaborate 1538 // return type. In Java, this can be "implements", etc. 1539 if (Style.Language == FormatStyle::LK_Cpp && 1540 FormatTok->is(tok::identifier)) 1541 return; 1542 } 1543 } 1544 1545 // Just a declaration or something is wrong. 1546 if (FormatTok->isNot(tok::l_brace)) 1547 return; 1548 FormatTok->BlockKind = BK_Block; 1549 1550 if (Style.Language == FormatStyle::LK_Java) { 1551 // Java enums are different. 1552 parseJavaEnumBody(); 1553 return; 1554 } else if (Style.Language == FormatStyle::LK_Proto) { 1555 parseBlock(/*MustBeDeclaration=*/true); 1556 return; 1557 } 1558 1559 // Parse enum body. 1560 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1561 if (HasError) { 1562 if (FormatTok->is(tok::semi)) 1563 nextToken(); 1564 addUnwrappedLine(); 1565 } 1566 1567 // There is no addUnwrappedLine() here so that we fall through to parsing a 1568 // structural element afterwards. Thus, in "enum A {} n, m;", 1569 // "} n, m;" will end up in one unwrapped line. 1570} 1571 1572void UnwrappedLineParser::parseJavaEnumBody() { 1573 // Determine whether the enum is simple, i.e. does not have a semicolon or 1574 // constants with class bodies. Simple enums can be formatted like braced 1575 // lists, contracted to a single line, etc. 1576 unsigned StoredPosition = Tokens->getPosition(); 1577 bool IsSimple = true; 1578 FormatToken *Tok = Tokens->getNextToken(); 1579 while (Tok) { 1580 if (Tok->is(tok::r_brace)) 1581 break; 1582 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1583 IsSimple = false; 1584 break; 1585 } 1586 // FIXME: This will also mark enums with braces in the arguments to enum 1587 // constants as "not simple". This is probably fine in practice, though. 1588 Tok = Tokens->getNextToken(); 1589 } 1590 FormatTok = Tokens->setPosition(StoredPosition); 1591 1592 if (IsSimple) { 1593 parseBracedList(); 1594 addUnwrappedLine(); 1595 return; 1596 } 1597 1598 // Parse the body of a more complex enum. 1599 // First add a line for everything up to the "{". 1600 nextToken(); 1601 addUnwrappedLine(); 1602 ++Line->Level; 1603 1604 // Parse the enum constants. 1605 while (FormatTok) { 1606 if (FormatTok->is(tok::l_brace)) { 1607 // Parse the constant's class body. 1608 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1609 /*MunchSemi=*/false); 1610 } else if (FormatTok->is(tok::l_paren)) { 1611 parseParens(); 1612 } else if (FormatTok->is(tok::comma)) { 1613 nextToken(); 1614 addUnwrappedLine(); 1615 } else if (FormatTok->is(tok::semi)) { 1616 nextToken(); 1617 addUnwrappedLine(); 1618 break; 1619 } else if (FormatTok->is(tok::r_brace)) { 1620 addUnwrappedLine(); 1621 break; 1622 } else { 1623 nextToken(); 1624 } 1625 } 1626 1627 // Parse the class body after the enum's ";" if any. 1628 parseLevel(/*HasOpeningBrace=*/true); 1629 nextToken(); 1630 --Line->Level; 1631 addUnwrappedLine(); 1632} 1633 1634void UnwrappedLineParser::parseRecord() { 1635 const FormatToken &InitialToken = *FormatTok; 1636 nextToken(); 1637 1638 // The actual identifier can be a nested name specifier, and in macros 1639 // it is often token-pasted. 1640 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1641 tok::kw___attribute, tok::kw___declspec, 1642 tok::kw_alignas) || 1643 ((Style.Language == FormatStyle::LK_Java || 1644 Style.Language == FormatStyle::LK_JavaScript) && 1645 FormatTok->isOneOf(tok::period, tok::comma))) { 1646 bool IsNonMacroIdentifier = 1647 FormatTok->is(tok::identifier) && 1648 FormatTok->TokenText != FormatTok->TokenText.upper(); 1649 nextToken(); 1650 // We can have macros or attributes in between 'class' and the class name. 1651 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1652 parseParens(); 1653 } 1654 1655 // Note that parsing away template declarations here leads to incorrectly 1656 // accepting function declarations as record declarations. 1657 // In general, we cannot solve this problem. Consider: 1658 // class A<int> B() {} 1659 // which can be a function definition or a class definition when B() is a 1660 // macro. If we find enough real-world cases where this is a problem, we 1661 // can parse for the 'template' keyword in the beginning of the statement, 1662 // and thus rule out the record production in case there is no template 1663 // (this would still leave us with an ambiguity between template function 1664 // and class declarations). 1665 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1666 while (!eof()) { 1667 if (FormatTok->is(tok::l_brace)) { 1668 calculateBraceTypes(/*ExpectClassBody=*/true); 1669 if (!tryToParseBracedList()) 1670 break; 1671 } 1672 if (FormatTok->Tok.is(tok::semi)) 1673 return; 1674 nextToken(); 1675 } 1676 } 1677 if (FormatTok->Tok.is(tok::l_brace)) { 1678 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1679 addUnwrappedLine(); 1680 1681 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1682 /*MunchSemi=*/false); 1683 } 1684 // There is no addUnwrappedLine() here so that we fall through to parsing a 1685 // structural element afterwards. Thus, in "class A {} n, m;", 1686 // "} n, m;" will end up in one unwrapped line. 1687} 1688 1689void UnwrappedLineParser::parseObjCProtocolList() { 1690 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1691 do 1692 nextToken(); 1693 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1694 nextToken(); // Skip '>'. 1695} 1696 1697void UnwrappedLineParser::parseObjCUntilAtEnd() { 1698 do { 1699 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1700 nextToken(); 1701 addUnwrappedLine(); 1702 break; 1703 } 1704 if (FormatTok->is(tok::l_brace)) { 1705 parseBlock(/*MustBeDeclaration=*/false); 1706 // In ObjC interfaces, nothing should be following the "}". 1707 addUnwrappedLine(); 1708 } else if (FormatTok->is(tok::r_brace)) { 1709 // Ignore stray "}". parseStructuralElement doesn't consume them. 1710 nextToken(); 1711 addUnwrappedLine(); 1712 } else { 1713 parseStructuralElement(); 1714 } 1715 } while (!eof()); 1716} 1717 1718void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1719 nextToken(); 1720 nextToken(); // interface name 1721 1722 // @interface can be followed by either a base class, or a category. 1723 if (FormatTok->Tok.is(tok::colon)) { 1724 nextToken(); 1725 nextToken(); // base class name 1726 } else if (FormatTok->Tok.is(tok::l_paren)) 1727 // Skip category, if present. 1728 parseParens(); 1729 1730 if (FormatTok->Tok.is(tok::less)) 1731 parseObjCProtocolList(); 1732 1733 if (FormatTok->Tok.is(tok::l_brace)) { 1734 if (Style.BraceWrapping.AfterObjCDeclaration) 1735 addUnwrappedLine(); 1736 parseBlock(/*MustBeDeclaration=*/true); 1737 } 1738 1739 // With instance variables, this puts '}' on its own line. Without instance 1740 // variables, this ends the @interface line. 1741 addUnwrappedLine(); 1742 1743 parseObjCUntilAtEnd(); 1744} 1745 1746void UnwrappedLineParser::parseObjCProtocol() { 1747 nextToken(); 1748 nextToken(); // protocol name 1749 1750 if (FormatTok->Tok.is(tok::less)) 1751 parseObjCProtocolList(); 1752 1753 // Check for protocol declaration. 1754 if (FormatTok->Tok.is(tok::semi)) { 1755 nextToken(); 1756 return addUnwrappedLine(); 1757 } 1758 1759 addUnwrappedLine(); 1760 parseObjCUntilAtEnd(); 1761} 1762 1763void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 1764 assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export)); 1765 nextToken(); 1766 1767 // Consume the "default" in "export default class/function". 1768 if (FormatTok->is(tok::kw_default)) 1769 nextToken(); 1770 1771 // Consume "function" and "default function", so that these get parsed as 1772 // free-standing JS functions, i.e. do not require a trailing semicolon. 1773 if (FormatTok->is(Keywords.kw_function)) { 1774 nextToken(); 1775 return; 1776 } 1777 1778 if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum, 1779 Keywords.kw_let, Keywords.kw_var)) 1780 return; // Fall through to parsing the corresponding structure. 1781 1782 if (FormatTok->is(tok::l_brace)) { 1783 FormatTok->BlockKind = BK_Block; 1784 parseBracedList(); 1785 } 1786 1787 while (!eof() && FormatTok->isNot(tok::semi) && 1788 FormatTok->isNot(tok::l_brace)) { 1789 nextToken(); 1790 } 1791} 1792 1793LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 1794 StringRef Prefix = "") { 1795 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 1796 << (Line.InPPDirective ? " MACRO" : "") << ": "; 1797 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1798 E = Line.Tokens.end(); 1799 I != E; ++I) { 1800 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; 1801 } 1802 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1803 E = Line.Tokens.end(); 1804 I != E; ++I) { 1805 const UnwrappedLineNode &Node = *I; 1806 for (SmallVectorImpl<UnwrappedLine>::const_iterator 1807 I = Node.Children.begin(), 1808 E = Node.Children.end(); 1809 I != E; ++I) { 1810 printDebugInfo(*I, "\nChild: "); 1811 } 1812 } 1813 llvm::dbgs() << "\n"; 1814} 1815 1816void UnwrappedLineParser::addUnwrappedLine() { 1817 if (Line->Tokens.empty()) 1818 return; 1819 DEBUG({ 1820 if (CurrentLines == &Lines) 1821 printDebugInfo(*Line); 1822 }); 1823 CurrentLines->push_back(std::move(*Line)); 1824 Line->Tokens.clear(); 1825 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1826 CurrentLines->append( 1827 std::make_move_iterator(PreprocessorDirectives.begin()), 1828 std::make_move_iterator(PreprocessorDirectives.end())); 1829 PreprocessorDirectives.clear(); 1830 } 1831} 1832 1833bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1834 1835bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 1836 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 1837 FormatTok.NewlinesBefore > 0; 1838} 1839 1840void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1841 bool JustComments = Line->Tokens.empty(); 1842 for (SmallVectorImpl<FormatToken *>::const_iterator 1843 I = CommentsBeforeNextToken.begin(), 1844 E = CommentsBeforeNextToken.end(); 1845 I != E; ++I) { 1846 if (isOnNewLine(**I) && JustComments) 1847 addUnwrappedLine(); 1848 pushToken(*I); 1849 } 1850 if (NewlineBeforeNext && JustComments) 1851 addUnwrappedLine(); 1852 CommentsBeforeNextToken.clear(); 1853} 1854 1855void UnwrappedLineParser::nextToken() { 1856 if (eof()) 1857 return; 1858 flushComments(isOnNewLine(*FormatTok)); 1859 pushToken(FormatTok); 1860 readToken(); 1861} 1862 1863void UnwrappedLineParser::readToken() { 1864 bool CommentsInCurrentLine = true; 1865 do { 1866 FormatTok = Tokens->getNextToken(); 1867 assert(FormatTok); 1868 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1869 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1870 // If there is an unfinished unwrapped line, we flush the preprocessor 1871 // directives only after that unwrapped line was finished later. 1872 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 1873 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1874 // Comments stored before the preprocessor directive need to be output 1875 // before the preprocessor directive, at the same level as the 1876 // preprocessor directive, as we consider them to apply to the directive. 1877 flushComments(isOnNewLine(*FormatTok)); 1878 parsePPDirective(); 1879 } 1880 while (FormatTok->Type == TT_ConflictStart || 1881 FormatTok->Type == TT_ConflictEnd || 1882 FormatTok->Type == TT_ConflictAlternative) { 1883 if (FormatTok->Type == TT_ConflictStart) { 1884 conditionalCompilationStart(/*Unreachable=*/false); 1885 } else if (FormatTok->Type == TT_ConflictAlternative) { 1886 conditionalCompilationAlternative(); 1887 } else if (FormatTok->Type == TT_ConflictEnd) { 1888 conditionalCompilationEnd(); 1889 } 1890 FormatTok = Tokens->getNextToken(); 1891 FormatTok->MustBreakBefore = true; 1892 } 1893 1894 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1895 !Line->InPPDirective) { 1896 continue; 1897 } 1898 1899 if (!FormatTok->Tok.is(tok::comment)) 1900 return; 1901 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { 1902 CommentsInCurrentLine = false; 1903 } 1904 if (CommentsInCurrentLine) { 1905 pushToken(FormatTok); 1906 } else { 1907 CommentsBeforeNextToken.push_back(FormatTok); 1908 } 1909 } while (!eof()); 1910} 1911 1912void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1913 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 1914 if (MustBreakBeforeNextToken) { 1915 Line->Tokens.back().Tok->MustBreakBefore = true; 1916 MustBreakBeforeNextToken = false; 1917 } 1918} 1919 1920} // end namespace format 1921} // end namespace clang 1922