Format.cpp revision 5f2173ee723fd17b758f2a35a5bb39ca74eca523
1//===--- Format.cpp - Format C++ code -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements functions declared in Format.h. This will be 12/// split into separate files as we go. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "format-formatter" 20 21#include "UnwrappedLineParser.h" 22#include "clang/Basic/Diagnostic.h" 23#include "clang/Basic/OperatorPrecedence.h" 24#include "clang/Basic/SourceManager.h" 25#include "clang/Format/Format.h" 26#include "clang/Frontend/TextDiagnosticPrinter.h" 27#include "clang/Lex/Lexer.h" 28#include "llvm/Support/Debug.h" 29#include <string> 30 31// Uncomment to get debug output from tests: 32// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0) 33 34namespace clang { 35namespace format { 36 37enum TokenType { 38 TT_BinaryOperator, 39 TT_BlockComment, 40 TT_CastRParen, 41 TT_ConditionalExpr, 42 TT_CtorInitializerColon, 43 TT_ImplicitStringLiteral, 44 TT_LineComment, 45 TT_ObjCBlockLParen, 46 TT_ObjCDecl, 47 TT_ObjCMethodSpecifier, 48 TT_ObjCMethodExpr, 49 TT_ObjCProperty, 50 TT_OverloadedOperator, 51 TT_PointerOrReference, 52 TT_PureVirtualSpecifier, 53 TT_TemplateCloser, 54 TT_TemplateOpener, 55 TT_TrailingUnaryOperator, 56 TT_UnaryOperator, 57 TT_Unknown 58}; 59 60enum LineType { 61 LT_Invalid, 62 LT_Other, 63 LT_BuilderTypeCall, 64 LT_PreprocessorDirective, 65 LT_VirtualFunctionDecl, 66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67 LT_ObjCMethodDecl, 68 LT_ObjCProperty // An @property line. 69}; 70 71class AnnotatedToken { 72public: 73 explicit AnnotatedToken(const FormatToken &FormatTok) 74 : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), 75 CanBreakBefore(false), MustBreakBefore(false), 76 ClosesTemplateDeclaration(false), MatchingParen(NULL), 77 ParameterCount(1), Parent(NULL) { 78 } 79 80 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 81 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 82 83 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 84 return FormatTok.Tok.isObjCAtKeyword(Kind); 85 } 86 87 FormatToken FormatTok; 88 89 TokenType Type; 90 91 bool SpaceRequiredBefore; 92 bool CanBreakBefore; 93 bool MustBreakBefore; 94 95 bool ClosesTemplateDeclaration; 96 97 AnnotatedToken *MatchingParen; 98 99 /// \brief Number of parameters, if this is "(", "[" or "<". 100 /// 101 /// This is initialized to 1 as we don't need to distinguish functions with 102 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 103 /// the number of commas. 104 unsigned ParameterCount; 105 106 /// \brief The total length of the line up to and including this token. 107 unsigned TotalLength; 108 109 std::vector<AnnotatedToken> Children; 110 AnnotatedToken *Parent; 111 112 const AnnotatedToken *getPreviousNoneComment() const { 113 AnnotatedToken *Tok = Parent; 114 while (Tok != NULL && Tok->is(tok::comment)) 115 Tok = Tok->Parent; 116 return Tok; 117 } 118}; 119 120class AnnotatedLine { 121public: 122 AnnotatedLine(const UnwrappedLine &Line) 123 : First(Line.Tokens.front()), Level(Line.Level), 124 InPPDirective(Line.InPPDirective), 125 MustBeDeclaration(Line.MustBeDeclaration) { 126 assert(!Line.Tokens.empty()); 127 AnnotatedToken *Current = &First; 128 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 129 E = Line.Tokens.end(); 130 I != E; ++I) { 131 Current->Children.push_back(AnnotatedToken(*I)); 132 Current->Children[0].Parent = Current; 133 Current = &Current->Children[0]; 134 } 135 Last = Current; 136 } 137 AnnotatedLine(const AnnotatedLine &Other) 138 : First(Other.First), Type(Other.Type), Level(Other.Level), 139 InPPDirective(Other.InPPDirective), 140 MustBeDeclaration(Other.MustBeDeclaration) { 141 Last = &First; 142 while (!Last->Children.empty()) { 143 Last->Children[0].Parent = Last; 144 Last = &Last->Children[0]; 145 } 146 } 147 148 AnnotatedToken First; 149 AnnotatedToken *Last; 150 151 LineType Type; 152 unsigned Level; 153 bool InPPDirective; 154 bool MustBeDeclaration; 155}; 156 157static prec::Level getPrecedence(const AnnotatedToken &Tok) { 158 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 159} 160 161FormatStyle getLLVMStyle() { 162 FormatStyle LLVMStyle; 163 LLVMStyle.ColumnLimit = 80; 164 LLVMStyle.MaxEmptyLinesToKeep = 1; 165 LLVMStyle.PointerAndReferenceBindToType = false; 166 LLVMStyle.AccessModifierOffset = -2; 167 LLVMStyle.SplitTemplateClosingGreater = true; 168 LLVMStyle.IndentCaseLabels = false; 169 LLVMStyle.SpacesBeforeTrailingComments = 1; 170 LLVMStyle.BinPackParameters = true; 171 LLVMStyle.AllowAllParametersOnNextLine = true; 172 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 173 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 174 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 175 return LLVMStyle; 176} 177 178FormatStyle getGoogleStyle() { 179 FormatStyle GoogleStyle; 180 GoogleStyle.ColumnLimit = 80; 181 GoogleStyle.MaxEmptyLinesToKeep = 1; 182 GoogleStyle.PointerAndReferenceBindToType = true; 183 GoogleStyle.AccessModifierOffset = -1; 184 GoogleStyle.SplitTemplateClosingGreater = false; 185 GoogleStyle.IndentCaseLabels = true; 186 GoogleStyle.SpacesBeforeTrailingComments = 2; 187 GoogleStyle.BinPackParameters = false; 188 GoogleStyle.AllowAllParametersOnNextLine = true; 189 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 190 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 191 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 192 return GoogleStyle; 193} 194 195FormatStyle getChromiumStyle() { 196 FormatStyle ChromiumStyle = getGoogleStyle(); 197 ChromiumStyle.AllowAllParametersOnNextLine = false; 198 return ChromiumStyle; 199} 200 201struct OptimizationParameters { 202 unsigned PenaltyIndentLevel; 203 unsigned PenaltyLevelDecrease; 204 unsigned PenaltyExcessCharacter; 205}; 206 207/// \brief Manages the whitespaces around tokens and their replacements. 208/// 209/// This includes special handling for certain constructs, e.g. the alignment of 210/// trailing line comments. 211class WhitespaceManager { 212public: 213 WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 214 215 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 216 /// each \c AnnotatedToken. 217 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 218 unsigned Spaces, unsigned WhitespaceStartColumn, 219 const FormatStyle &Style) { 220 // 2+ newlines mean an empty line separating logic scopes. 221 if (NewLines >= 2) 222 alignComments(); 223 224 // Align line comments if they are trailing or if they continue other 225 // trailing comments. 226 if (Tok.Type == TT_LineComment && 227 (Tok.Parent != NULL || !Comments.empty())) { 228 if (Style.ColumnLimit >= 229 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) { 230 Comments.push_back(StoredComment()); 231 Comments.back().Tok = Tok.FormatTok; 232 Comments.back().Spaces = Spaces; 233 Comments.back().NewLines = NewLines; 234 Comments.back().MinColumn = WhitespaceStartColumn + Spaces; 235 Comments.back().MaxColumn = Style.ColumnLimit - 236 Spaces - Tok.FormatTok.TokenLength; 237 return; 238 } 239 } 240 241 // If this line does not have a trailing comment, align the stored comments. 242 if (Tok.Children.empty() && Tok.Type != TT_LineComment) 243 alignComments(); 244 storeReplacement(Tok.FormatTok, 245 std::string(NewLines, '\n') + std::string(Spaces, ' ')); 246 } 247 248 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 249 /// backslashes to escape newlines inside a preprocessor directive. 250 /// 251 /// This function and \c replaceWhitespace have the same behavior if 252 /// \c Newlines == 0. 253 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 254 unsigned Spaces, unsigned WhitespaceStartColumn, 255 const FormatStyle &Style) { 256 std::string NewLineText; 257 if (NewLines > 0) { 258 unsigned Offset = std::min<int>(Style.ColumnLimit - 1, 259 WhitespaceStartColumn); 260 for (unsigned i = 0; i < NewLines; ++i) { 261 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 262 NewLineText += "\\\n"; 263 Offset = 0; 264 } 265 } 266 storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' ')); 267 } 268 269 /// \brief Returns all the \c Replacements created during formatting. 270 const tooling::Replacements &generateReplacements() { 271 alignComments(); 272 return Replaces; 273 } 274 275private: 276 /// \brief Structure to store a comment for later layout and alignment. 277 struct StoredComment { 278 FormatToken Tok; 279 unsigned MinColumn; 280 unsigned MaxColumn; 281 unsigned NewLines; 282 unsigned Spaces; 283 }; 284 SmallVector<StoredComment, 16> Comments; 285 typedef SmallVector<StoredComment, 16>::iterator comment_iterator; 286 287 /// \brief Try to align all stashed comments. 288 void alignComments() { 289 unsigned MinColumn = 0; 290 unsigned MaxColumn = UINT_MAX; 291 comment_iterator Start = Comments.begin(); 292 for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E; 293 ++I) { 294 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { 295 alignComments(Start, I, MinColumn); 296 MinColumn = I->MinColumn; 297 MaxColumn = I->MaxColumn; 298 Start = I; 299 } else { 300 MinColumn = std::max(MinColumn, I->MinColumn); 301 MaxColumn = std::min(MaxColumn, I->MaxColumn); 302 } 303 } 304 alignComments(Start, Comments.end(), MinColumn); 305 Comments.clear(); 306 } 307 308 /// \brief Put all the comments between \p I and \p E into \p Column. 309 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { 310 while (I != E) { 311 unsigned Spaces = I->Spaces + Column - I->MinColumn; 312 storeReplacement(I->Tok, std::string(I->NewLines, '\n') + 313 std::string(Spaces, ' ')); 314 ++I; 315 } 316 } 317 318 /// \brief Stores \p Text as the replacement for the whitespace in front of 319 /// \p Tok. 320 void storeReplacement(const FormatToken &Tok, const std::string Text) { 321 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, 322 Tok.WhiteSpaceLength, Text)); 323 } 324 325 SourceManager &SourceMgr; 326 tooling::Replacements Replaces; 327}; 328 329/// \brief Returns if a token is an Objective-C selector name. 330/// 331/// For example, "bar" is a selector name in [foo bar:(4 + 5)]. 332static bool isObjCSelectorName(const AnnotatedToken &Tok) { 333 return Tok.is(tok::identifier) && !Tok.Children.empty() && 334 Tok.Children[0].is(tok::colon) && 335 Tok.Children[0].Type == TT_ObjCMethodExpr; 336} 337 338class UnwrappedLineFormatter { 339public: 340 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 341 const AnnotatedLine &Line, unsigned FirstIndent, 342 const AnnotatedToken &RootToken, 343 WhitespaceManager &Whitespaces, bool StructuralError) 344 : Style(Style), SourceMgr(SourceMgr), Line(Line), 345 FirstIndent(FirstIndent), RootToken(RootToken), 346 Whitespaces(Whitespaces) { 347 Parameters.PenaltyIndentLevel = 20; 348 Parameters.PenaltyLevelDecrease = 30; 349 Parameters.PenaltyExcessCharacter = 1000000; 350 } 351 352 /// \brief Formats an \c UnwrappedLine. 353 /// 354 /// \returns The column after the last token in the last line of the 355 /// \c UnwrappedLine. 356 unsigned format() { 357 // Initialize state dependent on indent. 358 LineState State; 359 State.Column = FirstIndent; 360 State.NextToken = &RootToken; 361 State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent)); 362 State.ForLoopVariablePos = 0; 363 State.LineContainsContinuedForLoopSection = false; 364 State.StartOfLineLevel = 1; 365 366 DEBUG({ 367 DebugTokenState(*State.NextToken); 368 }); 369 370 // The first token has already been indented and thus consumed. 371 moveStateToNextToken(State); 372 373 // Start iterating at 1 as we have correctly formatted of Token #0 above. 374 while (State.NextToken != NULL) { 375 if (State.NextToken->Type == TT_ImplicitStringLiteral) { 376 // Calculating the column is important for aligning trailing comments. 377 // FIXME: This does not seem to happen in conjunction with escaped 378 // newlines. If it does, fix! 379 State.Column += State.NextToken->FormatTok.WhiteSpaceLength + 380 State.NextToken->FormatTok.TokenLength; 381 State.NextToken = State.NextToken->Children.empty() ? NULL : 382 &State.NextToken->Children[0]; 383 } else if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) { 384 addTokenToState(false, false, State); 385 } else { 386 unsigned NoBreak = calcPenalty(State, false, UINT_MAX); 387 unsigned Break = calcPenalty(State, true, NoBreak); 388 DEBUG({ 389 if (Break < NoBreak) 390 llvm::errs() << "\n"; 391 else 392 llvm::errs() << " "; 393 llvm::errs() << "<"; 394 DebugPenalty(Break, Break < NoBreak); 395 llvm::errs() << "/"; 396 DebugPenalty(NoBreak, !(Break < NoBreak)); 397 llvm::errs() << "> "; 398 DebugTokenState(*State.NextToken); 399 }); 400 addTokenToState(Break < NoBreak, false, State); 401 if (State.NextToken != NULL && 402 State.NextToken->Parent->Type == TT_CtorInitializerColon) { 403 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine && 404 Line.Last->TotalLength > getColumnLimit() - State.Column - 1) 405 State.Stack.back().BreakAfterComma = true; 406 } 407 } 408 } 409 DEBUG(llvm::errs() << "\n"); 410 return State.Column; 411 } 412 413private: 414 void DebugTokenState(const AnnotatedToken &AnnotatedTok) { 415 const Token &Tok = AnnotatedTok.FormatTok.Tok; 416 llvm::errs() 417 << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 418 Tok.getLength()); 419 llvm::errs(); 420 } 421 422 void DebugPenalty(unsigned Penalty, bool Winner) { 423 llvm::errs().changeColor(Winner ? raw_ostream::GREEN : raw_ostream::RED); 424 if (Penalty == UINT_MAX) 425 llvm::errs() << "MAX"; 426 else 427 llvm::errs() << Penalty; 428 llvm::errs().resetColor(); 429 } 430 431 struct ParenState { 432 ParenState(unsigned Indent, unsigned LastSpace) 433 : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0), 434 FirstLessLess(0), BreakBeforeClosingBrace(false), 435 BreakAfterComma(false), HasMultiParameterLine(false) {} 436 437 /// \brief The position to which a specific parenthesis level needs to be 438 /// indented. 439 unsigned Indent; 440 441 /// \brief The position of the last space on each level. 442 /// 443 /// Used e.g. to break like: 444 /// functionCall(Parameter, otherCall( 445 /// OtherParameter)); 446 unsigned LastSpace; 447 448 /// \brief This is the column of the first token after an assignment. 449 unsigned AssignmentColumn; 450 451 /// \brief The position the first "<<" operator encountered on each level. 452 /// 453 /// Used to align "<<" operators. 0 if no such operator has been encountered 454 /// on a level. 455 unsigned FirstLessLess; 456 457 /// \brief Whether a newline needs to be inserted before the block's closing 458 /// brace. 459 /// 460 /// We only want to insert a newline before the closing brace if there also 461 /// was a newline after the beginning left brace. 462 bool BreakBeforeClosingBrace; 463 464 bool BreakAfterComma; 465 bool HasMultiParameterLine; 466 467 bool operator<(const ParenState &Other) const { 468 if (Indent != Other.Indent) 469 return Indent < Other.Indent; 470 if (LastSpace != Other.LastSpace) 471 return LastSpace < Other.LastSpace; 472 if (AssignmentColumn != Other.AssignmentColumn) 473 return AssignmentColumn < Other.AssignmentColumn; 474 if (FirstLessLess != Other.FirstLessLess) 475 return FirstLessLess < Other.FirstLessLess; 476 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 477 return BreakBeforeClosingBrace; 478 if (BreakAfterComma != Other.BreakAfterComma) 479 return BreakAfterComma; 480 if (HasMultiParameterLine != Other.HasMultiParameterLine) 481 return HasMultiParameterLine; 482 return false; 483 } 484 }; 485 486 /// \brief The current state when indenting a unwrapped line. 487 /// 488 /// As the indenting tries different combinations this is copied by value. 489 struct LineState { 490 /// \brief The number of used columns in the current line. 491 unsigned Column; 492 493 /// \brief The token that needs to be next formatted. 494 const AnnotatedToken *NextToken; 495 496 /// \brief The parenthesis level of the first token on the current line. 497 unsigned StartOfLineLevel; 498 499 /// \brief The column of the first variable in a for-loop declaration. 500 /// 501 /// Used to align the second variable if necessary. 502 unsigned ForLoopVariablePos; 503 504 /// \brief \c true if this line contains a continued for-loop section. 505 bool LineContainsContinuedForLoopSection; 506 507 /// \brief A stack keeping track of properties applying to parenthesis 508 /// levels. 509 std::vector<ParenState> Stack; 510 511 /// \brief Comparison operator to be able to used \c LineState in \c map. 512 bool operator<(const LineState &Other) const { 513 if (Other.NextToken != NextToken) 514 return Other.NextToken > NextToken; 515 if (Other.Column != Column) 516 return Other.Column > Column; 517 if (Other.StartOfLineLevel != StartOfLineLevel) 518 return Other.StartOfLineLevel > StartOfLineLevel; 519 if (Other.ForLoopVariablePos != ForLoopVariablePos) 520 return Other.ForLoopVariablePos < ForLoopVariablePos; 521 if (Other.LineContainsContinuedForLoopSection != 522 LineContainsContinuedForLoopSection) 523 return LineContainsContinuedForLoopSection; 524 return Other.Stack < Stack; 525 } 526 }; 527 528 /// \brief Appends the next token to \p State and updates information 529 /// necessary for indentation. 530 /// 531 /// Puts the token on the current line if \p Newline is \c true and adds a 532 /// line break and necessary indentation otherwise. 533 /// 534 /// If \p DryRun is \c false, also creates and stores the required 535 /// \c Replacement. 536 void addTokenToState(bool Newline, bool DryRun, LineState &State) { 537 const AnnotatedToken &Current = *State.NextToken; 538 const AnnotatedToken &Previous = *State.NextToken->Parent; 539 assert(State.Stack.size()); 540 unsigned ParenLevel = State.Stack.size() - 1; 541 542 if (Newline) { 543 unsigned WhitespaceStartColumn = State.Column; 544 if (Current.is(tok::r_brace)) { 545 State.Column = Line.Level * 2; 546 } else if (Current.is(tok::string_literal) && 547 Previous.is(tok::string_literal)) { 548 State.Column = State.Column - Previous.FormatTok.TokenLength; 549 } else if (Current.is(tok::lessless) && 550 State.Stack[ParenLevel].FirstLessLess != 0) { 551 State.Column = State.Stack[ParenLevel].FirstLessLess; 552 } else if (ParenLevel != 0 && 553 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) || 554 Previous.is(tok::question) || 555 Previous.Type == TT_ConditionalExpr || 556 Current.is(tok::period) || Current.is(tok::arrow))) { 557 // Indent and extra 4 spaces after if we know the current expression is 558 // continued. Don't do that on the top level, as we already indent 4 559 // there. 560 State.Column = State.Stack[ParenLevel].Indent + 4; 561 } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) { 562 State.Column = State.ForLoopVariablePos; 563 } else if (State.NextToken->Parent->ClosesTemplateDeclaration) { 564 State.Column = State.Stack[ParenLevel].Indent - 4; 565 } else if (Previous.Type == TT_BinaryOperator && 566 State.Stack.back().AssignmentColumn != 0) { 567 State.Column = State.Stack.back().AssignmentColumn; 568 } else { 569 State.Column = State.Stack[ParenLevel].Indent; 570 } 571 572 // A line starting with a closing brace is assumed to be correct for the 573 // same level as before the opening brace. 574 State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1); 575 576 if (RootToken.is(tok::kw_for)) 577 State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi); 578 579 if (!DryRun) { 580 if (!Line.InPPDirective) 581 Whitespaces.replaceWhitespace(Current, 1, State.Column, 582 WhitespaceStartColumn, Style); 583 else 584 Whitespaces.replacePPWhitespace(Current, 1, State.Column, 585 WhitespaceStartColumn, Style); 586 } 587 588 State.Stack[ParenLevel].LastSpace = State.Column; 589 if (Current.is(tok::colon) && State.NextToken->Type != TT_ConditionalExpr) 590 State.Stack[ParenLevel].Indent += 2; 591 } else { 592 if (Current.is(tok::equal) && RootToken.is(tok::kw_for)) 593 State.ForLoopVariablePos = State.Column - 594 Previous.FormatTok.TokenLength; 595 596 unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0; 597 if (State.NextToken->Type == TT_LineComment) 598 Spaces = Style.SpacesBeforeTrailingComments; 599 600 if (!DryRun) 601 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style); 602 603 // FIXME: Do we need to do this for assignments nested in other 604 // expressions? 605 if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 && 606 (getPrecedence(Previous) == prec::Assignment || 607 Previous.is(tok::kw_return))) 608 State.Stack.back().AssignmentColumn = State.Column + Spaces; 609 if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || 610 State.NextToken->Parent->Type == TT_TemplateOpener) 611 State.Stack[ParenLevel].Indent = State.Column + Spaces; 612 if (Current.getPreviousNoneComment() != NULL && 613 Current.getPreviousNoneComment()->is(tok::comma) && 614 Current.isNot(tok::comment)) 615 State.Stack[ParenLevel].HasMultiParameterLine = true; 616 617 State.Column += Spaces; 618 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) 619 // Treat the condition inside an if as if it was a second function 620 // parameter, i.e. let nested calls have an indent of 4. 621 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 622 else if (Previous.is(tok::comma) && ParenLevel != 0) 623 // Top-level spaces are exempt as that mostly leads to better results. 624 State.Stack.back().LastSpace = State.Column; 625 else if (Previous.ParameterCount > 1 && 626 (Previous.is(tok::l_paren) || Previous.is(tok::l_square) || 627 Previous.Type == TT_TemplateOpener)) 628 // If this function has multiple parameters, indent nested calls from 629 // the start of the first parameter. 630 State.Stack.back().LastSpace = State.Column; 631 } 632 633 // If we break after an {, we should also break before the corresponding }. 634 if (Newline && Previous.is(tok::l_brace)) 635 State.Stack.back().BreakBeforeClosingBrace = true; 636 637 if (!Style.BinPackParameters && Newline) { 638 // If we are breaking after '(', '{', '<', this is not bin packing unless 639 // AllowAllParametersOnNextLine is false. 640 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) && 641 Previous.Type != TT_TemplateOpener) || 642 !Style.AllowAllParametersOnNextLine) 643 State.Stack.back().BreakAfterComma = true; 644 645 // Any break on this level means that the parent level has been broken 646 // and we need to avoid bin packing there. 647 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 648 State.Stack[i].BreakAfterComma = true; 649 } 650 } 651 652 moveStateToNextToken(State); 653 } 654 655 /// \brief Mark the next token as consumed in \p State and modify its stacks 656 /// accordingly. 657 void moveStateToNextToken(LineState &State) { 658 const AnnotatedToken &Current = *State.NextToken; 659 assert(State.Stack.size()); 660 661 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 662 State.Stack.back().FirstLessLess = State.Column; 663 664 // If we encounter an opening (, [, { or <, we add a level to our stacks to 665 // prepare for the following tokens. 666 if (Current.is(tok::l_paren) || Current.is(tok::l_square) || 667 Current.is(tok::l_brace) || 668 State.NextToken->Type == TT_TemplateOpener) { 669 unsigned NewIndent; 670 if (Current.is(tok::l_brace)) { 671 // FIXME: This does not work with nested static initializers. 672 // Implement a better handling for static initializers and similar 673 // constructs. 674 NewIndent = Line.Level * 2 + 2; 675 } else { 676 NewIndent = 4 + State.Stack.back().LastSpace; 677 } 678 State.Stack.push_back( 679 ParenState(NewIndent, State.Stack.back().LastSpace)); 680 } 681 682 // If we encounter a closing ), ], } or >, we can remove a level from our 683 // stacks. 684 if (Current.is(tok::r_paren) || Current.is(tok::r_square) || 685 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 686 State.NextToken->Type == TT_TemplateCloser) { 687 State.Stack.pop_back(); 688 } 689 690 if (State.NextToken->Children.empty()) 691 State.NextToken = NULL; 692 else 693 State.NextToken = &State.NextToken->Children[0]; 694 695 State.Column += Current.FormatTok.TokenLength; 696 } 697 698 /// \brief Calculate the penalty for splitting after the token at \p Index. 699 unsigned splitPenalty(const AnnotatedToken &Tok) { 700 const AnnotatedToken &Left = Tok; 701 const AnnotatedToken &Right = Tok.Children[0]; 702 703 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace)) 704 return 50; 705 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 706 return 150; 707 if (Left.is(tok::coloncolon)) 708 return 500; 709 710 // In for-loops, prefer breaking at ',' and ';'. 711 if (RootToken.is(tok::kw_for) && 712 (Left.isNot(tok::comma) && Left.isNot(tok::semi))) 713 return 20; 714 715 if (Left.is(tok::semi) || Left.is(tok::comma)) 716 return 0; 717 718 // In Objective-C method expressions, prefer breaking before "param:" over 719 // breaking after it. 720 if (isObjCSelectorName(Right)) 721 return 0; 722 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 723 return 20; 724 725 if (Left.is(tok::l_paren)) 726 return 20; 727 // FIXME: The penalty for a trailing "<" or "[" being higher than the 728 // penalty for a trainling "(" is a temporary workaround until we can 729 // properly avoid breaking in array subscripts or template parameters. 730 if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener) 731 return 50; 732 733 if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr) 734 return prec::Assignment; 735 prec::Level Level = getPrecedence(Left); 736 737 if (Level != prec::Unknown) 738 return Level; 739 740 if (Right.is(tok::arrow) || Right.is(tok::period)) { 741 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall) 742 return 15; // Should be smaller than breaking at a nested comma. 743 return 150; 744 } 745 746 return 3; 747 } 748 749 unsigned getColumnLimit() { 750 return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0); 751 } 752 753 /// \brief Calculate the number of lines needed to format the remaining part 754 /// of the unwrapped line. 755 /// 756 /// Assumes the formatting so far has led to 757 /// the \c LineSta \p State. If \p NewLine is set, a new line will be 758 /// added after the previous token. 759 /// 760 /// \param StopAt is used for optimization. If we can determine that we'll 761 /// definitely need at least \p StopAt additional lines, we already know of a 762 /// better solution. 763 unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) { 764 // We are at the end of the unwrapped line, so we don't need any more lines. 765 if (State.NextToken == NULL) 766 return 0; 767 768 if (!NewLine && State.NextToken->MustBreakBefore) 769 return UINT_MAX; 770 if (NewLine && !State.NextToken->CanBreakBefore && 771 !(State.NextToken->is(tok::r_brace) && 772 State.Stack.back().BreakBeforeClosingBrace)) 773 return UINT_MAX; 774 if (!NewLine && State.NextToken->is(tok::r_brace) && 775 State.Stack.back().BreakBeforeClosingBrace) 776 return UINT_MAX; 777 if (!NewLine && State.NextToken->Parent->is(tok::semi) && 778 State.LineContainsContinuedForLoopSection) 779 return UINT_MAX; 780 if (!NewLine && State.NextToken->Parent->is(tok::comma) && 781 State.NextToken->isNot(tok::comment) && 782 State.Stack.back().BreakAfterComma) 783 return UINT_MAX; 784 // Trying to insert a parameter on a new line if there are already more than 785 // one parameter on the current line is bin packing. 786 if (NewLine && State.NextToken->Parent->is(tok::comma) && 787 State.Stack.back().HasMultiParameterLine && !Style.BinPackParameters) 788 return UINT_MAX; 789 if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon || 790 (State.NextToken->Parent->ClosesTemplateDeclaration && 791 State.Stack.size() == 1))) 792 return UINT_MAX; 793 794 unsigned CurrentPenalty = 0; 795 if (NewLine) { 796 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() + 797 splitPenalty(*State.NextToken->Parent); 798 } else { 799 if (State.Stack.size() < State.StartOfLineLevel && 800 State.NextToken->is(tok::identifier)) 801 CurrentPenalty += Parameters.PenaltyLevelDecrease * 802 (State.StartOfLineLevel - State.Stack.size()); 803 } 804 805 addTokenToState(NewLine, true, State); 806 807 // Exceeding column limit is bad, assign penalty. 808 if (State.Column > getColumnLimit()) { 809 unsigned ExcessCharacters = State.Column - getColumnLimit(); 810 CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters; 811 } 812 813 if (StopAt <= CurrentPenalty) 814 return UINT_MAX; 815 StopAt -= CurrentPenalty; 816 StateMap::iterator I = Memory.find(State); 817 if (I != Memory.end()) { 818 // If this state has already been examined, we can safely return the 819 // previous result if we 820 // - have not hit the optimatization (and thus returned UINT_MAX) OR 821 // - are now computing for a smaller or equal StopAt. 822 unsigned SavedResult = I->second.first; 823 unsigned SavedStopAt = I->second.second; 824 if (SavedResult != UINT_MAX) 825 return SavedResult + CurrentPenalty; 826 else if (StopAt <= SavedStopAt) 827 return UINT_MAX; 828 } 829 830 unsigned NoBreak = calcPenalty(State, false, StopAt); 831 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); 832 unsigned Result = std::min(NoBreak, WithBreak); 833 834 // We have to store 'Result' without adding 'CurrentPenalty' as the latter 835 // can depend on 'NewLine'. 836 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt); 837 838 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty; 839 } 840 841 FormatStyle Style; 842 SourceManager &SourceMgr; 843 const AnnotatedLine &Line; 844 const unsigned FirstIndent; 845 const AnnotatedToken &RootToken; 846 WhitespaceManager &Whitespaces; 847 848 // A map from an indent state to a pair (Result, Used-StopAt). 849 typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap; 850 StateMap Memory; 851 852 OptimizationParameters Parameters; 853}; 854 855/// \brief Determines extra information about the tokens comprising an 856/// \c UnwrappedLine. 857class TokenAnnotator { 858public: 859 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 860 AnnotatedLine &Line) 861 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {} 862 863 /// \brief A parser that gathers additional information about tokens. 864 /// 865 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and 866 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 867 /// into template parameter lists. 868 class AnnotatingParser { 869 public: 870 AnnotatingParser(AnnotatedToken &RootToken) 871 : CurrentToken(&RootToken), KeywordVirtualFound(false), 872 ColonIsObjCMethodExpr(false) {} 873 874 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr. 875 struct ObjCSelectorRAII { 876 AnnotatingParser &P; 877 bool ColonWasObjCMethodExpr; 878 879 ObjCSelectorRAII(AnnotatingParser &P) 880 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {} 881 882 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; } 883 884 void markStart(AnnotatedToken &Left) { 885 P.ColonIsObjCMethodExpr = true; 886 Left.Type = TT_ObjCMethodExpr; 887 } 888 889 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; } 890 }; 891 892 893 bool parseAngle() { 894 if (CurrentToken == NULL) 895 return false; 896 AnnotatedToken *Left = CurrentToken->Parent; 897 while (CurrentToken != NULL) { 898 if (CurrentToken->is(tok::greater)) { 899 Left->MatchingParen = CurrentToken; 900 CurrentToken->MatchingParen = Left; 901 CurrentToken->Type = TT_TemplateCloser; 902 next(); 903 return true; 904 } 905 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || 906 CurrentToken->is(tok::r_brace)) 907 return false; 908 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || 909 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) 910 return false; 911 if (CurrentToken->is(tok::comma)) 912 ++Left->ParameterCount; 913 if (!consumeToken()) 914 return false; 915 } 916 return false; 917 } 918 919 bool parseParens(bool LookForDecls = false) { 920 if (CurrentToken == NULL) 921 return false; 922 bool StartsObjCMethodExpr = false; 923 AnnotatedToken *Left = CurrentToken->Parent; 924 if (CurrentToken->is(tok::caret)) { 925 // ^( starts a block. 926 Left->Type = TT_ObjCBlockLParen; 927 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 928 // @selector( starts a selector. 929 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 930 MaybeSel->Parent->is(tok::at)) { 931 StartsObjCMethodExpr = true; 932 } 933 } 934 935 ObjCSelectorRAII objCSelector(*this); 936 if (StartsObjCMethodExpr) 937 objCSelector.markStart(*Left); 938 939 while (CurrentToken != NULL) { 940 // LookForDecls is set when "if (" has been seen. Check for 941 // 'identifier' '*' 'identifier' followed by not '=' -- this 942 // '*' has to be a binary operator but determineStarAmpUsage() will 943 // categorize it as an unary operator, so set the right type here. 944 if (LookForDecls && !CurrentToken->Children.empty()) { 945 AnnotatedToken &Prev = *CurrentToken->Parent; 946 AnnotatedToken &Next = CurrentToken->Children[0]; 947 if (Prev.Parent->is(tok::identifier) && 948 (Prev.is(tok::star) || Prev.is(tok::amp)) && 949 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 950 Prev.Type = TT_BinaryOperator; 951 LookForDecls = false; 952 } 953 } 954 955 if (CurrentToken->is(tok::r_paren)) { 956 Left->MatchingParen = CurrentToken; 957 CurrentToken->MatchingParen = Left; 958 959 if (StartsObjCMethodExpr) 960 objCSelector.markEnd(*CurrentToken); 961 962 next(); 963 return true; 964 } 965 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) 966 return false; 967 if (CurrentToken->is(tok::comma)) 968 ++Left->ParameterCount; 969 if (!consumeToken()) 970 return false; 971 } 972 return false; 973 } 974 975 bool parseSquare() { 976 if (!CurrentToken) 977 return false; 978 979 // A '[' could be an index subscript (after an indentifier or after 980 // ')' or ']'), or it could be the start of an Objective-C method 981 // expression. 982 AnnotatedToken *Left = CurrentToken->Parent; 983 bool StartsObjCMethodExpr = 984 !Left->Parent || Left->Parent->is(tok::colon) || 985 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) || 986 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) || 987 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), 988 true, true) > prec::Unknown; 989 990 ObjCSelectorRAII objCSelector(*this); 991 if (StartsObjCMethodExpr) 992 objCSelector.markStart(*Left); 993 994 while (CurrentToken != NULL) { 995 if (CurrentToken->is(tok::r_square)) { 996 if (!CurrentToken->Children.empty() && 997 CurrentToken->Children[0].is(tok::l_paren)) { 998 // An ObjC method call can't be followed by an open parenthesis. 999 // FIXME: Do we incorrectly label ":" with this? 1000 StartsObjCMethodExpr = false; 1001 Left->Type = TT_Unknown; 1002 } 1003 if (StartsObjCMethodExpr) 1004 objCSelector.markEnd(*CurrentToken); 1005 Left->MatchingParen = CurrentToken; 1006 CurrentToken->MatchingParen = Left; 1007 next(); 1008 return true; 1009 } 1010 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) 1011 return false; 1012 if (CurrentToken->is(tok::comma)) 1013 ++Left->ParameterCount; 1014 if (!consumeToken()) 1015 return false; 1016 } 1017 return false; 1018 } 1019 1020 bool parseBrace() { 1021 // Lines are fine to end with '{'. 1022 if (CurrentToken == NULL) 1023 return true; 1024 AnnotatedToken *Left = CurrentToken->Parent; 1025 while (CurrentToken != NULL) { 1026 if (CurrentToken->is(tok::r_brace)) { 1027 Left->MatchingParen = CurrentToken; 1028 CurrentToken->MatchingParen = Left; 1029 next(); 1030 return true; 1031 } 1032 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) 1033 return false; 1034 if (!consumeToken()) 1035 return false; 1036 } 1037 return true; 1038 } 1039 1040 bool parseConditional() { 1041 while (CurrentToken != NULL) { 1042 if (CurrentToken->is(tok::colon)) { 1043 CurrentToken->Type = TT_ConditionalExpr; 1044 next(); 1045 return true; 1046 } 1047 if (!consumeToken()) 1048 return false; 1049 } 1050 return false; 1051 } 1052 1053 bool parseTemplateDeclaration() { 1054 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 1055 CurrentToken->Type = TT_TemplateOpener; 1056 next(); 1057 if (!parseAngle()) 1058 return false; 1059 CurrentToken->Parent->ClosesTemplateDeclaration = true; 1060 return true; 1061 } 1062 return false; 1063 } 1064 1065 bool consumeToken() { 1066 AnnotatedToken *Tok = CurrentToken; 1067 next(); 1068 switch (Tok->FormatTok.Tok.getKind()) { 1069 case tok::plus: 1070 case tok::minus: 1071 // At the start of the line, +/- specific ObjectiveC method 1072 // declarations. 1073 if (Tok->Parent == NULL) 1074 Tok->Type = TT_ObjCMethodSpecifier; 1075 break; 1076 case tok::colon: 1077 // Colons from ?: are handled in parseConditional(). 1078 if (Tok->Parent->is(tok::r_paren)) 1079 Tok->Type = TT_CtorInitializerColon; 1080 if (ColonIsObjCMethodExpr) 1081 Tok->Type = TT_ObjCMethodExpr; 1082 break; 1083 case tok::kw_if: 1084 case tok::kw_while: 1085 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 1086 next(); 1087 if (!parseParens(/*LookForDecls=*/true)) 1088 return false; 1089 } 1090 break; 1091 case tok::l_paren: 1092 if (!parseParens()) 1093 return false; 1094 break; 1095 case tok::l_square: 1096 if (!parseSquare()) 1097 return false; 1098 break; 1099 case tok::l_brace: 1100 if (!parseBrace()) 1101 return false; 1102 break; 1103 case tok::less: 1104 if (parseAngle()) 1105 Tok->Type = TT_TemplateOpener; 1106 else { 1107 Tok->Type = TT_BinaryOperator; 1108 CurrentToken = Tok; 1109 next(); 1110 } 1111 break; 1112 case tok::r_paren: 1113 case tok::r_square: 1114 return false; 1115 case tok::r_brace: 1116 // Lines can start with '}'. 1117 if (Tok->Parent != NULL) 1118 return false; 1119 break; 1120 case tok::greater: 1121 Tok->Type = TT_BinaryOperator; 1122 break; 1123 case tok::kw_operator: 1124 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 1125 CurrentToken->Type = TT_OverloadedOperator; 1126 next(); 1127 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { 1128 CurrentToken->Type = TT_OverloadedOperator; 1129 next(); 1130 } 1131 } else { 1132 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { 1133 CurrentToken->Type = TT_OverloadedOperator; 1134 next(); 1135 } 1136 } 1137 break; 1138 case tok::question: 1139 parseConditional(); 1140 break; 1141 case tok::kw_template: 1142 parseTemplateDeclaration(); 1143 break; 1144 default: 1145 break; 1146 } 1147 return true; 1148 } 1149 1150 void parseIncludeDirective() { 1151 next(); 1152 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 1153 next(); 1154 while (CurrentToken != NULL) { 1155 if (CurrentToken->isNot(tok::comment) || 1156 !CurrentToken->Children.empty()) 1157 CurrentToken->Type = TT_ImplicitStringLiteral; 1158 next(); 1159 } 1160 } else { 1161 while (CurrentToken != NULL) { 1162 next(); 1163 } 1164 } 1165 } 1166 1167 void parseWarningOrError() { 1168 next(); 1169 // We still want to format the whitespace left of the first token of the 1170 // warning or error. 1171 next(); 1172 while (CurrentToken != NULL) { 1173 CurrentToken->Type = TT_ImplicitStringLiteral; 1174 next(); 1175 } 1176 } 1177 1178 void parsePreprocessorDirective() { 1179 next(); 1180 if (CurrentToken == NULL) 1181 return; 1182 // Hashes in the middle of a line can lead to any strange token 1183 // sequence. 1184 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 1185 return; 1186 switch ( 1187 CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 1188 case tok::pp_include: 1189 case tok::pp_import: 1190 parseIncludeDirective(); 1191 break; 1192 case tok::pp_error: 1193 case tok::pp_warning: 1194 parseWarningOrError(); 1195 break; 1196 default: 1197 break; 1198 } 1199 } 1200 1201 LineType parseLine() { 1202 int PeriodsAndArrows = 0; 1203 if (CurrentToken->is(tok::hash)) { 1204 parsePreprocessorDirective(); 1205 return LT_PreprocessorDirective; 1206 } 1207 while (CurrentToken != NULL) { 1208 1209 if (CurrentToken->is(tok::kw_virtual)) 1210 KeywordVirtualFound = true; 1211 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) 1212 ++PeriodsAndArrows; 1213 if (!consumeToken()) 1214 return LT_Invalid; 1215 } 1216 if (KeywordVirtualFound) 1217 return LT_VirtualFunctionDecl; 1218 1219 // Assume a builder-type call if there are 2 or more "." and "->". 1220 if (PeriodsAndArrows >= 2) 1221 return LT_BuilderTypeCall; 1222 1223 return LT_Other; 1224 } 1225 1226 void next() { 1227 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 1228 CurrentToken = &CurrentToken->Children[0]; 1229 else 1230 CurrentToken = NULL; 1231 } 1232 1233 private: 1234 AnnotatedToken *CurrentToken; 1235 bool KeywordVirtualFound; 1236 bool ColonIsObjCMethodExpr; 1237 }; 1238 1239 void calculateExtraInformation(AnnotatedToken &Current) { 1240 Current.SpaceRequiredBefore = spaceRequiredBefore(Current); 1241 1242 if (Current.FormatTok.MustBreakBefore) { 1243 Current.MustBreakBefore = true; 1244 } else { 1245 if (Current.Type == TT_LineComment) { 1246 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0; 1247 } else if ((Current.Parent->is(tok::comment) && 1248 Current.FormatTok.NewlinesBefore > 0) || 1249 (Current.is(tok::string_literal) && 1250 Current.Parent->is(tok::string_literal))) { 1251 Current.MustBreakBefore = true; 1252 } else { 1253 Current.MustBreakBefore = false; 1254 } 1255 } 1256 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); 1257 if (Current.MustBreakBefore) 1258 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit; 1259 else 1260 Current.TotalLength = Current.Parent->TotalLength + 1261 Current.FormatTok.TokenLength + 1262 (Current.SpaceRequiredBefore ? 1 : 0); 1263 if (!Current.Children.empty()) 1264 calculateExtraInformation(Current.Children[0]); 1265 } 1266 1267 void annotate() { 1268 AnnotatingParser Parser(Line.First); 1269 Line.Type = Parser.parseLine(); 1270 if (Line.Type == LT_Invalid) 1271 return; 1272 1273 determineTokenTypes(Line.First, /*IsExpression=*/ false); 1274 1275 if (Line.First.Type == TT_ObjCMethodSpecifier) 1276 Line.Type = LT_ObjCMethodDecl; 1277 else if (Line.First.Type == TT_ObjCDecl) 1278 Line.Type = LT_ObjCDecl; 1279 else if (Line.First.Type == TT_ObjCProperty) 1280 Line.Type = LT_ObjCProperty; 1281 1282 Line.First.SpaceRequiredBefore = true; 1283 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 1284 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 1285 1286 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 1287 if (!Line.First.Children.empty()) 1288 calculateExtraInformation(Line.First.Children[0]); 1289 } 1290 1291private: 1292 void determineTokenTypes(AnnotatedToken &Current, bool IsExpression) { 1293 if (getPrecedence(Current) == prec::Assignment) { 1294 IsExpression = true; 1295 AnnotatedToken *Previous = Current.Parent; 1296 while (Previous != NULL) { 1297 if (Previous->Type == TT_BinaryOperator && 1298 (Previous->is(tok::star) || Previous->is(tok::amp))) { 1299 Previous->Type = TT_PointerOrReference; 1300 } 1301 Previous = Previous->Parent; 1302 } 1303 } 1304 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) || 1305 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 1306 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for)))) 1307 IsExpression = true; 1308 1309 if (Current.Type == TT_Unknown) { 1310 if (Current.is(tok::star) || Current.is(tok::amp)) { 1311 Current.Type = determineStarAmpUsage(Current, IsExpression); 1312 } else if (Current.is(tok::minus) || Current.is(tok::plus) || 1313 Current.is(tok::caret)) { 1314 Current.Type = determinePlusMinusCaretUsage(Current); 1315 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { 1316 Current.Type = determineIncrementUsage(Current); 1317 } else if (Current.is(tok::exclaim)) { 1318 Current.Type = TT_UnaryOperator; 1319 } else if (isBinaryOperator(Current)) { 1320 Current.Type = TT_BinaryOperator; 1321 } else if (Current.is(tok::comment)) { 1322 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 1323 Lex.getLangOpts())); 1324 if (StringRef(Data).startswith("//")) 1325 Current.Type = TT_LineComment; 1326 else 1327 Current.Type = TT_BlockComment; 1328 } else if (Current.is(tok::r_paren) && 1329 (Current.Parent->Type == TT_PointerOrReference || 1330 Current.Parent->Type == TT_TemplateCloser) && 1331 (Current.Children.empty() || 1332 (Current.Children[0].isNot(tok::equal) && 1333 Current.Children[0].isNot(tok::semi) && 1334 Current.Children[0].isNot(tok::l_brace)))) { 1335 // FIXME: We need to get smarter and understand more cases of casts. 1336 Current.Type = TT_CastRParen; 1337 } else if (Current.is(tok::at) && Current.Children.size()) { 1338 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 1339 case tok::objc_interface: 1340 case tok::objc_implementation: 1341 case tok::objc_protocol: 1342 Current.Type = TT_ObjCDecl; 1343 break; 1344 case tok::objc_property: 1345 Current.Type = TT_ObjCProperty; 1346 break; 1347 default: 1348 break; 1349 } 1350 } 1351 } 1352 1353 if (!Current.Children.empty()) 1354 determineTokenTypes(Current.Children[0], IsExpression); 1355 } 1356 1357 bool isBinaryOperator(const AnnotatedToken &Tok) { 1358 // Comma is a binary operator, but does not behave as such wrt. formatting. 1359 return getPrecedence(Tok) > prec::Comma; 1360 } 1361 1362 /// \brief Returns the previous token ignoring comments. 1363 const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 1364 const AnnotatedToken *PrevToken = Tok.Parent; 1365 while (PrevToken != NULL && PrevToken->is(tok::comment)) 1366 PrevToken = PrevToken->Parent; 1367 return PrevToken; 1368 } 1369 1370 /// \brief Returns the next token ignoring comments. 1371 const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 1372 if (Tok.Children.empty()) 1373 return NULL; 1374 const AnnotatedToken *NextToken = &Tok.Children[0]; 1375 while (NextToken->is(tok::comment)) { 1376 if (NextToken->Children.empty()) 1377 return NULL; 1378 NextToken = &NextToken->Children[0]; 1379 } 1380 return NextToken; 1381 } 1382 1383 /// \brief Return the type of the given token assuming it is * or &. 1384 TokenType determineStarAmpUsage(const AnnotatedToken &Tok, 1385 bool IsExpression) { 1386 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1387 if (PrevToken == NULL) 1388 return TT_UnaryOperator; 1389 1390 const AnnotatedToken *NextToken = getNextToken(Tok); 1391 if (NextToken == NULL) 1392 return TT_Unknown; 1393 1394 if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr) 1395 return TT_PointerOrReference; 1396 1397 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) || 1398 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) || 1399 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) || 1400 PrevToken->Type == TT_BinaryOperator || 1401 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 1402 return TT_UnaryOperator; 1403 1404 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) || 1405 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() || 1406 NextToken->is(tok::plus) || NextToken->is(tok::minus) || 1407 NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) || 1408 NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) || 1409 NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) || 1410 NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof)) 1411 return TT_BinaryOperator; 1412 1413 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) || 1414 NextToken->is(tok::greater)) 1415 return TT_PointerOrReference; 1416 1417 // It is very unlikely that we are going to find a pointer or reference type 1418 // definition on the RHS of an assignment. 1419 if (IsExpression) 1420 return TT_BinaryOperator; 1421 1422 return TT_PointerOrReference; 1423 } 1424 1425 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 1426 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1427 if (PrevToken == NULL) 1428 return TT_UnaryOperator; 1429 1430 // Use heuristics to recognize unary operators. 1431 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) || 1432 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) || 1433 PrevToken->is(tok::question) || PrevToken->is(tok::colon) || 1434 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) || 1435 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace)) 1436 return TT_UnaryOperator; 1437 1438 // There can't be to consecutive binary operators. 1439 if (PrevToken->Type == TT_BinaryOperator) 1440 return TT_UnaryOperator; 1441 1442 // Fall back to marking the token as binary operator. 1443 return TT_BinaryOperator; 1444 } 1445 1446 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1447 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 1448 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1449 if (PrevToken == NULL) 1450 return TT_UnaryOperator; 1451 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) || 1452 PrevToken->is(tok::identifier)) 1453 return TT_TrailingUnaryOperator; 1454 1455 return TT_UnaryOperator; 1456 } 1457 1458 bool spaceRequiredBetween(const AnnotatedToken &Left, 1459 const AnnotatedToken &Right) { 1460 if (Right.is(tok::hashhash)) 1461 return Left.is(tok::hash); 1462 if (Left.is(tok::hashhash) || Left.is(tok::hash)) 1463 return Right.is(tok::hash); 1464 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 1465 return false; 1466 if (Right.is(tok::less) && 1467 (Left.is(tok::kw_template) || 1468 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1469 return true; 1470 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1471 return false; 1472 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 1473 return false; 1474 if (Left.is(tok::at) && 1475 (Right.is(tok::identifier) || Right.is(tok::string_literal) || 1476 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || 1477 Right.is(tok::l_paren) || Right.is(tok::l_brace) || 1478 Right.is(tok::kw_true) || Right.is(tok::kw_false))) 1479 return false; 1480 if (Left.is(tok::coloncolon)) 1481 return false; 1482 if (Right.is(tok::coloncolon)) 1483 return Left.isNot(tok::identifier) && Left.isNot(tok::greater); 1484 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 1485 return false; 1486 if (Right.is(tok::amp) || Right.is(tok::star)) 1487 return Left.FormatTok.Tok.isLiteral() || 1488 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 1489 !Style.PointerAndReferenceBindToType); 1490 if (Left.is(tok::amp) || Left.is(tok::star)) 1491 return Right.FormatTok.Tok.isLiteral() || 1492 Style.PointerAndReferenceBindToType; 1493 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1494 return false; 1495 if (Left.is(tok::l_square) || Right.is(tok::r_square)) 1496 return false; 1497 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1498 return false; 1499 if (Left.is(tok::period) || Right.is(tok::period)) 1500 return false; 1501 if (Left.is(tok::colon)) 1502 return Left.Type != TT_ObjCMethodExpr; 1503 if (Right.is(tok::colon)) 1504 return Right.Type != TT_ObjCMethodExpr; 1505 if (Left.is(tok::l_paren)) 1506 return false; 1507 if (Right.is(tok::l_paren)) { 1508 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || 1509 Left.is(tok::kw_for) || Left.is(tok::kw_while) || 1510 Left.is(tok::kw_switch) || Left.is(tok::kw_return) || 1511 Left.is(tok::kw_catch) || Left.is(tok::kw_new) || 1512 Left.is(tok::kw_delete); 1513 } 1514 if (Left.is(tok::at) && 1515 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1516 return false; 1517 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1518 return false; 1519 return true; 1520 } 1521 1522 bool spaceRequiredBefore(const AnnotatedToken &Tok) { 1523 if (Line.Type == LT_ObjCMethodDecl) { 1524 if (Tok.is(tok::identifier) && !Tok.Children.empty() && 1525 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) 1526 return true; 1527 if (Tok.is(tok::colon)) 1528 return false; 1529 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1530 return true; 1531 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1532 // Don't space between ')' and <id> 1533 return false; 1534 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) 1535 // Don't space between ':' and '(' 1536 return false; 1537 } 1538 if (Line.Type == LT_ObjCProperty && 1539 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1540 return false; 1541 1542 if (Tok.Parent->is(tok::comma)) 1543 return true; 1544 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1545 return true; 1546 if (Tok.Type == TT_OverloadedOperator) 1547 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || 1548 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); 1549 if (Tok.Parent->Type == TT_OverloadedOperator) 1550 return false; 1551 if (Tok.is(tok::colon)) 1552 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && 1553 Tok.Type != TT_ObjCMethodExpr; 1554 if (Tok.Parent->Type == TT_UnaryOperator || 1555 Tok.Parent->Type == TT_CastRParen) 1556 return false; 1557 if (Tok.Type == TT_UnaryOperator) 1558 return Tok.Parent->isNot(tok::l_paren) && 1559 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && 1560 (Tok.Parent->isNot(tok::colon) || 1561 Tok.Parent->Type != TT_ObjCMethodExpr); 1562 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1563 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == 1564 TT_TemplateCloser && Style.SplitTemplateClosingGreater; 1565 } 1566 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1567 return true; 1568 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1569 return false; 1570 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1571 return true; 1572 if (Tok.Type == TT_TrailingUnaryOperator) 1573 return false; 1574 return spaceRequiredBetween(*Tok.Parent, Tok); 1575 } 1576 1577 bool canBreakBefore(const AnnotatedToken &Right) { 1578 const AnnotatedToken &Left = *Right.Parent; 1579 if (Line.Type == LT_ObjCMethodDecl) { 1580 if (Right.is(tok::identifier) && !Right.Children.empty() && 1581 Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) 1582 return true; 1583 if (Right.is(tok::identifier) && Left.is(tok::l_paren) && 1584 Left.Parent->is(tok::colon)) 1585 // Don't break this identifier as ':' or identifier 1586 // before it will break. 1587 return false; 1588 if (Right.is(tok::colon) && Left.is(tok::identifier) && 1589 Left.CanBreakBefore) 1590 // Don't break at ':' if identifier before it can beak. 1591 return false; 1592 } 1593 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1594 return false; 1595 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1596 return true; 1597 if (isObjCSelectorName(Right)) 1598 return true; 1599 if (Left.ClosesTemplateDeclaration) 1600 return true; 1601 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1602 Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr) 1603 return false; 1604 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1605 return false; 1606 1607 if (Right.Type == TT_LineComment) 1608 // We rely on MustBreakBefore being set correctly here as we should not 1609 // change the "binding" behavior of a comment. 1610 return false; 1611 1612 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1613 // unless it is follow by ';', '{' or '='. 1614 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1615 Left.Parent->is(tok::r_paren)) 1616 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) && 1617 Right.isNot(tok::equal); 1618 1619 // We only break before r_brace if there was a corresponding break before 1620 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1621 if (Right.is(tok::r_brace)) 1622 return false; 1623 1624 if (Right.is(tok::r_paren) || Right.is(tok::greater)) 1625 return false; 1626 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1627 Left.is(tok::comma) || Right.is(tok::lessless) || 1628 Right.is(tok::arrow) || Right.is(tok::period) || 1629 Right.is(tok::colon) || Left.is(tok::coloncolon) || 1630 Left.is(tok::semi) || Left.is(tok::l_brace) || 1631 Left.is(tok::question) || Left.Type == TT_ConditionalExpr || 1632 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1633 Right.is(tok::identifier)) || 1634 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1635 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1636 } 1637 1638 FormatStyle Style; 1639 SourceManager &SourceMgr; 1640 Lexer &Lex; 1641 AnnotatedLine &Line; 1642}; 1643 1644class LexerBasedFormatTokenSource : public FormatTokenSource { 1645public: 1646 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1647 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1648 IdentTable(Lex.getLangOpts()) { 1649 Lex.SetKeepWhitespaceMode(true); 1650 } 1651 1652 virtual FormatToken getNextToken() { 1653 if (GreaterStashed) { 1654 FormatTok.NewlinesBefore = 0; 1655 FormatTok.WhiteSpaceStart = 1656 FormatTok.Tok.getLocation().getLocWithOffset(1); 1657 FormatTok.WhiteSpaceLength = 0; 1658 GreaterStashed = false; 1659 return FormatTok; 1660 } 1661 1662 FormatTok = FormatToken(); 1663 Lex.LexFromRawLexer(FormatTok.Tok); 1664 StringRef Text = rawTokenText(FormatTok.Tok); 1665 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1666 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1667 FormatTok.IsFirst = true; 1668 1669 // Consume and record whitespace until we find a significant token. 1670 while (FormatTok.Tok.is(tok::unknown)) { 1671 FormatTok.NewlinesBefore += Text.count('\n'); 1672 FormatTok.HasUnescapedNewline = Text.count("\\\n") != 1673 FormatTok.NewlinesBefore; 1674 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1675 1676 if (FormatTok.Tok.is(tok::eof)) 1677 return FormatTok; 1678 Lex.LexFromRawLexer(FormatTok.Tok); 1679 Text = rawTokenText(FormatTok.Tok); 1680 } 1681 1682 // Now FormatTok is the next non-whitespace token. 1683 FormatTok.TokenLength = Text.size(); 1684 1685 // In case the token starts with escaped newlines, we want to 1686 // take them into account as whitespace - this pattern is quite frequent 1687 // in macro definitions. 1688 // FIXME: What do we want to do with other escaped spaces, and escaped 1689 // spaces or newlines in the middle of tokens? 1690 // FIXME: Add a more explicit test. 1691 unsigned i = 0; 1692 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1693 // FIXME: ++FormatTok.NewlinesBefore is missing... 1694 FormatTok.WhiteSpaceLength += 2; 1695 FormatTok.TokenLength -= 2; 1696 i += 2; 1697 } 1698 1699 if (FormatTok.Tok.is(tok::raw_identifier)) { 1700 IdentifierInfo &Info = IdentTable.get(Text); 1701 FormatTok.Tok.setIdentifierInfo(&Info); 1702 FormatTok.Tok.setKind(Info.getTokenID()); 1703 } 1704 1705 if (FormatTok.Tok.is(tok::greatergreater)) { 1706 FormatTok.Tok.setKind(tok::greater); 1707 GreaterStashed = true; 1708 } 1709 1710 return FormatTok; 1711 } 1712 1713private: 1714 FormatToken FormatTok; 1715 bool GreaterStashed; 1716 Lexer &Lex; 1717 SourceManager &SourceMgr; 1718 IdentifierTable IdentTable; 1719 1720 /// Returns the text of \c FormatTok. 1721 StringRef rawTokenText(Token &Tok) { 1722 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1723 Tok.getLength()); 1724 } 1725}; 1726 1727class Formatter : public UnwrappedLineConsumer { 1728public: 1729 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, 1730 SourceManager &SourceMgr, 1731 const std::vector<CharSourceRange> &Ranges) 1732 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1733 Whitespaces(SourceMgr), Ranges(Ranges) {} 1734 1735 virtual ~Formatter() {} 1736 1737 tooling::Replacements format() { 1738 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1739 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1740 StructuralError = Parser.parse(); 1741 unsigned PreviousEndOfLineColumn = 0; 1742 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1743 TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]); 1744 Annotator.annotate(); 1745 } 1746 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1747 E = AnnotatedLines.end(); 1748 I != E; ++I) { 1749 const AnnotatedLine &TheLine = *I; 1750 if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) { 1751 unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level, 1752 TheLine.InPPDirective, 1753 PreviousEndOfLineColumn); 1754 tryFitMultipleLinesInOne(Indent, I, E); 1755 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1756 TheLine.First, Whitespaces, 1757 StructuralError); 1758 PreviousEndOfLineColumn = Formatter.format(); 1759 } else { 1760 // If we did not reformat this unwrapped line, the column at the end of 1761 // the last token is unchanged - thus, we can calculate the end of the 1762 // last token, and return the result. 1763 PreviousEndOfLineColumn = 1764 SourceMgr.getSpellingColumnNumber( 1765 TheLine.Last->FormatTok.Tok.getLocation()) + 1766 Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(), 1767 SourceMgr, Lex.getLangOpts()) - 1768 1; 1769 } 1770 } 1771 return Whitespaces.generateReplacements(); 1772 } 1773 1774private: 1775 /// \brief Tries to merge lines into one. 1776 /// 1777 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1778 /// if possible; note that \c I will be incremented when lines are merged. 1779 /// 1780 /// Returns whether the resulting \c Line can fit in a single line. 1781 void tryFitMultipleLinesInOne(unsigned Indent, 1782 std::vector<AnnotatedLine>::iterator &I, 1783 std::vector<AnnotatedLine>::iterator E) { 1784 unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent; 1785 1786 // We can never merge stuff if there are trailing line comments. 1787 if (I->Last->Type == TT_LineComment) 1788 return; 1789 1790 // Check whether the UnwrappedLine can be put onto a single line. If 1791 // so, this is bound to be the optimal solution (by definition) and we 1792 // don't need to analyze the entire solution space. 1793 if (I->Last->TotalLength > Limit) 1794 return; 1795 Limit -= I->Last->TotalLength; 1796 1797 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1798 return; 1799 1800 if (I->Last->is(tok::l_brace)) { 1801 tryMergeSimpleBlock(I, E, Limit); 1802 } else if (I->First.is(tok::kw_if)) { 1803 tryMergeSimpleIf(I, E, Limit); 1804 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || 1805 I->First.FormatTok.IsFirst)) { 1806 tryMergeSimplePPDirective(I, E, Limit); 1807 } 1808 return; 1809 } 1810 1811 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1812 std::vector<AnnotatedLine>::iterator E, 1813 unsigned Limit) { 1814 AnnotatedLine &Line = *I; 1815 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) 1816 return; 1817 if (I + 2 != E && (I + 2)->InPPDirective && 1818 !(I + 2)->First.FormatTok.HasUnescapedNewline) 1819 return; 1820 if (1 + (I + 1)->Last->TotalLength > Limit) 1821 return; 1822 join(Line, *(++I)); 1823 } 1824 1825 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, 1826 std::vector<AnnotatedLine>::iterator E, 1827 unsigned Limit) { 1828 if (!Style.AllowShortIfStatementsOnASingleLine) 1829 return; 1830 if ((I + 1)->InPPDirective != I->InPPDirective || 1831 ((I + 1)->InPPDirective && 1832 (I + 1)->First.FormatTok.HasUnescapedNewline)) 1833 return; 1834 AnnotatedLine &Line = *I; 1835 if (Line.Last->isNot(tok::r_paren)) 1836 return; 1837 if (1 + (I + 1)->Last->TotalLength > Limit) 1838 return; 1839 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) 1840 return; 1841 // Only inline simple if's (no nested if or else). 1842 if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) 1843 return; 1844 join(Line, *(++I)); 1845 } 1846 1847 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1848 std::vector<AnnotatedLine>::iterator E, 1849 unsigned Limit){ 1850 // First, check that the current line allows merging. This is the case if 1851 // we're not in a control flow statement and the last token is an opening 1852 // brace. 1853 AnnotatedLine &Line = *I; 1854 bool AllowedTokens = 1855 Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) && 1856 Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) && 1857 Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) && 1858 Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) && 1859 // This gets rid of all ObjC @ keywords and methods. 1860 Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) && 1861 Line.First.isNot(tok::plus); 1862 if (!AllowedTokens) 1863 return; 1864 1865 AnnotatedToken *Tok = &(I + 1)->First; 1866 if (Tok->Children.empty() && Tok->is(tok::r_brace) && 1867 !Tok->MustBreakBefore && Tok->TotalLength <= Limit) { 1868 Tok->SpaceRequiredBefore = false; 1869 join(Line, *(I + 1)); 1870 I += 1; 1871 } else { 1872 // Check that we still have three lines and they fit into the limit. 1873 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1874 !nextTwoLinesFitInto(I, Limit)) 1875 return; 1876 1877 // Second, check that the next line does not contain any braces - if it 1878 // does, readability declines when putting it into a single line. 1879 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1880 return; 1881 do { 1882 if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace)) 1883 return; 1884 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); 1885 } while (Tok != NULL); 1886 1887 // Last, check that the third line contains a single closing brace. 1888 Tok = &(I + 2)->First; 1889 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || 1890 Tok->MustBreakBefore) 1891 return; 1892 1893 join(Line, *(I + 1)); 1894 join(Line, *(I + 2)); 1895 I += 2; 1896 } 1897 } 1898 1899 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1900 unsigned Limit) { 1901 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1902 Limit; 1903 } 1904 1905 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1906 A.Last->Children.push_back(B.First); 1907 while (!A.Last->Children.empty()) { 1908 A.Last->Children[0].Parent = A.Last; 1909 A.Last = &A.Last->Children[0]; 1910 } 1911 } 1912 1913 bool touchesRanges(const AnnotatedLine &TheLine) { 1914 const FormatToken *First = &TheLine.First.FormatTok; 1915 const FormatToken *Last = &TheLine.Last->FormatTok; 1916 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1917 First->Tok.getLocation(), 1918 Last->Tok.getLocation()); 1919 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1920 if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), 1921 Ranges[i].getBegin()) && 1922 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1923 LineRange.getBegin())) 1924 return true; 1925 } 1926 return false; 1927 } 1928 1929 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1930 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1931 } 1932 1933 /// \brief Add a new line and the required indent before the first Token 1934 /// of the \c UnwrappedLine if there was no structural parsing error. 1935 /// Returns the indent level of the \c UnwrappedLine. 1936 unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level, 1937 bool InPPDirective, 1938 unsigned PreviousEndOfLineColumn) { 1939 const FormatToken &Tok = RootToken.FormatTok; 1940 if (!Tok.WhiteSpaceStart.isValid() || StructuralError) 1941 return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1; 1942 1943 unsigned Newlines = std::min(Tok.NewlinesBefore, 1944 Style.MaxEmptyLinesToKeep + 1); 1945 if (Newlines == 0 && !Tok.IsFirst) 1946 Newlines = 1; 1947 unsigned Indent = Level * 2; 1948 1949 bool IsAccessModifier = false; 1950 if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) || 1951 RootToken.is(tok::kw_private)) 1952 IsAccessModifier = true; 1953 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1954 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1955 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1956 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1957 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1958 IsAccessModifier = true; 1959 1960 if (IsAccessModifier && 1961 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0) 1962 Indent += Style.AccessModifierOffset; 1963 if (!InPPDirective || Tok.HasUnescapedNewline) { 1964 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style); 1965 } else { 1966 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, 1967 PreviousEndOfLineColumn, Style); 1968 } 1969 return Indent; 1970 } 1971 1972 DiagnosticsEngine &Diag; 1973 FormatStyle Style; 1974 Lexer &Lex; 1975 SourceManager &SourceMgr; 1976 WhitespaceManager Whitespaces; 1977 std::vector<CharSourceRange> Ranges; 1978 std::vector<AnnotatedLine> AnnotatedLines; 1979 bool StructuralError; 1980}; 1981 1982tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1983 SourceManager &SourceMgr, 1984 std::vector<CharSourceRange> Ranges, 1985 DiagnosticConsumer *DiagClient) { 1986 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1987 OwningPtr<DiagnosticConsumer> DiagPrinter; 1988 if (DiagClient == 0) { 1989 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); 1990 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1991 DiagClient = DiagPrinter.get(); 1992 } 1993 DiagnosticsEngine Diagnostics( 1994 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1995 DiagClient, false); 1996 Diagnostics.setSourceManager(&SourceMgr); 1997 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1998 return formatter.format(); 1999} 2000 2001LangOptions getFormattingLangOpts() { 2002 LangOptions LangOpts; 2003 LangOpts.CPlusPlus = 1; 2004 LangOpts.CPlusPlus11 = 1; 2005 LangOpts.Bool = 1; 2006 LangOpts.ObjC1 = 1; 2007 LangOpts.ObjC2 = 1; 2008 return LangOpts; 2009} 2010 2011} // namespace format 2012} // namespace clang 2013