Format.cpp revision 8134e1e575f862799c361b9c146eae7672010fa0
1//===--- Format.cpp - Format C++ code -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements functions declared in Format.h. This will be 12/// split into separate files as we go. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#include "clang/Format/Format.h" 20#include "UnwrappedLineParser.h" 21#include "clang/Basic/Diagnostic.h" 22#include "clang/Basic/OperatorPrecedence.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Frontend/TextDiagnosticPrinter.h" 25#include "clang/Lex/Lexer.h" 26#include <string> 27 28namespace clang { 29namespace format { 30 31enum TokenType { 32 TT_BinaryOperator, 33 TT_BlockComment, 34 TT_CastRParen, 35 TT_ConditionalExpr, 36 TT_CtorInitializerColon, 37 TT_IncludePath, 38 TT_LineComment, 39 TT_ObjCBlockLParen, 40 TT_ObjCDecl, 41 TT_ObjCMethodSpecifier, 42 TT_ObjCMethodExpr, 43 TT_ObjCSelectorStart, 44 TT_ObjCProperty, 45 TT_OverloadedOperator, 46 TT_PointerOrReference, 47 TT_PureVirtualSpecifier, 48 TT_TemplateCloser, 49 TT_TemplateOpener, 50 TT_TrailingUnaryOperator, 51 TT_UnaryOperator, 52 TT_Unknown 53}; 54 55enum LineType { 56 LT_Invalid, 57 LT_Other, 58 LT_PreprocessorDirective, 59 LT_VirtualFunctionDecl, 60 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 61 LT_ObjCMethodDecl, 62 LT_ObjCProperty // An @property line. 63}; 64 65class AnnotatedToken { 66public: 67 AnnotatedToken(const FormatToken &FormatTok) 68 : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), 69 CanBreakBefore(false), MustBreakBefore(false), 70 ClosesTemplateDeclaration(false), Parent(NULL) {} 71 72 bool is(tok::TokenKind Kind) const { 73 return FormatTok.Tok.is(Kind); 74 } 75 bool isNot(tok::TokenKind Kind) const { 76 return FormatTok.Tok.isNot(Kind); 77 } 78 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 79 return FormatTok.Tok.isObjCAtKeyword(Kind); 80 } 81 82 FormatToken FormatTok; 83 84 TokenType Type; 85 86 bool SpaceRequiredBefore; 87 bool CanBreakBefore; 88 bool MustBreakBefore; 89 90 bool ClosesTemplateDeclaration; 91 92 std::vector<AnnotatedToken> Children; 93 AnnotatedToken *Parent; 94}; 95 96static prec::Level getPrecedence(const AnnotatedToken &Tok) { 97 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 98} 99 100FormatStyle getLLVMStyle() { 101 FormatStyle LLVMStyle; 102 LLVMStyle.ColumnLimit = 80; 103 LLVMStyle.MaxEmptyLinesToKeep = 1; 104 LLVMStyle.PointerAndReferenceBindToType = false; 105 LLVMStyle.AccessModifierOffset = -2; 106 LLVMStyle.SplitTemplateClosingGreater = true; 107 LLVMStyle.IndentCaseLabels = false; 108 LLVMStyle.SpacesBeforeTrailingComments = 1; 109 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 110 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 111 LLVMStyle.ObjCSpaceBeforeReturnType = true; 112 return LLVMStyle; 113} 114 115FormatStyle getGoogleStyle() { 116 FormatStyle GoogleStyle; 117 GoogleStyle.ColumnLimit = 80; 118 GoogleStyle.MaxEmptyLinesToKeep = 1; 119 GoogleStyle.PointerAndReferenceBindToType = true; 120 GoogleStyle.AccessModifierOffset = -1; 121 GoogleStyle.SplitTemplateClosingGreater = false; 122 GoogleStyle.IndentCaseLabels = true; 123 GoogleStyle.SpacesBeforeTrailingComments = 2; 124 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 125 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 126 GoogleStyle.ObjCSpaceBeforeReturnType = false; 127 return GoogleStyle; 128} 129 130struct OptimizationParameters { 131 unsigned PenaltyIndentLevel; 132 unsigned PenaltyLevelDecrease; 133 unsigned PenaltyExcessCharacter; 134}; 135 136/// \brief Replaces the whitespace in front of \p Tok. Only call once for 137/// each \c FormatToken. 138static void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 139 unsigned Spaces, const FormatStyle &Style, 140 SourceManager &SourceMgr, 141 tooling::Replacements &Replaces) { 142 Replaces.insert(tooling::Replacement( 143 SourceMgr, Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, 144 std::string(NewLines, '\n') + std::string(Spaces, ' '))); 145} 146 147/// \brief Like \c replaceWhitespace, but additionally adds right-aligned 148/// backslashes to escape newlines inside a preprocessor directive. 149/// 150/// This function and \c replaceWhitespace have the same behavior if 151/// \c Newlines == 0. 152static void replacePPWhitespace( 153 const AnnotatedToken &Tok, unsigned NewLines, unsigned Spaces, 154 unsigned WhitespaceStartColumn, const FormatStyle &Style, 155 SourceManager &SourceMgr, tooling::Replacements &Replaces) { 156 std::string NewLineText; 157 if (NewLines > 0) { 158 unsigned Offset = std::min<int>(Style.ColumnLimit - 1, 159 WhitespaceStartColumn); 160 for (unsigned i = 0; i < NewLines; ++i) { 161 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 162 NewLineText += "\\\n"; 163 Offset = 0; 164 } 165 } 166 Replaces.insert(tooling::Replacement(SourceMgr, Tok.FormatTok.WhiteSpaceStart, 167 Tok.FormatTok.WhiteSpaceLength, 168 NewLineText + std::string(Spaces, ' '))); 169} 170 171/// \brief Checks whether the (remaining) \c UnwrappedLine starting with 172/// \p RootToken fits into \p Limit columns. 173static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) { 174 unsigned Columns = RootToken.FormatTok.TokenLength; 175 bool FitsOnALine = true; 176 const AnnotatedToken *Tok = &RootToken; 177 while (!Tok->Children.empty()) { 178 Tok = &Tok->Children[0]; 179 Columns += (Tok->SpaceRequiredBefore ? 1 : 0) + Tok->FormatTok.TokenLength; 180 // A special case for the colon of a constructor initializer as this only 181 // needs to be put on a new line if the line needs to be split. 182 if (Columns > Limit || 183 (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) { 184 FitsOnALine = false; 185 break; 186 } 187 } 188 return FitsOnALine; 189} 190 191/// \brief Returns if a token is an Objective-C selector name. 192/// 193/// For example, "bar" is a selector name in [foo bar:(4 + 5)]. 194static bool isObjCSelectorName(const AnnotatedToken &Tok) { 195 return Tok.is(tok::identifier) && !Tok.Children.empty() && 196 Tok.Children[0].is(tok::colon) && 197 Tok.Children[0].Type == TT_ObjCMethodExpr; 198} 199 200class UnwrappedLineFormatter { 201public: 202 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 203 const UnwrappedLine &Line, unsigned FirstIndent, 204 bool FitsOnALine, const AnnotatedToken &RootToken, 205 tooling::Replacements &Replaces, bool StructuralError) 206 : Style(Style), SourceMgr(SourceMgr), Line(Line), 207 FirstIndent(FirstIndent), FitsOnALine(FitsOnALine), 208 RootToken(RootToken), Replaces(Replaces) { 209 Parameters.PenaltyIndentLevel = 15; 210 Parameters.PenaltyLevelDecrease = 30; 211 Parameters.PenaltyExcessCharacter = 1000000; 212 } 213 214 /// \brief Formats an \c UnwrappedLine. 215 /// 216 /// \returns The column after the last token in the last line of the 217 /// \c UnwrappedLine. 218 unsigned format() { 219 // Initialize state dependent on indent. 220 LineState State; 221 State.Column = FirstIndent; 222 State.NextToken = &RootToken; 223 State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent)); 224 State.ForLoopVariablePos = 0; 225 State.LineContainsContinuedForLoopSection = false; 226 State.StartOfLineLevel = 1; 227 228 // The first token has already been indented and thus consumed. 229 moveStateToNextToken(State); 230 231 // Start iterating at 1 as we have correctly formatted of Token #0 above. 232 while (State.NextToken != NULL) { 233 if (FitsOnALine) { 234 addTokenToState(false, false, State); 235 } else { 236 unsigned NoBreak = calcPenalty(State, false, UINT_MAX); 237 unsigned Break = calcPenalty(State, true, NoBreak); 238 addTokenToState(Break < NoBreak, false, State); 239 if (State.NextToken != NULL && 240 State.NextToken->Parent->Type == TT_CtorInitializerColon) { 241 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine && 242 !fitsIntoLimit(*State.NextToken, 243 getColumnLimit() - State.Column - 1)) 244 State.Stack.back().BreakAfterComma = true; 245 } 246 } 247 } 248 return State.Column; 249 } 250 251private: 252 struct ParenState { 253 ParenState(unsigned Indent, unsigned LastSpace) 254 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 255 BreakBeforeClosingBrace(false), BreakAfterComma(false) {} 256 257 /// \brief The position to which a specific parenthesis level needs to be 258 /// indented. 259 unsigned Indent; 260 261 /// \brief The position of the last space on each level. 262 /// 263 /// Used e.g. to break like: 264 /// functionCall(Parameter, otherCall( 265 /// OtherParameter)); 266 unsigned LastSpace; 267 268 /// \brief The position the first "<<" operator encountered on each level. 269 /// 270 /// Used to align "<<" operators. 0 if no such operator has been encountered 271 /// on a level. 272 unsigned FirstLessLess; 273 274 /// \brief Whether a newline needs to be inserted before the block's closing 275 /// brace. 276 /// 277 /// We only want to insert a newline before the closing brace if there also 278 /// was a newline after the beginning left brace. 279 bool BreakBeforeClosingBrace; 280 281 bool BreakAfterComma; 282 283 bool operator<(const ParenState &Other) const { 284 if (Indent != Other.Indent) 285 return Indent < Other.Indent; 286 if (LastSpace != Other.LastSpace) 287 return LastSpace < Other.LastSpace; 288 if (FirstLessLess != Other.FirstLessLess) 289 return FirstLessLess < Other.FirstLessLess; 290 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 291 return BreakBeforeClosingBrace; 292 if (BreakAfterComma != Other.BreakAfterComma) 293 return BreakAfterComma; 294 return false; 295 } 296 }; 297 298 /// \brief The current state when indenting a unwrapped line. 299 /// 300 /// As the indenting tries different combinations this is copied by value. 301 struct LineState { 302 /// \brief The number of used columns in the current line. 303 unsigned Column; 304 305 /// \brief The token that needs to be next formatted. 306 const AnnotatedToken *NextToken; 307 308 /// \brief The parenthesis level of the first token on the current line. 309 unsigned StartOfLineLevel; 310 311 /// \brief The column of the first variable in a for-loop declaration. 312 /// 313 /// Used to align the second variable if necessary. 314 unsigned ForLoopVariablePos; 315 316 /// \brief \c true if this line contains a continued for-loop section. 317 bool LineContainsContinuedForLoopSection; 318 319 /// \brief A stack keeping track of properties applying to parenthesis 320 /// levels. 321 std::vector<ParenState> Stack; 322 323 /// \brief Comparison operator to be able to used \c LineState in \c map. 324 bool operator<(const LineState &Other) const { 325 if (Other.NextToken != NextToken) 326 return Other.NextToken > NextToken; 327 if (Other.Column != Column) 328 return Other.Column > Column; 329 if (Other.StartOfLineLevel != StartOfLineLevel) 330 return Other.StartOfLineLevel > StartOfLineLevel; 331 if (Other.ForLoopVariablePos != ForLoopVariablePos) 332 return Other.ForLoopVariablePos < ForLoopVariablePos; 333 if (Other.LineContainsContinuedForLoopSection != 334 LineContainsContinuedForLoopSection) 335 return LineContainsContinuedForLoopSection; 336 return Other.Stack < Stack; 337 } 338 }; 339 340 /// \brief Appends the next token to \p State and updates information 341 /// necessary for indentation. 342 /// 343 /// Puts the token on the current line if \p Newline is \c true and adds a 344 /// line break and necessary indentation otherwise. 345 /// 346 /// If \p DryRun is \c false, also creates and stores the required 347 /// \c Replacement. 348 void addTokenToState(bool Newline, bool DryRun, LineState &State) { 349 const AnnotatedToken &Current = *State.NextToken; 350 const AnnotatedToken &Previous = *State.NextToken->Parent; 351 assert(State.Stack.size()); 352 unsigned ParenLevel = State.Stack.size() - 1; 353 354 if (Newline) { 355 unsigned WhitespaceStartColumn = State.Column; 356 if (Current.is(tok::r_brace)) { 357 State.Column = Line.Level * 2; 358 } else if (Current.is(tok::string_literal) && 359 Previous.is(tok::string_literal)) { 360 State.Column = State.Column - Previous.FormatTok.TokenLength; 361 } else if (Current.is(tok::lessless) && 362 State.Stack[ParenLevel].FirstLessLess != 0) { 363 State.Column = State.Stack[ParenLevel].FirstLessLess; 364 } else if (ParenLevel != 0 && 365 (Previous.is(tok::equal) || Current.is(tok::arrow) || 366 Current.is(tok::period) || Previous.is(tok::question) || 367 Previous.Type == TT_ConditionalExpr)) { 368 // Indent and extra 4 spaces after if we know the current expression is 369 // continued. Don't do that on the top level, as we already indent 4 370 // there. 371 State.Column = State.Stack[ParenLevel].Indent + 4; 372 } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) { 373 State.Column = State.ForLoopVariablePos; 374 } else if (State.NextToken->Parent->ClosesTemplateDeclaration) { 375 State.Column = State.Stack[ParenLevel].Indent - 4; 376 } else { 377 State.Column = State.Stack[ParenLevel].Indent; 378 } 379 380 // A line starting with a closing brace is assumed to be correct for the 381 // same level as before the opening brace. 382 State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1); 383 384 if (RootToken.is(tok::kw_for)) 385 State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi); 386 387 if (!DryRun) { 388 if (!Line.InPPDirective) 389 replaceWhitespace(Current.FormatTok, 1, State.Column, Style, 390 SourceMgr, Replaces); 391 else 392 replacePPWhitespace(Current.FormatTok, 1, State.Column, 393 WhitespaceStartColumn, Style, SourceMgr, 394 Replaces); 395 } 396 397 State.Stack[ParenLevel].LastSpace = State.Column; 398 if (Current.is(tok::colon) && State.NextToken->Type != TT_ConditionalExpr) 399 State.Stack[ParenLevel].Indent += 2; 400 } else { 401 if (Current.is(tok::equal) && RootToken.is(tok::kw_for)) 402 State.ForLoopVariablePos = State.Column - 403 Previous.FormatTok.TokenLength; 404 405 unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0; 406 if (State.NextToken->Type == TT_LineComment) 407 Spaces = Style.SpacesBeforeTrailingComments; 408 409 if (!DryRun) 410 replaceWhitespace(Current, 0, Spaces, Style, SourceMgr, Replaces); 411 412 // FIXME: Do we need to do this for assignments nested in other 413 // expressions? 414 if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 && 415 (getPrecedence(Previous) == prec::Assignment || 416 Previous.is(tok::kw_return))) 417 State.Stack[ParenLevel].Indent = State.Column + Spaces; 418 if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || 419 State.NextToken->Parent->Type == TT_TemplateOpener) 420 State.Stack[ParenLevel].Indent = State.Column + Spaces; 421 422 // Top-level spaces that are not part of assignments are exempt as that 423 // mostly leads to better results. 424 State.Column += Spaces; 425 if (Spaces > 0 && 426 (ParenLevel != 0 || getPrecedence(Previous) == prec::Assignment)) 427 State.Stack[ParenLevel].LastSpace = State.Column; 428 } 429 moveStateToNextToken(State); 430 if (Newline && Previous.is(tok::l_brace)) { 431 State.Stack.back().BreakBeforeClosingBrace = true; 432 } 433 } 434 435 /// \brief Mark the next token as consumed in \p State and modify its stacks 436 /// accordingly. 437 void moveStateToNextToken(LineState &State) { 438 const AnnotatedToken &Current = *State.NextToken; 439 assert(State.Stack.size()); 440 441 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 442 State.Stack.back().FirstLessLess = State.Column; 443 444 // If we encounter an opening (, [, { or <, we add a level to our stacks to 445 // prepare for the following tokens. 446 if (Current.is(tok::l_paren) || Current.is(tok::l_square) || 447 Current.is(tok::l_brace) || 448 State.NextToken->Type == TT_TemplateOpener) { 449 unsigned NewIndent; 450 if (Current.is(tok::l_brace)) { 451 // FIXME: This does not work with nested static initializers. 452 // Implement a better handling for static initializers and similar 453 // constructs. 454 NewIndent = Line.Level * 2 + 2; 455 } else { 456 NewIndent = 4 + State.Stack.back().LastSpace; 457 } 458 State.Stack.push_back( 459 ParenState(NewIndent, State.Stack.back().LastSpace)); 460 } 461 462 // If we encounter a closing ), ], } or >, we can remove a level from our 463 // stacks. 464 if (Current.is(tok::r_paren) || Current.is(tok::r_square) || 465 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 466 State.NextToken->Type == TT_TemplateCloser) { 467 State.Stack.pop_back(); 468 } 469 470 if (State.NextToken->Children.empty()) 471 State.NextToken = NULL; 472 else 473 State.NextToken = &State.NextToken->Children[0]; 474 475 State.Column += Current.FormatTok.TokenLength; 476 } 477 478 /// \brief Calculate the penalty for splitting after the token at \p Index. 479 unsigned splitPenalty(const AnnotatedToken &Tok) { 480 const AnnotatedToken &Left = Tok; 481 const AnnotatedToken &Right = Tok.Children[0]; 482 483 // In for-loops, prefer breaking at ',' and ';'. 484 if (RootToken.is(tok::kw_for) && 485 (Left.isNot(tok::comma) && Left.isNot(tok::semi))) 486 return 20; 487 488 if (Left.is(tok::semi) || Left.is(tok::comma) || 489 Left.ClosesTemplateDeclaration) 490 return 0; 491 492 // In Objective-C method expressions, prefer breaking before "param:" over 493 // breaking after it. 494 if (isObjCSelectorName(Right)) 495 return 0; 496 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 497 return 20; 498 499 if (Left.is(tok::l_paren)) 500 return 20; 501 502 if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr) 503 return prec::Assignment; 504 prec::Level Level = getPrecedence(Left); 505 506 // Breaking after an assignment leads to a bad result as the two sides of 507 // the assignment are visually very close together. 508 if (Level == prec::Assignment) 509 return 50; 510 511 if (Level != prec::Unknown) 512 return Level; 513 514 if (Right.is(tok::arrow) || Right.is(tok::period)) 515 return 150; 516 517 return 3; 518 } 519 520 unsigned getColumnLimit() { 521 return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0); 522 } 523 524 /// \brief Calculate the number of lines needed to format the remaining part 525 /// of the unwrapped line. 526 /// 527 /// Assumes the formatting so far has led to 528 /// the \c LineSta \p State. If \p NewLine is set, a new line will be 529 /// added after the previous token. 530 /// 531 /// \param StopAt is used for optimization. If we can determine that we'll 532 /// definitely need at least \p StopAt additional lines, we already know of a 533 /// better solution. 534 unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) { 535 // We are at the end of the unwrapped line, so we don't need any more lines. 536 if (State.NextToken == NULL) 537 return 0; 538 539 if (!NewLine && State.NextToken->MustBreakBefore) 540 return UINT_MAX; 541 if (NewLine && !State.NextToken->CanBreakBefore) 542 return UINT_MAX; 543 if (!NewLine && State.NextToken->is(tok::r_brace) && 544 State.Stack.back().BreakBeforeClosingBrace) 545 return UINT_MAX; 546 if (!NewLine && State.NextToken->Parent->is(tok::semi) && 547 State.LineContainsContinuedForLoopSection) 548 return UINT_MAX; 549 if (!NewLine && State.NextToken->Parent->is(tok::comma) && 550 State.NextToken->Type != TT_LineComment && 551 State.Stack.back().BreakAfterComma) 552 return UINT_MAX; 553 554 unsigned CurrentPenalty = 0; 555 if (NewLine) { 556 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() + 557 splitPenalty(*State.NextToken->Parent); 558 } else { 559 if (State.Stack.size() < State.StartOfLineLevel) 560 CurrentPenalty += Parameters.PenaltyLevelDecrease * 561 (State.StartOfLineLevel - State.Stack.size()); 562 } 563 564 addTokenToState(NewLine, true, State); 565 566 // Exceeding column limit is bad, assign penalty. 567 if (State.Column > getColumnLimit()) { 568 unsigned ExcessCharacters = State.Column - getColumnLimit(); 569 CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters; 570 } 571 572 if (StopAt <= CurrentPenalty) 573 return UINT_MAX; 574 StopAt -= CurrentPenalty; 575 StateMap::iterator I = Memory.find(State); 576 if (I != Memory.end()) { 577 // If this state has already been examined, we can safely return the 578 // previous result if we 579 // - have not hit the optimatization (and thus returned UINT_MAX) OR 580 // - are now computing for a smaller or equal StopAt. 581 unsigned SavedResult = I->second.first; 582 unsigned SavedStopAt = I->second.second; 583 if (SavedResult != UINT_MAX) 584 return SavedResult + CurrentPenalty; 585 else if (StopAt <= SavedStopAt) 586 return UINT_MAX; 587 } 588 589 unsigned NoBreak = calcPenalty(State, false, StopAt); 590 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); 591 unsigned Result = std::min(NoBreak, WithBreak); 592 593 // We have to store 'Result' without adding 'CurrentPenalty' as the latter 594 // can depend on 'NewLine'. 595 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt); 596 597 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty; 598 } 599 600 FormatStyle Style; 601 SourceManager &SourceMgr; 602 const UnwrappedLine &Line; 603 const unsigned FirstIndent; 604 const bool FitsOnALine; 605 const AnnotatedToken &RootToken; 606 tooling::Replacements &Replaces; 607 608 // A map from an indent state to a pair (Result, Used-StopAt). 609 typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap; 610 StateMap Memory; 611 612 OptimizationParameters Parameters; 613}; 614 615/// \brief Determines extra information about the tokens comprising an 616/// \c UnwrappedLine. 617class TokenAnnotator { 618public: 619 TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, 620 SourceManager &SourceMgr, Lexer &Lex) 621 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), 622 RootToken(Line.RootToken) {} 623 624 /// \brief A parser that gathers additional information about tokens. 625 /// 626 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and 627 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 628 /// into template parameter lists. 629 class AnnotatingParser { 630 public: 631 AnnotatingParser(AnnotatedToken &RootToken) 632 : CurrentToken(&RootToken), KeywordVirtualFound(false), 633 ColonIsObjCMethodExpr(false) {} 634 635 bool parseAngle() { 636 while (CurrentToken != NULL) { 637 if (CurrentToken->is(tok::greater)) { 638 CurrentToken->Type = TT_TemplateCloser; 639 next(); 640 return true; 641 } 642 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || 643 CurrentToken->is(tok::r_brace)) 644 return false; 645 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || 646 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) 647 return false; 648 if (!consumeToken()) 649 return false; 650 } 651 return false; 652 } 653 654 bool parseParens() { 655 if (CurrentToken != NULL && CurrentToken->is(tok::caret)) 656 CurrentToken->Parent->Type = TT_ObjCBlockLParen; 657 while (CurrentToken != NULL) { 658 if (CurrentToken->is(tok::r_paren)) { 659 next(); 660 return true; 661 } 662 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) 663 return false; 664 if (!consumeToken()) 665 return false; 666 } 667 return false; 668 } 669 670 bool parseSquare() { 671 if (!CurrentToken) 672 return false; 673 674 // A '[' could be an index subscript (after an indentifier or after 675 // ')' or ']'), or it could be the start of an Objective-C method 676 // expression. 677 AnnotatedToken *LSquare = CurrentToken->Parent; 678 bool StartsObjCMethodExpr = 679 !LSquare->Parent || LSquare->Parent->is(tok::colon) || 680 LSquare->Parent->is(tok::l_square) || 681 LSquare->Parent->is(tok::l_paren) || 682 LSquare->Parent->is(tok::kw_return) || 683 LSquare->Parent->is(tok::kw_throw) || 684 getBinOpPrecedence(LSquare->Parent->FormatTok.Tok.getKind(), 685 true, true) > prec::Unknown; 686 687 bool ColonWasObjCMethodExpr = ColonIsObjCMethodExpr; 688 if (StartsObjCMethodExpr) { 689 ColonIsObjCMethodExpr = true; 690 LSquare->Type = TT_ObjCMethodExpr; 691 } 692 693 while (CurrentToken != NULL) { 694 if (CurrentToken->is(tok::r_square)) { 695 if (StartsObjCMethodExpr) { 696 ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; 697 CurrentToken->Type = TT_ObjCMethodExpr; 698 } 699 next(); 700 return true; 701 } 702 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) 703 return false; 704 if (!consumeToken()) 705 return false; 706 } 707 return false; 708 } 709 710 bool parseBrace() { 711 while (CurrentToken != NULL) { 712 if (CurrentToken->is(tok::r_brace)) { 713 next(); 714 return true; 715 } 716 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) 717 return false; 718 if (!consumeToken()) 719 return false; 720 } 721 // Lines can currently end with '{'. 722 return true; 723 } 724 725 bool parseConditional() { 726 while (CurrentToken != NULL) { 727 if (CurrentToken->is(tok::colon)) { 728 CurrentToken->Type = TT_ConditionalExpr; 729 next(); 730 return true; 731 } 732 if (!consumeToken()) 733 return false; 734 } 735 return false; 736 } 737 738 bool parseTemplateDeclaration() { 739 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 740 CurrentToken->Type = TT_TemplateOpener; 741 next(); 742 if (!parseAngle()) 743 return false; 744 CurrentToken->Parent->ClosesTemplateDeclaration = true; 745 parseLine(); 746 return true; 747 } 748 return false; 749 } 750 751 bool consumeToken() { 752 AnnotatedToken *Tok = CurrentToken; 753 next(); 754 switch (Tok->FormatTok.Tok.getKind()) { 755 case tok::plus: 756 case tok::minus: 757 // At the start of the line, +/- specific ObjectiveC method 758 // declarations. 759 if (Tok->Parent == NULL) 760 Tok->Type = TT_ObjCMethodSpecifier; 761 break; 762 case tok::colon: 763 // Colons from ?: are handled in parseConditional(). 764 if (ColonIsObjCMethodExpr) 765 Tok->Type = TT_ObjCMethodExpr; 766 break; 767 case tok::l_paren: { 768 bool ParensWereObjCReturnType = 769 Tok->Parent && Tok->Parent->Type == TT_ObjCMethodSpecifier; 770 if (!parseParens()) 771 return false; 772 if (CurrentToken != NULL && CurrentToken->is(tok::colon)) { 773 CurrentToken->Type = TT_CtorInitializerColon; 774 next(); 775 } else if (CurrentToken != NULL && ParensWereObjCReturnType) { 776 CurrentToken->Type = TT_ObjCSelectorStart; 777 next(); 778 } 779 } break; 780 case tok::l_square: 781 if (!parseSquare()) 782 return false; 783 break; 784 case tok::l_brace: 785 if (!parseBrace()) 786 return false; 787 break; 788 case tok::less: 789 if (parseAngle()) 790 Tok->Type = TT_TemplateOpener; 791 else { 792 Tok->Type = TT_BinaryOperator; 793 CurrentToken = Tok; 794 next(); 795 } 796 break; 797 case tok::r_paren: 798 case tok::r_square: 799 return false; 800 case tok::r_brace: 801 // Lines can start with '}'. 802 if (Tok->Parent != NULL) 803 return false; 804 break; 805 case tok::greater: 806 Tok->Type = TT_BinaryOperator; 807 break; 808 case tok::kw_operator: 809 if (CurrentToken->is(tok::l_paren)) { 810 CurrentToken->Type = TT_OverloadedOperator; 811 next(); 812 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { 813 CurrentToken->Type = TT_OverloadedOperator; 814 next(); 815 } 816 } else { 817 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { 818 CurrentToken->Type = TT_OverloadedOperator; 819 next(); 820 } 821 } 822 break; 823 case tok::question: 824 parseConditional(); 825 break; 826 case tok::kw_template: 827 parseTemplateDeclaration(); 828 break; 829 default: 830 break; 831 } 832 return true; 833 } 834 835 void parseIncludeDirective() { 836 while (CurrentToken != NULL) { 837 CurrentToken->Type = TT_IncludePath; 838 next(); 839 } 840 } 841 842 void parsePreprocessorDirective() { 843 next(); 844 if (CurrentToken == NULL) 845 return; 846 // Hashes in the middle of a line can lead to any strange token 847 // sequence. 848 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 849 return; 850 switch ( 851 CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 852 case tok::pp_include: 853 case tok::pp_import: 854 parseIncludeDirective(); 855 break; 856 default: 857 break; 858 } 859 } 860 861 LineType parseLine() { 862 if (CurrentToken->is(tok::hash)) { 863 parsePreprocessorDirective(); 864 return LT_PreprocessorDirective; 865 } 866 while (CurrentToken != NULL) { 867 if (CurrentToken->is(tok::kw_virtual)) 868 KeywordVirtualFound = true; 869 if (!consumeToken()) 870 return LT_Invalid; 871 } 872 if (KeywordVirtualFound) 873 return LT_VirtualFunctionDecl; 874 return LT_Other; 875 } 876 877 void next() { 878 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 879 CurrentToken = &CurrentToken->Children[0]; 880 else 881 CurrentToken = NULL; 882 } 883 884 private: 885 AnnotatedToken *CurrentToken; 886 bool KeywordVirtualFound; 887 bool ColonIsObjCMethodExpr; 888 }; 889 890 void createAnnotatedTokens(AnnotatedToken &Current) { 891 if (!Current.FormatTok.Children.empty()) { 892 Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0])); 893 Current.Children.back().Parent = &Current; 894 createAnnotatedTokens(Current.Children.back()); 895 } 896 } 897 898 void calculateExtraInformation(AnnotatedToken &Current) { 899 Current.SpaceRequiredBefore = spaceRequiredBefore(Current); 900 901 if (Current.FormatTok.MustBreakBefore) { 902 Current.MustBreakBefore = true; 903 } else { 904 if (Current.Type == TT_CtorInitializerColon || Current.Parent->Type == 905 TT_LineComment || (Current.is(tok::string_literal) && 906 Current.Parent->is(tok::string_literal))) { 907 Current.MustBreakBefore = true; 908 } else { 909 Current.MustBreakBefore = false; 910 } 911 } 912 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); 913 if (!Current.Children.empty()) 914 calculateExtraInformation(Current.Children[0]); 915 } 916 917 bool annotate() { 918 createAnnotatedTokens(RootToken); 919 920 AnnotatingParser Parser(RootToken); 921 CurrentLineType = Parser.parseLine(); 922 if (CurrentLineType == LT_Invalid) 923 return false; 924 925 determineTokenTypes(RootToken, /*IsRHS=*/false); 926 927 if (RootToken.Type == TT_ObjCMethodSpecifier) 928 CurrentLineType = LT_ObjCMethodDecl; 929 else if (RootToken.Type == TT_ObjCDecl) 930 CurrentLineType = LT_ObjCDecl; 931 else if (RootToken.Type == TT_ObjCProperty) 932 CurrentLineType = LT_ObjCProperty; 933 934 if (!RootToken.Children.empty()) 935 calculateExtraInformation(RootToken.Children[0]); 936 return true; 937 } 938 939 LineType getLineType() { 940 return CurrentLineType; 941 } 942 943 const AnnotatedToken &getRootToken() { 944 return RootToken; 945 } 946 947private: 948 void determineTokenTypes(AnnotatedToken &Current, bool IsRHS) { 949 if (getPrecedence(Current) == prec::Assignment || 950 Current.is(tok::kw_return) || Current.is(tok::kw_throw)) 951 IsRHS = true; 952 953 if (Current.Type == TT_Unknown) { 954 if (Current.is(tok::star) || Current.is(tok::amp)) { 955 Current.Type = determineStarAmpUsage(Current, IsRHS); 956 } else if (Current.is(tok::minus) || Current.is(tok::plus) || 957 Current.is(tok::caret)) { 958 Current.Type = determinePlusMinusCaretUsage(Current); 959 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { 960 Current.Type = determineIncrementUsage(Current); 961 } else if (Current.is(tok::exclaim)) { 962 Current.Type = TT_UnaryOperator; 963 } else if (isBinaryOperator(Current)) { 964 Current.Type = TT_BinaryOperator; 965 } else if (Current.is(tok::comment)) { 966 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 967 Lex.getLangOpts())); 968 if (StringRef(Data).startswith("//")) 969 Current.Type = TT_LineComment; 970 else 971 Current.Type = TT_BlockComment; 972 } else if (Current.is(tok::r_paren) && 973 (Current.Parent->Type == TT_PointerOrReference || 974 Current.Parent->Type == TT_TemplateCloser) && 975 (Current.Children.empty() || 976 (Current.Children[0].isNot(tok::equal) && 977 Current.Children[0].isNot(tok::semi) && 978 Current.Children[0].isNot(tok::l_brace)))) { 979 // FIXME: We need to get smarter and understand more cases of casts. 980 Current.Type = TT_CastRParen; 981 } else if (Current.is(tok::at) && Current.Children.size()) { 982 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 983 case tok::objc_interface: 984 case tok::objc_implementation: 985 case tok::objc_protocol: 986 Current.Type = TT_ObjCDecl; 987 break; 988 case tok::objc_property: 989 Current.Type = TT_ObjCProperty; 990 break; 991 default: 992 break; 993 } 994 } 995 } 996 997 if (!Current.Children.empty()) 998 determineTokenTypes(Current.Children[0], IsRHS); 999 } 1000 1001 bool isBinaryOperator(const AnnotatedToken &Tok) { 1002 // Comma is a binary operator, but does not behave as such wrt. formatting. 1003 return getPrecedence(Tok) > prec::Comma; 1004 } 1005 1006 TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsRHS) { 1007 if (Tok.Parent == NULL) 1008 return TT_UnaryOperator; 1009 if (Tok.Children.size() == 0) 1010 return TT_Unknown; 1011 const FormatToken &PrevToken = Tok.Parent->FormatTok; 1012 const FormatToken &NextToken = Tok.Children[0].FormatTok; 1013 1014 if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::l_square) || 1015 PrevToken.Tok.is(tok::comma) || PrevToken.Tok.is(tok::kw_return) || 1016 PrevToken.Tok.is(tok::colon) || Tok.Parent->Type == TT_BinaryOperator || 1017 Tok.Parent->Type == TT_CastRParen) 1018 return TT_UnaryOperator; 1019 1020 if (PrevToken.Tok.isLiteral() || PrevToken.Tok.is(tok::r_paren) || 1021 PrevToken.Tok.is(tok::r_square) || NextToken.Tok.isLiteral() || 1022 NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) || 1023 NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) || 1024 NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) || 1025 NextToken.Tok.is(tok::l_paren) || NextToken.Tok.is(tok::l_square) || 1026 NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof)) 1027 return TT_BinaryOperator; 1028 1029 if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) || 1030 NextToken.Tok.is(tok::greater)) 1031 return TT_PointerOrReference; 1032 1033 // It is very unlikely that we are going to find a pointer or reference type 1034 // definition on the RHS of an assignment. 1035 if (IsRHS) 1036 return TT_BinaryOperator; 1037 1038 return TT_PointerOrReference; 1039 } 1040 1041 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 1042 // Use heuristics to recognize unary operators. 1043 if (Tok.Parent->is(tok::equal) || Tok.Parent->is(tok::l_paren) || 1044 Tok.Parent->is(tok::comma) || Tok.Parent->is(tok::l_square) || 1045 Tok.Parent->is(tok::question) || Tok.Parent->is(tok::colon) || 1046 Tok.Parent->is(tok::kw_return) || Tok.Parent->is(tok::kw_case) || 1047 Tok.Parent->is(tok::at) || Tok.Parent->is(tok::l_brace)) 1048 return TT_UnaryOperator; 1049 1050 // There can't be to consecutive binary operators. 1051 if (Tok.Parent->Type == TT_BinaryOperator) 1052 return TT_UnaryOperator; 1053 1054 // Fall back to marking the token as binary operator. 1055 return TT_BinaryOperator; 1056 } 1057 1058 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1059 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 1060 if (Tok.Parent != NULL && Tok.Parent->is(tok::identifier)) 1061 return TT_TrailingUnaryOperator; 1062 1063 return TT_UnaryOperator; 1064 } 1065 1066 bool spaceRequiredBetween(const AnnotatedToken &Left, 1067 const AnnotatedToken &Right) { 1068 if (Right.is(tok::hashhash)) 1069 return Left.is(tok::hash); 1070 if (Left.is(tok::hashhash) || Left.is(tok::hash)) 1071 return Right.is(tok::hash); 1072 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 1073 return false; 1074 if (Right.is(tok::less) && 1075 (Left.is(tok::kw_template) || 1076 (CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1077 return true; 1078 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1079 return false; 1080 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 1081 return false; 1082 if (Left.is(tok::at) && 1083 (Right.is(tok::identifier) || Right.is(tok::string_literal) || 1084 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || 1085 Right.is(tok::l_paren) || Right.is(tok::l_brace) || 1086 Right.is(tok::kw_true) || Right.is(tok::kw_false))) 1087 return false; 1088 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 1089 return false; 1090 if (Right.is(tok::amp) || Right.is(tok::star)) 1091 return Left.FormatTok.Tok.isLiteral() || 1092 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 1093 !Style.PointerAndReferenceBindToType); 1094 if (Left.is(tok::amp) || Left.is(tok::star)) 1095 return Right.FormatTok.Tok.isLiteral() || 1096 Style.PointerAndReferenceBindToType; 1097 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1098 return false; 1099 if (Left.is(tok::l_square) || Right.is(tok::r_square)) 1100 return false; 1101 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1102 return false; 1103 if (Left.is(tok::coloncolon) || 1104 (Right.is(tok::coloncolon) && 1105 (Left.is(tok::identifier) || Left.is(tok::greater)))) 1106 return false; 1107 if (Left.is(tok::period) || Right.is(tok::period)) 1108 return false; 1109 if (Left.is(tok::colon)) 1110 return Left.Type != TT_ObjCMethodExpr; 1111 if (Right.is(tok::colon)) 1112 return Right.Type != TT_ObjCMethodExpr; 1113 if (Left.is(tok::l_paren)) 1114 return false; 1115 if (Right.is(tok::l_paren)) { 1116 return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) || 1117 Left.is(tok::kw_for) || Left.is(tok::kw_while) || 1118 Left.is(tok::kw_switch) || Left.is(tok::kw_return) || 1119 Left.is(tok::kw_catch) || Left.is(tok::kw_new) || 1120 Left.is(tok::kw_delete); 1121 } 1122 if (Left.is(tok::at) && 1123 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1124 return false; 1125 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1126 return false; 1127 return true; 1128 } 1129 1130 bool spaceRequiredBefore(const AnnotatedToken &Tok) { 1131 if (CurrentLineType == LT_ObjCMethodDecl) { 1132 if (Tok.is(tok::identifier) && !Tok.Children.empty() && 1133 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) 1134 return true; 1135 if (Tok.is(tok::colon)) 1136 return false; 1137 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1138 return Style.ObjCSpaceBeforeReturnType || Tok.isNot(tok::l_paren); 1139 if (Tok.Type == TT_ObjCSelectorStart) 1140 return !Style.ObjCSpaceBeforeReturnType; 1141 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1142 // Don't space between ')' and <id> 1143 return false; 1144 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) 1145 // Don't space between ':' and '(' 1146 return false; 1147 } 1148 if (CurrentLineType == LT_ObjCProperty && 1149 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1150 return false; 1151 1152 if (Tok.Type == TT_IncludePath) 1153 return Tok.is(tok::less) || Tok.is(tok::string_literal); 1154 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1155 return true; 1156 if (Tok.Type == TT_OverloadedOperator) 1157 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || 1158 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); 1159 if (Tok.Parent->Type == TT_OverloadedOperator) 1160 return false; 1161 if (Tok.is(tok::colon)) 1162 return RootToken.isNot(tok::kw_case) && !Tok.Children.empty() && 1163 Tok.Type != TT_ObjCMethodExpr; 1164 if (Tok.Parent->Type == TT_UnaryOperator || 1165 Tok.Parent->Type == TT_CastRParen) 1166 return false; 1167 if (Tok.Type == TT_UnaryOperator) 1168 return Tok.Parent->isNot(tok::l_paren) && 1169 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && 1170 (Tok.Parent->isNot(tok::colon) || 1171 Tok.Parent->Type != TT_ObjCMethodExpr); 1172 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1173 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == 1174 TT_TemplateCloser && Style.SplitTemplateClosingGreater; 1175 } 1176 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1177 return true; 1178 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1179 return false; 1180 if (Tok.is(tok::less) && RootToken.is(tok::hash)) 1181 return true; 1182 if (Tok.Type == TT_TrailingUnaryOperator) 1183 return false; 1184 return spaceRequiredBetween(*Tok.Parent, Tok); 1185 } 1186 1187 bool canBreakBefore(const AnnotatedToken &Right) { 1188 const AnnotatedToken &Left = *Right.Parent; 1189 if (CurrentLineType == LT_ObjCMethodDecl) { 1190 if (Right.is(tok::identifier) && !Right.Children.empty() && 1191 Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) 1192 return true; 1193 if (Right.is(tok::identifier) && Left.is(tok::l_paren) && 1194 Left.Parent->is(tok::colon)) 1195 // Don't break this identifier as ':' or identifier 1196 // before it will break. 1197 return false; 1198 if (Right.is(tok::colon) && Left.is(tok::identifier) && 1199 Left.CanBreakBefore) 1200 // Don't break at ':' if identifier before it can beak. 1201 return false; 1202 } 1203 if (Right.Type == TT_IncludePath) 1204 return false; 1205 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1206 return false; 1207 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1208 return true; 1209 if (isObjCSelectorName(Right)) 1210 return true; 1211 if (Left.ClosesTemplateDeclaration) 1212 return true; 1213 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1214 Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr) 1215 return false; 1216 if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl) 1217 return false; 1218 1219 if (Right.is(tok::comment)) 1220 return !Right.Children.empty(); 1221 if (Right.is(tok::r_paren) || Right.is(tok::l_brace) || 1222 Right.is(tok::greater)) 1223 return false; 1224 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1225 Left.is(tok::comma) || Right.is(tok::lessless) || 1226 Right.is(tok::arrow) || Right.is(tok::period) || 1227 Right.is(tok::colon) || Left.is(tok::semi) || 1228 Left.is(tok::l_brace) || Left.is(tok::question) || 1229 Right.is(tok::r_brace) || Left.Type == TT_ConditionalExpr || 1230 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1231 Right.is(tok::identifier)) || 1232 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 1233 } 1234 1235 FormatStyle Style; 1236 SourceManager &SourceMgr; 1237 Lexer &Lex; 1238 LineType CurrentLineType; 1239 AnnotatedToken RootToken; 1240}; 1241 1242class LexerBasedFormatTokenSource : public FormatTokenSource { 1243public: 1244 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1245 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1246 IdentTable(Lex.getLangOpts()) { 1247 Lex.SetKeepWhitespaceMode(true); 1248 } 1249 1250 virtual FormatToken getNextToken() { 1251 if (GreaterStashed) { 1252 FormatTok.NewlinesBefore = 0; 1253 FormatTok.WhiteSpaceStart = 1254 FormatTok.Tok.getLocation().getLocWithOffset(1); 1255 FormatTok.WhiteSpaceLength = 0; 1256 GreaterStashed = false; 1257 return FormatTok; 1258 } 1259 1260 FormatTok = FormatToken(); 1261 Lex.LexFromRawLexer(FormatTok.Tok); 1262 StringRef Text = rawTokenText(FormatTok.Tok); 1263 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1264 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1265 FormatTok.IsFirst = true; 1266 1267 // Consume and record whitespace until we find a significant token. 1268 while (FormatTok.Tok.is(tok::unknown)) { 1269 FormatTok.NewlinesBefore += Text.count('\n'); 1270 FormatTok.HasUnescapedNewline = Text.count("\\\n") != 1271 FormatTok.NewlinesBefore; 1272 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1273 1274 if (FormatTok.Tok.is(tok::eof)) 1275 return FormatTok; 1276 Lex.LexFromRawLexer(FormatTok.Tok); 1277 Text = rawTokenText(FormatTok.Tok); 1278 } 1279 1280 // Now FormatTok is the next non-whitespace token. 1281 FormatTok.TokenLength = Text.size(); 1282 1283 // In case the token starts with escaped newlines, we want to 1284 // take them into account as whitespace - this pattern is quite frequent 1285 // in macro definitions. 1286 // FIXME: What do we want to do with other escaped spaces, and escaped 1287 // spaces or newlines in the middle of tokens? 1288 // FIXME: Add a more explicit test. 1289 unsigned i = 0; 1290 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1291 FormatTok.WhiteSpaceLength += 2; 1292 FormatTok.TokenLength -= 2; 1293 i += 2; 1294 } 1295 1296 if (FormatTok.Tok.is(tok::raw_identifier)) { 1297 IdentifierInfo &Info = IdentTable.get(Text); 1298 FormatTok.Tok.setIdentifierInfo(&Info); 1299 FormatTok.Tok.setKind(Info.getTokenID()); 1300 } 1301 1302 if (FormatTok.Tok.is(tok::greatergreater)) { 1303 FormatTok.Tok.setKind(tok::greater); 1304 GreaterStashed = true; 1305 } 1306 1307 return FormatTok; 1308 } 1309 1310private: 1311 FormatToken FormatTok; 1312 bool GreaterStashed; 1313 Lexer &Lex; 1314 SourceManager &SourceMgr; 1315 IdentifierTable IdentTable; 1316 1317 /// Returns the text of \c FormatTok. 1318 StringRef rawTokenText(Token &Tok) { 1319 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1320 Tok.getLength()); 1321 } 1322}; 1323 1324class Formatter : public UnwrappedLineConsumer { 1325public: 1326 Formatter(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 1327 Lexer &Lex, SourceManager &SourceMgr, 1328 const std::vector<CharSourceRange> &Ranges) 1329 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1330 Ranges(Ranges) {} 1331 1332 virtual ~Formatter() {} 1333 1334 tooling::Replacements format() { 1335 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1336 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1337 StructuralError = Parser.parse(); 1338 unsigned PreviousEndOfLineColumn = 0; 1339 for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(), 1340 E = UnwrappedLines.end(); 1341 I != E; ++I) { 1342 const UnwrappedLine &TheLine = *I; 1343 if (touchesRanges(TheLine)) { 1344 OwningPtr<TokenAnnotator> AnnotatedLine( 1345 new TokenAnnotator(TheLine, Style, SourceMgr, Lex)); 1346 if (!AnnotatedLine->annotate()) 1347 break; 1348 unsigned Indent = formatFirstToken(AnnotatedLine->getRootToken(), 1349 TheLine.Level, TheLine.InPPDirective, 1350 PreviousEndOfLineColumn); 1351 1352 UnwrappedLine Line(TheLine); 1353 bool FitsOnALine = tryFitMultipleLinesInOne(Indent, Line, AnnotatedLine, 1354 I, E); 1355 UnwrappedLineFormatter Formatter( 1356 Style, SourceMgr, Line, Indent, FitsOnALine, 1357 AnnotatedLine->getRootToken(), Replaces, StructuralError); 1358 PreviousEndOfLineColumn = Formatter.format(); 1359 } else { 1360 // If we did not reformat this unwrapped line, the column at the end of 1361 // the last token is unchanged - thus, we can calculate the end of the 1362 // last token, and return the result. 1363 const FormatToken *Last = getLastInLine(TheLine); 1364 PreviousEndOfLineColumn = 1365 SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) + 1366 Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr, 1367 Lex.getLangOpts()) - 1368 1; 1369 } 1370 } 1371 return Replaces; 1372 } 1373 1374private: 1375 /// \brief Tries to merge lines into one. 1376 /// 1377 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1378 /// if possible; note that \c I will be incremented when lines are merged. 1379 /// 1380 /// Returns whether the resulting \c Line can fit in a single line. 1381 bool tryFitMultipleLinesInOne(unsigned Indent, UnwrappedLine &Line, 1382 OwningPtr<TokenAnnotator> &AnnotatedLine, 1383 std::vector<UnwrappedLine>::iterator &I, 1384 std::vector<UnwrappedLine>::iterator E) { 1385 unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent; 1386 1387 // Check whether the UnwrappedLine can be put onto a single line. If 1388 // so, this is bound to be the optimal solution (by definition) and we 1389 // don't need to analyze the entire solution space. 1390 bool FitsOnALine = fitsIntoLimit(AnnotatedLine->getRootToken(), Limit); 1391 if (!FitsOnALine || I + 1 == E || I + 2 == E) 1392 return FitsOnALine; 1393 1394 // Try to merge the next two lines if possible. 1395 UnwrappedLine Combined(Line); 1396 1397 // First, check that the current line allows merging. This is the case if 1398 // we're not in a control flow statement and the last token is an opening 1399 // brace. 1400 FormatToken *Last = &Combined.RootToken; 1401 bool AllowedTokens = 1402 Last->Tok.isNot(tok::kw_if) && Last->Tok.isNot(tok::kw_while) && 1403 Last->Tok.isNot(tok::kw_do) && Last->Tok.isNot(tok::r_brace) && 1404 Last->Tok.isNot(tok::kw_else) && Last->Tok.isNot(tok::kw_try) && 1405 Last->Tok.isNot(tok::kw_catch) && Last->Tok.isNot(tok::kw_for) && 1406 // This gets rid of all ObjC @ keywords and methods. 1407 Last->Tok.isNot(tok::at) && Last->Tok.isNot(tok::minus) && 1408 Last->Tok.isNot(tok::plus); 1409 while (!Last->Children.empty()) 1410 Last = &Last->Children.back(); 1411 if (!Last->Tok.is(tok::l_brace)) 1412 return FitsOnALine; 1413 1414 // Second, check that the next line does not contain any braces - if it 1415 // does, readability declines when putting it into a single line. 1416 const FormatToken *Next = &(I + 1)->RootToken; 1417 while (Next) { 1418 AllowedTokens = AllowedTokens && !Next->Tok.is(tok::l_brace) && 1419 !Next->Tok.is(tok::r_brace); 1420 Last->Children.push_back(*Next); 1421 Last = &Last->Children[0]; 1422 Last->Children.clear(); 1423 Next = Next->Children.empty() ? NULL : &Next->Children.back(); 1424 } 1425 1426 // Last, check that the third line contains a single closing brace. 1427 Next = &(I + 2)->RootToken; 1428 AllowedTokens = AllowedTokens && Next->Tok.is(tok::r_brace); 1429 if (!Next->Children.empty() || !AllowedTokens) 1430 return FitsOnALine; 1431 Last->Children.push_back(*Next); 1432 1433 OwningPtr<TokenAnnotator> CombinedAnnotator( 1434 new TokenAnnotator(Combined, Style, SourceMgr, Lex)); 1435 if (CombinedAnnotator->annotate() && 1436 fitsIntoLimit(CombinedAnnotator->getRootToken(), Limit)) { 1437 // If the merged line fits, we use that instead and skip the next two 1438 // lines. 1439 AnnotatedLine.reset(CombinedAnnotator.take()); 1440 Line = Combined; 1441 I += 2; 1442 } 1443 return FitsOnALine; 1444 } 1445 1446 const FormatToken *getLastInLine(const UnwrappedLine &TheLine) { 1447 const FormatToken *Last = &TheLine.RootToken; 1448 while (!Last->Children.empty()) 1449 Last = &Last->Children.back(); 1450 return Last; 1451 } 1452 1453 bool touchesRanges(const UnwrappedLine &TheLine) { 1454 const FormatToken *First = &TheLine.RootToken; 1455 const FormatToken *Last = getLastInLine(TheLine); 1456 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1457 First->Tok.getLocation(), 1458 Last->Tok.getLocation()); 1459 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1460 if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), 1461 Ranges[i].getBegin()) && 1462 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1463 LineRange.getBegin())) 1464 return true; 1465 } 1466 return false; 1467 } 1468 1469 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1470 UnwrappedLines.push_back(TheLine); 1471 } 1472 1473 /// \brief Add a new line and the required indent before the first Token 1474 /// of the \c UnwrappedLine if there was no structural parsing error. 1475 /// Returns the indent level of the \c UnwrappedLine. 1476 unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level, 1477 bool InPPDirective, 1478 unsigned PreviousEndOfLineColumn) { 1479 const FormatToken &Tok = RootToken.FormatTok; 1480 if (!Tok.WhiteSpaceStart.isValid() || StructuralError) 1481 return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1; 1482 1483 unsigned Newlines = std::min(Tok.NewlinesBefore, 1484 Style.MaxEmptyLinesToKeep + 1); 1485 if (Newlines == 0 && !Tok.IsFirst) 1486 Newlines = 1; 1487 unsigned Indent = Level * 2; 1488 1489 bool IsAccessModifier = false; 1490 if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) || 1491 RootToken.is(tok::kw_private)) 1492 IsAccessModifier = true; 1493 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1494 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1495 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1496 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1497 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1498 IsAccessModifier = true; 1499 1500 if (IsAccessModifier && 1501 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0) 1502 Indent += Style.AccessModifierOffset; 1503 if (!InPPDirective || Tok.HasUnescapedNewline) { 1504 replaceWhitespace(Tok, Newlines, Indent, Style, SourceMgr, Replaces); 1505 } else { 1506 replacePPWhitespace(Tok, Newlines, Indent, PreviousEndOfLineColumn, Style, 1507 SourceMgr, Replaces); 1508 } 1509 return Indent; 1510 } 1511 1512 clang::DiagnosticsEngine &Diag; 1513 FormatStyle Style; 1514 Lexer &Lex; 1515 SourceManager &SourceMgr; 1516 tooling::Replacements Replaces; 1517 std::vector<CharSourceRange> Ranges; 1518 std::vector<UnwrappedLine> UnwrappedLines; 1519 bool StructuralError; 1520}; 1521 1522tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1523 SourceManager &SourceMgr, 1524 std::vector<CharSourceRange> Ranges) { 1525 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1526 TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts); 1527 DiagnosticPrinter.BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1528 DiagnosticsEngine Diagnostics( 1529 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1530 &DiagnosticPrinter, false); 1531 Diagnostics.setSourceManager(&SourceMgr); 1532 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1533 return formatter.format(); 1534} 1535 1536LangOptions getFormattingLangOpts() { 1537 LangOptions LangOpts; 1538 LangOpts.CPlusPlus = 1; 1539 LangOpts.CPlusPlus11 = 1; 1540 LangOpts.Bool = 1; 1541 LangOpts.ObjC1 = 1; 1542 LangOpts.ObjC2 = 1; 1543 return LangOpts; 1544} 1545 1546} // namespace format 1547} // namespace clang 1548