Format.cpp revision 6f21a988990ff5872822dcb049bd8fc65ce3d236
1//===--- Format.cpp - Format C++ code -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements functions declared in Format.h. This will be 12/// split into separate files as we go. 13/// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "format-formatter" 17 18#include "TokenAnnotator.h" 19#include "UnwrappedLineParser.h" 20#include "clang/Basic/Diagnostic.h" 21#include "clang/Basic/OperatorPrecedence.h" 22#include "clang/Basic/SourceManager.h" 23#include "clang/Format/Format.h" 24#include "clang/Frontend/TextDiagnosticPrinter.h" 25#include "clang/Lex/Lexer.h" 26#include "llvm/Support/Allocator.h" 27#include "llvm/Support/Debug.h" 28#include <queue> 29#include <string> 30 31namespace clang { 32namespace format { 33 34FormatStyle getLLVMStyle() { 35 FormatStyle LLVMStyle; 36 LLVMStyle.ColumnLimit = 80; 37 LLVMStyle.MaxEmptyLinesToKeep = 1; 38 LLVMStyle.PointerBindsToType = false; 39 LLVMStyle.DerivePointerBinding = false; 40 LLVMStyle.AccessModifierOffset = -2; 41 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 42 LLVMStyle.IndentCaseLabels = false; 43 LLVMStyle.SpacesBeforeTrailingComments = 1; 44 LLVMStyle.BinPackParameters = true; 45 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 46 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 47 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 48 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 49 LLVMStyle.PenaltyExcessCharacter = 1000000; 50 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5; 51 return LLVMStyle; 52} 53 54FormatStyle getGoogleStyle() { 55 FormatStyle GoogleStyle; 56 GoogleStyle.ColumnLimit = 80; 57 GoogleStyle.MaxEmptyLinesToKeep = 1; 58 GoogleStyle.PointerBindsToType = true; 59 GoogleStyle.DerivePointerBinding = true; 60 GoogleStyle.AccessModifierOffset = -1; 61 GoogleStyle.Standard = FormatStyle::LS_Auto; 62 GoogleStyle.IndentCaseLabels = true; 63 GoogleStyle.SpacesBeforeTrailingComments = 2; 64 GoogleStyle.BinPackParameters = true; 65 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 66 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 67 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 68 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 69 GoogleStyle.PenaltyExcessCharacter = 1000000; 70 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100; 71 return GoogleStyle; 72} 73 74FormatStyle getChromiumStyle() { 75 FormatStyle ChromiumStyle = getGoogleStyle(); 76 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 77 ChromiumStyle.BinPackParameters = false; 78 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 79 ChromiumStyle.DerivePointerBinding = false; 80 return ChromiumStyle; 81} 82 83static bool isTrailingComment(const AnnotatedToken &Tok) { 84 return Tok.is(tok::comment) && 85 (Tok.Children.empty() || Tok.Children[0].MustBreakBefore); 86} 87 88// Returns the length of everything up to the first possible line break after 89// the ), ], } or > matching \c Tok. 90static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { 91 if (Tok.MatchingParen == NULL) 92 return 0; 93 AnnotatedToken *End = Tok.MatchingParen; 94 while (!End->Children.empty() && !End->Children[0].CanBreakBefore) { 95 End = &End->Children[0]; 96 } 97 return End->TotalLength - Tok.TotalLength + 1; 98} 99 100/// \brief Manages the whitespaces around tokens and their replacements. 101/// 102/// This includes special handling for certain constructs, e.g. the alignment of 103/// trailing line comments. 104class WhitespaceManager { 105public: 106 WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 107 108 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 109 /// each \c AnnotatedToken. 110 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 111 unsigned Spaces, unsigned WhitespaceStartColumn, 112 const FormatStyle &Style) { 113 // 2+ newlines mean an empty line separating logic scopes. 114 if (NewLines >= 2) 115 alignComments(); 116 117 // Align line comments if they are trailing or if they continue other 118 // trailing comments. 119 if (isTrailingComment(Tok)) { 120 // Remove the comment's trailing whitespace. 121 if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength) 122 Replaces.insert(tooling::Replacement( 123 SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset( 124 Tok.FormatTok.TokenLength), 125 Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, "")); 126 127 // Align comment with other comments. 128 if (Tok.Parent != NULL || !Comments.empty()) { 129 if (Style.ColumnLimit >= 130 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) { 131 Comments.push_back(StoredComment()); 132 Comments.back().Tok = Tok.FormatTok; 133 Comments.back().Spaces = Spaces; 134 Comments.back().NewLines = NewLines; 135 if (NewLines == 0) 136 Comments.back().MinColumn = WhitespaceStartColumn + Spaces; 137 else 138 Comments.back().MinColumn = Spaces; 139 Comments.back().MaxColumn = 140 Style.ColumnLimit - Tok.FormatTok.TokenLength; 141 return; 142 } 143 } 144 } 145 146 // If this line does not have a trailing comment, align the stored comments. 147 if (Tok.Children.empty() && !isTrailingComment(Tok)) 148 alignComments(); 149 storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); 150 } 151 152 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 153 /// backslashes to escape newlines inside a preprocessor directive. 154 /// 155 /// This function and \c replaceWhitespace have the same behavior if 156 /// \c Newlines == 0. 157 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 158 unsigned Spaces, unsigned WhitespaceStartColumn, 159 const FormatStyle &Style) { 160 storeReplacement( 161 Tok.FormatTok, 162 getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style)); 163 } 164 165 /// \brief Inserts a line break into the middle of a token. 166 /// 167 /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line 168 /// break and \p Postfix before the rest of the token starts in the next line. 169 /// 170 /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are 171 /// used to generate the correct line break. 172 void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix, 173 StringRef Postfix, bool InPPDirective, unsigned Spaces, 174 unsigned WhitespaceStartColumn, const FormatStyle &Style) { 175 std::string NewLineText; 176 if (!InPPDirective) 177 NewLineText = getNewLineText(1, Spaces); 178 else 179 NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style); 180 std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); 181 SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart 182 .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset); 183 Replaces.insert( 184 tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText)); 185 } 186 187 /// \brief Returns all the \c Replacements created during formatting. 188 const tooling::Replacements &generateReplacements() { 189 alignComments(); 190 return Replaces; 191 } 192 193private: 194 std::string getNewLineText(unsigned NewLines, unsigned Spaces) { 195 return std::string(NewLines, '\n') + std::string(Spaces, ' '); 196 } 197 198 std::string 199 getNewLineText(unsigned NewLines, unsigned Spaces, 200 unsigned WhitespaceStartColumn, const FormatStyle &Style) { 201 std::string NewLineText; 202 if (NewLines > 0) { 203 unsigned Offset = 204 std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); 205 for (unsigned i = 0; i < NewLines; ++i) { 206 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 207 NewLineText += "\\\n"; 208 Offset = 0; 209 } 210 } 211 return NewLineText + std::string(Spaces, ' '); 212 } 213 214 /// \brief Structure to store a comment for later layout and alignment. 215 struct StoredComment { 216 FormatToken Tok; 217 unsigned MinColumn; 218 unsigned MaxColumn; 219 unsigned NewLines; 220 unsigned Spaces; 221 }; 222 SmallVector<StoredComment, 16> Comments; 223 typedef SmallVector<StoredComment, 16>::iterator comment_iterator; 224 225 /// \brief Try to align all stashed comments. 226 void alignComments() { 227 unsigned MinColumn = 0; 228 unsigned MaxColumn = UINT_MAX; 229 comment_iterator Start = Comments.begin(); 230 for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E; 231 ++I) { 232 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { 233 alignComments(Start, I, MinColumn); 234 MinColumn = I->MinColumn; 235 MaxColumn = I->MaxColumn; 236 Start = I; 237 } else { 238 MinColumn = std::max(MinColumn, I->MinColumn); 239 MaxColumn = std::min(MaxColumn, I->MaxColumn); 240 } 241 } 242 alignComments(Start, Comments.end(), MinColumn); 243 Comments.clear(); 244 } 245 246 /// \brief Put all the comments between \p I and \p E into \p Column. 247 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { 248 while (I != E) { 249 unsigned Spaces = I->Spaces + Column - I->MinColumn; 250 storeReplacement(I->Tok, std::string(I->NewLines, '\n') + 251 std::string(Spaces, ' ')); 252 ++I; 253 } 254 } 255 256 /// \brief Stores \p Text as the replacement for the whitespace in front of 257 /// \p Tok. 258 void storeReplacement(const FormatToken &Tok, const std::string Text) { 259 // Don't create a replacement, if it does not change anything. 260 if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart), 261 Tok.WhiteSpaceLength) == Text) 262 return; 263 264 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, 265 Tok.WhiteSpaceLength, Text)); 266 } 267 268 SourceManager &SourceMgr; 269 tooling::Replacements Replaces; 270}; 271 272class UnwrappedLineFormatter { 273public: 274 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 275 const AnnotatedLine &Line, unsigned FirstIndent, 276 const AnnotatedToken &RootToken, 277 WhitespaceManager &Whitespaces, bool StructuralError) 278 : Style(Style), SourceMgr(SourceMgr), Line(Line), 279 FirstIndent(FirstIndent), RootToken(RootToken), 280 Whitespaces(Whitespaces), Count(0) {} 281 282 /// \brief Formats an \c UnwrappedLine. 283 /// 284 /// \returns The column after the last token in the last line of the 285 /// \c UnwrappedLine. 286 unsigned format(const AnnotatedLine *NextLine) { 287 // Initialize state dependent on indent. 288 LineState State; 289 State.Column = FirstIndent; 290 State.NextToken = &RootToken; 291 State.Stack.push_back( 292 ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters, 293 /*HasMultiParameterLine=*/ false)); 294 State.VariablePos = 0; 295 State.LineContainsContinuedForLoopSection = false; 296 State.ParenLevel = 0; 297 State.StartOfStringLiteral = 0; 298 State.StartOfLineLevel = State.ParenLevel; 299 300 DEBUG({ 301 DebugTokenState(*State.NextToken); 302 }); 303 304 // The first token has already been indented and thus consumed. 305 moveStateToNextToken(State, /*DryRun=*/ false); 306 307 // If everything fits on a single line, just put it there. 308 unsigned ColumnLimit = Style.ColumnLimit; 309 if (NextLine && NextLine->InPPDirective && 310 !NextLine->First.FormatTok.HasUnescapedNewline) 311 ColumnLimit = getColumnLimit(); 312 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 313 while (State.NextToken != NULL) { 314 addTokenToState(false, false, State); 315 } 316 return State.Column; 317 } 318 319 // If the ObjC method declaration does not fit on a line, we should format 320 // it with one arg per line. 321 if (Line.Type == LT_ObjCMethodDecl) 322 State.Stack.back().BreakBeforeParameter = true; 323 324 // Find best solution in solution space. 325 return analyzeSolutionSpace(State); 326 } 327 328private: 329 void DebugTokenState(const AnnotatedToken &AnnotatedTok) { 330 const Token &Tok = AnnotatedTok.FormatTok.Tok; 331 llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 332 Tok.getLength()); 333 llvm::errs(); 334 } 335 336 struct ParenState { 337 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 338 bool HasMultiParameterLine) 339 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 340 BreakBeforeClosingBrace(false), QuestionColumn(0), 341 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 342 HasMultiParameterLine(HasMultiParameterLine), ColonPos(0), 343 StartOfFunctionCall(0) {} 344 345 /// \brief The position to which a specific parenthesis level needs to be 346 /// indented. 347 unsigned Indent; 348 349 /// \brief The position of the last space on each level. 350 /// 351 /// Used e.g. to break like: 352 /// functionCall(Parameter, otherCall( 353 /// OtherParameter)); 354 unsigned LastSpace; 355 356 /// \brief The position the first "<<" operator encountered on each level. 357 /// 358 /// Used to align "<<" operators. 0 if no such operator has been encountered 359 /// on a level. 360 unsigned FirstLessLess; 361 362 /// \brief Whether a newline needs to be inserted before the block's closing 363 /// brace. 364 /// 365 /// We only want to insert a newline before the closing brace if there also 366 /// was a newline after the beginning left brace. 367 bool BreakBeforeClosingBrace; 368 369 /// \brief The column of a \c ? in a conditional expression; 370 unsigned QuestionColumn; 371 372 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 373 /// lines, in this context. 374 bool AvoidBinPacking; 375 376 /// \brief Break after the next comma (or all the commas in this context if 377 /// \c AvoidBinPacking is \c true). 378 bool BreakBeforeParameter; 379 380 /// \brief This context already has a line with more than one parameter. 381 bool HasMultiParameterLine; 382 383 /// \brief The position of the colon in an ObjC method declaration/call. 384 unsigned ColonPos; 385 386 /// \brief The start of the most recent function in a builder-type call. 387 unsigned StartOfFunctionCall; 388 389 bool operator<(const ParenState &Other) const { 390 if (Indent != Other.Indent) 391 return Indent < Other.Indent; 392 if (LastSpace != Other.LastSpace) 393 return LastSpace < Other.LastSpace; 394 if (FirstLessLess != Other.FirstLessLess) 395 return FirstLessLess < Other.FirstLessLess; 396 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 397 return BreakBeforeClosingBrace; 398 if (QuestionColumn != Other.QuestionColumn) 399 return QuestionColumn < Other.QuestionColumn; 400 if (AvoidBinPacking != Other.AvoidBinPacking) 401 return AvoidBinPacking; 402 if (BreakBeforeParameter != Other.BreakBeforeParameter) 403 return BreakBeforeParameter; 404 if (HasMultiParameterLine != Other.HasMultiParameterLine) 405 return HasMultiParameterLine; 406 if (ColonPos != Other.ColonPos) 407 return ColonPos < Other.ColonPos; 408 if (StartOfFunctionCall != Other.StartOfFunctionCall) 409 return StartOfFunctionCall < Other.StartOfFunctionCall; 410 return false; 411 } 412 }; 413 414 /// \brief The current state when indenting a unwrapped line. 415 /// 416 /// As the indenting tries different combinations this is copied by value. 417 struct LineState { 418 /// \brief The number of used columns in the current line. 419 unsigned Column; 420 421 /// \brief The token that needs to be next formatted. 422 const AnnotatedToken *NextToken; 423 424 /// \brief The column of the first variable name in a variable declaration. 425 /// 426 /// Used to align further variables if necessary. 427 unsigned VariablePos; 428 429 /// \brief \c true if this line contains a continued for-loop section. 430 bool LineContainsContinuedForLoopSection; 431 432 /// \brief The level of nesting inside (), [], <> and {}. 433 unsigned ParenLevel; 434 435 /// \brief The \c ParenLevel at the start of this line. 436 unsigned StartOfLineLevel; 437 438 /// \brief The start column of the string literal, if we're in a string 439 /// literal sequence, 0 otherwise. 440 unsigned StartOfStringLiteral; 441 442 /// \brief A stack keeping track of properties applying to parenthesis 443 /// levels. 444 std::vector<ParenState> Stack; 445 446 /// \brief Comparison operator to be able to used \c LineState in \c map. 447 bool operator<(const LineState &Other) const { 448 if (NextToken != Other.NextToken) 449 return NextToken < Other.NextToken; 450 if (Column != Other.Column) 451 return Column < Other.Column; 452 if (VariablePos != Other.VariablePos) 453 return VariablePos < Other.VariablePos; 454 if (LineContainsContinuedForLoopSection != 455 Other.LineContainsContinuedForLoopSection) 456 return LineContainsContinuedForLoopSection; 457 if (ParenLevel != Other.ParenLevel) 458 return ParenLevel < Other.ParenLevel; 459 if (StartOfLineLevel != Other.StartOfLineLevel) 460 return StartOfLineLevel < Other.StartOfLineLevel; 461 if (StartOfStringLiteral != Other.StartOfStringLiteral) 462 return StartOfStringLiteral < Other.StartOfStringLiteral; 463 return Stack < Other.Stack; 464 } 465 }; 466 467 /// \brief Appends the next token to \p State and updates information 468 /// necessary for indentation. 469 /// 470 /// Puts the token on the current line if \p Newline is \c true and adds a 471 /// line break and necessary indentation otherwise. 472 /// 473 /// If \p DryRun is \c false, also creates and stores the required 474 /// \c Replacement. 475 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 476 const AnnotatedToken &Current = *State.NextToken; 477 const AnnotatedToken &Previous = *State.NextToken->Parent; 478 assert(State.Stack.size()); 479 480 if (Current.Type == TT_ImplicitStringLiteral) { 481 State.Column += State.NextToken->FormatTok.WhiteSpaceLength + 482 State.NextToken->FormatTok.TokenLength; 483 if (State.NextToken->Children.empty()) 484 State.NextToken = NULL; 485 else 486 State.NextToken = &State.NextToken->Children[0]; 487 return 0; 488 } 489 490 if (Newline) { 491 unsigned WhitespaceStartColumn = State.Column; 492 if (Current.is(tok::r_brace)) { 493 State.Column = Line.Level * 2; 494 } else if (Current.is(tok::string_literal) && 495 State.StartOfStringLiteral != 0) { 496 State.Column = State.StartOfStringLiteral; 497 State.Stack.back().BreakBeforeParameter = true; 498 } else if (Current.is(tok::lessless) && 499 State.Stack.back().FirstLessLess != 0) { 500 State.Column = State.Stack.back().FirstLessLess; 501 } else if (State.ParenLevel != 0 && 502 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) || 503 Current.is(tok::period) || Current.is(tok::arrow) || 504 Current.is(tok::question))) { 505 // Indent and extra 4 spaces after if we know the current expression is 506 // continued. Don't do that on the top level, as we already indent 4 507 // there. 508 State.Column = std::max(State.Stack.back().LastSpace, 509 State.Stack.back().Indent) + 4; 510 } else if (Current.Type == TT_ConditionalExpr) { 511 State.Column = State.Stack.back().QuestionColumn; 512 } else if (Previous.is(tok::comma) && State.VariablePos != 0 && 513 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) || 514 State.ParenLevel == 0)) { 515 State.Column = State.VariablePos; 516 } else if (Previous.ClosesTemplateDeclaration || 517 (Current.Type == TT_StartOfName && State.ParenLevel == 0)) { 518 State.Column = State.Stack.back().Indent - 4; 519 } else if (Current.Type == TT_ObjCSelectorName) { 520 if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { 521 State.Column = 522 State.Stack.back().ColonPos - Current.FormatTok.TokenLength; 523 } else { 524 State.Column = State.Stack.back().Indent; 525 State.Stack.back().ColonPos = 526 State.Column + Current.FormatTok.TokenLength; 527 } 528 } else if (Previous.Type == TT_ObjCMethodExpr || 529 Current.Type == TT_StartOfName) { 530 State.Column = State.Stack.back().Indent + 4; 531 } else { 532 State.Column = State.Stack.back().Indent; 533 } 534 535 if (Current.is(tok::question)) 536 State.Stack.back().BreakBeforeParameter = true; 537 if ((Previous.is(tok::comma) || Previous.is(tok::semi)) && 538 !State.Stack.back().AvoidBinPacking) 539 State.Stack.back().BreakBeforeParameter = false; 540 541 if (!DryRun) { 542 unsigned NewLines = 1; 543 if (Current.Type == TT_LineComment) 544 NewLines = 545 std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore, 546 Style.MaxEmptyLinesToKeep + 1)); 547 if (!Line.InPPDirective) 548 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 549 WhitespaceStartColumn, Style); 550 else 551 Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, 552 WhitespaceStartColumn, Style); 553 } 554 555 State.Stack.back().LastSpace = State.Column; 556 State.StartOfLineLevel = State.ParenLevel; 557 if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr) 558 State.Stack.back().Indent += 2; 559 560 // Any break on this level means that the parent level has been broken 561 // and we need to avoid bin packing there. 562 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 563 State.Stack[i].BreakBeforeParameter = true; 564 } 565 if (Current.is(tok::period) || Current.is(tok::arrow)) 566 State.Stack.back().BreakBeforeParameter = true; 567 568 // If we break after {, we should also break before the corresponding }. 569 if (Previous.is(tok::l_brace)) 570 State.Stack.back().BreakBeforeClosingBrace = true; 571 572 if (State.Stack.back().AvoidBinPacking) { 573 // If we are breaking after '(', '{', '<', this is not bin packing 574 // unless AllowAllParametersOfDeclarationOnNextLine is false. 575 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) || 576 (!Style.AllowAllParametersOfDeclarationOnNextLine && 577 Line.MustBeDeclaration)) 578 State.Stack.back().BreakBeforeParameter = true; 579 } 580 } else { 581 // FIXME: Put VariablePos into ParenState and remove second part of if(). 582 if (Current.is(tok::equal) && 583 (RootToken.is(tok::kw_for) || State.ParenLevel == 0)) 584 State.VariablePos = State.Column - Previous.FormatTok.TokenLength; 585 586 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 587 588 if (!DryRun) 589 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style); 590 591 if (Current.Type == TT_ObjCSelectorName && 592 State.Stack.back().ColonPos == 0) { 593 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 594 State.Column + Spaces + Current.FormatTok.TokenLength) 595 State.Stack.back().ColonPos = 596 State.Stack.back().Indent + Current.LongestObjCSelectorName; 597 else 598 State.Stack.back().ColonPos = 599 State.Column + Spaces + Current.FormatTok.TokenLength; 600 } 601 602 if (Current.Type != TT_LineComment && 603 (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || 604 State.NextToken->Parent->Type == TT_TemplateOpener)) 605 State.Stack.back().Indent = State.Column + Spaces; 606 if (Previous.is(tok::comma) && !isTrailingComment(Current)) 607 State.Stack.back().HasMultiParameterLine = true; 608 609 State.Column += Spaces; 610 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) 611 // Treat the condition inside an if as if it was a second function 612 // parameter, i.e. let nested calls have an indent of 4. 613 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 614 else if (Previous.is(tok::comma) && State.ParenLevel != 0) 615 // Top-level spaces are exempt as that mostly leads to better results. 616 State.Stack.back().LastSpace = State.Column; 617 else if ((Previous.Type == TT_BinaryOperator || 618 Previous.Type == TT_ConditionalExpr || 619 Previous.Type == TT_CtorInitializerColon) && 620 getPrecedence(Previous) != prec::Assignment) 621 State.Stack.back().LastSpace = State.Column; 622 else if (Previous.Type == TT_InheritanceColon) 623 State.Stack.back().Indent = State.Column; 624 else if (Previous.ParameterCount > 1 && 625 (Previous.is(tok::l_paren) || Previous.is(tok::l_square) || 626 Previous.is(tok::l_brace) || 627 Previous.Type == TT_TemplateOpener)) 628 // If this function has multiple parameters, indent nested calls from 629 // the start of the first parameter. 630 State.Stack.back().LastSpace = State.Column; 631 } 632 633 return moveStateToNextToken(State, DryRun); 634 } 635 636 /// \brief Mark the next token as consumed in \p State and modify its stacks 637 /// accordingly. 638 unsigned moveStateToNextToken(LineState &State, bool DryRun) { 639 const AnnotatedToken &Current = *State.NextToken; 640 assert(State.Stack.size()); 641 642 if (Current.Type == TT_InheritanceColon) 643 State.Stack.back().AvoidBinPacking = true; 644 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 645 State.Stack.back().FirstLessLess = State.Column; 646 if (Current.is(tok::question)) 647 State.Stack.back().QuestionColumn = State.Column; 648 if ((Current.is(tok::period) || Current.is(tok::arrow)) && 649 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 650 State.Stack.back().StartOfFunctionCall = 651 Current.LastInChainOfCalls ? 0 : State.Column; 652 if (Current.Type == TT_CtorInitializerColon) { 653 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 654 State.Stack.back().AvoidBinPacking = true; 655 State.Stack.back().BreakBeforeParameter = false; 656 } 657 658 // Insert scopes created by fake parenthesis. 659 for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) { 660 ParenState NewParenState = State.Stack.back(); 661 NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent); 662 NewParenState.BreakBeforeParameter = false; 663 State.Stack.push_back(NewParenState); 664 } 665 666 // If we encounter an opening (, [, { or <, we add a level to our stacks to 667 // prepare for the following tokens. 668 if (Current.is(tok::l_paren) || Current.is(tok::l_square) || 669 Current.is(tok::l_brace) || 670 State.NextToken->Type == TT_TemplateOpener) { 671 unsigned NewIndent; 672 bool AvoidBinPacking; 673 if (Current.is(tok::l_brace)) { 674 NewIndent = 2 + State.Stack.back().LastSpace; 675 AvoidBinPacking = false; 676 } else { 677 NewIndent = 4 + std::max(State.Stack.back().LastSpace, 678 State.Stack.back().StartOfFunctionCall); 679 AvoidBinPacking = 680 !Style.BinPackParameters || State.Stack.back().AvoidBinPacking; 681 } 682 State.Stack.push_back( 683 ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, 684 State.Stack.back().HasMultiParameterLine)); 685 ++State.ParenLevel; 686 } 687 688 // If this '[' opens an ObjC call, determine whether all parameters fit into 689 // one line and put one per line if they don't. 690 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 691 Current.MatchingParen != NULL) { 692 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 693 State.Stack.back().BreakBeforeParameter = true; 694 } 695 696 // If we encounter a closing ), ], } or >, we can remove a level from our 697 // stacks. 698 if (Current.is(tok::r_paren) || Current.is(tok::r_square) || 699 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 700 State.NextToken->Type == TT_TemplateCloser) { 701 State.Stack.pop_back(); 702 --State.ParenLevel; 703 } 704 705 // Remove scopes created by fake parenthesis. 706 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 707 State.Stack.pop_back(); 708 } 709 710 if (Current.is(tok::string_literal)) { 711 State.StartOfStringLiteral = State.Column; 712 } else if (Current.isNot(tok::comment)) { 713 State.StartOfStringLiteral = 0; 714 } 715 716 State.Column += Current.FormatTok.TokenLength; 717 718 if (State.NextToken->Children.empty()) 719 State.NextToken = NULL; 720 else 721 State.NextToken = &State.NextToken->Children[0]; 722 723 return breakProtrudingToken(Current, State, DryRun); 724 } 725 726 /// \brief If the current token sticks out over the end of the line, break 727 /// it if possible. 728 unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, 729 bool DryRun) { 730 if (Current.isNot(tok::string_literal)) 731 return 0; 732 // Only break up default narrow strings. 733 if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0) 734 return 0; 735 736 unsigned Penalty = 0; 737 unsigned TailOffset = 0; 738 unsigned TailLength = Current.FormatTok.TokenLength; 739 unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; 740 unsigned OffsetFromStart = 0; 741 while (StartColumn + TailLength > getColumnLimit()) { 742 StringRef Text = StringRef( 743 Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength); 744 if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) 745 break; 746 StringRef::size_type SplitPoint = getSplitPoint( 747 Text, getColumnLimit() - StartColumn - OffsetFromStart - 1); 748 if (SplitPoint == StringRef::npos) 749 break; 750 assert(SplitPoint != 0); 751 // +2, because 'Text' starts after the opening quotes, and does not 752 // include the closing quote we need to insert. 753 unsigned WhitespaceStartColumn = 754 StartColumn + OffsetFromStart + SplitPoint + 2; 755 State.Stack.back().LastSpace = StartColumn; 756 if (!DryRun) { 757 Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"", 758 Line.InPPDirective, StartColumn, 759 WhitespaceStartColumn, Style); 760 } 761 TailOffset += SplitPoint + 1; 762 TailLength -= SplitPoint + 1; 763 OffsetFromStart = 1; 764 Penalty += Style.PenaltyExcessCharacter; 765 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 766 State.Stack[i].BreakBeforeParameter = true; 767 } 768 State.Column = StartColumn + TailLength; 769 return Penalty; 770 } 771 772 StringRef::size_type 773 getSplitPoint(StringRef Text, StringRef::size_type Offset) { 774 StringRef::size_type SpaceOffset = Text.rfind(' ', Offset); 775 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) 776 return SpaceOffset; 777 StringRef::size_type SlashOffset = Text.rfind('/', Offset); 778 if (SlashOffset != StringRef::npos && SlashOffset != 0) 779 return SlashOffset; 780 StringRef::size_type Split = getStartOfCharacter(Text, Offset); 781 if (Split != StringRef::npos && Split > 1) 782 // Do not split at 0. 783 return Split - 1; 784 return StringRef::npos; 785 } 786 787 StringRef::size_type 788 getStartOfCharacter(StringRef Text, StringRef::size_type Offset) { 789 StringRef::size_type NextEscape = Text.find('\\'); 790 while (NextEscape != StringRef::npos && NextEscape < Offset) { 791 StringRef::size_type SequenceLength = 792 getEscapeSequenceLength(Text.substr(NextEscape)); 793 if (Offset < NextEscape + SequenceLength) 794 return NextEscape; 795 NextEscape = Text.find('\\', NextEscape + SequenceLength); 796 } 797 return Offset; 798 } 799 800 unsigned getEscapeSequenceLength(StringRef Text) { 801 assert(Text[0] == '\\'); 802 if (Text.size() < 2) 803 return 1; 804 805 switch (Text[1]) { 806 case 'u': 807 return 6; 808 case 'U': 809 return 10; 810 case 'x': 811 return getHexLength(Text); 812 default: 813 if (Text[1] >= '0' && Text[1] <= '7') 814 return getOctalLength(Text); 815 return 2; 816 } 817 } 818 819 unsigned getHexLength(StringRef Text) { 820 unsigned I = 2; // Point after '\x'. 821 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || 822 (Text[I] >= 'a' && Text[I] <= 'f') || 823 (Text[I] >= 'A' && Text[I] <= 'F'))) { 824 ++I; 825 } 826 return I; 827 } 828 829 unsigned getOctalLength(StringRef Text) { 830 unsigned I = 1; 831 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { 832 ++I; 833 } 834 return I; 835 } 836 837 unsigned getColumnLimit() { 838 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 839 } 840 841 /// \brief An edge in the solution space from \c Previous->State to \c State, 842 /// inserting a newline dependent on the \c NewLine. 843 struct StateNode { 844 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 845 : State(State), NewLine(NewLine), Previous(Previous) {} 846 LineState State; 847 bool NewLine; 848 StateNode *Previous; 849 }; 850 851 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 852 /// 853 /// In case of equal penalties, we want to prefer states that were inserted 854 /// first. During state generation we make sure that we insert states first 855 /// that break the line as late as possible. 856 typedef std::pair<unsigned, unsigned> OrderedPenalty; 857 858 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 859 /// \c State has the given \c OrderedPenalty. 860 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 861 862 /// \brief The BFS queue type. 863 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 864 std::greater<QueueItem> > QueueType; 865 866 /// \brief Analyze the entire solution space starting from \p InitialState. 867 /// 868 /// This implements a variant of Dijkstra's algorithm on the graph that spans 869 /// the solution space (\c LineStates are the nodes). The algorithm tries to 870 /// find the shortest path (the one with lowest penalty) from \p InitialState 871 /// to a state where all tokens are placed. 872 unsigned analyzeSolutionSpace(LineState &InitialState) { 873 std::set<LineState> Seen; 874 875 // Insert start element into queue. 876 StateNode *Node = 877 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 878 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 879 ++Count; 880 881 // While not empty, take first element and follow edges. 882 while (!Queue.empty()) { 883 unsigned Penalty = Queue.top().first.first; 884 StateNode *Node = Queue.top().second; 885 if (Node->State.NextToken == NULL) { 886 DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n"); 887 break; 888 } 889 Queue.pop(); 890 891 if (!Seen.insert(Node->State).second) 892 // State already examined with lower penalty. 893 continue; 894 895 addNextStateToQueue(Penalty, Node, /*NewLine=*/ false); 896 addNextStateToQueue(Penalty, Node, /*NewLine=*/ true); 897 } 898 899 if (Queue.empty()) 900 // We were unable to find a solution, do nothing. 901 // FIXME: Add diagnostic? 902 return 0; 903 904 // Reconstruct the solution. 905 reconstructPath(InitialState, Queue.top().second); 906 DEBUG(llvm::errs() << "---\n"); 907 908 // Return the column after the last token of the solution. 909 return Queue.top().second->State.Column; 910 } 911 912 void reconstructPath(LineState &State, StateNode *Current) { 913 // FIXME: This recursive implementation limits the possible number 914 // of tokens per line if compiled into a binary with small stack space. 915 // To become more independent of stack frame limitations we would need 916 // to also change the TokenAnnotator. 917 if (Current->Previous == NULL) 918 return; 919 reconstructPath(State, Current->Previous); 920 DEBUG({ 921 if (Current->NewLine) { 922 llvm::errs() 923 << "Penalty for splitting before " 924 << Current->Previous->State.NextToken->FormatTok.Tok.getName() 925 << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n"; 926 } 927 }); 928 addTokenToState(Current->NewLine, false, State); 929 } 930 931 /// \brief Add the following state to the analysis queue \c Queue. 932 /// 933 /// Assume the current state is \p PreviousNode and has been reached with a 934 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 935 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 936 bool NewLine) { 937 if (NewLine && !canBreak(PreviousNode->State)) 938 return; 939 if (!NewLine && mustBreak(PreviousNode->State)) 940 return; 941 if (NewLine) 942 Penalty += PreviousNode->State.NextToken->SplitPenalty; 943 944 StateNode *Node = new (Allocator.Allocate()) 945 StateNode(PreviousNode->State, NewLine, PreviousNode); 946 Penalty += addTokenToState(NewLine, true, Node->State); 947 if (Node->State.Column > getColumnLimit()) { 948 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 949 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 950 } 951 952 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 953 ++Count; 954 } 955 956 /// \brief Returns \c true, if a line break after \p State is allowed. 957 bool canBreak(const LineState &State) { 958 if (!State.NextToken->CanBreakBefore && 959 !(State.NextToken->is(tok::r_brace) && 960 State.Stack.back().BreakBeforeClosingBrace)) 961 return false; 962 // This prevents breaks like: 963 // ... 964 // SomeParameter, OtherParameter).DoSomething( 965 // ... 966 // As they hide "DoSomething" and generally bad for readability. 967 if (State.NextToken->Parent->is(tok::l_paren) && 968 State.ParenLevel <= State.StartOfLineLevel) 969 return false; 970 // Trying to insert a parameter on a new line if there are already more than 971 // one parameter on the current line is bin packing. 972 if (State.Stack.back().HasMultiParameterLine && 973 State.Stack.back().AvoidBinPacking) 974 return false; 975 return true; 976 } 977 978 /// \brief Returns \c true, if a line break after \p State is mandatory. 979 bool mustBreak(const LineState &State) { 980 if (State.NextToken->MustBreakBefore) 981 return true; 982 if (State.NextToken->is(tok::r_brace) && 983 State.Stack.back().BreakBeforeClosingBrace) 984 return true; 985 if (State.NextToken->Parent->is(tok::semi) && 986 State.LineContainsContinuedForLoopSection) 987 return true; 988 if ((State.NextToken->Parent->is(tok::comma) || 989 State.NextToken->Parent->is(tok::semi) || 990 State.NextToken->is(tok::question) || 991 State.NextToken->Type == TT_ConditionalExpr) && 992 State.Stack.back().BreakBeforeParameter && 993 !isTrailingComment(*State.NextToken) && 994 State.NextToken->isNot(tok::r_paren) && 995 State.NextToken->isNot(tok::r_brace)) 996 return true; 997 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 998 // out whether it is the first parameter. Clean this up. 999 if (State.NextToken->Type == TT_ObjCSelectorName && 1000 State.NextToken->LongestObjCSelectorName == 0 && 1001 State.Stack.back().BreakBeforeParameter) 1002 return true; 1003 if ((State.NextToken->Type == TT_CtorInitializerColon || 1004 (State.NextToken->Parent->ClosesTemplateDeclaration && 1005 State.ParenLevel == 0))) 1006 return true; 1007 return false; 1008 } 1009 1010 FormatStyle Style; 1011 SourceManager &SourceMgr; 1012 const AnnotatedLine &Line; 1013 const unsigned FirstIndent; 1014 const AnnotatedToken &RootToken; 1015 WhitespaceManager &Whitespaces; 1016 1017 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1018 QueueType Queue; 1019 // Increasing count of \c StateNode items we have created. This is used 1020 // to create a deterministic order independent of the container. 1021 unsigned Count; 1022}; 1023 1024class LexerBasedFormatTokenSource : public FormatTokenSource { 1025public: 1026 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1027 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1028 IdentTable(Lex.getLangOpts()) { 1029 Lex.SetKeepWhitespaceMode(true); 1030 } 1031 1032 virtual FormatToken getNextToken() { 1033 if (GreaterStashed) { 1034 FormatTok.NewlinesBefore = 0; 1035 FormatTok.WhiteSpaceStart = 1036 FormatTok.Tok.getLocation().getLocWithOffset(1); 1037 FormatTok.WhiteSpaceLength = 0; 1038 GreaterStashed = false; 1039 return FormatTok; 1040 } 1041 1042 FormatTok = FormatToken(); 1043 Lex.LexFromRawLexer(FormatTok.Tok); 1044 StringRef Text = rawTokenText(FormatTok.Tok); 1045 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1046 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1047 FormatTok.IsFirst = true; 1048 1049 // Consume and record whitespace until we find a significant token. 1050 while (FormatTok.Tok.is(tok::unknown)) { 1051 unsigned Newlines = Text.count('\n'); 1052 if (Newlines > 0) 1053 FormatTok.LastNewlineOffset = 1054 FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1; 1055 unsigned EscapedNewlines = Text.count("\\\n"); 1056 FormatTok.NewlinesBefore += Newlines; 1057 FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines; 1058 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1059 1060 if (FormatTok.Tok.is(tok::eof)) 1061 return FormatTok; 1062 Lex.LexFromRawLexer(FormatTok.Tok); 1063 Text = rawTokenText(FormatTok.Tok); 1064 } 1065 1066 // Now FormatTok is the next non-whitespace token. 1067 FormatTok.TokenLength = Text.size(); 1068 1069 // In case the token starts with escaped newlines, we want to 1070 // take them into account as whitespace - this pattern is quite frequent 1071 // in macro definitions. 1072 // FIXME: What do we want to do with other escaped spaces, and escaped 1073 // spaces or newlines in the middle of tokens? 1074 // FIXME: Add a more explicit test. 1075 unsigned i = 0; 1076 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1077 // FIXME: ++FormatTok.NewlinesBefore is missing... 1078 FormatTok.WhiteSpaceLength += 2; 1079 FormatTok.TokenLength -= 2; 1080 i += 2; 1081 } 1082 1083 if (FormatTok.Tok.is(tok::raw_identifier)) { 1084 IdentifierInfo &Info = IdentTable.get(Text); 1085 FormatTok.Tok.setIdentifierInfo(&Info); 1086 FormatTok.Tok.setKind(Info.getTokenID()); 1087 } 1088 1089 if (FormatTok.Tok.is(tok::greatergreater)) { 1090 FormatTok.Tok.setKind(tok::greater); 1091 FormatTok.TokenLength = 1; 1092 GreaterStashed = true; 1093 } 1094 1095 // If we reformat comments, we remove trailing whitespace. Update the length 1096 // accordingly. 1097 if (FormatTok.Tok.is(tok::comment)) 1098 FormatTok.TokenLength = Text.rtrim().size(); 1099 1100 return FormatTok; 1101 } 1102 1103 IdentifierTable &getIdentTable() { return IdentTable; } 1104 1105private: 1106 FormatToken FormatTok; 1107 bool GreaterStashed; 1108 Lexer &Lex; 1109 SourceManager &SourceMgr; 1110 IdentifierTable IdentTable; 1111 1112 /// Returns the text of \c FormatTok. 1113 StringRef rawTokenText(Token &Tok) { 1114 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1115 Tok.getLength()); 1116 } 1117}; 1118 1119class Formatter : public UnwrappedLineConsumer { 1120public: 1121 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, 1122 SourceManager &SourceMgr, 1123 const std::vector<CharSourceRange> &Ranges) 1124 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1125 Whitespaces(SourceMgr), Ranges(Ranges) {} 1126 1127 virtual ~Formatter() {} 1128 1129 void deriveLocalStyle() { 1130 unsigned CountBoundToVariable = 0; 1131 unsigned CountBoundToType = 0; 1132 bool HasCpp03IncompatibleFormat = false; 1133 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1134 if (AnnotatedLines[i].First.Children.empty()) 1135 continue; 1136 AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0]; 1137 while (!Tok->Children.empty()) { 1138 if (Tok->Type == TT_PointerOrReference) { 1139 bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0; 1140 bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0; 1141 if (SpacesBefore && !SpacesAfter) 1142 ++CountBoundToVariable; 1143 else if (!SpacesBefore && SpacesAfter) 1144 ++CountBoundToType; 1145 } 1146 1147 if (Tok->Type == TT_TemplateCloser && 1148 Tok->Parent->Type == TT_TemplateCloser && 1149 Tok->FormatTok.WhiteSpaceLength == 0) 1150 HasCpp03IncompatibleFormat = true; 1151 Tok = &Tok->Children[0]; 1152 } 1153 } 1154 if (Style.DerivePointerBinding) { 1155 if (CountBoundToType > CountBoundToVariable) 1156 Style.PointerBindsToType = true; 1157 else if (CountBoundToType < CountBoundToVariable) 1158 Style.PointerBindsToType = false; 1159 } 1160 if (Style.Standard == FormatStyle::LS_Auto) { 1161 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1162 : FormatStyle::LS_Cpp03; 1163 } 1164 } 1165 1166 tooling::Replacements format() { 1167 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1168 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1169 StructuralError = Parser.parse(); 1170 unsigned PreviousEndOfLineColumn = 0; 1171 TokenAnnotator Annotator(Style, SourceMgr, Lex, 1172 Tokens.getIdentTable().get("in")); 1173 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1174 Annotator.annotate(AnnotatedLines[i]); 1175 } 1176 deriveLocalStyle(); 1177 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1178 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1179 } 1180 std::vector<int> IndentForLevel; 1181 bool PreviousLineWasTouched = false; 1182 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1183 E = AnnotatedLines.end(); 1184 I != E; ++I) { 1185 const AnnotatedLine &TheLine = *I; 1186 int Offset = getIndentOffset(TheLine.First); 1187 while (IndentForLevel.size() <= TheLine.Level) 1188 IndentForLevel.push_back(-1); 1189 IndentForLevel.resize(TheLine.Level + 1); 1190 bool WasMoved = 1191 PreviousLineWasTouched && TheLine.First.FormatTok.NewlinesBefore == 0; 1192 if (TheLine.First.is(tok::eof)) { 1193 if (PreviousLineWasTouched) { 1194 unsigned NewLines = 1195 std::min(TheLine.First.FormatTok.NewlinesBefore, 1u); 1196 Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, 1197 /*WhitespaceStartColumn*/ 0, Style); 1198 } 1199 } else if (TheLine.Type != LT_Invalid && 1200 (WasMoved || touchesLine(TheLine))) { 1201 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1202 unsigned Indent = LevelIndent; 1203 if (static_cast<int>(Indent) + Offset >= 0) 1204 Indent += Offset; 1205 if (!TheLine.First.FormatTok.WhiteSpaceStart.isValid() || 1206 StructuralError) { 1207 Indent = LevelIndent = SourceMgr.getSpellingColumnNumber( 1208 TheLine.First.FormatTok.Tok.getLocation()) - 1; 1209 } else { 1210 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1211 PreviousEndOfLineColumn); 1212 } 1213 tryFitMultipleLinesInOne(Indent, I, E); 1214 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1215 TheLine.First, Whitespaces, 1216 StructuralError); 1217 PreviousEndOfLineColumn = 1218 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1219 IndentForLevel[TheLine.Level] = LevelIndent; 1220 PreviousLineWasTouched = true; 1221 } else { 1222 if (TheLine.First.FormatTok.NewlinesBefore > 0 || 1223 TheLine.First.FormatTok.IsFirst) { 1224 unsigned Indent = SourceMgr.getSpellingColumnNumber( 1225 TheLine.First.FormatTok.Tok.getLocation()) - 1; 1226 unsigned LevelIndent = Indent; 1227 if (static_cast<int>(LevelIndent) - Offset >= 0) 1228 LevelIndent -= Offset; 1229 IndentForLevel[TheLine.Level] = LevelIndent; 1230 1231 // Remove trailing whitespace of the previous line if it was touched. 1232 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) 1233 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1234 PreviousEndOfLineColumn); 1235 } 1236 // If we did not reformat this unwrapped line, the column at the end of 1237 // the last token is unchanged - thus, we can calculate the end of the 1238 // last token. 1239 PreviousEndOfLineColumn = 1240 SourceMgr.getSpellingColumnNumber( 1241 TheLine.Last->FormatTok.Tok.getLocation()) + 1242 Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(), 1243 SourceMgr, Lex.getLangOpts()) - 1; 1244 PreviousLineWasTouched = false; 1245 } 1246 } 1247 return Whitespaces.generateReplacements(); 1248 } 1249 1250private: 1251 /// \brief Get the indent of \p Level from \p IndentForLevel. 1252 /// 1253 /// \p IndentForLevel must contain the indent for the level \c l 1254 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1255 /// that level is unknown. 1256 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1257 if (IndentForLevel[Level] != -1) 1258 return IndentForLevel[Level]; 1259 if (Level == 0) 1260 return 0; 1261 return getIndent(IndentForLevel, Level - 1) + 2; 1262 } 1263 1264 /// \brief Get the offset of the line relatively to the level. 1265 /// 1266 /// For example, 'public:' labels in classes are offset by 1 or 2 1267 /// characters to the left from their level. 1268 int getIndentOffset(const AnnotatedToken &RootToken) { 1269 bool IsAccessModifier = false; 1270 if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) || 1271 RootToken.is(tok::kw_private)) 1272 IsAccessModifier = true; 1273 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1274 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1275 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1276 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1277 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1278 IsAccessModifier = true; 1279 1280 if (IsAccessModifier) 1281 return Style.AccessModifierOffset; 1282 return 0; 1283 } 1284 1285 /// \brief Tries to merge lines into one. 1286 /// 1287 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1288 /// if possible; note that \c I will be incremented when lines are merged. 1289 /// 1290 /// Returns whether the resulting \c Line can fit in a single line. 1291 void tryFitMultipleLinesInOne(unsigned Indent, 1292 std::vector<AnnotatedLine>::iterator &I, 1293 std::vector<AnnotatedLine>::iterator E) { 1294 // We can never merge stuff if there are trailing line comments. 1295 if (I->Last->Type == TT_LineComment) 1296 return; 1297 1298 unsigned Limit = Style.ColumnLimit - Indent; 1299 // If we already exceed the column limit, we set 'Limit' to 0. The different 1300 // tryMerge..() functions can then decide whether to still do merging. 1301 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1302 1303 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1304 return; 1305 1306 if (I->Last->is(tok::l_brace)) { 1307 tryMergeSimpleBlock(I, E, Limit); 1308 } else if (I->First.is(tok::kw_if)) { 1309 tryMergeSimpleIf(I, E, Limit); 1310 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || 1311 I->First.FormatTok.IsFirst)) { 1312 tryMergeSimplePPDirective(I, E, Limit); 1313 } 1314 return; 1315 } 1316 1317 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1318 std::vector<AnnotatedLine>::iterator E, 1319 unsigned Limit) { 1320 if (Limit == 0) 1321 return; 1322 AnnotatedLine &Line = *I; 1323 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) 1324 return; 1325 if (I + 2 != E && (I + 2)->InPPDirective && 1326 !(I + 2)->First.FormatTok.HasUnescapedNewline) 1327 return; 1328 if (1 + (I + 1)->Last->TotalLength > Limit) 1329 return; 1330 join(Line, *(++I)); 1331 } 1332 1333 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, 1334 std::vector<AnnotatedLine>::iterator E, 1335 unsigned Limit) { 1336 if (Limit == 0) 1337 return; 1338 if (!Style.AllowShortIfStatementsOnASingleLine) 1339 return; 1340 if ((I + 1)->InPPDirective != I->InPPDirective || 1341 ((I + 1)->InPPDirective && 1342 (I + 1)->First.FormatTok.HasUnescapedNewline)) 1343 return; 1344 AnnotatedLine &Line = *I; 1345 if (Line.Last->isNot(tok::r_paren)) 1346 return; 1347 if (1 + (I + 1)->Last->TotalLength > Limit) 1348 return; 1349 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) 1350 return; 1351 // Only inline simple if's (no nested if or else). 1352 if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) 1353 return; 1354 join(Line, *(++I)); 1355 } 1356 1357 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1358 std::vector<AnnotatedLine>::iterator E, 1359 unsigned Limit) { 1360 // First, check that the current line allows merging. This is the case if 1361 // we're not in a control flow statement and the last token is an opening 1362 // brace. 1363 AnnotatedLine &Line = *I; 1364 bool AllowedTokens = 1365 Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) && 1366 Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) && 1367 Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) && 1368 Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) && 1369 // This gets rid of all ObjC @ keywords and methods. 1370 Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) && 1371 Line.First.isNot(tok::plus); 1372 if (!AllowedTokens) 1373 return; 1374 1375 AnnotatedToken *Tok = &(I + 1)->First; 1376 if (Tok->Children.empty() && Tok->is(tok::r_brace) && 1377 !Tok->MustBreakBefore) { 1378 // We merge empty blocks even if the line exceeds the column limit. 1379 Tok->SpacesRequiredBefore = 0; 1380 Tok->CanBreakBefore = true; 1381 join(Line, *(I + 1)); 1382 I += 1; 1383 } else if (Limit != 0) { 1384 // Check that we still have three lines and they fit into the limit. 1385 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1386 !nextTwoLinesFitInto(I, Limit)) 1387 return; 1388 1389 // Second, check that the next line does not contain any braces - if it 1390 // does, readability declines when putting it into a single line. 1391 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1392 return; 1393 do { 1394 if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace)) 1395 return; 1396 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); 1397 } while (Tok != NULL); 1398 1399 // Last, check that the third line contains a single closing brace. 1400 Tok = &(I + 2)->First; 1401 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || 1402 Tok->MustBreakBefore) 1403 return; 1404 1405 join(Line, *(I + 1)); 1406 join(Line, *(I + 2)); 1407 I += 2; 1408 } 1409 } 1410 1411 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1412 unsigned Limit) { 1413 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1414 Limit; 1415 } 1416 1417 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1418 unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore; 1419 A.Last->Children.push_back(B.First); 1420 while (!A.Last->Children.empty()) { 1421 A.Last->Children[0].Parent = A.Last; 1422 A.Last->Children[0].TotalLength += LengthA; 1423 A.Last = &A.Last->Children[0]; 1424 } 1425 } 1426 1427 bool touchesRanges(const CharSourceRange &Range) { 1428 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1429 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1430 Ranges[i].getBegin()) && 1431 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1432 Range.getBegin())) 1433 return true; 1434 } 1435 return false; 1436 } 1437 1438 bool touchesLine(const AnnotatedLine &TheLine) { 1439 const FormatToken *First = &TheLine.First.FormatTok; 1440 const FormatToken *Last = &TheLine.Last->FormatTok; 1441 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1442 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset), 1443 Last->Tok.getLocation()); 1444 return touchesRanges(LineRange); 1445 } 1446 1447 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1448 const FormatToken *First = &TheLine.First.FormatTok; 1449 CharSourceRange LineRange = CharSourceRange::getCharRange( 1450 First->WhiteSpaceStart, 1451 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset)); 1452 return touchesRanges(LineRange); 1453 } 1454 1455 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1456 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1457 } 1458 1459 /// \brief Add a new line and the required indent before the first Token 1460 /// of the \c UnwrappedLine if there was no structural parsing error. 1461 /// Returns the indent level of the \c UnwrappedLine. 1462 void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent, 1463 bool InPPDirective, unsigned PreviousEndOfLineColumn) { 1464 const FormatToken &Tok = RootToken.FormatTok; 1465 1466 unsigned Newlines = 1467 std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1468 if (Newlines == 0 && !Tok.IsFirst) 1469 Newlines = 1; 1470 1471 if (!InPPDirective || Tok.HasUnescapedNewline) { 1472 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style); 1473 } else { 1474 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, 1475 PreviousEndOfLineColumn, Style); 1476 } 1477 } 1478 1479 DiagnosticsEngine &Diag; 1480 FormatStyle Style; 1481 Lexer &Lex; 1482 SourceManager &SourceMgr; 1483 WhitespaceManager Whitespaces; 1484 std::vector<CharSourceRange> Ranges; 1485 std::vector<AnnotatedLine> AnnotatedLines; 1486 bool StructuralError; 1487}; 1488 1489tooling::Replacements 1490reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1491 std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) { 1492 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1493 OwningPtr<DiagnosticConsumer> DiagPrinter; 1494 if (DiagClient == 0) { 1495 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); 1496 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1497 DiagClient = DiagPrinter.get(); 1498 } 1499 DiagnosticsEngine Diagnostics( 1500 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1501 DiagClient, false); 1502 Diagnostics.setSourceManager(&SourceMgr); 1503 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1504 return formatter.format(); 1505} 1506 1507LangOptions getFormattingLangOpts() { 1508 LangOptions LangOpts; 1509 LangOpts.CPlusPlus = 1; 1510 LangOpts.CPlusPlus11 = 1; 1511 LangOpts.Bool = 1; 1512 LangOpts.ObjC1 = 1; 1513 LangOpts.ObjC2 = 1; 1514 return LangOpts; 1515} 1516 1517} // namespace format 1518} // namespace clang 1519