Format.cpp revision 088dab50b76d235cd5b0e701048efccfcea3677e
1//===--- Format.cpp - Format C++ code -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements functions declared in Format.h. This will be 12/// split into separate files as we go. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#include "clang/Format/Format.h" 20#include "UnwrappedLineParser.h" 21#include "clang/Basic/Diagnostic.h" 22#include "clang/Basic/OperatorPrecedence.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Frontend/TextDiagnosticPrinter.h" 25#include "clang/Lex/Lexer.h" 26#include <string> 27 28namespace clang { 29namespace format { 30 31enum TokenType { 32 TT_BinaryOperator, 33 TT_BlockComment, 34 TT_CastRParen, 35 TT_ConditionalExpr, 36 TT_CtorInitializerColon, 37 TT_DirectorySeparator, 38 TT_LineComment, 39 TT_ObjCBlockLParen, 40 TT_ObjCDecl, 41 TT_ObjCMethodSpecifier, 42 TT_ObjCSelectorStart, 43 TT_ObjCProperty, 44 TT_OverloadedOperator, 45 TT_PointerOrReference, 46 TT_PureVirtualSpecifier, 47 TT_TemplateCloser, 48 TT_TemplateOpener, 49 TT_TrailingUnaryOperator, 50 TT_UnaryOperator, 51 TT_Unknown 52}; 53 54enum LineType { 55 LT_Invalid, 56 LT_Other, 57 LT_PreprocessorDirective, 58 LT_VirtualFunctionDecl, 59 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 60 LT_ObjCMethodDecl, 61 LT_ObjCProperty // An @property line. 62}; 63 64class AnnotatedToken { 65public: 66 AnnotatedToken(const FormatToken &FormatTok) 67 : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), 68 CanBreakBefore(false), MustBreakBefore(false), 69 ClosesTemplateDeclaration(false), Parent(NULL) {} 70 71 bool is(tok::TokenKind Kind) const { 72 return FormatTok.Tok.is(Kind); 73 } 74 bool isNot(tok::TokenKind Kind) const { 75 return FormatTok.Tok.isNot(Kind); 76 } 77 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 78 return FormatTok.Tok.isObjCAtKeyword(Kind); 79 } 80 81 FormatToken FormatTok; 82 83 TokenType Type; 84 85 bool SpaceRequiredBefore; 86 bool CanBreakBefore; 87 bool MustBreakBefore; 88 89 bool ClosesTemplateDeclaration; 90 91 std::vector<AnnotatedToken> Children; 92 AnnotatedToken *Parent; 93}; 94 95static prec::Level getPrecedence(const AnnotatedToken &Tok) { 96 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 97} 98 99FormatStyle getLLVMStyle() { 100 FormatStyle LLVMStyle; 101 LLVMStyle.ColumnLimit = 80; 102 LLVMStyle.MaxEmptyLinesToKeep = 1; 103 LLVMStyle.PointerAndReferenceBindToType = false; 104 LLVMStyle.AccessModifierOffset = -2; 105 LLVMStyle.SplitTemplateClosingGreater = true; 106 LLVMStyle.IndentCaseLabels = false; 107 LLVMStyle.SpacesBeforeTrailingComments = 1; 108 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 109 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 110 LLVMStyle.ObjCSpaceBeforeReturnType = true; 111 return LLVMStyle; 112} 113 114FormatStyle getGoogleStyle() { 115 FormatStyle GoogleStyle; 116 GoogleStyle.ColumnLimit = 80; 117 GoogleStyle.MaxEmptyLinesToKeep = 1; 118 GoogleStyle.PointerAndReferenceBindToType = true; 119 GoogleStyle.AccessModifierOffset = -1; 120 GoogleStyle.SplitTemplateClosingGreater = false; 121 GoogleStyle.IndentCaseLabels = true; 122 GoogleStyle.SpacesBeforeTrailingComments = 2; 123 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 124 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 125 GoogleStyle.ObjCSpaceBeforeReturnType = false; 126 return GoogleStyle; 127} 128 129struct OptimizationParameters { 130 unsigned PenaltyIndentLevel; 131 unsigned PenaltyLevelDecrease; 132 unsigned PenaltyExcessCharacter; 133}; 134 135/// \brief Replaces the whitespace in front of \p Tok. Only call once for 136/// each \c FormatToken. 137static void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 138 unsigned Spaces, const FormatStyle &Style, 139 SourceManager &SourceMgr, 140 tooling::Replacements &Replaces) { 141 Replaces.insert(tooling::Replacement( 142 SourceMgr, Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, 143 std::string(NewLines, '\n') + std::string(Spaces, ' '))); 144} 145 146/// \brief Like \c replaceWhitespace, but additionally adds right-aligned 147/// backslashes to escape newlines inside a preprocessor directive. 148/// 149/// This function and \c replaceWhitespace have the same behavior if 150/// \c Newlines == 0. 151static void replacePPWhitespace( 152 const AnnotatedToken &Tok, unsigned NewLines, unsigned Spaces, 153 unsigned WhitespaceStartColumn, const FormatStyle &Style, 154 SourceManager &SourceMgr, tooling::Replacements &Replaces) { 155 std::string NewLineText; 156 if (NewLines > 0) { 157 unsigned Offset = std::min<int>(Style.ColumnLimit - 1, 158 WhitespaceStartColumn); 159 for (unsigned i = 0; i < NewLines; ++i) { 160 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 161 NewLineText += "\\\n"; 162 Offset = 0; 163 } 164 } 165 Replaces.insert(tooling::Replacement(SourceMgr, Tok.FormatTok.WhiteSpaceStart, 166 Tok.FormatTok.WhiteSpaceLength, 167 NewLineText + std::string(Spaces, ' '))); 168} 169 170/// \brief Checks whether the (remaining) \c UnwrappedLine starting with 171/// \p RootToken fits into \p Limit columns. 172bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) { 173 unsigned Columns = RootToken.FormatTok.TokenLength; 174 bool FitsOnALine = true; 175 const AnnotatedToken *Tok = &RootToken; 176 while (!Tok->Children.empty()) { 177 Tok = &Tok->Children[0]; 178 Columns += (Tok->SpaceRequiredBefore ? 1 : 0) + Tok->FormatTok.TokenLength; 179 // A special case for the colon of a constructor initializer as this only 180 // needs to be put on a new line if the line needs to be split. 181 if (Columns > Limit || 182 (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) { 183 FitsOnALine = false; 184 break; 185 } 186 } 187 return FitsOnALine; 188} 189 190class UnwrappedLineFormatter { 191public: 192 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 193 const UnwrappedLine &Line, unsigned FirstIndent, 194 bool FitsOnALine, LineType CurrentLineType, 195 const AnnotatedToken &RootToken, 196 tooling::Replacements &Replaces, bool StructuralError) 197 : Style(Style), SourceMgr(SourceMgr), Line(Line), 198 FirstIndent(FirstIndent), FitsOnALine(FitsOnALine), 199 CurrentLineType(CurrentLineType), RootToken(RootToken), 200 Replaces(Replaces) { 201 Parameters.PenaltyIndentLevel = 15; 202 Parameters.PenaltyLevelDecrease = 30; 203 Parameters.PenaltyExcessCharacter = 1000000; 204 } 205 206 /// \brief Formats an \c UnwrappedLine. 207 /// 208 /// \returns The column after the last token in the last line of the 209 /// \c UnwrappedLine. 210 unsigned format() { 211 // Initialize state dependent on indent. 212 LineState State; 213 State.Column = FirstIndent; 214 State.NextToken = &RootToken; 215 State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent)); 216 State.ForLoopVariablePos = 0; 217 State.LineContainsContinuedForLoopSection = false; 218 State.StartOfLineLevel = 1; 219 220 // The first token has already been indented and thus consumed. 221 moveStateToNextToken(State); 222 223 // Start iterating at 1 as we have correctly formatted of Token #0 above. 224 while (State.NextToken != NULL) { 225 if (FitsOnALine) { 226 addTokenToState(false, false, State); 227 } else { 228 unsigned NoBreak = calcPenalty(State, false, UINT_MAX); 229 unsigned Break = calcPenalty(State, true, NoBreak); 230 addTokenToState(Break < NoBreak, false, State); 231 if (State.NextToken != NULL && 232 State.NextToken->Parent->Type == TT_CtorInitializerColon) { 233 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine && 234 !fitsIntoLimit(*State.NextToken, 235 getColumnLimit() - State.Column - 1)) 236 State.Stack.back().BreakAfterComma = true; 237 } 238 } 239 } 240 return State.Column; 241 } 242 243private: 244 struct ParenState { 245 ParenState(unsigned Indent, unsigned LastSpace) 246 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 247 BreakBeforeClosingBrace(false), BreakAfterComma(false) {} 248 249 /// \brief The position to which a specific parenthesis level needs to be 250 /// indented. 251 unsigned Indent; 252 253 /// \brief The position of the last space on each level. 254 /// 255 /// Used e.g. to break like: 256 /// functionCall(Parameter, otherCall( 257 /// OtherParameter)); 258 unsigned LastSpace; 259 260 /// \brief The position the first "<<" operator encountered on each level. 261 /// 262 /// Used to align "<<" operators. 0 if no such operator has been encountered 263 /// on a level. 264 unsigned FirstLessLess; 265 266 /// \brief Whether a newline needs to be inserted before the block's closing 267 /// brace. 268 /// 269 /// We only want to insert a newline before the closing brace if there also 270 /// was a newline after the beginning left brace. 271 bool BreakBeforeClosingBrace; 272 273 bool BreakAfterComma; 274 275 bool operator<(const ParenState &Other) const { 276 if (Indent != Other.Indent) 277 return Indent < Other.Indent; 278 if (LastSpace != Other.LastSpace) 279 return LastSpace < Other.LastSpace; 280 if (FirstLessLess != Other.FirstLessLess) 281 return FirstLessLess < Other.FirstLessLess; 282 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 283 return BreakBeforeClosingBrace; 284 return BreakAfterComma; 285 } 286 }; 287 288 /// \brief The current state when indenting a unwrapped line. 289 /// 290 /// As the indenting tries different combinations this is copied by value. 291 struct LineState { 292 /// \brief The number of used columns in the current line. 293 unsigned Column; 294 295 /// \brief The token that needs to be next formatted. 296 const AnnotatedToken *NextToken; 297 298 /// \brief The parenthesis level of the first token on the current line. 299 unsigned StartOfLineLevel; 300 301 /// \brief The column of the first variable in a for-loop declaration. 302 /// 303 /// Used to align the second variable if necessary. 304 unsigned ForLoopVariablePos; 305 306 /// \brief \c true if this line contains a continued for-loop section. 307 bool LineContainsContinuedForLoopSection; 308 309 /// \brief A stack keeping track of properties applying to parenthesis 310 /// levels. 311 std::vector<ParenState> Stack; 312 313 /// \brief Comparison operator to be able to used \c LineState in \c map. 314 bool operator<(const LineState &Other) const { 315 if (Other.NextToken != NextToken) 316 return Other.NextToken > NextToken; 317 if (Other.Column != Column) 318 return Other.Column > Column; 319 if (Other.StartOfLineLevel != StartOfLineLevel) 320 return Other.StartOfLineLevel > StartOfLineLevel; 321 if (Other.ForLoopVariablePos != ForLoopVariablePos) 322 return Other.ForLoopVariablePos < ForLoopVariablePos; 323 if (Other.LineContainsContinuedForLoopSection != 324 LineContainsContinuedForLoopSection) 325 return LineContainsContinuedForLoopSection; 326 return Other.Stack < Stack; 327 } 328 }; 329 330 /// \brief Appends the next token to \p State and updates information 331 /// necessary for indentation. 332 /// 333 /// Puts the token on the current line if \p Newline is \c true and adds a 334 /// line break and necessary indentation otherwise. 335 /// 336 /// If \p DryRun is \c false, also creates and stores the required 337 /// \c Replacement. 338 void addTokenToState(bool Newline, bool DryRun, LineState &State) { 339 const AnnotatedToken &Current = *State.NextToken; 340 const AnnotatedToken &Previous = *State.NextToken->Parent; 341 assert(State.Stack.size()); 342 unsigned ParenLevel = State.Stack.size() - 1; 343 344 if (Newline) { 345 unsigned WhitespaceStartColumn = State.Column; 346 if (Current.is(tok::r_brace)) { 347 State.Column = Line.Level * 2; 348 } else if (Current.is(tok::string_literal) && 349 Previous.is(tok::string_literal)) { 350 State.Column = State.Column - Previous.FormatTok.TokenLength; 351 } else if (Current.is(tok::lessless) && 352 State.Stack[ParenLevel].FirstLessLess != 0) { 353 State.Column = State.Stack[ParenLevel].FirstLessLess; 354 } else if (ParenLevel != 0 && 355 (Previous.is(tok::equal) || Current.is(tok::arrow) || 356 Current.is(tok::period) || Previous.is(tok::question) || 357 Previous.Type == TT_ConditionalExpr)) { 358 // Indent and extra 4 spaces after if we know the current expression is 359 // continued. Don't do that on the top level, as we already indent 4 360 // there. 361 State.Column = State.Stack[ParenLevel].Indent + 4; 362 } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) { 363 State.Column = State.ForLoopVariablePos; 364 } else if (State.NextToken->Parent->ClosesTemplateDeclaration) { 365 State.Column = State.Stack[ParenLevel].Indent - 4; 366 } else { 367 State.Column = State.Stack[ParenLevel].Indent; 368 } 369 370 // A line starting with a closing brace is assumed to be correct for the 371 // same level as before the opening brace. 372 State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1); 373 374 if (RootToken.is(tok::kw_for)) 375 State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi); 376 377 if (!DryRun) { 378 if (!Line.InPPDirective) 379 replaceWhitespace(Current.FormatTok, 1, State.Column, Style, 380 SourceMgr, Replaces); 381 else 382 replacePPWhitespace(Current.FormatTok, 1, State.Column, 383 WhitespaceStartColumn, Style, SourceMgr, 384 Replaces); 385 } 386 387 State.Stack[ParenLevel].LastSpace = State.Column; 388 if (Current.is(tok::colon) && CurrentLineType != LT_ObjCMethodDecl && 389 State.NextToken->Type != TT_ConditionalExpr) 390 State.Stack[ParenLevel].Indent += 2; 391 } else { 392 if (Current.is(tok::equal) && RootToken.is(tok::kw_for)) 393 State.ForLoopVariablePos = State.Column - 394 Previous.FormatTok.TokenLength; 395 396 unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0; 397 if (State.NextToken->Type == TT_LineComment) 398 Spaces = Style.SpacesBeforeTrailingComments; 399 400 if (!DryRun) 401 replaceWhitespace(Current, 0, Spaces, Style, SourceMgr, Replaces); 402 403 // FIXME: Do we need to do this for assignments nested in other 404 // expressions? 405 if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 && 406 (getPrecedence(Previous) == prec::Assignment || 407 Previous.is(tok::kw_return))) 408 State.Stack[ParenLevel].Indent = State.Column + Spaces; 409 if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || 410 State.NextToken->Parent->Type == TT_TemplateOpener) 411 State.Stack[ParenLevel].Indent = State.Column + Spaces; 412 413 // Top-level spaces that are not part of assignments are exempt as that 414 // mostly leads to better results. 415 State.Column += Spaces; 416 if (Spaces > 0 && 417 (ParenLevel != 0 || getPrecedence(Previous) == prec::Assignment)) 418 State.Stack[ParenLevel].LastSpace = State.Column; 419 } 420 moveStateToNextToken(State); 421 if (Newline && Previous.is(tok::l_brace)) { 422 State.Stack.back().BreakBeforeClosingBrace = true; 423 } 424 } 425 426 /// \brief Mark the next token as consumed in \p State and modify its stacks 427 /// accordingly. 428 void moveStateToNextToken(LineState &State) { 429 const AnnotatedToken &Current = *State.NextToken; 430 assert(State.Stack.size()); 431 432 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 433 State.Stack.back().FirstLessLess = State.Column; 434 435 // If we encounter an opening (, [, { or <, we add a level to our stacks to 436 // prepare for the following tokens. 437 if (Current.is(tok::l_paren) || Current.is(tok::l_square) || 438 Current.is(tok::l_brace) || 439 State.NextToken->Type == TT_TemplateOpener) { 440 unsigned NewIndent; 441 if (Current.is(tok::l_brace)) { 442 // FIXME: This does not work with nested static initializers. 443 // Implement a better handling for static initializers and similar 444 // constructs. 445 NewIndent = Line.Level * 2 + 2; 446 } else { 447 NewIndent = 4 + State.Stack.back().LastSpace; 448 } 449 State.Stack.push_back( 450 ParenState(NewIndent, State.Stack.back().LastSpace)); 451 } 452 453 // If we encounter a closing ), ], } or >, we can remove a level from our 454 // stacks. 455 if (Current.is(tok::r_paren) || Current.is(tok::r_square) || 456 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 457 State.NextToken->Type == TT_TemplateCloser) { 458 State.Stack.pop_back(); 459 } 460 461 if (State.NextToken->Children.empty()) 462 State.NextToken = NULL; 463 else 464 State.NextToken = &State.NextToken->Children[0]; 465 466 State.Column += Current.FormatTok.TokenLength; 467 } 468 469 /// \brief Calculate the penalty for splitting after the token at \p Index. 470 unsigned splitPenalty(const AnnotatedToken &Tok) { 471 const AnnotatedToken &Left = Tok; 472 const AnnotatedToken &Right = Tok.Children[0]; 473 474 // In for-loops, prefer breaking at ',' and ';'. 475 if (RootToken.is(tok::kw_for) && 476 (Left.isNot(tok::comma) && Left.isNot(tok::semi))) 477 return 20; 478 479 if (Left.is(tok::semi) || Left.is(tok::comma) || 480 Left.ClosesTemplateDeclaration) 481 return 0; 482 if (Left.is(tok::l_paren)) 483 return 20; 484 485 if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr) 486 return prec::Assignment; 487 prec::Level Level = getPrecedence(Left); 488 489 // Breaking after an assignment leads to a bad result as the two sides of 490 // the assignment are visually very close together. 491 if (Level == prec::Assignment) 492 return 50; 493 494 if (Level != prec::Unknown) 495 return Level; 496 497 if (Right.is(tok::arrow) || Right.is(tok::period)) 498 return 150; 499 500 return 3; 501 } 502 503 unsigned getColumnLimit() { 504 return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0); 505 } 506 507 /// \brief Calculate the number of lines needed to format the remaining part 508 /// of the unwrapped line. 509 /// 510 /// Assumes the formatting so far has led to 511 /// the \c LineSta \p State. If \p NewLine is set, a new line will be 512 /// added after the previous token. 513 /// 514 /// \param StopAt is used for optimization. If we can determine that we'll 515 /// definitely need at least \p StopAt additional lines, we already know of a 516 /// better solution. 517 unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) { 518 // We are at the end of the unwrapped line, so we don't need any more lines. 519 if (State.NextToken == NULL) 520 return 0; 521 522 if (!NewLine && State.NextToken->MustBreakBefore) 523 return UINT_MAX; 524 if (NewLine && !State.NextToken->CanBreakBefore) 525 return UINT_MAX; 526 if (!NewLine && State.NextToken->is(tok::r_brace) && 527 State.Stack.back().BreakBeforeClosingBrace) 528 return UINT_MAX; 529 if (!NewLine && State.NextToken->Parent->is(tok::semi) && 530 State.LineContainsContinuedForLoopSection) 531 return UINT_MAX; 532 if (!NewLine && State.NextToken->Parent->is(tok::comma) && 533 State.NextToken->Type != TT_LineComment && 534 State.Stack.back().BreakAfterComma) 535 return UINT_MAX; 536 537 unsigned CurrentPenalty = 0; 538 if (NewLine) { 539 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() + 540 splitPenalty(*State.NextToken->Parent); 541 } else { 542 if (State.Stack.size() < State.StartOfLineLevel) 543 CurrentPenalty += Parameters.PenaltyLevelDecrease * 544 (State.StartOfLineLevel - State.Stack.size()); 545 } 546 547 addTokenToState(NewLine, true, State); 548 549 // Exceeding column limit is bad, assign penalty. 550 if (State.Column > getColumnLimit()) { 551 unsigned ExcessCharacters = State.Column - getColumnLimit(); 552 CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters; 553 } 554 555 if (StopAt <= CurrentPenalty) 556 return UINT_MAX; 557 StopAt -= CurrentPenalty; 558 StateMap::iterator I = Memory.find(State); 559 if (I != Memory.end()) { 560 // If this state has already been examined, we can safely return the 561 // previous result if we 562 // - have not hit the optimatization (and thus returned UINT_MAX) OR 563 // - are now computing for a smaller or equal StopAt. 564 unsigned SavedResult = I->second.first; 565 unsigned SavedStopAt = I->second.second; 566 if (SavedResult != UINT_MAX) 567 return SavedResult + CurrentPenalty; 568 else if (StopAt <= SavedStopAt) 569 return UINT_MAX; 570 } 571 572 unsigned NoBreak = calcPenalty(State, false, StopAt); 573 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); 574 unsigned Result = std::min(NoBreak, WithBreak); 575 576 // We have to store 'Result' without adding 'CurrentPenalty' as the latter 577 // can depend on 'NewLine'. 578 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt); 579 580 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty; 581 } 582 583 FormatStyle Style; 584 SourceManager &SourceMgr; 585 const UnwrappedLine &Line; 586 const unsigned FirstIndent; 587 const bool FitsOnALine; 588 const LineType CurrentLineType; 589 const AnnotatedToken &RootToken; 590 tooling::Replacements &Replaces; 591 592 // A map from an indent state to a pair (Result, Used-StopAt). 593 typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap; 594 StateMap Memory; 595 596 OptimizationParameters Parameters; 597}; 598 599/// \brief Determines extra information about the tokens comprising an 600/// \c UnwrappedLine. 601class TokenAnnotator { 602public: 603 TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, 604 SourceManager &SourceMgr, Lexer &Lex) 605 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), 606 RootToken(Line.RootToken) {} 607 608 /// \brief A parser that gathers additional information about tokens. 609 /// 610 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and 611 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 612 /// into template parameter lists. 613 class AnnotatingParser { 614 public: 615 AnnotatingParser(AnnotatedToken &RootToken) 616 : CurrentToken(&RootToken), KeywordVirtualFound(false) {} 617 618 bool parseAngle() { 619 while (CurrentToken != NULL) { 620 if (CurrentToken->is(tok::greater)) { 621 CurrentToken->Type = TT_TemplateCloser; 622 next(); 623 return true; 624 } 625 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || 626 CurrentToken->is(tok::r_brace)) 627 return false; 628 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || 629 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) 630 return false; 631 if (!consumeToken()) 632 return false; 633 } 634 return false; 635 } 636 637 bool parseParens() { 638 if (CurrentToken != NULL && CurrentToken->is(tok::caret)) 639 CurrentToken->Parent->Type = TT_ObjCBlockLParen; 640 while (CurrentToken != NULL) { 641 if (CurrentToken->is(tok::r_paren)) { 642 next(); 643 return true; 644 } 645 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) 646 return false; 647 if (!consumeToken()) 648 return false; 649 } 650 return false; 651 } 652 653 bool parseSquare() { 654 while (CurrentToken != NULL) { 655 if (CurrentToken->is(tok::r_square)) { 656 next(); 657 return true; 658 } 659 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) 660 return false; 661 if (!consumeToken()) 662 return false; 663 } 664 return false; 665 } 666 667 bool parseBrace() { 668 while (CurrentToken != NULL) { 669 if (CurrentToken->is(tok::r_brace)) { 670 next(); 671 return true; 672 } 673 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) 674 return false; 675 if (!consumeToken()) 676 return false; 677 } 678 // Lines can currently end with '{'. 679 return true; 680 } 681 682 bool parseConditional() { 683 while (CurrentToken != NULL) { 684 if (CurrentToken->is(tok::colon)) { 685 CurrentToken->Type = TT_ConditionalExpr; 686 next(); 687 return true; 688 } 689 if (!consumeToken()) 690 return false; 691 } 692 return false; 693 } 694 695 bool parseTemplateDeclaration() { 696 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 697 CurrentToken->Type = TT_TemplateOpener; 698 next(); 699 if (!parseAngle()) 700 return false; 701 CurrentToken->Parent->ClosesTemplateDeclaration = true; 702 parseLine(); 703 return true; 704 } 705 return false; 706 } 707 708 bool consumeToken() { 709 AnnotatedToken *Tok = CurrentToken; 710 next(); 711 switch (Tok->FormatTok.Tok.getKind()) { 712 case tok::plus: 713 case tok::minus: 714 // At the start of the line, +/- specific ObjectiveC method 715 // declarations. 716 if (Tok->Parent == NULL) 717 Tok->Type = TT_ObjCMethodSpecifier; 718 break; 719 case tok::l_paren: { 720 bool ParensWereObjCReturnType = 721 Tok->Parent && Tok->Parent->Type == TT_ObjCMethodSpecifier; 722 if (!parseParens()) 723 return false; 724 if (CurrentToken != NULL && CurrentToken->is(tok::colon)) { 725 CurrentToken->Type = TT_CtorInitializerColon; 726 next(); 727 } else if (CurrentToken != NULL && ParensWereObjCReturnType) { 728 CurrentToken->Type = TT_ObjCSelectorStart; 729 next(); 730 } 731 } break; 732 case tok::l_square: 733 if (!parseSquare()) 734 return false; 735 break; 736 case tok::l_brace: 737 if (!parseBrace()) 738 return false; 739 break; 740 case tok::less: 741 if (parseAngle()) 742 Tok->Type = TT_TemplateOpener; 743 else { 744 Tok->Type = TT_BinaryOperator; 745 CurrentToken = Tok; 746 next(); 747 } 748 break; 749 case tok::r_paren: 750 case tok::r_square: 751 return false; 752 case tok::r_brace: 753 // Lines can start with '}'. 754 if (Tok->Parent != NULL) 755 return false; 756 break; 757 case tok::greater: 758 Tok->Type = TT_BinaryOperator; 759 break; 760 case tok::kw_operator: 761 if (CurrentToken->is(tok::l_paren)) { 762 CurrentToken->Type = TT_OverloadedOperator; 763 next(); 764 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { 765 CurrentToken->Type = TT_OverloadedOperator; 766 next(); 767 } 768 } else { 769 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { 770 CurrentToken->Type = TT_OverloadedOperator; 771 next(); 772 } 773 } 774 break; 775 case tok::question: 776 parseConditional(); 777 break; 778 case tok::kw_template: 779 parseTemplateDeclaration(); 780 break; 781 default: 782 break; 783 } 784 return true; 785 } 786 787 void parseIncludeDirective() { 788 while (CurrentToken != NULL) { 789 if (CurrentToken->is(tok::slash)) 790 CurrentToken->Type = TT_DirectorySeparator; 791 else if (CurrentToken->is(tok::less)) 792 CurrentToken->Type = TT_TemplateOpener; 793 else if (CurrentToken->is(tok::greater)) 794 CurrentToken->Type = TT_TemplateCloser; 795 next(); 796 } 797 } 798 799 void parsePreprocessorDirective() { 800 next(); 801 if (CurrentToken == NULL) 802 return; 803 // Hashes in the middle of a line can lead to any strange token 804 // sequence. 805 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 806 return; 807 switch ( 808 CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 809 case tok::pp_include: 810 case tok::pp_import: 811 parseIncludeDirective(); 812 break; 813 default: 814 break; 815 } 816 } 817 818 LineType parseLine() { 819 if (CurrentToken->is(tok::hash)) { 820 parsePreprocessorDirective(); 821 return LT_PreprocessorDirective; 822 } 823 while (CurrentToken != NULL) { 824 if (CurrentToken->is(tok::kw_virtual)) 825 KeywordVirtualFound = true; 826 if (!consumeToken()) 827 return LT_Invalid; 828 } 829 if (KeywordVirtualFound) 830 return LT_VirtualFunctionDecl; 831 return LT_Other; 832 } 833 834 void next() { 835 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 836 CurrentToken = &CurrentToken->Children[0]; 837 else 838 CurrentToken = NULL; 839 } 840 841 private: 842 AnnotatedToken *CurrentToken; 843 bool KeywordVirtualFound; 844 }; 845 846 void createAnnotatedTokens(AnnotatedToken &Current) { 847 if (!Current.FormatTok.Children.empty()) { 848 Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0])); 849 Current.Children.back().Parent = &Current; 850 createAnnotatedTokens(Current.Children.back()); 851 } 852 } 853 854 void calculateExtraInformation(AnnotatedToken &Current) { 855 Current.SpaceRequiredBefore = spaceRequiredBefore(Current); 856 857 if (Current.FormatTok.MustBreakBefore) { 858 Current.MustBreakBefore = true; 859 } else { 860 if (Current.Type == TT_CtorInitializerColon || Current.Parent->Type == 861 TT_LineComment || (Current.is(tok::string_literal) && 862 Current.Parent->is(tok::string_literal))) { 863 Current.MustBreakBefore = true; 864 } else { 865 Current.MustBreakBefore = false; 866 } 867 } 868 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); 869 if (!Current.Children.empty()) 870 calculateExtraInformation(Current.Children[0]); 871 } 872 873 bool annotate() { 874 createAnnotatedTokens(RootToken); 875 876 AnnotatingParser Parser(RootToken); 877 CurrentLineType = Parser.parseLine(); 878 if (CurrentLineType == LT_Invalid) 879 return false; 880 881 determineTokenTypes(RootToken, /*IsRHS=*/false); 882 883 if (RootToken.Type == TT_ObjCMethodSpecifier) 884 CurrentLineType = LT_ObjCMethodDecl; 885 else if (RootToken.Type == TT_ObjCDecl) 886 CurrentLineType = LT_ObjCDecl; 887 else if (RootToken.Type == TT_ObjCProperty) 888 CurrentLineType = LT_ObjCProperty; 889 890 if (!RootToken.Children.empty()) 891 calculateExtraInformation(RootToken.Children[0]); 892 return true; 893 } 894 895 LineType getLineType() { 896 return CurrentLineType; 897 } 898 899 const AnnotatedToken &getRootToken() { 900 return RootToken; 901 } 902 903private: 904 void determineTokenTypes(AnnotatedToken &Current, bool IsRHS) { 905 if (getPrecedence(Current) == prec::Assignment || 906 Current.is(tok::kw_return) || Current.is(tok::kw_throw)) 907 IsRHS = true; 908 909 if (Current.Type == TT_Unknown) { 910 if (Current.is(tok::star) || Current.is(tok::amp)) { 911 Current.Type = determineStarAmpUsage(Current, IsRHS); 912 } else if (Current.is(tok::minus) || Current.is(tok::plus) || 913 Current.is(tok::caret)) { 914 Current.Type = determinePlusMinusCaretUsage(Current); 915 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { 916 Current.Type = determineIncrementUsage(Current); 917 } else if (Current.is(tok::exclaim)) { 918 Current.Type = TT_UnaryOperator; 919 } else if (isBinaryOperator(Current)) { 920 Current.Type = TT_BinaryOperator; 921 } else if (Current.is(tok::comment)) { 922 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 923 Lex.getLangOpts())); 924 if (StringRef(Data).startswith("//")) 925 Current.Type = TT_LineComment; 926 else 927 Current.Type = TT_BlockComment; 928 } else if (Current.is(tok::r_paren) && 929 (Current.Parent->Type == TT_PointerOrReference || 930 Current.Parent->Type == TT_TemplateCloser)) { 931 // FIXME: We need to get smarter and understand more cases of casts. 932 Current.Type = TT_CastRParen; 933 } else if (Current.is(tok::at) && Current.Children.size()) { 934 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 935 case tok::objc_interface: 936 case tok::objc_implementation: 937 case tok::objc_protocol: 938 Current.Type = TT_ObjCDecl; 939 break; 940 case tok::objc_property: 941 Current.Type = TT_ObjCProperty; 942 break; 943 default: 944 break; 945 } 946 } 947 } 948 949 if (!Current.Children.empty()) 950 determineTokenTypes(Current.Children[0], IsRHS); 951 } 952 953 bool isBinaryOperator(const AnnotatedToken &Tok) { 954 // Comma is a binary operator, but does not behave as such wrt. formatting. 955 return getPrecedence(Tok) > prec::Comma; 956 } 957 958 TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsRHS) { 959 if (Tok.Parent == NULL) 960 return TT_UnaryOperator; 961 if (Tok.Children.size() == 0) 962 return TT_Unknown; 963 const FormatToken &PrevToken = Tok.Parent->FormatTok; 964 const FormatToken &NextToken = Tok.Children[0].FormatTok; 965 966 if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::l_square) || 967 PrevToken.Tok.is(tok::comma) || PrevToken.Tok.is(tok::kw_return) || 968 PrevToken.Tok.is(tok::colon) || Tok.Parent->Type == TT_BinaryOperator || 969 Tok.Parent->Type == TT_CastRParen) 970 return TT_UnaryOperator; 971 972 if (PrevToken.Tok.isLiteral() || NextToken.Tok.isLiteral() || 973 NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) || 974 NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) || 975 NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) || 976 NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof)) 977 return TT_BinaryOperator; 978 979 if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) || 980 NextToken.Tok.is(tok::greater)) 981 return TT_PointerOrReference; 982 983 // It is very unlikely that we are going to find a pointer or reference type 984 // definition on the RHS of an assignment. 985 if (IsRHS) 986 return TT_BinaryOperator; 987 988 return TT_PointerOrReference; 989 } 990 991 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 992 // Use heuristics to recognize unary operators. 993 if (Tok.Parent->is(tok::equal) || Tok.Parent->is(tok::l_paren) || 994 Tok.Parent->is(tok::comma) || Tok.Parent->is(tok::l_square) || 995 Tok.Parent->is(tok::question) || Tok.Parent->is(tok::colon) || 996 Tok.Parent->is(tok::kw_return) || Tok.Parent->is(tok::kw_case) || 997 Tok.Parent->is(tok::at)) 998 return TT_UnaryOperator; 999 1000 // There can't be to consecutive binary operators. 1001 if (Tok.Parent->Type == TT_BinaryOperator) 1002 return TT_UnaryOperator; 1003 1004 // Fall back to marking the token as binary operator. 1005 return TT_BinaryOperator; 1006 } 1007 1008 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1009 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 1010 if (Tok.Parent != NULL && Tok.Parent->is(tok::identifier)) 1011 return TT_TrailingUnaryOperator; 1012 1013 return TT_UnaryOperator; 1014 } 1015 1016 bool spaceRequiredBetween(const AnnotatedToken &Left, 1017 const AnnotatedToken &Right) { 1018 if (Right.is(tok::hashhash)) 1019 return Left.is(tok::hash); 1020 if (Left.is(tok::hashhash) || Left.is(tok::hash)) 1021 return Right.is(tok::hash); 1022 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 1023 return false; 1024 if (Right.is(tok::less) && 1025 (Left.is(tok::kw_template) || 1026 (CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1027 return true; 1028 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1029 return false; 1030 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 1031 return false; 1032 if (Left.is(tok::at) && 1033 (Right.is(tok::identifier) || Right.is(tok::string_literal) || 1034 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || 1035 Right.is(tok::l_paren) || Right.is(tok::l_brace) || 1036 Right.is(tok::kw_true) || Right.is(tok::kw_false))) 1037 return false; 1038 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 1039 return false; 1040 if (Right.is(tok::amp) || Right.is(tok::star)) 1041 return Left.FormatTok.Tok.isLiteral() || 1042 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 1043 !Style.PointerAndReferenceBindToType); 1044 if (Left.is(tok::amp) || Left.is(tok::star)) 1045 return Right.FormatTok.Tok.isLiteral() || 1046 Style.PointerAndReferenceBindToType; 1047 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1048 return false; 1049 if (Left.is(tok::l_square) || Right.is(tok::l_square) || 1050 Right.is(tok::r_square)) 1051 return false; 1052 if (Left.is(tok::coloncolon) || 1053 (Right.is(tok::coloncolon) && 1054 (Left.is(tok::identifier) || Left.is(tok::greater)))) 1055 return false; 1056 if (Left.is(tok::period) || Right.is(tok::period)) 1057 return false; 1058 if (Left.is(tok::colon) || Right.is(tok::colon)) 1059 return true; 1060 if (Left.is(tok::l_paren)) 1061 return false; 1062 if (Right.is(tok::l_paren)) { 1063 return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) || 1064 Left.is(tok::kw_for) || Left.is(tok::kw_while) || 1065 Left.is(tok::kw_switch) || Left.is(tok::kw_return) || 1066 Left.is(tok::kw_catch) || Left.is(tok::kw_new) || 1067 Left.is(tok::kw_delete); 1068 } 1069 if (Left.is(tok::at) && 1070 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1071 return false; 1072 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1073 return false; 1074 return true; 1075 } 1076 1077 bool spaceRequiredBefore(const AnnotatedToken &Tok) { 1078 if (CurrentLineType == LT_ObjCMethodDecl) { 1079 if (Tok.is(tok::identifier) && !Tok.Children.empty() && 1080 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) 1081 return true; 1082 if (Tok.is(tok::colon)) 1083 return false; 1084 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1085 return Style.ObjCSpaceBeforeReturnType || Tok.isNot(tok::l_paren); 1086 if (Tok.Type == TT_ObjCSelectorStart) 1087 return !Style.ObjCSpaceBeforeReturnType; 1088 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1089 // Don't space between ')' and <id> 1090 return false; 1091 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) 1092 // Don't space between ':' and '(' 1093 return false; 1094 } 1095 if (CurrentLineType == LT_ObjCProperty && 1096 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1097 return false; 1098 1099 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1100 return true; 1101 if (Tok.Type == TT_OverloadedOperator) 1102 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || 1103 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); 1104 if (Tok.Parent->Type == TT_OverloadedOperator) 1105 return false; 1106 if (Tok.is(tok::colon)) 1107 return RootToken.isNot(tok::kw_case) && (!Tok.Children.empty()); 1108 if (Tok.Parent->Type == TT_UnaryOperator || 1109 Tok.Parent->Type == TT_CastRParen) 1110 return false; 1111 if (Tok.Type == TT_UnaryOperator) 1112 return Tok.Parent->isNot(tok::l_paren) && 1113 Tok.Parent->isNot(tok::l_square) && 1114 Tok.Parent->isNot(tok::at); 1115 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1116 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == 1117 TT_TemplateCloser && Style.SplitTemplateClosingGreater; 1118 } 1119 if (Tok.Type == TT_DirectorySeparator || 1120 Tok.Parent->Type == TT_DirectorySeparator) 1121 return false; 1122 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1123 return true; 1124 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1125 return false; 1126 if (Tok.is(tok::less) && RootToken.is(tok::hash)) 1127 return true; 1128 if (Tok.Type == TT_TrailingUnaryOperator) 1129 return false; 1130 return spaceRequiredBetween(*Tok.Parent, Tok); 1131 } 1132 1133 bool canBreakBefore(const AnnotatedToken &Right) { 1134 const AnnotatedToken &Left = *Right.Parent; 1135 if (CurrentLineType == LT_ObjCMethodDecl) { 1136 if (Right.is(tok::identifier) && !Right.Children.empty() && 1137 Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) 1138 return true; 1139 if (CurrentLineType == LT_ObjCMethodDecl && Right.is(tok::identifier) && 1140 Left.is(tok::l_paren) && Left.Parent->is(tok::colon)) 1141 // Don't break this identifier as ':' or identifier 1142 // before it will break. 1143 return false; 1144 if (Right.is(tok::colon) && Left.is(tok::identifier) && 1145 Left.CanBreakBefore) 1146 // Don't break at ':' if identifier before it can beak. 1147 return false; 1148 } 1149 if (Left.ClosesTemplateDeclaration) 1150 return true; 1151 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1152 Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr) 1153 return false; 1154 if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl) 1155 return false; 1156 1157 if (Right.is(tok::comment)) 1158 return !Right.Children.empty(); 1159 if (Right.is(tok::r_paren) || Right.is(tok::l_brace) || 1160 Right.is(tok::greater)) 1161 return false; 1162 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1163 Left.is(tok::comma) || Right.is(tok::lessless) || 1164 Right.is(tok::arrow) || Right.is(tok::period) || 1165 Right.is(tok::colon) || Left.is(tok::semi) || 1166 Left.is(tok::l_brace) || Left.is(tok::question) || 1167 Right.is(tok::r_brace) || Left.Type == TT_ConditionalExpr || 1168 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1169 Right.is(tok::identifier)) || 1170 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 1171 } 1172 1173 FormatStyle Style; 1174 SourceManager &SourceMgr; 1175 Lexer &Lex; 1176 LineType CurrentLineType; 1177 AnnotatedToken RootToken; 1178}; 1179 1180class LexerBasedFormatTokenSource : public FormatTokenSource { 1181public: 1182 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1183 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1184 IdentTable(Lex.getLangOpts()) { 1185 Lex.SetKeepWhitespaceMode(true); 1186 } 1187 1188 virtual FormatToken getNextToken() { 1189 if (GreaterStashed) { 1190 FormatTok.NewlinesBefore = 0; 1191 FormatTok.WhiteSpaceStart = 1192 FormatTok.Tok.getLocation().getLocWithOffset(1); 1193 FormatTok.WhiteSpaceLength = 0; 1194 GreaterStashed = false; 1195 return FormatTok; 1196 } 1197 1198 FormatTok = FormatToken(); 1199 Lex.LexFromRawLexer(FormatTok.Tok); 1200 StringRef Text = rawTokenText(FormatTok.Tok); 1201 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1202 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1203 FormatTok.IsFirst = true; 1204 1205 // Consume and record whitespace until we find a significant token. 1206 while (FormatTok.Tok.is(tok::unknown)) { 1207 FormatTok.NewlinesBefore += Text.count('\n'); 1208 FormatTok.HasUnescapedNewline = Text.count("\\\n") != 1209 FormatTok.NewlinesBefore; 1210 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1211 1212 if (FormatTok.Tok.is(tok::eof)) 1213 return FormatTok; 1214 Lex.LexFromRawLexer(FormatTok.Tok); 1215 Text = rawTokenText(FormatTok.Tok); 1216 } 1217 1218 // Now FormatTok is the next non-whitespace token. 1219 FormatTok.TokenLength = Text.size(); 1220 1221 // In case the token starts with escaped newlines, we want to 1222 // take them into account as whitespace - this pattern is quite frequent 1223 // in macro definitions. 1224 // FIXME: What do we want to do with other escaped spaces, and escaped 1225 // spaces or newlines in the middle of tokens? 1226 // FIXME: Add a more explicit test. 1227 unsigned i = 0; 1228 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1229 FormatTok.WhiteSpaceLength += 2; 1230 FormatTok.TokenLength -= 2; 1231 i += 2; 1232 } 1233 1234 if (FormatTok.Tok.is(tok::raw_identifier)) { 1235 IdentifierInfo &Info = IdentTable.get(Text); 1236 FormatTok.Tok.setIdentifierInfo(&Info); 1237 FormatTok.Tok.setKind(Info.getTokenID()); 1238 } 1239 1240 if (FormatTok.Tok.is(tok::greatergreater)) { 1241 FormatTok.Tok.setKind(tok::greater); 1242 GreaterStashed = true; 1243 } 1244 1245 return FormatTok; 1246 } 1247 1248private: 1249 FormatToken FormatTok; 1250 bool GreaterStashed; 1251 Lexer &Lex; 1252 SourceManager &SourceMgr; 1253 IdentifierTable IdentTable; 1254 1255 /// Returns the text of \c FormatTok. 1256 StringRef rawTokenText(Token &Tok) { 1257 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1258 Tok.getLength()); 1259 } 1260}; 1261 1262class Formatter : public UnwrappedLineConsumer { 1263public: 1264 Formatter(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 1265 Lexer &Lex, SourceManager &SourceMgr, 1266 const std::vector<CharSourceRange> &Ranges) 1267 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1268 Ranges(Ranges) {} 1269 1270 virtual ~Formatter() {} 1271 1272 tooling::Replacements format() { 1273 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1274 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1275 StructuralError = Parser.parse(); 1276 unsigned PreviousEndOfLineColumn = 0; 1277 for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(), 1278 E = UnwrappedLines.end(); 1279 I != E; ++I) { 1280 const UnwrappedLine &TheLine = *I; 1281 if (touchesRanges(TheLine)) { 1282 TokenAnnotator Annotator(TheLine, Style, SourceMgr, Lex); 1283 if (!Annotator.annotate()) 1284 break; 1285 unsigned Indent = formatFirstToken(Annotator.getRootToken(), 1286 TheLine.Level, TheLine.InPPDirective, 1287 PreviousEndOfLineColumn); 1288 unsigned Limit = Style.ColumnLimit - (TheLine.InPPDirective ? 1 : 0) - 1289 Indent; 1290 // Check whether the UnwrappedLine can be put onto a single line. If 1291 // so, this is bound to be the optimal solution (by definition) and we 1292 // don't need to analyze the entire solution space. 1293 bool FitsOnALine = fitsIntoLimit(Annotator.getRootToken(), Limit); 1294 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1295 FitsOnALine, Annotator.getLineType(), 1296 Annotator.getRootToken(), Replaces, 1297 StructuralError); 1298 PreviousEndOfLineColumn = Formatter.format(); 1299 } else { 1300 // If we did not reformat this unwrapped line, the column at the end of 1301 // the last token is unchanged - thus, we can calculate the end of the 1302 // last token, and return the result. 1303 const FormatToken *Last = getLastLine(TheLine); 1304 PreviousEndOfLineColumn = 1305 SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) + 1306 Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr, 1307 Lex.getLangOpts()) - 1308 1; 1309 } 1310 } 1311 return Replaces; 1312 } 1313 1314private: 1315 const FormatToken *getLastLine(const UnwrappedLine &TheLine) { 1316 const FormatToken *Last = &TheLine.RootToken; 1317 while (!Last->Children.empty()) 1318 Last = &Last->Children.back(); 1319 return Last; 1320 } 1321 1322 bool touchesRanges(const UnwrappedLine &TheLine) { 1323 const FormatToken *First = &TheLine.RootToken; 1324 const FormatToken *Last = getLastLine(TheLine); 1325 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1326 First->Tok.getLocation(), 1327 Last->Tok.getLocation()); 1328 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1329 if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), 1330 Ranges[i].getBegin()) && 1331 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1332 LineRange.getBegin())) 1333 return true; 1334 } 1335 return false; 1336 } 1337 1338 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1339 UnwrappedLines.push_back(TheLine); 1340 } 1341 1342 /// \brief Add a new line and the required indent before the first Token 1343 /// of the \c UnwrappedLine if there was no structural parsing error. 1344 /// Returns the indent level of the \c UnwrappedLine. 1345 unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level, 1346 bool InPPDirective, 1347 unsigned PreviousEndOfLineColumn) { 1348 const FormatToken &Tok = RootToken.FormatTok; 1349 if (!Tok.WhiteSpaceStart.isValid() || StructuralError) 1350 return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1; 1351 1352 unsigned Newlines = std::min(Tok.NewlinesBefore, 1353 Style.MaxEmptyLinesToKeep + 1); 1354 if (Newlines == 0 && !Tok.IsFirst) 1355 Newlines = 1; 1356 unsigned Indent = Level * 2; 1357 1358 bool IsAccessModifier = false; 1359 if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) || 1360 RootToken.is(tok::kw_private)) 1361 IsAccessModifier = true; 1362 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1363 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1364 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1365 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1366 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1367 IsAccessModifier = true; 1368 1369 if (IsAccessModifier && 1370 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0) 1371 Indent += Style.AccessModifierOffset; 1372 if (!InPPDirective || Tok.HasUnescapedNewline) { 1373 replaceWhitespace(Tok, Newlines, Indent, Style, SourceMgr, Replaces); 1374 } else { 1375 replacePPWhitespace(Tok, Newlines, Indent, PreviousEndOfLineColumn, Style, 1376 SourceMgr, Replaces); 1377 } 1378 return Indent; 1379 } 1380 1381 clang::DiagnosticsEngine &Diag; 1382 FormatStyle Style; 1383 Lexer &Lex; 1384 SourceManager &SourceMgr; 1385 tooling::Replacements Replaces; 1386 std::vector<CharSourceRange> Ranges; 1387 std::vector<UnwrappedLine> UnwrappedLines; 1388 bool StructuralError; 1389}; 1390 1391tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1392 SourceManager &SourceMgr, 1393 std::vector<CharSourceRange> Ranges) { 1394 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1395 TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts); 1396 DiagnosticPrinter.BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1397 DiagnosticsEngine Diagnostics( 1398 llvm::IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1399 &DiagnosticPrinter, false); 1400 Diagnostics.setSourceManager(&SourceMgr); 1401 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1402 return formatter.format(); 1403} 1404 1405LangOptions getFormattingLangOpts() { 1406 LangOptions LangOpts; 1407 LangOpts.CPlusPlus = 1; 1408 LangOpts.CPlusPlus11 = 1; 1409 LangOpts.Bool = 1; 1410 LangOpts.ObjC1 = 1; 1411 LangOpts.ObjC2 = 1; 1412 return LangOpts; 1413} 1414 1415} // namespace format 1416} // namespace clang 1417