TokenAnnotator.cpp revision 248497199bc56e86d1c089beb9529f3b3d77abb1
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#include "TokenAnnotator.h" 17#include "clang/Basic/SourceManager.h" 18#include "clang/Lex/Lexer.h" 19 20namespace clang { 21namespace format { 22 23static bool isUnaryOperator(const AnnotatedToken &Tok) { 24 switch (Tok.FormatTok.Tok.getKind()) { 25 case tok::plus: 26 case tok::plusplus: 27 case tok::minus: 28 case tok::minusminus: 29 case tok::exclaim: 30 case tok::tilde: 31 case tok::kw_sizeof: 32 case tok::kw_alignof: 33 return true; 34 default: 35 return false; 36 } 37} 38 39static bool isBinaryOperator(const AnnotatedToken &Tok) { 40 // Comma is a binary operator, but does not behave as such wrt. formatting. 41 return getPrecedence(Tok) > prec::Comma; 42} 43 44// Returns the previous token ignoring comments. 45static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { 46 AnnotatedToken *PrevToken = Tok.Parent; 47 while (PrevToken != NULL && PrevToken->is(tok::comment)) 48 PrevToken = PrevToken->Parent; 49 return PrevToken; 50} 51static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 52 return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); 53} 54 55static bool isTrailingComment(AnnotatedToken *Tok) { 56 return Tok != NULL && Tok->is(tok::comment) && 57 (Tok->Children.empty() || 58 Tok->Children[0].FormatTok.NewlinesBefore > 0); 59} 60 61// Returns the next token ignoring comments. 62static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 63 if (Tok.Children.empty()) 64 return NULL; 65 const AnnotatedToken *NextToken = &Tok.Children[0]; 66 while (NextToken->is(tok::comment)) { 67 if (NextToken->Children.empty()) 68 return NULL; 69 NextToken = &NextToken->Children[0]; 70 } 71 return NextToken; 72} 73 74/// \brief A parser that gathers additional information about tokens. 75/// 76/// The \c TokenAnnotator tries to matches parenthesis and square brakets and 77/// store a parenthesis levels. It also tries to resolve matching "<" and ">" 78/// into template parameter lists. 79class AnnotatingParser { 80public: 81 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, 82 IdentifierInfo &Ident_in) 83 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), 84 KeywordVirtualFound(false), Ident_in(Ident_in) { 85 Contexts.push_back(Context(1, /*IsExpression=*/ false)); 86 } 87 88private: 89 bool parseAngle() { 90 if (CurrentToken == NULL) 91 return false; 92 ScopedContextCreator ContextCreator(*this, 10); 93 AnnotatedToken *Left = CurrentToken->Parent; 94 Contexts.back().IsExpression = false; 95 while (CurrentToken != NULL) { 96 if (CurrentToken->is(tok::greater)) { 97 Left->MatchingParen = CurrentToken; 98 CurrentToken->MatchingParen = Left; 99 CurrentToken->Type = TT_TemplateCloser; 100 next(); 101 return true; 102 } 103 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || 104 CurrentToken->is(tok::r_brace)) 105 return false; 106 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || 107 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) 108 return false; 109 updateParameterCount(Left, CurrentToken); 110 if (!consumeToken()) 111 return false; 112 } 113 return false; 114 } 115 116 bool parseParens(bool LookForDecls = false) { 117 if (CurrentToken == NULL) 118 return false; 119 ScopedContextCreator ContextCreator(*this, 1); 120 121 // FIXME: This is a bit of a hack. Do better. 122 Contexts.back().ColonIsForRangeExpr = 123 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 124 125 bool StartsObjCMethodExpr = false; 126 AnnotatedToken *Left = CurrentToken->Parent; 127 if (CurrentToken->is(tok::caret)) { 128 // ^( starts a block. 129 Left->Type = TT_ObjCBlockLParen; 130 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 131 // @selector( starts a selector. 132 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 133 MaybeSel->Parent->is(tok::at)) { 134 StartsObjCMethodExpr = true; 135 } 136 } 137 138 if (StartsObjCMethodExpr) { 139 Contexts.back().ColonIsObjCMethodExpr = true; 140 Left->Type = TT_ObjCMethodExpr; 141 } 142 143 while (CurrentToken != NULL) { 144 // LookForDecls is set when "if (" has been seen. Check for 145 // 'identifier' '*' 'identifier' followed by not '=' -- this 146 // '*' has to be a binary operator but determineStarAmpUsage() will 147 // categorize it as an unary operator, so set the right type here. 148 if (LookForDecls && !CurrentToken->Children.empty()) { 149 AnnotatedToken &Prev = *CurrentToken->Parent; 150 AnnotatedToken &Next = CurrentToken->Children[0]; 151 if (Prev.Parent->is(tok::identifier) && 152 (Prev.is(tok::star) || Prev.is(tok::amp)) && 153 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 154 Prev.Type = TT_BinaryOperator; 155 LookForDecls = false; 156 } 157 } 158 159 if (CurrentToken->is(tok::r_paren)) { 160 Left->MatchingParen = CurrentToken; 161 CurrentToken->MatchingParen = Left; 162 163 if (StartsObjCMethodExpr) { 164 CurrentToken->Type = TT_ObjCMethodExpr; 165 if (Contexts.back().FirstObjCSelectorName != NULL) { 166 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 167 Contexts.back().LongestObjCSelectorName; 168 } 169 } 170 171 next(); 172 return true; 173 } 174 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) 175 return false; 176 updateParameterCount(Left, CurrentToken); 177 if (!consumeToken()) 178 return false; 179 } 180 return false; 181 } 182 183 bool parseSquare() { 184 if (!CurrentToken) 185 return false; 186 ScopedContextCreator ContextCreator(*this, 10); 187 188 // A '[' could be an index subscript (after an indentifier or after 189 // ')' or ']'), it could be the start of an Objective-C method 190 // expression, or it could the the start of an Objective-C array literal. 191 AnnotatedToken *Left = CurrentToken->Parent; 192 AnnotatedToken *Parent = getPreviousToken(*Left); 193 Contexts.back().IsExpression = true; 194 bool StartsObjCMethodExpr = 195 !Parent || Parent->is(tok::colon) || Parent->is(tok::l_square) || 196 Parent->is(tok::l_paren) || Parent->is(tok::kw_return) || 197 Parent->is(tok::kw_throw) || isUnaryOperator(*Parent) || 198 Parent->Type == TT_ObjCForIn || Parent->Type == TT_CastRParen || 199 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > 200 prec::Unknown; 201 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); 202 203 if (StartsObjCMethodExpr) { 204 Contexts.back().ColonIsObjCMethodExpr = true; 205 Left->Type = TT_ObjCMethodExpr; 206 } else if (StartsObjCArrayLiteral) { 207 Left->Type = TT_ObjCArrayLiteral; 208 } 209 210 while (CurrentToken != NULL) { 211 if (CurrentToken->is(tok::r_square)) { 212 if (!CurrentToken->Children.empty() && 213 CurrentToken->Children[0].is(tok::l_paren)) { 214 // An ObjC method call is rarely followed by an open parenthesis. 215 // FIXME: Do we incorrectly label ":" with this? 216 StartsObjCMethodExpr = false; 217 Left->Type = TT_Unknown; 218 } 219 if (StartsObjCMethodExpr) { 220 CurrentToken->Type = TT_ObjCMethodExpr; 221 // determineStarAmpUsage() thinks that '*' '[' is allocating an 222 // array of pointers, but if '[' starts a selector then '*' is a 223 // binary operator. 224 if (Parent != NULL && 225 (Parent->is(tok::star) || Parent->is(tok::amp)) && 226 Parent->Type == TT_PointerOrReference) 227 Parent->Type = TT_BinaryOperator; 228 } else if (StartsObjCArrayLiteral) { 229 CurrentToken->Type = TT_ObjCArrayLiteral; 230 } 231 Left->MatchingParen = CurrentToken; 232 CurrentToken->MatchingParen = Left; 233 if (Contexts.back().FirstObjCSelectorName != NULL) 234 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 235 Contexts.back().LongestObjCSelectorName; 236 next(); 237 return true; 238 } 239 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) 240 return false; 241 updateParameterCount(Left, CurrentToken); 242 if (!consumeToken()) 243 return false; 244 } 245 return false; 246 } 247 248 bool parseBrace() { 249 // Lines are fine to end with '{'. 250 if (CurrentToken == NULL) 251 return true; 252 ScopedContextCreator ContextCreator(*this, 1); 253 AnnotatedToken *Left = CurrentToken->Parent; 254 while (CurrentToken != NULL) { 255 if (CurrentToken->is(tok::r_brace)) { 256 Left->MatchingParen = CurrentToken; 257 CurrentToken->MatchingParen = Left; 258 next(); 259 return true; 260 } 261 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) 262 return false; 263 updateParameterCount(Left, CurrentToken); 264 if (!consumeToken()) 265 return false; 266 } 267 return true; 268 } 269 270 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { 271 if (Current->is(tok::comma)) 272 ++Left->ParameterCount; 273 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) 274 Left->ParameterCount = 1; 275 } 276 277 bool parseConditional() { 278 while (CurrentToken != NULL) { 279 if (CurrentToken->is(tok::colon)) { 280 CurrentToken->Type = TT_ConditionalExpr; 281 next(); 282 return true; 283 } 284 if (!consumeToken()) 285 return false; 286 } 287 return false; 288 } 289 290 bool parseTemplateDeclaration() { 291 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 292 CurrentToken->Type = TT_TemplateOpener; 293 next(); 294 if (!parseAngle()) 295 return false; 296 if (CurrentToken != NULL) 297 CurrentToken->Parent->ClosesTemplateDeclaration = true; 298 return true; 299 } 300 return false; 301 } 302 303 bool consumeToken() { 304 AnnotatedToken *Tok = CurrentToken; 305 next(); 306 switch (Tok->FormatTok.Tok.getKind()) { 307 case tok::plus: 308 case tok::minus: 309 // At the start of the line, +/- specific ObjectiveC method 310 // declarations. 311 if (Tok->Parent == NULL) 312 Tok->Type = TT_ObjCMethodSpecifier; 313 break; 314 case tok::colon: 315 // Colons from ?: are handled in parseConditional(). 316 if (Tok->Parent->is(tok::r_paren)) { 317 Tok->Type = TT_CtorInitializerColon; 318 } else if (Contexts.back().ColonIsObjCMethodExpr || 319 Line.First.Type == TT_ObjCMethodSpecifier) { 320 Tok->Type = TT_ObjCMethodExpr; 321 Tok->Parent->Type = TT_ObjCSelectorName; 322 if (Tok->Parent->FormatTok.TokenLength > 323 Contexts.back().LongestObjCSelectorName) 324 Contexts.back().LongestObjCSelectorName = 325 Tok->Parent->FormatTok.TokenLength; 326 if (Contexts.back().FirstObjCSelectorName == NULL) 327 Contexts.back().FirstObjCSelectorName = Tok->Parent; 328 } else if (Contexts.back().ColonIsForRangeExpr) { 329 Tok->Type = TT_RangeBasedForLoopColon; 330 } else if (Contexts.size() == 1) { 331 Tok->Type = TT_InheritanceColon; 332 } 333 break; 334 case tok::kw_if: 335 case tok::kw_while: 336 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 337 next(); 338 if (!parseParens(/*LookForDecls=*/ true)) 339 return false; 340 } 341 break; 342 case tok::kw_for: 343 Contexts.back().ColonIsForRangeExpr = true; 344 next(); 345 if (!parseParens()) 346 return false; 347 break; 348 case tok::l_paren: 349 if (!parseParens()) 350 return false; 351 if (Line.MustBeDeclaration) 352 Line.MightBeFunctionDecl = true; 353 break; 354 case tok::l_square: 355 if (!parseSquare()) 356 return false; 357 break; 358 case tok::l_brace: 359 if (!parseBrace()) 360 return false; 361 break; 362 case tok::less: 363 if (parseAngle()) 364 Tok->Type = TT_TemplateOpener; 365 else { 366 Tok->Type = TT_BinaryOperator; 367 CurrentToken = Tok; 368 next(); 369 } 370 break; 371 case tok::r_paren: 372 case tok::r_square: 373 return false; 374 case tok::r_brace: 375 // Lines can start with '}'. 376 if (Tok->Parent != NULL) 377 return false; 378 break; 379 case tok::greater: 380 Tok->Type = TT_BinaryOperator; 381 break; 382 case tok::kw_operator: 383 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { 384 if (CurrentToken->is(tok::star) || CurrentToken->is(tok::amp)) 385 CurrentToken->Type = TT_PointerOrReference; 386 consumeToken(); 387 } 388 if (CurrentToken) 389 CurrentToken->Type = TT_OverloadedOperatorLParen; 390 break; 391 case tok::question: 392 parseConditional(); 393 break; 394 case tok::kw_template: 395 parseTemplateDeclaration(); 396 break; 397 case tok::identifier: 398 if (Line.First.is(tok::kw_for) && 399 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) 400 Tok->Type = TT_ObjCForIn; 401 break; 402 default: 403 break; 404 } 405 return true; 406 } 407 408 void parseIncludeDirective() { 409 next(); 410 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 411 next(); 412 while (CurrentToken != NULL) { 413 if (CurrentToken->isNot(tok::comment) || 414 !CurrentToken->Children.empty()) 415 CurrentToken->Type = TT_ImplicitStringLiteral; 416 next(); 417 } 418 } else { 419 while (CurrentToken != NULL) { 420 if (CurrentToken->is(tok::string_literal)) 421 // Mark these string literals as "implicit" literals, too, so that 422 // they are not split or line-wrapped. 423 CurrentToken->Type = TT_ImplicitStringLiteral; 424 next(); 425 } 426 } 427 } 428 429 void parseWarningOrError() { 430 next(); 431 // We still want to format the whitespace left of the first token of the 432 // warning or error. 433 next(); 434 while (CurrentToken != NULL) { 435 CurrentToken->Type = TT_ImplicitStringLiteral; 436 next(); 437 } 438 } 439 440 void parsePreprocessorDirective() { 441 next(); 442 if (CurrentToken == NULL) 443 return; 444 // Hashes in the middle of a line can lead to any strange token 445 // sequence. 446 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 447 return; 448 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 449 case tok::pp_include: 450 case tok::pp_import: 451 parseIncludeDirective(); 452 break; 453 case tok::pp_error: 454 case tok::pp_warning: 455 parseWarningOrError(); 456 break; 457 default: 458 break; 459 } 460 while (CurrentToken != NULL) 461 next(); 462 } 463 464public: 465 LineType parseLine() { 466 int PeriodsAndArrows = 0; 467 AnnotatedToken *LastPeriodOrArrow = NULL; 468 bool CanBeBuilderTypeStmt = true; 469 if (CurrentToken->is(tok::hash)) { 470 parsePreprocessorDirective(); 471 return LT_PreprocessorDirective; 472 } 473 while (CurrentToken != NULL) { 474 if (CurrentToken->is(tok::kw_virtual)) 475 KeywordVirtualFound = true; 476 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) { 477 ++PeriodsAndArrows; 478 LastPeriodOrArrow = CurrentToken; 479 } 480 AnnotatedToken *TheToken = CurrentToken; 481 if (!consumeToken()) 482 return LT_Invalid; 483 if (getPrecedence(*TheToken) > prec::Assignment && 484 TheToken->Type == TT_BinaryOperator) 485 CanBeBuilderTypeStmt = false; 486 } 487 if (KeywordVirtualFound) 488 return LT_VirtualFunctionDecl; 489 490 // Assume a builder-type call if there are 2 or more "." and "->". 491 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { 492 LastPeriodOrArrow->LastInChainOfCalls = true; 493 return LT_BuilderTypeCall; 494 } 495 496 if (Line.First.Type == TT_ObjCMethodSpecifier) { 497 if (Contexts.back().FirstObjCSelectorName != NULL) 498 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 499 Contexts.back().LongestObjCSelectorName; 500 return LT_ObjCMethodDecl; 501 } 502 503 return LT_Other; 504 } 505 506private: 507 void next() { 508 if (CurrentToken != NULL) { 509 determineTokenType(*CurrentToken); 510 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 511 } 512 513 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 514 CurrentToken = &CurrentToken->Children[0]; 515 else 516 CurrentToken = NULL; 517 518 // Reset token type in case we have already looked at it and then recovered 519 // from an error (e.g. failure to find the matching >). 520 if (CurrentToken != NULL) 521 CurrentToken->Type = TT_Unknown; 522 } 523 524 /// \brief A struct to hold information valid in a specific context, e.g. 525 /// a pair of parenthesis. 526 struct Context { 527 Context(unsigned BindingStrength, bool IsExpression) 528 : BindingStrength(BindingStrength), LongestObjCSelectorName(0), 529 ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false), 530 FirstObjCSelectorName(NULL), IsExpression(IsExpression) {} 531 532 unsigned BindingStrength; 533 unsigned LongestObjCSelectorName; 534 bool ColonIsForRangeExpr; 535 bool ColonIsObjCMethodExpr; 536 AnnotatedToken *FirstObjCSelectorName; 537 bool IsExpression; 538 }; 539 540 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 541 /// of each instance. 542 struct ScopedContextCreator { 543 AnnotatingParser &P; 544 545 ScopedContextCreator(AnnotatingParser &P, unsigned Increase) : P(P) { 546 P.Contexts.push_back(Context(P.Contexts.back().BindingStrength + Increase, 547 P.Contexts.back().IsExpression)); 548 } 549 550 ~ScopedContextCreator() { P.Contexts.pop_back(); } 551 }; 552 553 void determineTokenType(AnnotatedToken &Current) { 554 if (getPrecedence(Current) == prec::Assignment) { 555 Contexts.back().IsExpression = true; 556 for (AnnotatedToken *Previous = Current.Parent; 557 Previous && Previous->isNot(tok::comma); 558 Previous = Previous->Parent) { 559 if (Previous->is(tok::r_square)) 560 Previous = Previous->MatchingParen; 561 if (Previous->Type == TT_BinaryOperator && 562 (Previous->is(tok::star) || Previous->is(tok::amp))) { 563 Previous->Type = TT_PointerOrReference; 564 } 565 } 566 } else if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) || 567 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 568 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { 569 Contexts.back().IsExpression = true; 570 } else if (Current.is(tok::r_paren) || Current.is(tok::greater) || 571 Current.is(tok::comma)) { 572 for (AnnotatedToken *Previous = Current.Parent; 573 Previous && (Previous->is(tok::star) || Previous->is(tok::amp)); 574 Previous = Previous->Parent) 575 Previous->Type = TT_PointerOrReference; 576 } else if (Current.Parent && 577 Current.Parent->Type == TT_CtorInitializerColon) { 578 Contexts.back().IsExpression = true; 579 } 580 581 if (Current.Type == TT_Unknown) { 582 if (Current.Parent && Current.is(tok::identifier) && 583 ((Current.Parent->is(tok::identifier) && 584 Current.Parent->FormatTok.Tok.getIdentifierInfo() 585 ->getPPKeywordID() == tok::pp_not_keyword) || 586 Current.Parent->Type == TT_PointerOrReference || 587 Current.Parent->Type == TT_TemplateCloser)) { 588 Current.Type = TT_StartOfName; 589 } else if (Current.is(tok::star) || Current.is(tok::amp)) { 590 Current.Type = 591 determineStarAmpUsage(Current, Contexts.back().IsExpression); 592 } else if (Current.is(tok::minus) || Current.is(tok::plus) || 593 Current.is(tok::caret)) { 594 Current.Type = determinePlusMinusCaretUsage(Current); 595 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { 596 Current.Type = determineIncrementUsage(Current); 597 } else if (Current.is(tok::exclaim)) { 598 Current.Type = TT_UnaryOperator; 599 } else if (isBinaryOperator(Current)) { 600 Current.Type = TT_BinaryOperator; 601 } else if (Current.is(tok::comment)) { 602 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 603 Lex.getLangOpts())); 604 if (StringRef(Data).startswith("//")) 605 Current.Type = TT_LineComment; 606 else 607 Current.Type = TT_BlockComment; 608 } else if (Current.is(tok::r_paren)) { 609 bool ParensNotExpr = !Current.Parent || 610 Current.Parent->Type == TT_PointerOrReference || 611 Current.Parent->Type == TT_TemplateCloser; 612 bool ParensCouldEndDecl = 613 !Current.Children.empty() && (Current.Children[0].is(tok::equal) || 614 Current.Children[0].is(tok::semi) || 615 Current.Children[0].is(tok::l_brace)); 616 if (ParensNotExpr && !ParensCouldEndDecl && 617 Contexts.back().IsExpression) 618 // FIXME: We need to get smarter and understand more cases of casts. 619 Current.Type = TT_CastRParen; 620 } else if (Current.is(tok::at) && Current.Children.size()) { 621 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 622 case tok::objc_interface: 623 case tok::objc_implementation: 624 case tok::objc_protocol: 625 Current.Type = TT_ObjCDecl; 626 break; 627 case tok::objc_property: 628 Current.Type = TT_ObjCProperty; 629 break; 630 default: 631 break; 632 } 633 } 634 } 635 } 636 637 /// \brief Return the type of the given token assuming it is * or &. 638 TokenType 639 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { 640 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 641 if (PrevToken == NULL) 642 return TT_UnaryOperator; 643 644 const AnnotatedToken *NextToken = getNextToken(Tok); 645 if (NextToken == NULL) 646 return TT_Unknown; 647 648 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) || 649 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) || 650 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) || 651 PrevToken->is(tok::equal) || PrevToken->Type == TT_BinaryOperator || 652 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 653 return TT_UnaryOperator; 654 655 if (NextToken->is(tok::l_square)) 656 return TT_PointerOrReference; 657 658 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) || 659 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() || 660 isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) || 661 NextToken->is(tok::l_square)) 662 return TT_BinaryOperator; 663 664 // It is very unlikely that we are going to find a pointer or reference type 665 // definition on the RHS of an assignment. 666 if (IsExpression) 667 return TT_BinaryOperator; 668 669 return TT_PointerOrReference; 670 } 671 672 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 673 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 674 if (PrevToken == NULL) 675 return TT_UnaryOperator; 676 677 // Use heuristics to recognize unary operators. 678 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) || 679 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) || 680 PrevToken->is(tok::question) || PrevToken->is(tok::colon) || 681 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) || 682 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace)) 683 return TT_UnaryOperator; 684 685 // There can't be two consecutive binary operators. 686 if (PrevToken->Type == TT_BinaryOperator) 687 return TT_UnaryOperator; 688 689 // Fall back to marking the token as binary operator. 690 return TT_BinaryOperator; 691 } 692 693 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 694 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 695 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 696 if (PrevToken == NULL) 697 return TT_UnaryOperator; 698 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) || 699 PrevToken->is(tok::identifier)) 700 return TT_TrailingUnaryOperator; 701 702 return TT_UnaryOperator; 703 } 704 705 SmallVector<Context, 8> Contexts; 706 707 SourceManager &SourceMgr; 708 Lexer &Lex; 709 AnnotatedLine &Line; 710 AnnotatedToken *CurrentToken; 711 bool KeywordVirtualFound; 712 IdentifierInfo &Ident_in; 713}; 714 715/// \brief Parses binary expressions by inserting fake parenthesis based on 716/// operator precedence. 717class ExpressionParser { 718public: 719 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} 720 721 /// \brief Parse expressions with the given operatore precedence. 722 void parse(int Precedence = 0) { 723 if (Precedence > prec::PointerToMember || Current == NULL) 724 return; 725 726 // Skip over "return" until we can properly parse it. 727 if (Current->is(tok::kw_return)) 728 next(); 729 730 // Eagerly consume trailing comments. 731 while (isTrailingComment(Current)) { 732 next(); 733 } 734 735 AnnotatedToken *Start = Current; 736 bool OperatorFound = false; 737 738 while (Current) { 739 // Consume operators with higher precedence. 740 parse(prec::Level(Precedence + 1)); 741 742 int CurrentPrecedence = 0; 743 if (Current) { 744 if (Current->Type == TT_ConditionalExpr) 745 CurrentPrecedence = 1 + (int) prec::Conditional; 746 else if (Current->is(tok::semi)) 747 CurrentPrecedence = 1; 748 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 749 CurrentPrecedence = 1 + (int) getPrecedence(*Current); 750 } 751 752 // At the end of the line or when an operator with higher precedence is 753 // found, insert fake parenthesis and return. 754 if (Current == NULL || closesScope(*Current) || 755 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { 756 if (OperatorFound) { 757 ++Start->FakeLParens; 758 if (Current) 759 ++Current->Parent->FakeRParens; 760 } 761 return; 762 } 763 764 // Consume scopes: (), [], <> and {} 765 if (opensScope(*Current)) { 766 AnnotatedToken *Left = Current; 767 while (Current && !closesScope(*Current)) { 768 next(); 769 parse(); 770 } 771 // Remove fake parens that just duplicate the real parens. 772 if (Current && Left->Children[0].FakeLParens > 0 && 773 Current->Parent->FakeRParens > 0) { 774 --Left->Children[0].FakeLParens; 775 --Current->Parent->FakeRParens; 776 } 777 next(); 778 } else { 779 // Operator found. 780 if (CurrentPrecedence == Precedence) 781 OperatorFound = true; 782 783 next(); 784 } 785 } 786 } 787 788private: 789 void next() { 790 if (Current != NULL) 791 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 792 } 793 794 bool closesScope(const AnnotatedToken &Tok) { 795 return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser || 796 Current->is(tok::r_brace) || Current->is(tok::r_square); 797 } 798 799 bool opensScope(const AnnotatedToken &Tok) { 800 return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener || 801 Current->is(tok::l_brace) || Current->is(tok::l_square); 802 } 803 804 AnnotatedToken *Current; 805}; 806 807void TokenAnnotator::annotate(AnnotatedLine &Line) { 808 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); 809 Line.Type = Parser.parseLine(); 810 if (Line.Type == LT_Invalid) 811 return; 812 813 ExpressionParser ExprParser(Line); 814 ExprParser.parse(); 815 816 if (Line.First.Type == TT_ObjCMethodSpecifier) 817 Line.Type = LT_ObjCMethodDecl; 818 else if (Line.First.Type == TT_ObjCDecl) 819 Line.Type = LT_ObjCDecl; 820 else if (Line.First.Type == TT_ObjCProperty) 821 Line.Type = LT_ObjCProperty; 822 823 Line.First.SpacesRequiredBefore = 1; 824 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 825 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 826 827 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 828} 829 830void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 831 if (Line.First.Children.empty()) 832 return; 833 AnnotatedToken *Current = &Line.First.Children[0]; 834 while (Current != NULL) { 835 if (Current->Type == TT_LineComment) 836 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 837 else 838 Current->SpacesRequiredBefore = 839 spaceRequiredBefore(Line, *Current) ? 1 : 0; 840 841 if (Current->FormatTok.MustBreakBefore) { 842 Current->MustBreakBefore = true; 843 } else if (Current->Type == TT_LineComment) { 844 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; 845 } else if (isTrailingComment(Current->Parent) || 846 (Current->is(tok::string_literal) && 847 Current->Parent->is(tok::string_literal))) { 848 Current->MustBreakBefore = true; 849 } else if (Current->is(tok::lessless) && !Current->Children.empty() && 850 Current->Parent->is(tok::string_literal) && 851 Current->Children[0].is(tok::string_literal)) { 852 Current->MustBreakBefore = true; 853 } else { 854 Current->MustBreakBefore = false; 855 } 856 Current->CanBreakBefore = 857 Current->MustBreakBefore || canBreakBefore(Line, *Current); 858 if (Current->MustBreakBefore) 859 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; 860 else 861 Current->TotalLength = 862 Current->Parent->TotalLength + Current->FormatTok.TokenLength + 863 Current->SpacesRequiredBefore; 864 // FIXME: Only calculate this if CanBreakBefore is true once static 865 // initializers etc. are sorted out. 866 // FIXME: Move magic numbers to a better place. 867 Current->SplitPenalty = 868 20 * Current->BindingStrength + splitPenalty(Line, *Current); 869 870 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 871 } 872} 873 874unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 875 const AnnotatedToken &Tok) { 876 const AnnotatedToken &Left = *Tok.Parent; 877 const AnnotatedToken &Right = Tok; 878 879 if (Right.Type == TT_StartOfName) { 880 if (Line.First.is(tok::kw_for)) 881 return 3; 882 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) 883 // FIXME: Clean up hack of using BindingStrength to find top-level names. 884 return Style.PenaltyReturnTypeOnItsOwnLine; 885 else 886 return 100; 887 } 888 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 889 return 150; 890 if (Left.is(tok::coloncolon)) 891 return 500; 892 893 if (Left.Type == TT_RangeBasedForLoopColon || 894 Left.Type == TT_InheritanceColon) 895 return 2; 896 897 if (Right.is(tok::arrow) || Right.is(tok::period)) { 898 if (Line.Type == LT_BuilderTypeCall) 899 return 5; 900 if ((Left.is(tok::r_paren) || Left.is(tok::r_square)) && 901 Left.MatchingParen && Left.MatchingParen->ParameterCount > 0) 902 return 20; // Should be smaller than breaking at a nested comma. 903 return 150; 904 } 905 906 // In for-loops, prefer breaking at ',' and ';'. 907 if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) 908 return 4; 909 910 if (Left.is(tok::semi)) 911 return 0; 912 if (Left.is(tok::comma)) 913 return 1; 914 915 // In Objective-C method expressions, prefer breaking before "param:" over 916 // breaking after it. 917 if (Right.Type == TT_ObjCSelectorName) 918 return 0; 919 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 920 return 20; 921 922 if (Left.is(tok::l_paren) || Left.is(tok::l_square) || 923 Left.is(tok::l_brace) || Left.Type == TT_TemplateOpener) 924 return 20; 925 926 if (Right.is(tok::lessless)) { 927 if (Left.is(tok::string_literal)) { 928 char LastChar = 929 StringRef(Left.FormatTok.Tok.getLiteralData(), 930 Left.FormatTok.TokenLength).drop_back(1).rtrim().back(); 931 if (LastChar == ':' || LastChar == '=') 932 return 100; 933 } 934 return prec::Shift; 935 } 936 if (Left.Type == TT_ConditionalExpr) 937 return prec::Conditional; 938 prec::Level Level = getPrecedence(Left); 939 940 if (Level != prec::Unknown) 941 return Level; 942 943 return 3; 944} 945 946bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 947 const AnnotatedToken &Left, 948 const AnnotatedToken &Right) { 949 if (Right.is(tok::hashhash)) 950 return Left.is(tok::hash); 951 if (Left.is(tok::hashhash) || Left.is(tok::hash)) 952 return Right.is(tok::hash); 953 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 954 return false; 955 if (Right.is(tok::less) && 956 (Left.is(tok::kw_template) || 957 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 958 return true; 959 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 960 return false; 961 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 962 return false; 963 if (Left.is(tok::at) && 964 (Right.is(tok::identifier) || Right.is(tok::string_literal) || 965 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || 966 Right.is(tok::l_paren) || Right.is(tok::l_brace) || 967 Right.is(tok::kw_true) || Right.is(tok::kw_false))) 968 return false; 969 if (Left.is(tok::coloncolon)) 970 return false; 971 if (Right.is(tok::coloncolon)) 972 return Left.isNot(tok::identifier) && Left.isNot(tok::greater) && 973 Left.isNot(tok::l_paren); 974 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 975 return false; 976 if (Right.is(tok::amp) || Right.is(tok::star)) 977 return Left.FormatTok.Tok.isLiteral() || 978 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 979 Left.isNot(tok::l_paren) && !Style.PointerBindsToType); 980 if (Left.is(tok::amp) || Left.is(tok::star)) 981 return Right.FormatTok.Tok.isLiteral() || 982 (Right.isNot(tok::star) && Right.isNot(tok::amp) && 983 Style.PointerBindsToType); 984 if (Right.is(tok::star) && Left.is(tok::l_paren)) 985 return false; 986 if (Left.is(tok::l_square)) 987 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); 988 if (Right.is(tok::r_square)) 989 return Right.Type == TT_ObjCArrayLiteral; 990 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 991 return false; 992 if (Left.is(tok::period) || Right.is(tok::period)) 993 return false; 994 if (Left.is(tok::colon)) 995 return Left.Type != TT_ObjCMethodExpr; 996 if (Right.is(tok::colon)) 997 return Right.Type != TT_ObjCMethodExpr; 998 if (Left.is(tok::l_paren)) 999 return false; 1000 if (Right.is(tok::l_paren)) { 1001 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || 1002 Left.is(tok::kw_for) || Left.is(tok::kw_while) || 1003 Left.is(tok::kw_switch) || Left.is(tok::kw_return) || 1004 Left.is(tok::kw_catch) || Left.is(tok::kw_new) || 1005 Left.is(tok::kw_delete); 1006 } 1007 if (Left.is(tok::at) && 1008 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1009 return false; 1010 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1011 return false; 1012 return true; 1013} 1014 1015bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1016 const AnnotatedToken &Tok) { 1017 if (Tok.FormatTok.Tok.getIdentifierInfo() && 1018 Tok.Parent->FormatTok.Tok.getIdentifierInfo()) 1019 return true; // Never ever merge two identifiers. 1020 if (Line.Type == LT_ObjCMethodDecl) { 1021 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1022 return true; 1023 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1024 // Don't space between ')' and <id> 1025 return false; 1026 } 1027 if (Line.Type == LT_ObjCProperty && 1028 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1029 return false; 1030 1031 if (Tok.Parent->is(tok::comma)) 1032 return true; 1033 if (Tok.is(tok::comma)) 1034 return false; 1035 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1036 return true; 1037 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) 1038 return false; 1039 if (Tok.Type == TT_OverloadedOperatorLParen) 1040 return false; 1041 if (Tok.is(tok::colon)) 1042 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && 1043 Tok.Type != TT_ObjCMethodExpr; 1044 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) 1045 return false; 1046 if (Tok.Type == TT_UnaryOperator) 1047 return Tok.Parent->isNot(tok::l_paren) && 1048 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && 1049 (Tok.Parent->isNot(tok::colon) || 1050 Tok.Parent->Type != TT_ObjCMethodExpr); 1051 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1052 return Tok.Type == TT_TemplateCloser && 1053 Tok.Parent->Type == TT_TemplateCloser && 1054 Style.Standard != FormatStyle::LS_Cpp11; 1055 } 1056 if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar)) 1057 return false; 1058 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1059 return true; 1060 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1061 return false; 1062 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1063 return true; 1064 if (Tok.Type == TT_TrailingUnaryOperator) 1065 return false; 1066 return spaceRequiredBetween(Line, *Tok.Parent, Tok); 1067} 1068 1069bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1070 const AnnotatedToken &Right) { 1071 const AnnotatedToken &Left = *Right.Parent; 1072 if (Right.Type == TT_StartOfName) 1073 return true; 1074 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1075 return false; 1076 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1077 return true; 1078 if (Right.Type == TT_ObjCSelectorName) 1079 return true; 1080 if (Left.ClosesTemplateDeclaration) 1081 return true; 1082 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1083 return true; 1084 if (Right.Type == TT_RangeBasedForLoopColon || 1085 Right.Type == TT_InheritanceColon) 1086 return false; 1087 if (Left.Type == TT_RangeBasedForLoopColon || 1088 Left.Type == TT_InheritanceColon) 1089 return true; 1090 if (Right.Type == TT_RangeBasedForLoopColon) 1091 return false; 1092 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1093 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1094 Left.is(tok::question) || Left.is(tok::kw_operator)) 1095 return false; 1096 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1097 return false; 1098 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && 1099 Left.Parent->is(tok::kw___attribute)) 1100 return false; 1101 1102 if (Right.Type == TT_LineComment) 1103 // We rely on MustBreakBefore being set correctly here as we should not 1104 // change the "binding" behavior of a comment. 1105 return false; 1106 1107 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1108 // unless it is follow by ';', '{' or '='. 1109 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1110 Left.Parent->is(tok::r_paren)) 1111 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) && 1112 Right.isNot(tok::equal); 1113 1114 // We only break before r_brace if there was a corresponding break before 1115 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1116 if (Right.is(tok::r_brace)) 1117 return false; 1118 1119 if (Right.is(tok::r_paren) || Right.is(tok::greater)) 1120 return false; 1121 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1122 return true; 1123 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1124 Left.is(tok::comma) || Right.is(tok::lessless) || 1125 Right.is(tok::arrow) || Right.is(tok::period) || 1126 Right.is(tok::colon) || Left.is(tok::coloncolon) || 1127 Left.is(tok::semi) || Left.is(tok::l_brace) || 1128 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1129 (Right.is(tok::identifier) || Right.is(tok::kw___attribute))) || 1130 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1131 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1132} 1133 1134} // namespace format 1135} // namespace clang 1136