TokenAnnotator.cpp revision d3cf17b5f1fed43dbd0cd35c43d15139803c9c84
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#include "TokenAnnotator.h" 17#include "clang/Basic/SourceManager.h" 18#include "clang/Lex/Lexer.h" 19 20namespace clang { 21namespace format { 22 23static bool isUnaryOperator(const AnnotatedToken &Tok) { 24 switch (Tok.FormatTok.Tok.getKind()) { 25 case tok::plus: 26 case tok::plusplus: 27 case tok::minus: 28 case tok::minusminus: 29 case tok::exclaim: 30 case tok::tilde: 31 case tok::kw_sizeof: 32 case tok::kw_alignof: 33 return true; 34 default: 35 return false; 36 } 37} 38 39static bool isBinaryOperator(const AnnotatedToken &Tok) { 40 // Comma is a binary operator, but does not behave as such wrt. formatting. 41 return getPrecedence(Tok) > prec::Comma; 42} 43 44// Returns the previous token ignoring comments. 45static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { 46 AnnotatedToken *PrevToken = Tok.Parent; 47 while (PrevToken != NULL && PrevToken->is(tok::comment)) 48 PrevToken = PrevToken->Parent; 49 return PrevToken; 50} 51static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 52 return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); 53} 54 55static bool isTrailingComment(AnnotatedToken *Tok) { 56 return Tok != NULL && Tok->is(tok::comment) && 57 (Tok->Children.empty() || 58 Tok->Children[0].FormatTok.NewlinesBefore > 0); 59} 60 61// Returns the next token ignoring comments. 62static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 63 if (Tok.Children.empty()) 64 return NULL; 65 const AnnotatedToken *NextToken = &Tok.Children[0]; 66 while (NextToken->is(tok::comment)) { 67 if (NextToken->Children.empty()) 68 return NULL; 69 NextToken = &NextToken->Children[0]; 70 } 71 return NextToken; 72} 73 74static bool closesScope(const AnnotatedToken &Tok) { 75 return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 76 Tok.Type == TT_TemplateCloser; 77} 78 79static bool opensScope(const AnnotatedToken &Tok) { 80 return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 81 Tok.Type == TT_TemplateOpener; 82} 83 84/// \brief A parser that gathers additional information about tokens. 85/// 86/// The \c TokenAnnotator tries to match parenthesis and square brakets and 87/// store a parenthesis levels. It also tries to resolve matching "<" and ">" 88/// into template parameter lists. 89class AnnotatingParser { 90public: 91 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, 92 IdentifierInfo &Ident_in) 93 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), 94 KeywordVirtualFound(false), Ident_in(Ident_in) { 95 Contexts.push_back(Context(1, /*IsExpression=*/ false)); 96 } 97 98private: 99 bool parseAngle() { 100 if (CurrentToken == NULL) 101 return false; 102 ScopedContextCreator ContextCreator(*this, 10); 103 AnnotatedToken *Left = CurrentToken->Parent; 104 Contexts.back().IsExpression = false; 105 while (CurrentToken != NULL) { 106 if (CurrentToken->is(tok::greater)) { 107 Left->MatchingParen = CurrentToken; 108 CurrentToken->MatchingParen = Left; 109 CurrentToken->Type = TT_TemplateCloser; 110 next(); 111 return true; 112 } 113 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 114 tok::pipepipe, tok::ampamp, tok::question, 115 tok::colon)) 116 return false; 117 updateParameterCount(Left, CurrentToken); 118 if (!consumeToken()) 119 return false; 120 } 121 return false; 122 } 123 124 bool parseParens(bool LookForDecls = false) { 125 if (CurrentToken == NULL) 126 return false; 127 ScopedContextCreator ContextCreator(*this, 1); 128 129 // FIXME: This is a bit of a hack. Do better. 130 Contexts.back().ColonIsForRangeExpr = 131 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 132 133 bool StartsObjCMethodExpr = false; 134 AnnotatedToken *Left = CurrentToken->Parent; 135 if (CurrentToken->is(tok::caret)) { 136 // ^( starts a block. 137 Left->Type = TT_ObjCBlockLParen; 138 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 139 // @selector( starts a selector. 140 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 141 MaybeSel->Parent->is(tok::at)) { 142 StartsObjCMethodExpr = true; 143 } 144 } 145 146 if (StartsObjCMethodExpr) { 147 Contexts.back().ColonIsObjCMethodExpr = true; 148 Left->Type = TT_ObjCMethodExpr; 149 } 150 151 while (CurrentToken != NULL) { 152 // LookForDecls is set when "if (" has been seen. Check for 153 // 'identifier' '*' 'identifier' followed by not '=' -- this 154 // '*' has to be a binary operator but determineStarAmpUsage() will 155 // categorize it as an unary operator, so set the right type here. 156 if (LookForDecls && !CurrentToken->Children.empty()) { 157 AnnotatedToken &Prev = *CurrentToken->Parent; 158 AnnotatedToken &Next = CurrentToken->Children[0]; 159 if (Prev.Parent->is(tok::identifier) && 160 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && 161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 162 Prev.Type = TT_BinaryOperator; 163 LookForDecls = false; 164 } 165 } 166 167 if (CurrentToken->is(tok::r_paren)) { 168 Left->MatchingParen = CurrentToken; 169 CurrentToken->MatchingParen = Left; 170 171 if (StartsObjCMethodExpr) { 172 CurrentToken->Type = TT_ObjCMethodExpr; 173 if (Contexts.back().FirstObjCSelectorName != NULL) { 174 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 175 Contexts.back().LongestObjCSelectorName; 176 } 177 } 178 179 next(); 180 return true; 181 } 182 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 183 return false; 184 updateParameterCount(Left, CurrentToken); 185 if (!consumeToken()) 186 return false; 187 } 188 return false; 189 } 190 191 bool parseSquare() { 192 if (!CurrentToken) 193 return false; 194 195 // A '[' could be an index subscript (after an indentifier or after 196 // ')' or ']'), it could be the start of an Objective-C method 197 // expression, or it could the the start of an Objective-C array literal. 198 AnnotatedToken *Left = CurrentToken->Parent; 199 AnnotatedToken *Parent = getPreviousToken(*Left); 200 bool StartsObjCMethodExpr = 201 Contexts.back().CanBeExpression && 202 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 203 tok::kw_return, tok::kw_throw) || 204 isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn || 205 Parent->Type == TT_CastRParen || 206 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > 207 prec::Unknown); 208 ScopedContextCreator ContextCreator(*this, 10); 209 Contexts.back().IsExpression = true; 210 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); 211 212 if (StartsObjCMethodExpr) { 213 Contexts.back().ColonIsObjCMethodExpr = true; 214 Left->Type = TT_ObjCMethodExpr; 215 } else if (StartsObjCArrayLiteral) { 216 Left->Type = TT_ObjCArrayLiteral; 217 } 218 219 while (CurrentToken != NULL) { 220 if (CurrentToken->is(tok::r_square)) { 221 if (!CurrentToken->Children.empty() && 222 CurrentToken->Children[0].is(tok::l_paren)) { 223 // An ObjC method call is rarely followed by an open parenthesis. 224 // FIXME: Do we incorrectly label ":" with this? 225 StartsObjCMethodExpr = false; 226 Left->Type = TT_Unknown; 227 } 228 if (StartsObjCMethodExpr) { 229 CurrentToken->Type = TT_ObjCMethodExpr; 230 // determineStarAmpUsage() thinks that '*' '[' is allocating an 231 // array of pointers, but if '[' starts a selector then '*' is a 232 // binary operator. 233 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 234 Parent->Type = TT_BinaryOperator; 235 } else if (StartsObjCArrayLiteral) { 236 CurrentToken->Type = TT_ObjCArrayLiteral; 237 } 238 Left->MatchingParen = CurrentToken; 239 CurrentToken->MatchingParen = Left; 240 if (Contexts.back().FirstObjCSelectorName != NULL) 241 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 242 Contexts.back().LongestObjCSelectorName; 243 next(); 244 return true; 245 } 246 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 247 return false; 248 updateParameterCount(Left, CurrentToken); 249 if (!consumeToken()) 250 return false; 251 } 252 return false; 253 } 254 255 bool parseBrace() { 256 // Lines are fine to end with '{'. 257 if (CurrentToken == NULL) 258 return true; 259 ScopedContextCreator ContextCreator(*this, 1); 260 AnnotatedToken *Left = CurrentToken->Parent; 261 while (CurrentToken != NULL) { 262 if (CurrentToken->is(tok::r_brace)) { 263 Left->MatchingParen = CurrentToken; 264 CurrentToken->MatchingParen = Left; 265 next(); 266 return true; 267 } 268 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 269 return false; 270 updateParameterCount(Left, CurrentToken); 271 if (!consumeToken()) 272 return false; 273 } 274 return true; 275 } 276 277 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { 278 if (Current->is(tok::comma)) 279 ++Left->ParameterCount; 280 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) 281 Left->ParameterCount = 1; 282 } 283 284 bool parseConditional() { 285 while (CurrentToken != NULL) { 286 if (CurrentToken->is(tok::colon)) { 287 CurrentToken->Type = TT_ConditionalExpr; 288 next(); 289 return true; 290 } 291 if (!consumeToken()) 292 return false; 293 } 294 return false; 295 } 296 297 bool parseTemplateDeclaration() { 298 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 299 CurrentToken->Type = TT_TemplateOpener; 300 next(); 301 if (!parseAngle()) 302 return false; 303 if (CurrentToken != NULL) 304 CurrentToken->Parent->ClosesTemplateDeclaration = true; 305 return true; 306 } 307 return false; 308 } 309 310 bool consumeToken() { 311 AnnotatedToken *Tok = CurrentToken; 312 next(); 313 switch (Tok->FormatTok.Tok.getKind()) { 314 case tok::plus: 315 case tok::minus: 316 // At the start of the line, +/- specific ObjectiveC method 317 // declarations. 318 if (Tok->Parent == NULL) 319 Tok->Type = TT_ObjCMethodSpecifier; 320 break; 321 case tok::colon: 322 // Colons from ?: are handled in parseConditional(). 323 if (Tok->Parent->is(tok::r_paren)) { 324 Tok->Type = TT_CtorInitializerColon; 325 } else if (Contexts.back().ColonIsObjCMethodExpr || 326 Line.First.Type == TT_ObjCMethodSpecifier) { 327 Tok->Type = TT_ObjCMethodExpr; 328 Tok->Parent->Type = TT_ObjCSelectorName; 329 if (Tok->Parent->FormatTok.TokenLength > 330 Contexts.back().LongestObjCSelectorName) 331 Contexts.back().LongestObjCSelectorName = 332 Tok->Parent->FormatTok.TokenLength; 333 if (Contexts.back().FirstObjCSelectorName == NULL) 334 Contexts.back().FirstObjCSelectorName = Tok->Parent; 335 } else if (Contexts.back().ColonIsForRangeExpr) { 336 Tok->Type = TT_RangeBasedForLoopColon; 337 } else if (Contexts.size() == 1) { 338 Tok->Type = TT_InheritanceColon; 339 } 340 break; 341 case tok::kw_if: 342 case tok::kw_while: 343 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 344 next(); 345 if (!parseParens(/*LookForDecls=*/ true)) 346 return false; 347 } 348 break; 349 case tok::kw_for: 350 Contexts.back().ColonIsForRangeExpr = true; 351 next(); 352 if (!parseParens()) 353 return false; 354 break; 355 case tok::l_paren: 356 if (!parseParens()) 357 return false; 358 if (Line.MustBeDeclaration) 359 Line.MightBeFunctionDecl = true; 360 break; 361 case tok::l_square: 362 if (!parseSquare()) 363 return false; 364 break; 365 case tok::l_brace: 366 if (!parseBrace()) 367 return false; 368 break; 369 case tok::less: 370 if (parseAngle()) 371 Tok->Type = TT_TemplateOpener; 372 else { 373 Tok->Type = TT_BinaryOperator; 374 CurrentToken = Tok; 375 next(); 376 } 377 break; 378 case tok::r_paren: 379 case tok::r_square: 380 return false; 381 case tok::r_brace: 382 // Lines can start with '}'. 383 if (Tok->Parent != NULL) 384 return false; 385 break; 386 case tok::greater: 387 Tok->Type = TT_BinaryOperator; 388 break; 389 case tok::kw_operator: 390 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { 391 if (CurrentToken->isOneOf(tok::star, tok::amp)) 392 CurrentToken->Type = TT_PointerOrReference; 393 consumeToken(); 394 } 395 if (CurrentToken) 396 CurrentToken->Type = TT_OverloadedOperatorLParen; 397 break; 398 case tok::question: 399 parseConditional(); 400 break; 401 case tok::kw_template: 402 parseTemplateDeclaration(); 403 break; 404 case tok::identifier: 405 if (Line.First.is(tok::kw_for) && 406 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) 407 Tok->Type = TT_ObjCForIn; 408 break; 409 default: 410 break; 411 } 412 return true; 413 } 414 415 void parseIncludeDirective() { 416 next(); 417 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 418 next(); 419 while (CurrentToken != NULL) { 420 if (CurrentToken->isNot(tok::comment) || 421 !CurrentToken->Children.empty()) 422 CurrentToken->Type = TT_ImplicitStringLiteral; 423 next(); 424 } 425 } else { 426 while (CurrentToken != NULL) { 427 if (CurrentToken->is(tok::string_literal)) 428 // Mark these string literals as "implicit" literals, too, so that 429 // they are not split or line-wrapped. 430 CurrentToken->Type = TT_ImplicitStringLiteral; 431 next(); 432 } 433 } 434 } 435 436 void parseWarningOrError() { 437 next(); 438 // We still want to format the whitespace left of the first token of the 439 // warning or error. 440 next(); 441 while (CurrentToken != NULL) { 442 CurrentToken->Type = TT_ImplicitStringLiteral; 443 next(); 444 } 445 } 446 447 void parsePreprocessorDirective() { 448 next(); 449 if (CurrentToken == NULL) 450 return; 451 // Hashes in the middle of a line can lead to any strange token 452 // sequence. 453 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 454 return; 455 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 456 case tok::pp_include: 457 case tok::pp_import: 458 parseIncludeDirective(); 459 break; 460 case tok::pp_error: 461 case tok::pp_warning: 462 parseWarningOrError(); 463 break; 464 default: 465 break; 466 } 467 while (CurrentToken != NULL) 468 next(); 469 } 470 471public: 472 LineType parseLine() { 473 int PeriodsAndArrows = 0; 474 AnnotatedToken *LastPeriodOrArrow = NULL; 475 bool CanBeBuilderTypeStmt = true; 476 if (CurrentToken->is(tok::hash)) { 477 parsePreprocessorDirective(); 478 return LT_PreprocessorDirective; 479 } 480 while (CurrentToken != NULL) { 481 if (CurrentToken->is(tok::kw_virtual)) 482 KeywordVirtualFound = true; 483 if (CurrentToken->isOneOf(tok::period, tok::arrow)) { 484 ++PeriodsAndArrows; 485 LastPeriodOrArrow = CurrentToken; 486 } 487 AnnotatedToken *TheToken = CurrentToken; 488 if (!consumeToken()) 489 return LT_Invalid; 490 if (getPrecedence(*TheToken) > prec::Assignment && 491 TheToken->Type == TT_BinaryOperator) 492 CanBeBuilderTypeStmt = false; 493 } 494 if (KeywordVirtualFound) 495 return LT_VirtualFunctionDecl; 496 497 // Assume a builder-type call if there are 2 or more "." and "->". 498 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { 499 LastPeriodOrArrow->LastInChainOfCalls = true; 500 return LT_BuilderTypeCall; 501 } 502 503 if (Line.First.Type == TT_ObjCMethodSpecifier) { 504 if (Contexts.back().FirstObjCSelectorName != NULL) 505 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 506 Contexts.back().LongestObjCSelectorName; 507 return LT_ObjCMethodDecl; 508 } 509 510 return LT_Other; 511 } 512 513private: 514 void next() { 515 if (CurrentToken != NULL) { 516 determineTokenType(*CurrentToken); 517 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 518 } 519 520 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 521 CurrentToken = &CurrentToken->Children[0]; 522 else 523 CurrentToken = NULL; 524 525 // Reset token type in case we have already looked at it and then recovered 526 // from an error (e.g. failure to find the matching >). 527 if (CurrentToken != NULL) 528 CurrentToken->Type = TT_Unknown; 529 } 530 531 /// \brief A struct to hold information valid in a specific context, e.g. 532 /// a pair of parenthesis. 533 struct Context { 534 Context(unsigned BindingStrength, bool IsExpression) 535 : BindingStrength(BindingStrength), LongestObjCSelectorName(0), 536 ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false), 537 FirstObjCSelectorName(NULL), IsExpression(IsExpression), 538 CanBeExpression(true) {} 539 540 unsigned BindingStrength; 541 unsigned LongestObjCSelectorName; 542 bool ColonIsForRangeExpr; 543 bool ColonIsObjCMethodExpr; 544 AnnotatedToken *FirstObjCSelectorName; 545 bool IsExpression; 546 bool CanBeExpression; 547 }; 548 549 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 550 /// of each instance. 551 struct ScopedContextCreator { 552 AnnotatingParser &P; 553 554 ScopedContextCreator(AnnotatingParser &P, unsigned Increase) : P(P) { 555 P.Contexts.push_back(Context(P.Contexts.back().BindingStrength + Increase, 556 P.Contexts.back().IsExpression)); 557 } 558 559 ~ScopedContextCreator() { P.Contexts.pop_back(); } 560 }; 561 562 void determineTokenType(AnnotatedToken &Current) { 563 if (getPrecedence(Current) == prec::Assignment) { 564 Contexts.back().IsExpression = true; 565 for (AnnotatedToken *Previous = Current.Parent; 566 Previous && Previous->isNot(tok::comma); 567 Previous = Previous->Parent) { 568 if (Previous->is(tok::r_square)) 569 Previous = Previous->MatchingParen; 570 if (Previous->Type == TT_BinaryOperator && 571 Previous->isOneOf(tok::star, tok::amp)) { 572 Previous->Type = TT_PointerOrReference; 573 } 574 } 575 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || 576 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 577 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { 578 Contexts.back().IsExpression = true; 579 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 580 for (AnnotatedToken *Previous = Current.Parent; 581 Previous && Previous->isOneOf(tok::star, tok::amp); 582 Previous = Previous->Parent) 583 Previous->Type = TT_PointerOrReference; 584 } else if (Current.Parent && 585 Current.Parent->Type == TT_CtorInitializerColon) { 586 Contexts.back().IsExpression = true; 587 } else if (Current.is(tok::kw_new)) { 588 Contexts.back().CanBeExpression = false; 589 } 590 591 if (Current.Type == TT_Unknown) { 592 if (Current.Parent && Current.is(tok::identifier) && 593 ((Current.Parent->is(tok::identifier) && 594 Current.Parent->FormatTok.Tok.getIdentifierInfo() 595 ->getPPKeywordID() == tok::pp_not_keyword) || 596 Current.Parent->Type == TT_PointerOrReference || 597 Current.Parent->Type == TT_TemplateCloser)) { 598 Current.Type = TT_StartOfName; 599 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 600 Current.Type = 601 determineStarAmpUsage(Current, Contexts.back().IsExpression); 602 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 603 Current.Type = determinePlusMinusCaretUsage(Current); 604 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 605 Current.Type = determineIncrementUsage(Current); 606 } else if (Current.is(tok::exclaim)) { 607 Current.Type = TT_UnaryOperator; 608 } else if (isBinaryOperator(Current)) { 609 Current.Type = TT_BinaryOperator; 610 } else if (Current.is(tok::comment)) { 611 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 612 Lex.getLangOpts())); 613 if (StringRef(Data).startswith("//")) 614 Current.Type = TT_LineComment; 615 else 616 Current.Type = TT_BlockComment; 617 } else if (Current.is(tok::r_paren)) { 618 bool ParensNotExpr = !Current.Parent || 619 Current.Parent->Type == TT_PointerOrReference || 620 Current.Parent->Type == TT_TemplateCloser; 621 bool ParensCouldEndDecl = 622 !Current.Children.empty() && 623 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); 624 bool IsSizeOfOrAlignOf = 625 Current.MatchingParen && Current.MatchingParen->Parent && 626 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, 627 tok::kw_alignof); 628 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 629 Contexts.back().IsExpression) 630 // FIXME: We need to get smarter and understand more cases of casts. 631 Current.Type = TT_CastRParen; 632 } else if (Current.is(tok::at) && Current.Children.size()) { 633 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 634 case tok::objc_interface: 635 case tok::objc_implementation: 636 case tok::objc_protocol: 637 Current.Type = TT_ObjCDecl; 638 break; 639 case tok::objc_property: 640 Current.Type = TT_ObjCProperty; 641 break; 642 default: 643 break; 644 } 645 } 646 } 647 } 648 649 /// \brief Return the type of the given token assuming it is * or &. 650 TokenType 651 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { 652 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 653 if (PrevToken == NULL) 654 return TT_UnaryOperator; 655 656 const AnnotatedToken *NextToken = getNextToken(Tok); 657 if (NextToken == NULL) 658 return TT_Unknown; 659 660 if (PrevToken->is(tok::l_paren) && !IsExpression) 661 return TT_PointerOrReference; 662 663 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 664 tok::comma, tok::semi, tok::kw_return, tok::colon, 665 tok::equal) || 666 PrevToken->Type == TT_BinaryOperator || 667 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 668 return TT_UnaryOperator; 669 670 if (NextToken->is(tok::l_square)) 671 return TT_PointerOrReference; 672 673 if (PrevToken->FormatTok.Tok.isLiteral() || 674 PrevToken->isOneOf(tok::r_paren, tok::r_square) || 675 NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken) || 676 NextToken->isOneOf(tok::l_paren, tok::l_square)) 677 return TT_BinaryOperator; 678 679 // It is very unlikely that we are going to find a pointer or reference type 680 // definition on the RHS of an assignment. 681 if (IsExpression) 682 return TT_BinaryOperator; 683 684 return TT_PointerOrReference; 685 } 686 687 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 688 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 689 if (PrevToken == NULL) 690 return TT_UnaryOperator; 691 692 // Use heuristics to recognize unary operators. 693 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 694 tok::question, tok::colon, tok::kw_return, 695 tok::kw_case, tok::at, tok::l_brace)) 696 return TT_UnaryOperator; 697 698 // There can't be two consecutive binary operators. 699 if (PrevToken->Type == TT_BinaryOperator) 700 return TT_UnaryOperator; 701 702 // Fall back to marking the token as binary operator. 703 return TT_BinaryOperator; 704 } 705 706 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 707 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 708 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 709 if (PrevToken == NULL) 710 return TT_UnaryOperator; 711 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 712 return TT_TrailingUnaryOperator; 713 714 return TT_UnaryOperator; 715 } 716 717 SmallVector<Context, 8> Contexts; 718 719 SourceManager &SourceMgr; 720 Lexer &Lex; 721 AnnotatedLine &Line; 722 AnnotatedToken *CurrentToken; 723 bool KeywordVirtualFound; 724 IdentifierInfo &Ident_in; 725}; 726 727/// \brief Parses binary expressions by inserting fake parenthesis based on 728/// operator precedence. 729class ExpressionParser { 730public: 731 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} 732 733 /// \brief Parse expressions with the given operatore precedence. 734 void parse(int Precedence = 0) { 735 if (Precedence > prec::PointerToMember || Current == NULL) 736 return; 737 738 // Skip over "return" until we can properly parse it. 739 if (Current->is(tok::kw_return)) 740 next(); 741 742 // Eagerly consume trailing comments. 743 while (isTrailingComment(Current)) { 744 next(); 745 } 746 747 AnnotatedToken *Start = Current; 748 bool OperatorFound = false; 749 750 while (Current) { 751 // Consume operators with higher precedence. 752 parse(prec::Level(Precedence + 1)); 753 754 int CurrentPrecedence = 0; 755 if (Current) { 756 if (Current->Type == TT_ConditionalExpr) 757 CurrentPrecedence = 1 + (int) prec::Conditional; 758 else if (Current->is(tok::semi)) 759 CurrentPrecedence = 1; 760 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 761 CurrentPrecedence = 1 + (int) getPrecedence(*Current); 762 } 763 764 // At the end of the line or when an operator with higher precedence is 765 // found, insert fake parenthesis and return. 766 if (Current == NULL || closesScope(*Current) || 767 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { 768 if (OperatorFound) { 769 ++Start->FakeLParens; 770 if (Current) 771 ++Current->Parent->FakeRParens; 772 } 773 return; 774 } 775 776 // Consume scopes: (), [], <> and {} 777 if (opensScope(*Current)) { 778 AnnotatedToken *Left = Current; 779 while (Current && !closesScope(*Current)) { 780 next(); 781 parse(); 782 } 783 // Remove fake parens that just duplicate the real parens. 784 if (Current && Left->Children[0].FakeLParens > 0 && 785 Current->Parent->FakeRParens > 0) { 786 --Left->Children[0].FakeLParens; 787 --Current->Parent->FakeRParens; 788 } 789 next(); 790 } else { 791 // Operator found. 792 if (CurrentPrecedence == Precedence) 793 OperatorFound = true; 794 795 next(); 796 } 797 } 798 } 799 800private: 801 void next() { 802 if (Current != NULL) 803 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 804 } 805 806 AnnotatedToken *Current; 807}; 808 809void TokenAnnotator::annotate(AnnotatedLine &Line) { 810 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); 811 Line.Type = Parser.parseLine(); 812 if (Line.Type == LT_Invalid) 813 return; 814 815 ExpressionParser ExprParser(Line); 816 ExprParser.parse(); 817 818 if (Line.First.Type == TT_ObjCMethodSpecifier) 819 Line.Type = LT_ObjCMethodDecl; 820 else if (Line.First.Type == TT_ObjCDecl) 821 Line.Type = LT_ObjCDecl; 822 else if (Line.First.Type == TT_ObjCProperty) 823 Line.Type = LT_ObjCProperty; 824 825 Line.First.SpacesRequiredBefore = 1; 826 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 827 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 828 829 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 830} 831 832void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 833 if (Line.First.Children.empty()) 834 return; 835 AnnotatedToken *Current = &Line.First.Children[0]; 836 while (Current != NULL) { 837 if (Current->Type == TT_LineComment) 838 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 839 else 840 Current->SpacesRequiredBefore = 841 spaceRequiredBefore(Line, *Current) ? 1 : 0; 842 843 if (Current->FormatTok.MustBreakBefore) { 844 Current->MustBreakBefore = true; 845 } else if (Current->Type == TT_LineComment) { 846 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; 847 } else if (isTrailingComment(Current->Parent) || 848 (Current->is(tok::string_literal) && 849 Current->Parent->is(tok::string_literal))) { 850 Current->MustBreakBefore = true; 851 } else if (Current->is(tok::lessless) && !Current->Children.empty() && 852 Current->Parent->is(tok::string_literal) && 853 Current->Children[0].is(tok::string_literal)) { 854 Current->MustBreakBefore = true; 855 } else { 856 Current->MustBreakBefore = false; 857 } 858 Current->CanBreakBefore = 859 Current->MustBreakBefore || canBreakBefore(Line, *Current); 860 if (Current->MustBreakBefore) 861 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; 862 else 863 Current->TotalLength = 864 Current->Parent->TotalLength + Current->FormatTok.TokenLength + 865 Current->SpacesRequiredBefore; 866 // FIXME: Only calculate this if CanBreakBefore is true once static 867 // initializers etc. are sorted out. 868 // FIXME: Move magic numbers to a better place. 869 Current->SplitPenalty = 870 20 * Current->BindingStrength + splitPenalty(Line, *Current); 871 872 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 873 } 874} 875 876unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 877 const AnnotatedToken &Tok) { 878 const AnnotatedToken &Left = *Tok.Parent; 879 const AnnotatedToken &Right = Tok; 880 881 if (Right.Type == TT_StartOfName) { 882 if (Line.First.is(tok::kw_for)) 883 return 3; 884 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) 885 // FIXME: Clean up hack of using BindingStrength to find top-level names. 886 return Style.PenaltyReturnTypeOnItsOwnLine; 887 else 888 return 100; 889 } 890 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 891 return 150; 892 if (Left.is(tok::coloncolon)) 893 return 500; 894 895 if (Left.Type == TT_RangeBasedForLoopColon || 896 Left.Type == TT_InheritanceColon) 897 return 2; 898 899 if (Right.isOneOf(tok::arrow, tok::period)) { 900 if (Line.Type == LT_BuilderTypeCall) 901 return prec::PointerToMember; 902 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && 903 Left.MatchingParen->ParameterCount > 0) 904 return 20; // Should be smaller than breaking at a nested comma. 905 return 150; 906 } 907 908 // In for-loops, prefer breaking at ',' and ';'. 909 if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) 910 return 4; 911 912 if (Left.is(tok::semi)) 913 return 0; 914 if (Left.is(tok::comma)) 915 return 1; 916 917 // In Objective-C method expressions, prefer breaking before "param:" over 918 // breaking after it. 919 if (Right.Type == TT_ObjCSelectorName) 920 return 0; 921 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 922 return 20; 923 924 if (opensScope(Left)) 925 return 20; 926 927 if (Right.is(tok::lessless)) { 928 if (Left.is(tok::string_literal)) { 929 char LastChar = 930 StringRef(Left.FormatTok.Tok.getLiteralData(), 931 Left.FormatTok.TokenLength).drop_back(1).rtrim().back(); 932 if (LastChar == ':' || LastChar == '=') 933 return 100; 934 } 935 return prec::Shift; 936 } 937 if (Left.Type == TT_ConditionalExpr) 938 return prec::Conditional; 939 prec::Level Level = getPrecedence(Left); 940 941 if (Level != prec::Unknown) 942 return Level; 943 944 return 3; 945} 946 947bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 948 const AnnotatedToken &Left, 949 const AnnotatedToken &Right) { 950 if (Right.is(tok::hashhash)) 951 return Left.is(tok::hash); 952 if (Left.isOneOf(tok::hashhash, tok::hash)) 953 return Right.is(tok::hash); 954 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) 955 return false; 956 if (Right.is(tok::less) && 957 (Left.is(tok::kw_template) || 958 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 959 return true; 960 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 961 return false; 962 if (Left.isOneOf(tok::exclaim, tok::tilde)) 963 return false; 964 if (Left.is(tok::at) && 965 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 966 tok::numeric_constant, tok::l_paren, tok::l_brace, 967 tok::kw_true, tok::kw_false)) 968 return false; 969 if (Left.is(tok::coloncolon)) 970 return false; 971 if (Right.is(tok::coloncolon)) 972 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); 973 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 974 return false; 975 if (Right.Type == TT_PointerOrReference) 976 return Left.FormatTok.Tok.isLiteral() || 977 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 978 !Style.PointerBindsToType); 979 if (Left.Type == TT_PointerOrReference) 980 return Right.FormatTok.Tok.isLiteral() || 981 ((Right.Type != TT_PointerOrReference) && Style.PointerBindsToType); 982 if (Right.is(tok::star) && Left.is(tok::l_paren)) 983 return false; 984 if (Left.is(tok::l_square)) 985 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); 986 if (Right.is(tok::r_square)) 987 return Right.Type == TT_ObjCArrayLiteral; 988 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 989 return false; 990 if (Left.is(tok::period) || Right.is(tok::period)) 991 return false; 992 if (Left.is(tok::colon)) 993 return Left.Type != TT_ObjCMethodExpr; 994 if (Right.is(tok::colon)) 995 return Right.Type != TT_ObjCMethodExpr; 996 if (Left.is(tok::l_paren)) 997 return false; 998 if (Right.is(tok::l_paren)) { 999 return Line.Type == LT_ObjCDecl || 1000 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, 1001 tok::kw_return, tok::kw_catch, tok::kw_new, 1002 tok::kw_delete); 1003 } 1004 if (Left.is(tok::at) && 1005 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1006 return false; 1007 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1008 return false; 1009 return true; 1010} 1011 1012bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1013 const AnnotatedToken &Tok) { 1014 if (Tok.FormatTok.Tok.getIdentifierInfo() && 1015 Tok.Parent->FormatTok.Tok.getIdentifierInfo()) 1016 return true; // Never ever merge two identifiers. 1017 if (Line.Type == LT_ObjCMethodDecl) { 1018 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1019 return true; 1020 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1021 // Don't space between ')' and <id> 1022 return false; 1023 } 1024 if (Line.Type == LT_ObjCProperty && 1025 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1026 return false; 1027 1028 if (Tok.Parent->is(tok::comma)) 1029 return true; 1030 if (Tok.is(tok::comma)) 1031 return false; 1032 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1033 return true; 1034 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) 1035 return false; 1036 if (Tok.Type == TT_OverloadedOperatorLParen) 1037 return false; 1038 if (Tok.is(tok::colon)) 1039 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && 1040 !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; 1041 if (Tok.is(tok::l_paren) && !Tok.Children.empty() && 1042 Tok.Children[0].Type == TT_PointerOrReference && 1043 !Tok.Children[0].Children.empty() && 1044 Tok.Children[0].Children[0].isNot(tok::r_paren)) 1045 return true; 1046 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) 1047 return false; 1048 if (Tok.Type == TT_UnaryOperator) 1049 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && 1050 (Tok.Parent->isNot(tok::colon) || 1051 Tok.Parent->Type != TT_ObjCMethodExpr); 1052 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1053 return Tok.Type == TT_TemplateCloser && 1054 Tok.Parent->Type == TT_TemplateCloser && 1055 Style.Standard != FormatStyle::LS_Cpp11; 1056 } 1057 if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar)) 1058 return false; 1059 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1060 return true; 1061 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1062 return false; 1063 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1064 return true; 1065 if (Tok.Type == TT_TrailingUnaryOperator) 1066 return false; 1067 return spaceRequiredBetween(Line, *Tok.Parent, Tok); 1068} 1069 1070bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1071 const AnnotatedToken &Right) { 1072 const AnnotatedToken &Left = *Right.Parent; 1073 if (Right.Type == TT_StartOfName) 1074 return true; 1075 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1076 return false; 1077 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1078 return true; 1079 if (Right.Type == TT_ObjCSelectorName) 1080 return true; 1081 if (Left.ClosesTemplateDeclaration) 1082 return true; 1083 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1084 return true; 1085 if (Right.Type == TT_RangeBasedForLoopColon || 1086 Right.Type == TT_InheritanceColon) 1087 return false; 1088 if (Left.Type == TT_RangeBasedForLoopColon || 1089 Left.Type == TT_InheritanceColon) 1090 return true; 1091 if (Right.Type == TT_RangeBasedForLoopColon) 1092 return false; 1093 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1094 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1095 Left.isOneOf(tok::question, tok::kw_operator)) 1096 return false; 1097 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1098 return false; 1099 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && 1100 Left.Parent->is(tok::kw___attribute)) 1101 return false; 1102 1103 if (Right.Type == TT_LineComment) 1104 // We rely on MustBreakBefore being set correctly here as we should not 1105 // change the "binding" behavior of a comment. 1106 return false; 1107 1108 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1109 // unless it is follow by ';', '{' or '='. 1110 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1111 Left.Parent->is(tok::r_paren)) 1112 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); 1113 1114 if (Right.is(tok::kw___attribute)) 1115 return true; 1116 1117 // We only break before r_brace if there was a corresponding break before 1118 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1119 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) 1120 return false; 1121 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1122 return true; 1123 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1124 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) || 1125 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || 1126 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1127 Right.isOneOf(tok::identifier, tok::kw___attribute)) || 1128 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1129 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1130} 1131 1132} // namespace format 1133} // namespace clang 1134