TokenAnnotator.cpp revision d3cf17b5f1fed43dbd0cd35c43d15139803c9c84
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
23static bool isUnaryOperator(const AnnotatedToken &Tok) {
24  switch (Tok.FormatTok.Tok.getKind()) {
25  case tok::plus:
26  case tok::plusplus:
27  case tok::minus:
28  case tok::minusminus:
29  case tok::exclaim:
30  case tok::tilde:
31  case tok::kw_sizeof:
32  case tok::kw_alignof:
33    return true;
34  default:
35    return false;
36  }
37}
38
39static bool isBinaryOperator(const AnnotatedToken &Tok) {
40  // Comma is a binary operator, but does not behave as such wrt. formatting.
41  return getPrecedence(Tok) > prec::Comma;
42}
43
44// Returns the previous token ignoring comments.
45static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
46  AnnotatedToken *PrevToken = Tok.Parent;
47  while (PrevToken != NULL && PrevToken->is(tok::comment))
48    PrevToken = PrevToken->Parent;
49  return PrevToken;
50}
51static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
52  return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
53}
54
55static bool isTrailingComment(AnnotatedToken *Tok) {
56  return Tok != NULL && Tok->is(tok::comment) &&
57         (Tok->Children.empty() ||
58          Tok->Children[0].FormatTok.NewlinesBefore > 0);
59}
60
61// Returns the next token ignoring comments.
62static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
63  if (Tok.Children.empty())
64    return NULL;
65  const AnnotatedToken *NextToken = &Tok.Children[0];
66  while (NextToken->is(tok::comment)) {
67    if (NextToken->Children.empty())
68      return NULL;
69    NextToken = &NextToken->Children[0];
70  }
71  return NextToken;
72}
73
74static bool closesScope(const AnnotatedToken &Tok) {
75  return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
76         Tok.Type == TT_TemplateCloser;
77}
78
79static bool opensScope(const AnnotatedToken &Tok) {
80  return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
81         Tok.Type == TT_TemplateOpener;
82}
83
84/// \brief A parser that gathers additional information about tokens.
85///
86/// The \c TokenAnnotator tries to match parenthesis and square brakets and
87/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
88/// into template parameter lists.
89class AnnotatingParser {
90public:
91  AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
92                   IdentifierInfo &Ident_in)
93      : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
94        KeywordVirtualFound(false), Ident_in(Ident_in) {
95    Contexts.push_back(Context(1, /*IsExpression=*/ false));
96  }
97
98private:
99  bool parseAngle() {
100    if (CurrentToken == NULL)
101      return false;
102    ScopedContextCreator ContextCreator(*this, 10);
103    AnnotatedToken *Left = CurrentToken->Parent;
104    Contexts.back().IsExpression = false;
105    while (CurrentToken != NULL) {
106      if (CurrentToken->is(tok::greater)) {
107        Left->MatchingParen = CurrentToken;
108        CurrentToken->MatchingParen = Left;
109        CurrentToken->Type = TT_TemplateCloser;
110        next();
111        return true;
112      }
113      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
114                                tok::pipepipe, tok::ampamp, tok::question,
115                                tok::colon))
116        return false;
117      updateParameterCount(Left, CurrentToken);
118      if (!consumeToken())
119        return false;
120    }
121    return false;
122  }
123
124  bool parseParens(bool LookForDecls = false) {
125    if (CurrentToken == NULL)
126      return false;
127    ScopedContextCreator ContextCreator(*this, 1);
128
129    // FIXME: This is a bit of a hack. Do better.
130    Contexts.back().ColonIsForRangeExpr =
131        Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
132
133    bool StartsObjCMethodExpr = false;
134    AnnotatedToken *Left = CurrentToken->Parent;
135    if (CurrentToken->is(tok::caret)) {
136      // ^( starts a block.
137      Left->Type = TT_ObjCBlockLParen;
138    } else if (AnnotatedToken *MaybeSel = Left->Parent) {
139      // @selector( starts a selector.
140      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
141          MaybeSel->Parent->is(tok::at)) {
142        StartsObjCMethodExpr = true;
143      }
144    }
145
146    if (StartsObjCMethodExpr) {
147      Contexts.back().ColonIsObjCMethodExpr = true;
148      Left->Type = TT_ObjCMethodExpr;
149    }
150
151    while (CurrentToken != NULL) {
152      // LookForDecls is set when "if (" has been seen. Check for
153      // 'identifier' '*' 'identifier' followed by not '=' -- this
154      // '*' has to be a binary operator but determineStarAmpUsage() will
155      // categorize it as an unary operator, so set the right type here.
156      if (LookForDecls && !CurrentToken->Children.empty()) {
157        AnnotatedToken &Prev = *CurrentToken->Parent;
158        AnnotatedToken &Next = CurrentToken->Children[0];
159        if (Prev.Parent->is(tok::identifier) &&
160            Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
161            CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
162          Prev.Type = TT_BinaryOperator;
163          LookForDecls = false;
164        }
165      }
166
167      if (CurrentToken->is(tok::r_paren)) {
168        Left->MatchingParen = CurrentToken;
169        CurrentToken->MatchingParen = Left;
170
171        if (StartsObjCMethodExpr) {
172          CurrentToken->Type = TT_ObjCMethodExpr;
173          if (Contexts.back().FirstObjCSelectorName != NULL) {
174            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
175                Contexts.back().LongestObjCSelectorName;
176          }
177        }
178
179        next();
180        return true;
181      }
182      if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
183        return false;
184      updateParameterCount(Left, CurrentToken);
185      if (!consumeToken())
186        return false;
187    }
188    return false;
189  }
190
191  bool parseSquare() {
192    if (!CurrentToken)
193      return false;
194
195    // A '[' could be an index subscript (after an indentifier or after
196    // ')' or ']'), it could be the start of an Objective-C method
197    // expression, or it could the the start of an Objective-C array literal.
198    AnnotatedToken *Left = CurrentToken->Parent;
199    AnnotatedToken *Parent = getPreviousToken(*Left);
200    bool StartsObjCMethodExpr =
201        Contexts.back().CanBeExpression &&
202        (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
203                                    tok::kw_return, tok::kw_throw) ||
204         isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn ||
205         Parent->Type == TT_CastRParen ||
206         getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
207         prec::Unknown);
208    ScopedContextCreator ContextCreator(*this, 10);
209    Contexts.back().IsExpression = true;
210    bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
211
212    if (StartsObjCMethodExpr) {
213      Contexts.back().ColonIsObjCMethodExpr = true;
214      Left->Type = TT_ObjCMethodExpr;
215    } else if (StartsObjCArrayLiteral) {
216      Left->Type = TT_ObjCArrayLiteral;
217    }
218
219    while (CurrentToken != NULL) {
220      if (CurrentToken->is(tok::r_square)) {
221        if (!CurrentToken->Children.empty() &&
222            CurrentToken->Children[0].is(tok::l_paren)) {
223          // An ObjC method call is rarely followed by an open parenthesis.
224          // FIXME: Do we incorrectly label ":" with this?
225          StartsObjCMethodExpr = false;
226          Left->Type = TT_Unknown;
227        }
228        if (StartsObjCMethodExpr) {
229          CurrentToken->Type = TT_ObjCMethodExpr;
230          // determineStarAmpUsage() thinks that '*' '[' is allocating an
231          // array of pointers, but if '[' starts a selector then '*' is a
232          // binary operator.
233          if (Parent != NULL && Parent->Type == TT_PointerOrReference)
234            Parent->Type = TT_BinaryOperator;
235        } else if (StartsObjCArrayLiteral) {
236          CurrentToken->Type = TT_ObjCArrayLiteral;
237        }
238        Left->MatchingParen = CurrentToken;
239        CurrentToken->MatchingParen = Left;
240        if (Contexts.back().FirstObjCSelectorName != NULL)
241          Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
242              Contexts.back().LongestObjCSelectorName;
243        next();
244        return true;
245      }
246      if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
247        return false;
248      updateParameterCount(Left, CurrentToken);
249      if (!consumeToken())
250        return false;
251    }
252    return false;
253  }
254
255  bool parseBrace() {
256    // Lines are fine to end with '{'.
257    if (CurrentToken == NULL)
258      return true;
259    ScopedContextCreator ContextCreator(*this, 1);
260    AnnotatedToken *Left = CurrentToken->Parent;
261    while (CurrentToken != NULL) {
262      if (CurrentToken->is(tok::r_brace)) {
263        Left->MatchingParen = CurrentToken;
264        CurrentToken->MatchingParen = Left;
265        next();
266        return true;
267      }
268      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
269        return false;
270      updateParameterCount(Left, CurrentToken);
271      if (!consumeToken())
272        return false;
273    }
274    return true;
275  }
276
277  void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
278    if (Current->is(tok::comma))
279      ++Left->ParameterCount;
280    else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
281      Left->ParameterCount = 1;
282  }
283
284  bool parseConditional() {
285    while (CurrentToken != NULL) {
286      if (CurrentToken->is(tok::colon)) {
287        CurrentToken->Type = TT_ConditionalExpr;
288        next();
289        return true;
290      }
291      if (!consumeToken())
292        return false;
293    }
294    return false;
295  }
296
297  bool parseTemplateDeclaration() {
298    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
299      CurrentToken->Type = TT_TemplateOpener;
300      next();
301      if (!parseAngle())
302        return false;
303      if (CurrentToken != NULL)
304        CurrentToken->Parent->ClosesTemplateDeclaration = true;
305      return true;
306    }
307    return false;
308  }
309
310  bool consumeToken() {
311    AnnotatedToken *Tok = CurrentToken;
312    next();
313    switch (Tok->FormatTok.Tok.getKind()) {
314    case tok::plus:
315    case tok::minus:
316      // At the start of the line, +/- specific ObjectiveC method
317      // declarations.
318      if (Tok->Parent == NULL)
319        Tok->Type = TT_ObjCMethodSpecifier;
320      break;
321    case tok::colon:
322      // Colons from ?: are handled in parseConditional().
323      if (Tok->Parent->is(tok::r_paren)) {
324        Tok->Type = TT_CtorInitializerColon;
325      } else if (Contexts.back().ColonIsObjCMethodExpr ||
326                 Line.First.Type == TT_ObjCMethodSpecifier) {
327        Tok->Type = TT_ObjCMethodExpr;
328        Tok->Parent->Type = TT_ObjCSelectorName;
329        if (Tok->Parent->FormatTok.TokenLength >
330            Contexts.back().LongestObjCSelectorName)
331          Contexts.back().LongestObjCSelectorName =
332              Tok->Parent->FormatTok.TokenLength;
333        if (Contexts.back().FirstObjCSelectorName == NULL)
334          Contexts.back().FirstObjCSelectorName = Tok->Parent;
335      } else if (Contexts.back().ColonIsForRangeExpr) {
336        Tok->Type = TT_RangeBasedForLoopColon;
337      } else if (Contexts.size() == 1) {
338        Tok->Type = TT_InheritanceColon;
339      }
340      break;
341    case tok::kw_if:
342    case tok::kw_while:
343      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
344        next();
345        if (!parseParens(/*LookForDecls=*/ true))
346          return false;
347      }
348      break;
349    case tok::kw_for:
350      Contexts.back().ColonIsForRangeExpr = true;
351      next();
352      if (!parseParens())
353        return false;
354      break;
355    case tok::l_paren:
356      if (!parseParens())
357        return false;
358      if (Line.MustBeDeclaration)
359        Line.MightBeFunctionDecl = true;
360      break;
361    case tok::l_square:
362      if (!parseSquare())
363        return false;
364      break;
365    case tok::l_brace:
366      if (!parseBrace())
367        return false;
368      break;
369    case tok::less:
370      if (parseAngle())
371        Tok->Type = TT_TemplateOpener;
372      else {
373        Tok->Type = TT_BinaryOperator;
374        CurrentToken = Tok;
375        next();
376      }
377      break;
378    case tok::r_paren:
379    case tok::r_square:
380      return false;
381    case tok::r_brace:
382      // Lines can start with '}'.
383      if (Tok->Parent != NULL)
384        return false;
385      break;
386    case tok::greater:
387      Tok->Type = TT_BinaryOperator;
388      break;
389    case tok::kw_operator:
390      while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
391        if (CurrentToken->isOneOf(tok::star, tok::amp))
392          CurrentToken->Type = TT_PointerOrReference;
393        consumeToken();
394      }
395      if (CurrentToken)
396        CurrentToken->Type = TT_OverloadedOperatorLParen;
397      break;
398    case tok::question:
399      parseConditional();
400      break;
401    case tok::kw_template:
402      parseTemplateDeclaration();
403      break;
404    case tok::identifier:
405      if (Line.First.is(tok::kw_for) &&
406          Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
407        Tok->Type = TT_ObjCForIn;
408      break;
409    default:
410      break;
411    }
412    return true;
413  }
414
415  void parseIncludeDirective() {
416    next();
417    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
418      next();
419      while (CurrentToken != NULL) {
420        if (CurrentToken->isNot(tok::comment) ||
421            !CurrentToken->Children.empty())
422          CurrentToken->Type = TT_ImplicitStringLiteral;
423        next();
424      }
425    } else {
426      while (CurrentToken != NULL) {
427        if (CurrentToken->is(tok::string_literal))
428          // Mark these string literals as "implicit" literals, too, so that
429          // they are not split or line-wrapped.
430          CurrentToken->Type = TT_ImplicitStringLiteral;
431        next();
432      }
433    }
434  }
435
436  void parseWarningOrError() {
437    next();
438    // We still want to format the whitespace left of the first token of the
439    // warning or error.
440    next();
441    while (CurrentToken != NULL) {
442      CurrentToken->Type = TT_ImplicitStringLiteral;
443      next();
444    }
445  }
446
447  void parsePreprocessorDirective() {
448    next();
449    if (CurrentToken == NULL)
450      return;
451    // Hashes in the middle of a line can lead to any strange token
452    // sequence.
453    if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
454      return;
455    switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
456    case tok::pp_include:
457    case tok::pp_import:
458      parseIncludeDirective();
459      break;
460    case tok::pp_error:
461    case tok::pp_warning:
462      parseWarningOrError();
463      break;
464    default:
465      break;
466    }
467    while (CurrentToken != NULL)
468      next();
469  }
470
471public:
472  LineType parseLine() {
473    int PeriodsAndArrows = 0;
474    AnnotatedToken *LastPeriodOrArrow = NULL;
475    bool CanBeBuilderTypeStmt = true;
476    if (CurrentToken->is(tok::hash)) {
477      parsePreprocessorDirective();
478      return LT_PreprocessorDirective;
479    }
480    while (CurrentToken != NULL) {
481      if (CurrentToken->is(tok::kw_virtual))
482        KeywordVirtualFound = true;
483      if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
484        ++PeriodsAndArrows;
485        LastPeriodOrArrow = CurrentToken;
486      }
487      AnnotatedToken *TheToken = CurrentToken;
488      if (!consumeToken())
489        return LT_Invalid;
490      if (getPrecedence(*TheToken) > prec::Assignment &&
491          TheToken->Type == TT_BinaryOperator)
492        CanBeBuilderTypeStmt = false;
493    }
494    if (KeywordVirtualFound)
495      return LT_VirtualFunctionDecl;
496
497    // Assume a builder-type call if there are 2 or more "." and "->".
498    if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
499      LastPeriodOrArrow->LastInChainOfCalls = true;
500      return LT_BuilderTypeCall;
501    }
502
503    if (Line.First.Type == TT_ObjCMethodSpecifier) {
504      if (Contexts.back().FirstObjCSelectorName != NULL)
505        Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
506            Contexts.back().LongestObjCSelectorName;
507      return LT_ObjCMethodDecl;
508    }
509
510    return LT_Other;
511  }
512
513private:
514  void next() {
515    if (CurrentToken != NULL) {
516      determineTokenType(*CurrentToken);
517      CurrentToken->BindingStrength = Contexts.back().BindingStrength;
518    }
519
520    if (CurrentToken != NULL && !CurrentToken->Children.empty())
521      CurrentToken = &CurrentToken->Children[0];
522    else
523      CurrentToken = NULL;
524
525    // Reset token type in case we have already looked at it and then recovered
526    // from an error (e.g. failure to find the matching >).
527    if (CurrentToken != NULL)
528      CurrentToken->Type = TT_Unknown;
529  }
530
531  /// \brief A struct to hold information valid in a specific context, e.g.
532  /// a pair of parenthesis.
533  struct Context {
534    Context(unsigned BindingStrength, bool IsExpression)
535        : BindingStrength(BindingStrength), LongestObjCSelectorName(0),
536          ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false),
537          FirstObjCSelectorName(NULL), IsExpression(IsExpression),
538          CanBeExpression(true) {}
539
540    unsigned BindingStrength;
541    unsigned LongestObjCSelectorName;
542    bool ColonIsForRangeExpr;
543    bool ColonIsObjCMethodExpr;
544    AnnotatedToken *FirstObjCSelectorName;
545    bool IsExpression;
546    bool CanBeExpression;
547  };
548
549  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
550  /// of each instance.
551  struct ScopedContextCreator {
552    AnnotatingParser &P;
553
554    ScopedContextCreator(AnnotatingParser &P, unsigned Increase) : P(P) {
555      P.Contexts.push_back(Context(P.Contexts.back().BindingStrength + Increase,
556                                   P.Contexts.back().IsExpression));
557    }
558
559    ~ScopedContextCreator() { P.Contexts.pop_back(); }
560  };
561
562  void determineTokenType(AnnotatedToken &Current) {
563    if (getPrecedence(Current) == prec::Assignment) {
564      Contexts.back().IsExpression = true;
565      for (AnnotatedToken *Previous = Current.Parent;
566           Previous && Previous->isNot(tok::comma);
567           Previous = Previous->Parent) {
568        if (Previous->is(tok::r_square))
569          Previous = Previous->MatchingParen;
570        if (Previous->Type == TT_BinaryOperator &&
571            Previous->isOneOf(tok::star, tok::amp)) {
572          Previous->Type = TT_PointerOrReference;
573        }
574      }
575    } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
576               (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
577                (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
578      Contexts.back().IsExpression = true;
579    } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
580      for (AnnotatedToken *Previous = Current.Parent;
581           Previous && Previous->isOneOf(tok::star, tok::amp);
582           Previous = Previous->Parent)
583        Previous->Type = TT_PointerOrReference;
584    } else if (Current.Parent &&
585               Current.Parent->Type == TT_CtorInitializerColon) {
586      Contexts.back().IsExpression = true;
587    } else if (Current.is(tok::kw_new)) {
588      Contexts.back().CanBeExpression = false;
589    }
590
591    if (Current.Type == TT_Unknown) {
592      if (Current.Parent && Current.is(tok::identifier) &&
593          ((Current.Parent->is(tok::identifier) &&
594            Current.Parent->FormatTok.Tok.getIdentifierInfo()
595                ->getPPKeywordID() == tok::pp_not_keyword) ||
596           Current.Parent->Type == TT_PointerOrReference ||
597           Current.Parent->Type == TT_TemplateCloser)) {
598        Current.Type = TT_StartOfName;
599      } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
600        Current.Type =
601            determineStarAmpUsage(Current, Contexts.back().IsExpression);
602      } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
603        Current.Type = determinePlusMinusCaretUsage(Current);
604      } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
605        Current.Type = determineIncrementUsage(Current);
606      } else if (Current.is(tok::exclaim)) {
607        Current.Type = TT_UnaryOperator;
608      } else if (isBinaryOperator(Current)) {
609        Current.Type = TT_BinaryOperator;
610      } else if (Current.is(tok::comment)) {
611        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
612                                            Lex.getLangOpts()));
613        if (StringRef(Data).startswith("//"))
614          Current.Type = TT_LineComment;
615        else
616          Current.Type = TT_BlockComment;
617      } else if (Current.is(tok::r_paren)) {
618        bool ParensNotExpr = !Current.Parent ||
619                             Current.Parent->Type == TT_PointerOrReference ||
620                             Current.Parent->Type == TT_TemplateCloser;
621        bool ParensCouldEndDecl =
622            !Current.Children.empty() &&
623            Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
624        bool IsSizeOfOrAlignOf =
625            Current.MatchingParen && Current.MatchingParen->Parent &&
626            Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
627                                                   tok::kw_alignof);
628        if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
629            Contexts.back().IsExpression)
630          // FIXME: We need to get smarter and understand more cases of casts.
631          Current.Type = TT_CastRParen;
632      } else if (Current.is(tok::at) && Current.Children.size()) {
633        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
634        case tok::objc_interface:
635        case tok::objc_implementation:
636        case tok::objc_protocol:
637          Current.Type = TT_ObjCDecl;
638          break;
639        case tok::objc_property:
640          Current.Type = TT_ObjCProperty;
641          break;
642        default:
643          break;
644        }
645      }
646    }
647  }
648
649  /// \brief Return the type of the given token assuming it is * or &.
650  TokenType
651  determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
652    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
653    if (PrevToken == NULL)
654      return TT_UnaryOperator;
655
656    const AnnotatedToken *NextToken = getNextToken(Tok);
657    if (NextToken == NULL)
658      return TT_Unknown;
659
660    if (PrevToken->is(tok::l_paren) && !IsExpression)
661      return TT_PointerOrReference;
662
663    if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
664                           tok::comma, tok::semi, tok::kw_return, tok::colon,
665                           tok::equal) ||
666        PrevToken->Type == TT_BinaryOperator ||
667        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
668      return TT_UnaryOperator;
669
670    if (NextToken->is(tok::l_square))
671      return TT_PointerOrReference;
672
673    if (PrevToken->FormatTok.Tok.isLiteral() ||
674        PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
675        NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken) ||
676        NextToken->isOneOf(tok::l_paren, tok::l_square))
677      return TT_BinaryOperator;
678
679    // It is very unlikely that we are going to find a pointer or reference type
680    // definition on the RHS of an assignment.
681    if (IsExpression)
682      return TT_BinaryOperator;
683
684    return TT_PointerOrReference;
685  }
686
687  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
688    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
689    if (PrevToken == NULL)
690      return TT_UnaryOperator;
691
692    // Use heuristics to recognize unary operators.
693    if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
694                           tok::question, tok::colon, tok::kw_return,
695                           tok::kw_case, tok::at, tok::l_brace))
696      return TT_UnaryOperator;
697
698    // There can't be two consecutive binary operators.
699    if (PrevToken->Type == TT_BinaryOperator)
700      return TT_UnaryOperator;
701
702    // Fall back to marking the token as binary operator.
703    return TT_BinaryOperator;
704  }
705
706  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
707  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
708    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
709    if (PrevToken == NULL)
710      return TT_UnaryOperator;
711    if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
712      return TT_TrailingUnaryOperator;
713
714    return TT_UnaryOperator;
715  }
716
717  SmallVector<Context, 8> Contexts;
718
719  SourceManager &SourceMgr;
720  Lexer &Lex;
721  AnnotatedLine &Line;
722  AnnotatedToken *CurrentToken;
723  bool KeywordVirtualFound;
724  IdentifierInfo &Ident_in;
725};
726
727/// \brief Parses binary expressions by inserting fake parenthesis based on
728/// operator precedence.
729class ExpressionParser {
730public:
731  ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
732
733  /// \brief Parse expressions with the given operatore precedence.
734  void parse(int Precedence = 0) {
735    if (Precedence > prec::PointerToMember || Current == NULL)
736      return;
737
738    // Skip over "return" until we can properly parse it.
739    if (Current->is(tok::kw_return))
740      next();
741
742    // Eagerly consume trailing comments.
743    while (isTrailingComment(Current)) {
744      next();
745    }
746
747    AnnotatedToken *Start = Current;
748    bool OperatorFound = false;
749
750    while (Current) {
751      // Consume operators with higher precedence.
752      parse(prec::Level(Precedence + 1));
753
754      int CurrentPrecedence = 0;
755      if (Current) {
756        if (Current->Type == TT_ConditionalExpr)
757          CurrentPrecedence = 1 + (int) prec::Conditional;
758        else if (Current->is(tok::semi))
759          CurrentPrecedence = 1;
760        else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
761          CurrentPrecedence = 1 + (int) getPrecedence(*Current);
762      }
763
764      // At the end of the line or when an operator with higher precedence is
765      // found, insert fake parenthesis and return.
766      if (Current == NULL || closesScope(*Current) ||
767          (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
768        if (OperatorFound) {
769          ++Start->FakeLParens;
770          if (Current)
771            ++Current->Parent->FakeRParens;
772        }
773        return;
774      }
775
776      // Consume scopes: (), [], <> and {}
777      if (opensScope(*Current)) {
778        AnnotatedToken *Left = Current;
779        while (Current && !closesScope(*Current)) {
780          next();
781          parse();
782        }
783        // Remove fake parens that just duplicate the real parens.
784        if (Current && Left->Children[0].FakeLParens > 0 &&
785            Current->Parent->FakeRParens > 0) {
786          --Left->Children[0].FakeLParens;
787          --Current->Parent->FakeRParens;
788        }
789        next();
790      } else {
791        // Operator found.
792        if (CurrentPrecedence == Precedence)
793          OperatorFound = true;
794
795        next();
796      }
797    }
798  }
799
800private:
801  void next() {
802    if (Current != NULL)
803      Current = Current->Children.empty() ? NULL : &Current->Children[0];
804  }
805
806  AnnotatedToken *Current;
807};
808
809void TokenAnnotator::annotate(AnnotatedLine &Line) {
810  AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
811  Line.Type = Parser.parseLine();
812  if (Line.Type == LT_Invalid)
813    return;
814
815  ExpressionParser ExprParser(Line);
816  ExprParser.parse();
817
818  if (Line.First.Type == TT_ObjCMethodSpecifier)
819    Line.Type = LT_ObjCMethodDecl;
820  else if (Line.First.Type == TT_ObjCDecl)
821    Line.Type = LT_ObjCDecl;
822  else if (Line.First.Type == TT_ObjCProperty)
823    Line.Type = LT_ObjCProperty;
824
825  Line.First.SpacesRequiredBefore = 1;
826  Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
827  Line.First.CanBreakBefore = Line.First.MustBreakBefore;
828
829  Line.First.TotalLength = Line.First.FormatTok.TokenLength;
830}
831
832void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
833  if (Line.First.Children.empty())
834    return;
835  AnnotatedToken *Current = &Line.First.Children[0];
836  while (Current != NULL) {
837    if (Current->Type == TT_LineComment)
838      Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
839    else
840      Current->SpacesRequiredBefore =
841          spaceRequiredBefore(Line, *Current) ? 1 : 0;
842
843    if (Current->FormatTok.MustBreakBefore) {
844      Current->MustBreakBefore = true;
845    } else if (Current->Type == TT_LineComment) {
846      Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
847    } else if (isTrailingComment(Current->Parent) ||
848               (Current->is(tok::string_literal) &&
849                Current->Parent->is(tok::string_literal))) {
850      Current->MustBreakBefore = true;
851    } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
852               Current->Parent->is(tok::string_literal) &&
853               Current->Children[0].is(tok::string_literal)) {
854      Current->MustBreakBefore = true;
855    } else {
856      Current->MustBreakBefore = false;
857    }
858    Current->CanBreakBefore =
859        Current->MustBreakBefore || canBreakBefore(Line, *Current);
860    if (Current->MustBreakBefore)
861      Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
862    else
863      Current->TotalLength =
864          Current->Parent->TotalLength + Current->FormatTok.TokenLength +
865          Current->SpacesRequiredBefore;
866    // FIXME: Only calculate this if CanBreakBefore is true once static
867    // initializers etc. are sorted out.
868    // FIXME: Move magic numbers to a better place.
869    Current->SplitPenalty =
870        20 * Current->BindingStrength + splitPenalty(Line, *Current);
871
872    Current = Current->Children.empty() ? NULL : &Current->Children[0];
873  }
874}
875
876unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
877                                      const AnnotatedToken &Tok) {
878  const AnnotatedToken &Left = *Tok.Parent;
879  const AnnotatedToken &Right = Tok;
880
881  if (Right.Type == TT_StartOfName) {
882    if (Line.First.is(tok::kw_for))
883      return 3;
884    else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
885      // FIXME: Clean up hack of using BindingStrength to find top-level names.
886      return Style.PenaltyReturnTypeOnItsOwnLine;
887    else
888      return 100;
889  }
890  if (Left.is(tok::equal) && Right.is(tok::l_brace))
891    return 150;
892  if (Left.is(tok::coloncolon))
893    return 500;
894
895  if (Left.Type == TT_RangeBasedForLoopColon ||
896      Left.Type == TT_InheritanceColon)
897    return 2;
898
899  if (Right.isOneOf(tok::arrow, tok::period)) {
900    if (Line.Type == LT_BuilderTypeCall)
901      return prec::PointerToMember;
902    if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
903        Left.MatchingParen->ParameterCount > 0)
904      return 20; // Should be smaller than breaking at a nested comma.
905    return 150;
906  }
907
908  // In for-loops, prefer breaking at ',' and ';'.
909  if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
910    return 4;
911
912  if (Left.is(tok::semi))
913    return 0;
914  if (Left.is(tok::comma))
915    return 1;
916
917  // In Objective-C method expressions, prefer breaking before "param:" over
918  // breaking after it.
919  if (Right.Type == TT_ObjCSelectorName)
920    return 0;
921  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
922    return 20;
923
924  if (opensScope(Left))
925    return 20;
926
927  if (Right.is(tok::lessless)) {
928    if (Left.is(tok::string_literal)) {
929      char LastChar =
930          StringRef(Left.FormatTok.Tok.getLiteralData(),
931                    Left.FormatTok.TokenLength).drop_back(1).rtrim().back();
932      if (LastChar == ':' || LastChar == '=')
933        return 100;
934    }
935    return prec::Shift;
936  }
937  if (Left.Type == TT_ConditionalExpr)
938    return prec::Conditional;
939  prec::Level Level = getPrecedence(Left);
940
941  if (Level != prec::Unknown)
942    return Level;
943
944  return 3;
945}
946
947bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
948                                          const AnnotatedToken &Left,
949                                          const AnnotatedToken &Right) {
950  if (Right.is(tok::hashhash))
951    return Left.is(tok::hash);
952  if (Left.isOneOf(tok::hashhash, tok::hash))
953    return Right.is(tok::hash);
954  if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
955    return false;
956  if (Right.is(tok::less) &&
957      (Left.is(tok::kw_template) ||
958       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
959    return true;
960  if (Left.is(tok::arrow) || Right.is(tok::arrow))
961    return false;
962  if (Left.isOneOf(tok::exclaim, tok::tilde))
963    return false;
964  if (Left.is(tok::at) &&
965      Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
966                    tok::numeric_constant, tok::l_paren, tok::l_brace,
967                    tok::kw_true, tok::kw_false))
968    return false;
969  if (Left.is(tok::coloncolon))
970    return false;
971  if (Right.is(tok::coloncolon))
972    return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
973  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
974    return false;
975  if (Right.Type == TT_PointerOrReference)
976    return Left.FormatTok.Tok.isLiteral() ||
977           ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
978            !Style.PointerBindsToType);
979  if (Left.Type == TT_PointerOrReference)
980    return Right.FormatTok.Tok.isLiteral() ||
981           ((Right.Type != TT_PointerOrReference) && Style.PointerBindsToType);
982  if (Right.is(tok::star) && Left.is(tok::l_paren))
983    return false;
984  if (Left.is(tok::l_square))
985    return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
986  if (Right.is(tok::r_square))
987    return Right.Type == TT_ObjCArrayLiteral;
988  if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
989    return false;
990  if (Left.is(tok::period) || Right.is(tok::period))
991    return false;
992  if (Left.is(tok::colon))
993    return Left.Type != TT_ObjCMethodExpr;
994  if (Right.is(tok::colon))
995    return Right.Type != TT_ObjCMethodExpr;
996  if (Left.is(tok::l_paren))
997    return false;
998  if (Right.is(tok::l_paren)) {
999    return Line.Type == LT_ObjCDecl ||
1000           Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1001                        tok::kw_return, tok::kw_catch, tok::kw_new,
1002                        tok::kw_delete);
1003  }
1004  if (Left.is(tok::at) &&
1005      Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1006    return false;
1007  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1008    return false;
1009  return true;
1010}
1011
1012bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1013                                         const AnnotatedToken &Tok) {
1014  if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1015      Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1016    return true; // Never ever merge two identifiers.
1017  if (Line.Type == LT_ObjCMethodDecl) {
1018    if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1019      return true;
1020    if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1021      // Don't space between ')' and <id>
1022      return false;
1023  }
1024  if (Line.Type == LT_ObjCProperty &&
1025      (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1026    return false;
1027
1028  if (Tok.Parent->is(tok::comma))
1029    return true;
1030  if (Tok.is(tok::comma))
1031    return false;
1032  if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1033    return true;
1034  if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1035    return false;
1036  if (Tok.Type == TT_OverloadedOperatorLParen)
1037    return false;
1038  if (Tok.is(tok::colon))
1039    return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
1040           !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr;
1041  if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
1042      Tok.Children[0].Type == TT_PointerOrReference &&
1043      !Tok.Children[0].Children.empty() &&
1044      Tok.Children[0].Children[0].isNot(tok::r_paren))
1045    return true;
1046  if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1047    return false;
1048  if (Tok.Type == TT_UnaryOperator)
1049    return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1050           (Tok.Parent->isNot(tok::colon) ||
1051            Tok.Parent->Type != TT_ObjCMethodExpr);
1052  if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1053    return Tok.Type == TT_TemplateCloser &&
1054           Tok.Parent->Type == TT_TemplateCloser &&
1055           Style.Standard != FormatStyle::LS_Cpp11;
1056  }
1057  if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar))
1058    return false;
1059  if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1060    return true;
1061  if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1062    return false;
1063  if (Tok.is(tok::less) && Line.First.is(tok::hash))
1064    return true;
1065  if (Tok.Type == TT_TrailingUnaryOperator)
1066    return false;
1067  return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1068}
1069
1070bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1071                                    const AnnotatedToken &Right) {
1072  const AnnotatedToken &Left = *Right.Parent;
1073  if (Right.Type == TT_StartOfName)
1074    return true;
1075  if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1076    return false;
1077  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1078    return true;
1079  if (Right.Type == TT_ObjCSelectorName)
1080    return true;
1081  if (Left.ClosesTemplateDeclaration)
1082    return true;
1083  if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1084    return true;
1085  if (Right.Type == TT_RangeBasedForLoopColon ||
1086      Right.Type == TT_InheritanceColon)
1087    return false;
1088  if (Left.Type == TT_RangeBasedForLoopColon ||
1089      Left.Type == TT_InheritanceColon)
1090    return true;
1091  if (Right.Type == TT_RangeBasedForLoopColon)
1092    return false;
1093  if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1094      Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1095      Left.isOneOf(tok::question, tok::kw_operator))
1096    return false;
1097  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1098    return false;
1099  if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1100      Left.Parent->is(tok::kw___attribute))
1101    return false;
1102
1103  if (Right.Type == TT_LineComment)
1104    // We rely on MustBreakBefore being set correctly here as we should not
1105    // change the "binding" behavior of a comment.
1106    return false;
1107
1108  // Allow breaking after a trailing 'const', e.g. after a method declaration,
1109  // unless it is follow by ';', '{' or '='.
1110  if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1111      Left.Parent->is(tok::r_paren))
1112    return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1113
1114  if (Right.is(tok::kw___attribute))
1115    return true;
1116
1117  // We only break before r_brace if there was a corresponding break before
1118  // the l_brace, which is tracked by BreakBeforeClosingBrace.
1119  if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
1120    return false;
1121  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1122    return true;
1123  return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1124         Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) ||
1125         Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
1126         (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1127          Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
1128         (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1129         (Left.is(tok::l_square) && !Right.is(tok::r_square));
1130}
1131
1132} // namespace format
1133} // namespace clang
1134