TokenAnnotator.cpp revision 248497199bc56e86d1c089beb9529f3b3d77abb1
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19
20namespace clang {
21namespace format {
22
23static bool isUnaryOperator(const AnnotatedToken &Tok) {
24  switch (Tok.FormatTok.Tok.getKind()) {
25  case tok::plus:
26  case tok::plusplus:
27  case tok::minus:
28  case tok::minusminus:
29  case tok::exclaim:
30  case tok::tilde:
31  case tok::kw_sizeof:
32  case tok::kw_alignof:
33    return true;
34  default:
35    return false;
36  }
37}
38
39static bool isBinaryOperator(const AnnotatedToken &Tok) {
40  // Comma is a binary operator, but does not behave as such wrt. formatting.
41  return getPrecedence(Tok) > prec::Comma;
42}
43
44// Returns the previous token ignoring comments.
45static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
46  AnnotatedToken *PrevToken = Tok.Parent;
47  while (PrevToken != NULL && PrevToken->is(tok::comment))
48    PrevToken = PrevToken->Parent;
49  return PrevToken;
50}
51static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
52  return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
53}
54
55static bool isTrailingComment(AnnotatedToken *Tok) {
56  return Tok != NULL && Tok->is(tok::comment) &&
57         (Tok->Children.empty() ||
58          Tok->Children[0].FormatTok.NewlinesBefore > 0);
59}
60
61// Returns the next token ignoring comments.
62static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
63  if (Tok.Children.empty())
64    return NULL;
65  const AnnotatedToken *NextToken = &Tok.Children[0];
66  while (NextToken->is(tok::comment)) {
67    if (NextToken->Children.empty())
68      return NULL;
69    NextToken = &NextToken->Children[0];
70  }
71  return NextToken;
72}
73
74/// \brief A parser that gathers additional information about tokens.
75///
76/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
77/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
78/// into template parameter lists.
79class AnnotatingParser {
80public:
81  AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
82                   IdentifierInfo &Ident_in)
83      : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
84        KeywordVirtualFound(false), Ident_in(Ident_in) {
85    Contexts.push_back(Context(1, /*IsExpression=*/ false));
86  }
87
88private:
89  bool parseAngle() {
90    if (CurrentToken == NULL)
91      return false;
92    ScopedContextCreator ContextCreator(*this, 10);
93    AnnotatedToken *Left = CurrentToken->Parent;
94    Contexts.back().IsExpression = false;
95    while (CurrentToken != NULL) {
96      if (CurrentToken->is(tok::greater)) {
97        Left->MatchingParen = CurrentToken;
98        CurrentToken->MatchingParen = Left;
99        CurrentToken->Type = TT_TemplateCloser;
100        next();
101        return true;
102      }
103      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
104          CurrentToken->is(tok::r_brace))
105        return false;
106      if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
107          CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
108        return false;
109      updateParameterCount(Left, CurrentToken);
110      if (!consumeToken())
111        return false;
112    }
113    return false;
114  }
115
116  bool parseParens(bool LookForDecls = false) {
117    if (CurrentToken == NULL)
118      return false;
119    ScopedContextCreator ContextCreator(*this, 1);
120
121    // FIXME: This is a bit of a hack. Do better.
122    Contexts.back().ColonIsForRangeExpr =
123        Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
124
125    bool StartsObjCMethodExpr = false;
126    AnnotatedToken *Left = CurrentToken->Parent;
127    if (CurrentToken->is(tok::caret)) {
128      // ^( starts a block.
129      Left->Type = TT_ObjCBlockLParen;
130    } else if (AnnotatedToken *MaybeSel = Left->Parent) {
131      // @selector( starts a selector.
132      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
133          MaybeSel->Parent->is(tok::at)) {
134        StartsObjCMethodExpr = true;
135      }
136    }
137
138    if (StartsObjCMethodExpr) {
139      Contexts.back().ColonIsObjCMethodExpr = true;
140      Left->Type = TT_ObjCMethodExpr;
141    }
142
143    while (CurrentToken != NULL) {
144      // LookForDecls is set when "if (" has been seen. Check for
145      // 'identifier' '*' 'identifier' followed by not '=' -- this
146      // '*' has to be a binary operator but determineStarAmpUsage() will
147      // categorize it as an unary operator, so set the right type here.
148      if (LookForDecls && !CurrentToken->Children.empty()) {
149        AnnotatedToken &Prev = *CurrentToken->Parent;
150        AnnotatedToken &Next = CurrentToken->Children[0];
151        if (Prev.Parent->is(tok::identifier) &&
152            (Prev.is(tok::star) || Prev.is(tok::amp)) &&
153            CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
154          Prev.Type = TT_BinaryOperator;
155          LookForDecls = false;
156        }
157      }
158
159      if (CurrentToken->is(tok::r_paren)) {
160        Left->MatchingParen = CurrentToken;
161        CurrentToken->MatchingParen = Left;
162
163        if (StartsObjCMethodExpr) {
164          CurrentToken->Type = TT_ObjCMethodExpr;
165          if (Contexts.back().FirstObjCSelectorName != NULL) {
166            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
167                Contexts.back().LongestObjCSelectorName;
168          }
169        }
170
171        next();
172        return true;
173      }
174      if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
175        return false;
176      updateParameterCount(Left, CurrentToken);
177      if (!consumeToken())
178        return false;
179    }
180    return false;
181  }
182
183  bool parseSquare() {
184    if (!CurrentToken)
185      return false;
186    ScopedContextCreator ContextCreator(*this, 10);
187
188    // A '[' could be an index subscript (after an indentifier or after
189    // ')' or ']'), it could be the start of an Objective-C method
190    // expression, or it could the the start of an Objective-C array literal.
191    AnnotatedToken *Left = CurrentToken->Parent;
192    AnnotatedToken *Parent = getPreviousToken(*Left);
193    Contexts.back().IsExpression = true;
194    bool StartsObjCMethodExpr =
195        !Parent || Parent->is(tok::colon) || Parent->is(tok::l_square) ||
196        Parent->is(tok::l_paren) || Parent->is(tok::kw_return) ||
197        Parent->is(tok::kw_throw) || isUnaryOperator(*Parent) ||
198        Parent->Type == TT_ObjCForIn || Parent->Type == TT_CastRParen ||
199        getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
200        prec::Unknown;
201    bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
202
203    if (StartsObjCMethodExpr) {
204      Contexts.back().ColonIsObjCMethodExpr = true;
205      Left->Type = TT_ObjCMethodExpr;
206    } else if (StartsObjCArrayLiteral) {
207      Left->Type = TT_ObjCArrayLiteral;
208    }
209
210    while (CurrentToken != NULL) {
211      if (CurrentToken->is(tok::r_square)) {
212        if (!CurrentToken->Children.empty() &&
213            CurrentToken->Children[0].is(tok::l_paren)) {
214          // An ObjC method call is rarely followed by an open parenthesis.
215          // FIXME: Do we incorrectly label ":" with this?
216          StartsObjCMethodExpr = false;
217          Left->Type = TT_Unknown;
218        }
219        if (StartsObjCMethodExpr) {
220          CurrentToken->Type = TT_ObjCMethodExpr;
221          // determineStarAmpUsage() thinks that '*' '[' is allocating an
222          // array of pointers, but if '[' starts a selector then '*' is a
223          // binary operator.
224          if (Parent != NULL &&
225              (Parent->is(tok::star) || Parent->is(tok::amp)) &&
226              Parent->Type == TT_PointerOrReference)
227            Parent->Type = TT_BinaryOperator;
228        } else if (StartsObjCArrayLiteral) {
229          CurrentToken->Type = TT_ObjCArrayLiteral;
230        }
231        Left->MatchingParen = CurrentToken;
232        CurrentToken->MatchingParen = Left;
233        if (Contexts.back().FirstObjCSelectorName != NULL)
234          Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
235              Contexts.back().LongestObjCSelectorName;
236        next();
237        return true;
238      }
239      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
240        return false;
241      updateParameterCount(Left, CurrentToken);
242      if (!consumeToken())
243        return false;
244    }
245    return false;
246  }
247
248  bool parseBrace() {
249    // Lines are fine to end with '{'.
250    if (CurrentToken == NULL)
251      return true;
252    ScopedContextCreator ContextCreator(*this, 1);
253    AnnotatedToken *Left = CurrentToken->Parent;
254    while (CurrentToken != NULL) {
255      if (CurrentToken->is(tok::r_brace)) {
256        Left->MatchingParen = CurrentToken;
257        CurrentToken->MatchingParen = Left;
258        next();
259        return true;
260      }
261      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
262        return false;
263      updateParameterCount(Left, CurrentToken);
264      if (!consumeToken())
265        return false;
266    }
267    return true;
268  }
269
270  void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
271    if (Current->is(tok::comma))
272      ++Left->ParameterCount;
273    else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
274      Left->ParameterCount = 1;
275  }
276
277  bool parseConditional() {
278    while (CurrentToken != NULL) {
279      if (CurrentToken->is(tok::colon)) {
280        CurrentToken->Type = TT_ConditionalExpr;
281        next();
282        return true;
283      }
284      if (!consumeToken())
285        return false;
286    }
287    return false;
288  }
289
290  bool parseTemplateDeclaration() {
291    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
292      CurrentToken->Type = TT_TemplateOpener;
293      next();
294      if (!parseAngle())
295        return false;
296      if (CurrentToken != NULL)
297        CurrentToken->Parent->ClosesTemplateDeclaration = true;
298      return true;
299    }
300    return false;
301  }
302
303  bool consumeToken() {
304    AnnotatedToken *Tok = CurrentToken;
305    next();
306    switch (Tok->FormatTok.Tok.getKind()) {
307    case tok::plus:
308    case tok::minus:
309      // At the start of the line, +/- specific ObjectiveC method
310      // declarations.
311      if (Tok->Parent == NULL)
312        Tok->Type = TT_ObjCMethodSpecifier;
313      break;
314    case tok::colon:
315      // Colons from ?: are handled in parseConditional().
316      if (Tok->Parent->is(tok::r_paren)) {
317        Tok->Type = TT_CtorInitializerColon;
318      } else if (Contexts.back().ColonIsObjCMethodExpr ||
319                 Line.First.Type == TT_ObjCMethodSpecifier) {
320        Tok->Type = TT_ObjCMethodExpr;
321        Tok->Parent->Type = TT_ObjCSelectorName;
322        if (Tok->Parent->FormatTok.TokenLength >
323            Contexts.back().LongestObjCSelectorName)
324          Contexts.back().LongestObjCSelectorName =
325              Tok->Parent->FormatTok.TokenLength;
326        if (Contexts.back().FirstObjCSelectorName == NULL)
327          Contexts.back().FirstObjCSelectorName = Tok->Parent;
328      } else if (Contexts.back().ColonIsForRangeExpr) {
329        Tok->Type = TT_RangeBasedForLoopColon;
330      } else if (Contexts.size() == 1) {
331        Tok->Type = TT_InheritanceColon;
332      }
333      break;
334    case tok::kw_if:
335    case tok::kw_while:
336      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
337        next();
338        if (!parseParens(/*LookForDecls=*/ true))
339          return false;
340      }
341      break;
342    case tok::kw_for:
343      Contexts.back().ColonIsForRangeExpr = true;
344      next();
345      if (!parseParens())
346        return false;
347      break;
348    case tok::l_paren:
349      if (!parseParens())
350        return false;
351      if (Line.MustBeDeclaration)
352        Line.MightBeFunctionDecl = true;
353      break;
354    case tok::l_square:
355      if (!parseSquare())
356        return false;
357      break;
358    case tok::l_brace:
359      if (!parseBrace())
360        return false;
361      break;
362    case tok::less:
363      if (parseAngle())
364        Tok->Type = TT_TemplateOpener;
365      else {
366        Tok->Type = TT_BinaryOperator;
367        CurrentToken = Tok;
368        next();
369      }
370      break;
371    case tok::r_paren:
372    case tok::r_square:
373      return false;
374    case tok::r_brace:
375      // Lines can start with '}'.
376      if (Tok->Parent != NULL)
377        return false;
378      break;
379    case tok::greater:
380      Tok->Type = TT_BinaryOperator;
381      break;
382    case tok::kw_operator:
383      while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
384        if (CurrentToken->is(tok::star) || CurrentToken->is(tok::amp))
385          CurrentToken->Type = TT_PointerOrReference;
386        consumeToken();
387      }
388      if (CurrentToken)
389        CurrentToken->Type = TT_OverloadedOperatorLParen;
390      break;
391    case tok::question:
392      parseConditional();
393      break;
394    case tok::kw_template:
395      parseTemplateDeclaration();
396      break;
397    case tok::identifier:
398      if (Line.First.is(tok::kw_for) &&
399          Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
400        Tok->Type = TT_ObjCForIn;
401      break;
402    default:
403      break;
404    }
405    return true;
406  }
407
408  void parseIncludeDirective() {
409    next();
410    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
411      next();
412      while (CurrentToken != NULL) {
413        if (CurrentToken->isNot(tok::comment) ||
414            !CurrentToken->Children.empty())
415          CurrentToken->Type = TT_ImplicitStringLiteral;
416        next();
417      }
418    } else {
419      while (CurrentToken != NULL) {
420        if (CurrentToken->is(tok::string_literal))
421          // Mark these string literals as "implicit" literals, too, so that
422          // they are not split or line-wrapped.
423          CurrentToken->Type = TT_ImplicitStringLiteral;
424        next();
425      }
426    }
427  }
428
429  void parseWarningOrError() {
430    next();
431    // We still want to format the whitespace left of the first token of the
432    // warning or error.
433    next();
434    while (CurrentToken != NULL) {
435      CurrentToken->Type = TT_ImplicitStringLiteral;
436      next();
437    }
438  }
439
440  void parsePreprocessorDirective() {
441    next();
442    if (CurrentToken == NULL)
443      return;
444    // Hashes in the middle of a line can lead to any strange token
445    // sequence.
446    if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
447      return;
448    switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
449    case tok::pp_include:
450    case tok::pp_import:
451      parseIncludeDirective();
452      break;
453    case tok::pp_error:
454    case tok::pp_warning:
455      parseWarningOrError();
456      break;
457    default:
458      break;
459    }
460    while (CurrentToken != NULL)
461      next();
462  }
463
464public:
465  LineType parseLine() {
466    int PeriodsAndArrows = 0;
467    AnnotatedToken *LastPeriodOrArrow = NULL;
468    bool CanBeBuilderTypeStmt = true;
469    if (CurrentToken->is(tok::hash)) {
470      parsePreprocessorDirective();
471      return LT_PreprocessorDirective;
472    }
473    while (CurrentToken != NULL) {
474      if (CurrentToken->is(tok::kw_virtual))
475        KeywordVirtualFound = true;
476      if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) {
477        ++PeriodsAndArrows;
478        LastPeriodOrArrow = CurrentToken;
479      }
480      AnnotatedToken *TheToken = CurrentToken;
481      if (!consumeToken())
482        return LT_Invalid;
483      if (getPrecedence(*TheToken) > prec::Assignment &&
484          TheToken->Type == TT_BinaryOperator)
485        CanBeBuilderTypeStmt = false;
486    }
487    if (KeywordVirtualFound)
488      return LT_VirtualFunctionDecl;
489
490    // Assume a builder-type call if there are 2 or more "." and "->".
491    if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
492      LastPeriodOrArrow->LastInChainOfCalls = true;
493      return LT_BuilderTypeCall;
494    }
495
496    if (Line.First.Type == TT_ObjCMethodSpecifier) {
497      if (Contexts.back().FirstObjCSelectorName != NULL)
498        Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
499            Contexts.back().LongestObjCSelectorName;
500      return LT_ObjCMethodDecl;
501    }
502
503    return LT_Other;
504  }
505
506private:
507  void next() {
508    if (CurrentToken != NULL) {
509      determineTokenType(*CurrentToken);
510      CurrentToken->BindingStrength = Contexts.back().BindingStrength;
511    }
512
513    if (CurrentToken != NULL && !CurrentToken->Children.empty())
514      CurrentToken = &CurrentToken->Children[0];
515    else
516      CurrentToken = NULL;
517
518    // Reset token type in case we have already looked at it and then recovered
519    // from an error (e.g. failure to find the matching >).
520    if (CurrentToken != NULL)
521      CurrentToken->Type = TT_Unknown;
522  }
523
524  /// \brief A struct to hold information valid in a specific context, e.g.
525  /// a pair of parenthesis.
526  struct Context {
527    Context(unsigned BindingStrength, bool IsExpression)
528        : BindingStrength(BindingStrength), LongestObjCSelectorName(0),
529          ColonIsForRangeExpr(false), ColonIsObjCMethodExpr(false),
530          FirstObjCSelectorName(NULL), IsExpression(IsExpression) {}
531
532    unsigned BindingStrength;
533    unsigned LongestObjCSelectorName;
534    bool ColonIsForRangeExpr;
535    bool ColonIsObjCMethodExpr;
536    AnnotatedToken *FirstObjCSelectorName;
537    bool IsExpression;
538  };
539
540  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
541  /// of each instance.
542  struct ScopedContextCreator {
543    AnnotatingParser &P;
544
545    ScopedContextCreator(AnnotatingParser &P, unsigned Increase) : P(P) {
546      P.Contexts.push_back(Context(P.Contexts.back().BindingStrength + Increase,
547                                   P.Contexts.back().IsExpression));
548    }
549
550    ~ScopedContextCreator() { P.Contexts.pop_back(); }
551  };
552
553  void determineTokenType(AnnotatedToken &Current) {
554    if (getPrecedence(Current) == prec::Assignment) {
555      Contexts.back().IsExpression = true;
556      for (AnnotatedToken *Previous = Current.Parent;
557           Previous && Previous->isNot(tok::comma);
558           Previous = Previous->Parent) {
559        if (Previous->is(tok::r_square))
560          Previous = Previous->MatchingParen;
561        if (Previous->Type == TT_BinaryOperator &&
562            (Previous->is(tok::star) || Previous->is(tok::amp))) {
563          Previous->Type = TT_PointerOrReference;
564        }
565      }
566    } else if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
567               (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
568                (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
569      Contexts.back().IsExpression = true;
570    } else if (Current.is(tok::r_paren) || Current.is(tok::greater) ||
571               Current.is(tok::comma)) {
572      for (AnnotatedToken *Previous = Current.Parent;
573           Previous && (Previous->is(tok::star) || Previous->is(tok::amp));
574           Previous = Previous->Parent)
575        Previous->Type = TT_PointerOrReference;
576    } else if (Current.Parent &&
577               Current.Parent->Type == TT_CtorInitializerColon) {
578      Contexts.back().IsExpression = true;
579    }
580
581    if (Current.Type == TT_Unknown) {
582      if (Current.Parent && Current.is(tok::identifier) &&
583          ((Current.Parent->is(tok::identifier) &&
584            Current.Parent->FormatTok.Tok.getIdentifierInfo()
585                ->getPPKeywordID() == tok::pp_not_keyword) ||
586           Current.Parent->Type == TT_PointerOrReference ||
587           Current.Parent->Type == TT_TemplateCloser)) {
588        Current.Type = TT_StartOfName;
589      } else if (Current.is(tok::star) || Current.is(tok::amp)) {
590        Current.Type =
591            determineStarAmpUsage(Current, Contexts.back().IsExpression);
592      } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
593                 Current.is(tok::caret)) {
594        Current.Type = determinePlusMinusCaretUsage(Current);
595      } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
596        Current.Type = determineIncrementUsage(Current);
597      } else if (Current.is(tok::exclaim)) {
598        Current.Type = TT_UnaryOperator;
599      } else if (isBinaryOperator(Current)) {
600        Current.Type = TT_BinaryOperator;
601      } else if (Current.is(tok::comment)) {
602        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
603                                            Lex.getLangOpts()));
604        if (StringRef(Data).startswith("//"))
605          Current.Type = TT_LineComment;
606        else
607          Current.Type = TT_BlockComment;
608      } else if (Current.is(tok::r_paren)) {
609        bool ParensNotExpr = !Current.Parent ||
610                             Current.Parent->Type == TT_PointerOrReference ||
611                             Current.Parent->Type == TT_TemplateCloser;
612        bool ParensCouldEndDecl =
613            !Current.Children.empty() && (Current.Children[0].is(tok::equal) ||
614                                          Current.Children[0].is(tok::semi) ||
615                                          Current.Children[0].is(tok::l_brace));
616        if (ParensNotExpr && !ParensCouldEndDecl &&
617            Contexts.back().IsExpression)
618          // FIXME: We need to get smarter and understand more cases of casts.
619          Current.Type = TT_CastRParen;
620      } else if (Current.is(tok::at) && Current.Children.size()) {
621        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
622        case tok::objc_interface:
623        case tok::objc_implementation:
624        case tok::objc_protocol:
625          Current.Type = TT_ObjCDecl;
626          break;
627        case tok::objc_property:
628          Current.Type = TT_ObjCProperty;
629          break;
630        default:
631          break;
632        }
633      }
634    }
635  }
636
637  /// \brief Return the type of the given token assuming it is * or &.
638  TokenType
639  determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
640    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
641    if (PrevToken == NULL)
642      return TT_UnaryOperator;
643
644    const AnnotatedToken *NextToken = getNextToken(Tok);
645    if (NextToken == NULL)
646      return TT_Unknown;
647
648    if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
649        PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
650        PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
651        PrevToken->is(tok::equal) || PrevToken->Type == TT_BinaryOperator ||
652        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
653      return TT_UnaryOperator;
654
655    if (NextToken->is(tok::l_square))
656      return TT_PointerOrReference;
657
658    if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
659        PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
660        isUnaryOperator(*NextToken) || NextToken->is(tok::l_paren) ||
661        NextToken->is(tok::l_square))
662      return TT_BinaryOperator;
663
664    // It is very unlikely that we are going to find a pointer or reference type
665    // definition on the RHS of an assignment.
666    if (IsExpression)
667      return TT_BinaryOperator;
668
669    return TT_PointerOrReference;
670  }
671
672  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
673    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
674    if (PrevToken == NULL)
675      return TT_UnaryOperator;
676
677    // Use heuristics to recognize unary operators.
678    if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
679        PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
680        PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
681        PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
682        PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
683      return TT_UnaryOperator;
684
685    // There can't be two consecutive binary operators.
686    if (PrevToken->Type == TT_BinaryOperator)
687      return TT_UnaryOperator;
688
689    // Fall back to marking the token as binary operator.
690    return TT_BinaryOperator;
691  }
692
693  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
694  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
695    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
696    if (PrevToken == NULL)
697      return TT_UnaryOperator;
698    if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
699        PrevToken->is(tok::identifier))
700      return TT_TrailingUnaryOperator;
701
702    return TT_UnaryOperator;
703  }
704
705  SmallVector<Context, 8> Contexts;
706
707  SourceManager &SourceMgr;
708  Lexer &Lex;
709  AnnotatedLine &Line;
710  AnnotatedToken *CurrentToken;
711  bool KeywordVirtualFound;
712  IdentifierInfo &Ident_in;
713};
714
715/// \brief Parses binary expressions by inserting fake parenthesis based on
716/// operator precedence.
717class ExpressionParser {
718public:
719  ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
720
721  /// \brief Parse expressions with the given operatore precedence.
722  void parse(int Precedence = 0) {
723    if (Precedence > prec::PointerToMember || Current == NULL)
724      return;
725
726    // Skip over "return" until we can properly parse it.
727    if (Current->is(tok::kw_return))
728      next();
729
730    // Eagerly consume trailing comments.
731    while (isTrailingComment(Current)) {
732      next();
733    }
734
735    AnnotatedToken *Start = Current;
736    bool OperatorFound = false;
737
738    while (Current) {
739      // Consume operators with higher precedence.
740      parse(prec::Level(Precedence + 1));
741
742      int CurrentPrecedence = 0;
743      if (Current) {
744        if (Current->Type == TT_ConditionalExpr)
745          CurrentPrecedence = 1 + (int) prec::Conditional;
746        else if (Current->is(tok::semi))
747          CurrentPrecedence = 1;
748        else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
749          CurrentPrecedence = 1 + (int) getPrecedence(*Current);
750      }
751
752      // At the end of the line or when an operator with higher precedence is
753      // found, insert fake parenthesis and return.
754      if (Current == NULL || closesScope(*Current) ||
755          (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
756        if (OperatorFound) {
757          ++Start->FakeLParens;
758          if (Current)
759            ++Current->Parent->FakeRParens;
760        }
761        return;
762      }
763
764      // Consume scopes: (), [], <> and {}
765      if (opensScope(*Current)) {
766        AnnotatedToken *Left = Current;
767        while (Current && !closesScope(*Current)) {
768          next();
769          parse();
770        }
771        // Remove fake parens that just duplicate the real parens.
772        if (Current && Left->Children[0].FakeLParens > 0 &&
773            Current->Parent->FakeRParens > 0) {
774          --Left->Children[0].FakeLParens;
775          --Current->Parent->FakeRParens;
776        }
777        next();
778      } else {
779        // Operator found.
780        if (CurrentPrecedence == Precedence)
781          OperatorFound = true;
782
783        next();
784      }
785    }
786  }
787
788private:
789  void next() {
790    if (Current != NULL)
791      Current = Current->Children.empty() ? NULL : &Current->Children[0];
792  }
793
794  bool closesScope(const AnnotatedToken &Tok) {
795    return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser ||
796           Current->is(tok::r_brace) || Current->is(tok::r_square);
797  }
798
799  bool opensScope(const AnnotatedToken &Tok) {
800    return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener ||
801           Current->is(tok::l_brace) || Current->is(tok::l_square);
802  }
803
804  AnnotatedToken *Current;
805};
806
807void TokenAnnotator::annotate(AnnotatedLine &Line) {
808  AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
809  Line.Type = Parser.parseLine();
810  if (Line.Type == LT_Invalid)
811    return;
812
813  ExpressionParser ExprParser(Line);
814  ExprParser.parse();
815
816  if (Line.First.Type == TT_ObjCMethodSpecifier)
817    Line.Type = LT_ObjCMethodDecl;
818  else if (Line.First.Type == TT_ObjCDecl)
819    Line.Type = LT_ObjCDecl;
820  else if (Line.First.Type == TT_ObjCProperty)
821    Line.Type = LT_ObjCProperty;
822
823  Line.First.SpacesRequiredBefore = 1;
824  Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
825  Line.First.CanBreakBefore = Line.First.MustBreakBefore;
826
827  Line.First.TotalLength = Line.First.FormatTok.TokenLength;
828}
829
830void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
831  if (Line.First.Children.empty())
832    return;
833  AnnotatedToken *Current = &Line.First.Children[0];
834  while (Current != NULL) {
835    if (Current->Type == TT_LineComment)
836      Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
837    else
838      Current->SpacesRequiredBefore =
839          spaceRequiredBefore(Line, *Current) ? 1 : 0;
840
841    if (Current->FormatTok.MustBreakBefore) {
842      Current->MustBreakBefore = true;
843    } else if (Current->Type == TT_LineComment) {
844      Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
845    } else if (isTrailingComment(Current->Parent) ||
846               (Current->is(tok::string_literal) &&
847                Current->Parent->is(tok::string_literal))) {
848      Current->MustBreakBefore = true;
849    } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
850               Current->Parent->is(tok::string_literal) &&
851               Current->Children[0].is(tok::string_literal)) {
852      Current->MustBreakBefore = true;
853    } else {
854      Current->MustBreakBefore = false;
855    }
856    Current->CanBreakBefore =
857        Current->MustBreakBefore || canBreakBefore(Line, *Current);
858    if (Current->MustBreakBefore)
859      Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
860    else
861      Current->TotalLength =
862          Current->Parent->TotalLength + Current->FormatTok.TokenLength +
863          Current->SpacesRequiredBefore;
864    // FIXME: Only calculate this if CanBreakBefore is true once static
865    // initializers etc. are sorted out.
866    // FIXME: Move magic numbers to a better place.
867    Current->SplitPenalty =
868        20 * Current->BindingStrength + splitPenalty(Line, *Current);
869
870    Current = Current->Children.empty() ? NULL : &Current->Children[0];
871  }
872}
873
874unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
875                                      const AnnotatedToken &Tok) {
876  const AnnotatedToken &Left = *Tok.Parent;
877  const AnnotatedToken &Right = Tok;
878
879  if (Right.Type == TT_StartOfName) {
880    if (Line.First.is(tok::kw_for))
881      return 3;
882    else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
883      // FIXME: Clean up hack of using BindingStrength to find top-level names.
884      return Style.PenaltyReturnTypeOnItsOwnLine;
885    else
886      return 100;
887  }
888  if (Left.is(tok::equal) && Right.is(tok::l_brace))
889    return 150;
890  if (Left.is(tok::coloncolon))
891    return 500;
892
893  if (Left.Type == TT_RangeBasedForLoopColon ||
894      Left.Type == TT_InheritanceColon)
895    return 2;
896
897  if (Right.is(tok::arrow) || Right.is(tok::period)) {
898    if (Line.Type == LT_BuilderTypeCall)
899      return 5;
900    if ((Left.is(tok::r_paren) || Left.is(tok::r_square)) &&
901        Left.MatchingParen && Left.MatchingParen->ParameterCount > 0)
902      return 20; // Should be smaller than breaking at a nested comma.
903    return 150;
904  }
905
906  // In for-loops, prefer breaking at ',' and ';'.
907  if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
908    return 4;
909
910  if (Left.is(tok::semi))
911    return 0;
912  if (Left.is(tok::comma))
913    return 1;
914
915  // In Objective-C method expressions, prefer breaking before "param:" over
916  // breaking after it.
917  if (Right.Type == TT_ObjCSelectorName)
918    return 0;
919  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
920    return 20;
921
922  if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
923      Left.is(tok::l_brace) || Left.Type == TT_TemplateOpener)
924    return 20;
925
926  if (Right.is(tok::lessless)) {
927    if (Left.is(tok::string_literal)) {
928      char LastChar =
929          StringRef(Left.FormatTok.Tok.getLiteralData(),
930                    Left.FormatTok.TokenLength).drop_back(1).rtrim().back();
931      if (LastChar == ':' || LastChar == '=')
932        return 100;
933    }
934    return prec::Shift;
935  }
936  if (Left.Type == TT_ConditionalExpr)
937    return prec::Conditional;
938  prec::Level Level = getPrecedence(Left);
939
940  if (Level != prec::Unknown)
941    return Level;
942
943  return 3;
944}
945
946bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
947                                          const AnnotatedToken &Left,
948                                          const AnnotatedToken &Right) {
949  if (Right.is(tok::hashhash))
950    return Left.is(tok::hash);
951  if (Left.is(tok::hashhash) || Left.is(tok::hash))
952    return Right.is(tok::hash);
953  if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
954    return false;
955  if (Right.is(tok::less) &&
956      (Left.is(tok::kw_template) ||
957       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
958    return true;
959  if (Left.is(tok::arrow) || Right.is(tok::arrow))
960    return false;
961  if (Left.is(tok::exclaim) || Left.is(tok::tilde))
962    return false;
963  if (Left.is(tok::at) &&
964      (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
965       Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
966       Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
967       Right.is(tok::kw_true) || Right.is(tok::kw_false)))
968    return false;
969  if (Left.is(tok::coloncolon))
970    return false;
971  if (Right.is(tok::coloncolon))
972    return Left.isNot(tok::identifier) && Left.isNot(tok::greater) &&
973           Left.isNot(tok::l_paren);
974  if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
975    return false;
976  if (Right.is(tok::amp) || Right.is(tok::star))
977    return Left.FormatTok.Tok.isLiteral() ||
978           (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
979            Left.isNot(tok::l_paren) && !Style.PointerBindsToType);
980  if (Left.is(tok::amp) || Left.is(tok::star))
981    return Right.FormatTok.Tok.isLiteral() ||
982           (Right.isNot(tok::star) && Right.isNot(tok::amp) &&
983            Style.PointerBindsToType);
984  if (Right.is(tok::star) && Left.is(tok::l_paren))
985    return false;
986  if (Left.is(tok::l_square))
987    return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
988  if (Right.is(tok::r_square))
989    return Right.Type == TT_ObjCArrayLiteral;
990  if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
991    return false;
992  if (Left.is(tok::period) || Right.is(tok::period))
993    return false;
994  if (Left.is(tok::colon))
995    return Left.Type != TT_ObjCMethodExpr;
996  if (Right.is(tok::colon))
997    return Right.Type != TT_ObjCMethodExpr;
998  if (Left.is(tok::l_paren))
999    return false;
1000  if (Right.is(tok::l_paren)) {
1001    return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
1002           Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
1003           Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
1004           Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
1005           Left.is(tok::kw_delete);
1006  }
1007  if (Left.is(tok::at) &&
1008      Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1009    return false;
1010  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1011    return false;
1012  return true;
1013}
1014
1015bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1016                                         const AnnotatedToken &Tok) {
1017  if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1018      Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1019    return true; // Never ever merge two identifiers.
1020  if (Line.Type == LT_ObjCMethodDecl) {
1021    if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1022      return true;
1023    if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1024      // Don't space between ')' and <id>
1025      return false;
1026  }
1027  if (Line.Type == LT_ObjCProperty &&
1028      (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1029    return false;
1030
1031  if (Tok.Parent->is(tok::comma))
1032    return true;
1033  if (Tok.is(tok::comma))
1034    return false;
1035  if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1036    return true;
1037  if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1038    return false;
1039  if (Tok.Type == TT_OverloadedOperatorLParen)
1040    return false;
1041  if (Tok.is(tok::colon))
1042    return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
1043           Tok.Type != TT_ObjCMethodExpr;
1044  if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1045    return false;
1046  if (Tok.Type == TT_UnaryOperator)
1047    return Tok.Parent->isNot(tok::l_paren) &&
1048           Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
1049           (Tok.Parent->isNot(tok::colon) ||
1050            Tok.Parent->Type != TT_ObjCMethodExpr);
1051  if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1052    return Tok.Type == TT_TemplateCloser &&
1053           Tok.Parent->Type == TT_TemplateCloser &&
1054           Style.Standard != FormatStyle::LS_Cpp11;
1055  }
1056  if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar))
1057    return false;
1058  if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1059    return true;
1060  if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1061    return false;
1062  if (Tok.is(tok::less) && Line.First.is(tok::hash))
1063    return true;
1064  if (Tok.Type == TT_TrailingUnaryOperator)
1065    return false;
1066  return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1067}
1068
1069bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1070                                    const AnnotatedToken &Right) {
1071  const AnnotatedToken &Left = *Right.Parent;
1072  if (Right.Type == TT_StartOfName)
1073    return true;
1074  if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1075    return false;
1076  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1077    return true;
1078  if (Right.Type == TT_ObjCSelectorName)
1079    return true;
1080  if (Left.ClosesTemplateDeclaration)
1081    return true;
1082  if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1083    return true;
1084  if (Right.Type == TT_RangeBasedForLoopColon ||
1085      Right.Type == TT_InheritanceColon)
1086    return false;
1087  if (Left.Type == TT_RangeBasedForLoopColon ||
1088      Left.Type == TT_InheritanceColon)
1089    return true;
1090  if (Right.Type == TT_RangeBasedForLoopColon)
1091    return false;
1092  if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1093      Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1094      Left.is(tok::question) || Left.is(tok::kw_operator))
1095    return false;
1096  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1097    return false;
1098  if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1099      Left.Parent->is(tok::kw___attribute))
1100    return false;
1101
1102  if (Right.Type == TT_LineComment)
1103    // We rely on MustBreakBefore being set correctly here as we should not
1104    // change the "binding" behavior of a comment.
1105    return false;
1106
1107  // Allow breaking after a trailing 'const', e.g. after a method declaration,
1108  // unless it is follow by ';', '{' or '='.
1109  if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1110      Left.Parent->is(tok::r_paren))
1111    return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
1112           Right.isNot(tok::equal);
1113
1114  // We only break before r_brace if there was a corresponding break before
1115  // the l_brace, which is tracked by BreakBeforeClosingBrace.
1116  if (Right.is(tok::r_brace))
1117    return false;
1118
1119  if (Right.is(tok::r_paren) || Right.is(tok::greater))
1120    return false;
1121  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1122    return true;
1123  return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1124         Left.is(tok::comma) || Right.is(tok::lessless) ||
1125         Right.is(tok::arrow) || Right.is(tok::period) ||
1126         Right.is(tok::colon) || Left.is(tok::coloncolon) ||
1127         Left.is(tok::semi) || Left.is(tok::l_brace) ||
1128         (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1129          (Right.is(tok::identifier) || Right.is(tok::kw___attribute))) ||
1130         (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1131         (Left.is(tok::l_square) && !Right.is(tok::r_square));
1132}
1133
1134} // namespace format
1135} // namespace clang
1136