1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/Support/Debug.h"
20
21#define DEBUG_TYPE "format-token-annotator"
22
23namespace clang {
24namespace format {
25
26namespace {
27
28/// \brief A parser that gathers additional information about tokens.
29///
30/// The \c TokenAnnotator tries to match parenthesis and square brakets and
31/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32/// into template parameter lists.
33class AnnotatingParser {
34public:
35  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36                   const AdditionalKeywords &Keywords)
37      : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38        Keywords(Keywords) {
39    Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40    resetTokenMetadata(CurrentToken);
41  }
42
43private:
44  bool parseAngle() {
45    if (!CurrentToken)
46      return false;
47    FormatToken *Left = CurrentToken->Previous;
48    Left->ParentBracket = Contexts.back().ContextKind;
49    ScopedContextCreator ContextCreator(*this, tok::less, 10);
50
51    // If this angle is in the context of an expression, we need to be more
52    // hesitant to detect it as opening template parameters.
53    bool InExprContext = Contexts.back().IsExpression;
54
55    Contexts.back().IsExpression = false;
56    // If there's a template keyword before the opening angle bracket, this is a
57    // template parameter, not an argument.
58    Contexts.back().InTemplateArgument =
59        Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
60
61    if (Style.Language == FormatStyle::LK_Java &&
62        CurrentToken->is(tok::question))
63      next();
64
65    while (CurrentToken) {
66      if (CurrentToken->is(tok::greater)) {
67        Left->MatchingParen = CurrentToken;
68        CurrentToken->MatchingParen = Left;
69        CurrentToken->Type = TT_TemplateCloser;
70        next();
71        return true;
72      }
73      if (CurrentToken->is(tok::question) &&
74          Style.Language == FormatStyle::LK_Java) {
75        next();
76        continue;
77      }
78      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
79          (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
80        return false;
81      // If a && or || is found and interpreted as a binary operator, this set
82      // of angles is likely part of something like "a < b && c > d". If the
83      // angles are inside an expression, the ||/&& might also be a binary
84      // operator that was misinterpreted because we are parsing template
85      // parameters.
86      // FIXME: This is getting out of hand, write a decent parser.
87      if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
88          CurrentToken->Previous->is(TT_BinaryOperator) &&
89          Contexts[Contexts.size() - 2].IsExpression &&
90          !Line.startsWith(tok::kw_template))
91        return false;
92      updateParameterCount(Left, CurrentToken);
93      if (!consumeToken())
94        return false;
95    }
96    return false;
97  }
98
99  bool parseParens(bool LookForDecls = false) {
100    if (!CurrentToken)
101      return false;
102    FormatToken *Left = CurrentToken->Previous;
103    Left->ParentBracket = Contexts.back().ContextKind;
104    ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
105
106    // FIXME: This is a bit of a hack. Do better.
107    Contexts.back().ColonIsForRangeExpr =
108        Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
109
110    bool StartsObjCMethodExpr = false;
111    if (CurrentToken->is(tok::caret)) {
112      // (^ can start a block type.
113      Left->Type = TT_ObjCBlockLParen;
114    } else if (FormatToken *MaybeSel = Left->Previous) {
115      // @selector( starts a selector.
116      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
117          MaybeSel->Previous->is(tok::at)) {
118        StartsObjCMethodExpr = true;
119      }
120    }
121
122    if (Left->Previous &&
123        (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
124                                 tok::kw_if, tok::kw_while, tok::l_paren,
125                                 tok::comma) ||
126         Left->Previous->is(TT_BinaryOperator))) {
127      // static_assert, if and while usually contain expressions.
128      Contexts.back().IsExpression = true;
129    } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
130               Left->Previous->MatchingParen &&
131               Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
132      // This is a parameter list of a lambda expression.
133      Contexts.back().IsExpression = false;
134    } else if (Line.InPPDirective &&
135               (!Left->Previous ||
136                !Left->Previous->isOneOf(tok::identifier,
137                                         TT_OverloadedOperator))) {
138      Contexts.back().IsExpression = true;
139    } else if (Contexts[Contexts.size() - 2].CaretFound) {
140      // This is the parameter list of an ObjC block.
141      Contexts.back().IsExpression = false;
142    } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
143      Left->Type = TT_AttributeParen;
144    } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
145      // The first argument to a foreach macro is a declaration.
146      Contexts.back().IsForEachMacro = true;
147      Contexts.back().IsExpression = false;
148    } else if (Left->Previous && Left->Previous->MatchingParen &&
149               Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
150      Contexts.back().IsExpression = false;
151    }
152
153    if (StartsObjCMethodExpr) {
154      Contexts.back().ColonIsObjCMethodExpr = true;
155      Left->Type = TT_ObjCMethodExpr;
156    }
157
158    bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
159    bool HasMultipleLines = false;
160    bool HasMultipleParametersOnALine = false;
161    bool MightBeObjCForRangeLoop =
162        Left->Previous && Left->Previous->is(tok::kw_for);
163    while (CurrentToken) {
164      // LookForDecls is set when "if (" has been seen. Check for
165      // 'identifier' '*' 'identifier' followed by not '=' -- this
166      // '*' has to be a binary operator but determineStarAmpUsage() will
167      // categorize it as an unary operator, so set the right type here.
168      if (LookForDecls && CurrentToken->Next) {
169        FormatToken *Prev = CurrentToken->getPreviousNonComment();
170        if (Prev) {
171          FormatToken *PrevPrev = Prev->getPreviousNonComment();
172          FormatToken *Next = CurrentToken->Next;
173          if (PrevPrev && PrevPrev->is(tok::identifier) &&
174              Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
175              CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
176            Prev->Type = TT_BinaryOperator;
177            LookForDecls = false;
178          }
179        }
180      }
181
182      if (CurrentToken->Previous->is(TT_PointerOrReference) &&
183          CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
184                                                    tok::coloncolon))
185        MightBeFunctionType = true;
186      if (CurrentToken->Previous->is(TT_BinaryOperator))
187        Contexts.back().IsExpression = true;
188      if (CurrentToken->is(tok::r_paren)) {
189        if (MightBeFunctionType && CurrentToken->Next &&
190            (CurrentToken->Next->is(tok::l_paren) ||
191             (CurrentToken->Next->is(tok::l_square) &&
192              !Contexts.back().IsExpression)))
193          Left->Type = TT_FunctionTypeLParen;
194        Left->MatchingParen = CurrentToken;
195        CurrentToken->MatchingParen = Left;
196
197        if (StartsObjCMethodExpr) {
198          CurrentToken->Type = TT_ObjCMethodExpr;
199          if (Contexts.back().FirstObjCSelectorName) {
200            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
201                Contexts.back().LongestObjCSelectorName;
202          }
203        }
204
205        if (Left->is(TT_AttributeParen))
206          CurrentToken->Type = TT_AttributeParen;
207        if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
208          CurrentToken->Type = TT_JavaAnnotation;
209        if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
210          CurrentToken->Type = TT_LeadingJavaAnnotation;
211
212        if (!HasMultipleLines)
213          Left->PackingKind = PPK_Inconclusive;
214        else if (HasMultipleParametersOnALine)
215          Left->PackingKind = PPK_BinPacked;
216        else
217          Left->PackingKind = PPK_OnePerLine;
218
219        next();
220        return true;
221      }
222      if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
223        return false;
224
225      if (CurrentToken->is(tok::l_brace))
226        Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
227      if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
228          !CurrentToken->Next->HasUnescapedNewline &&
229          !CurrentToken->Next->isTrailingComment())
230        HasMultipleParametersOnALine = true;
231      if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
232          CurrentToken->isSimpleTypeSpecifier())
233        Contexts.back().IsExpression = false;
234      if (CurrentToken->isOneOf(tok::semi, tok::colon))
235        MightBeObjCForRangeLoop = false;
236      if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
237        CurrentToken->Type = TT_ObjCForIn;
238      // When we discover a 'new', we set CanBeExpression to 'false' in order to
239      // parse the type correctly. Reset that after a comma.
240      if (CurrentToken->is(tok::comma))
241        Contexts.back().CanBeExpression = true;
242
243      FormatToken *Tok = CurrentToken;
244      if (!consumeToken())
245        return false;
246      updateParameterCount(Left, Tok);
247      if (CurrentToken && CurrentToken->HasUnescapedNewline)
248        HasMultipleLines = true;
249    }
250    return false;
251  }
252
253  bool parseSquare() {
254    if (!CurrentToken)
255      return false;
256
257    // A '[' could be an index subscript (after an identifier or after
258    // ')' or ']'), it could be the start of an Objective-C method
259    // expression, or it could the start of an Objective-C array literal.
260    FormatToken *Left = CurrentToken->Previous;
261    Left->ParentBracket = Contexts.back().ContextKind;
262    FormatToken *Parent = Left->getPreviousNonComment();
263    bool StartsObjCMethodExpr =
264        Style.Language == FormatStyle::LK_Cpp &&
265        Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
266        CurrentToken->isNot(tok::l_brace) &&
267        (!Parent ||
268         Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
269                         tok::kw_return, tok::kw_throw) ||
270         Parent->isUnaryOperator() ||
271         Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
272         getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
273    bool ColonFound = false;
274
275    unsigned BindingIncrease = 1;
276    if (Left->is(TT_Unknown)) {
277      if (StartsObjCMethodExpr) {
278        Left->Type = TT_ObjCMethodExpr;
279      } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
280                 Contexts.back().ContextKind == tok::l_brace &&
281                 Parent->isOneOf(tok::l_brace, tok::comma)) {
282        Left->Type = TT_JsComputedPropertyName;
283      } else if (Parent &&
284                 Parent->isOneOf(tok::at, tok::equal, tok::comma, tok::l_paren,
285                                 tok::l_square, tok::question, tok::colon,
286                                 tok::kw_return)) {
287        Left->Type = TT_ArrayInitializerLSquare;
288      } else {
289        BindingIncrease = 10;
290        Left->Type = TT_ArraySubscriptLSquare;
291      }
292    }
293
294    ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
295    Contexts.back().IsExpression = true;
296    Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
297
298    while (CurrentToken) {
299      if (CurrentToken->is(tok::r_square)) {
300        if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
301            Left->is(TT_ObjCMethodExpr)) {
302          // An ObjC method call is rarely followed by an open parenthesis.
303          // FIXME: Do we incorrectly label ":" with this?
304          StartsObjCMethodExpr = false;
305          Left->Type = TT_Unknown;
306        }
307        if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
308          CurrentToken->Type = TT_ObjCMethodExpr;
309          // determineStarAmpUsage() thinks that '*' '[' is allocating an
310          // array of pointers, but if '[' starts a selector then '*' is a
311          // binary operator.
312          if (Parent && Parent->is(TT_PointerOrReference))
313            Parent->Type = TT_BinaryOperator;
314        }
315        Left->MatchingParen = CurrentToken;
316        CurrentToken->MatchingParen = Left;
317        if (Contexts.back().FirstObjCSelectorName) {
318          Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
319              Contexts.back().LongestObjCSelectorName;
320          if (Left->BlockParameterCount > 1)
321            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
322        }
323        next();
324        return true;
325      }
326      if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
327        return false;
328      if (CurrentToken->is(tok::colon)) {
329        if (Left->is(TT_ArraySubscriptLSquare)) {
330          Left->Type = TT_ObjCMethodExpr;
331          StartsObjCMethodExpr = true;
332          Contexts.back().ColonIsObjCMethodExpr = true;
333          if (Parent && Parent->is(tok::r_paren))
334            Parent->Type = TT_CastRParen;
335        }
336        ColonFound = true;
337      }
338      if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
339          !ColonFound)
340        Left->Type = TT_ArrayInitializerLSquare;
341      FormatToken *Tok = CurrentToken;
342      if (!consumeToken())
343        return false;
344      updateParameterCount(Left, Tok);
345    }
346    return false;
347  }
348
349  bool parseBrace() {
350    if (CurrentToken) {
351      FormatToken *Left = CurrentToken->Previous;
352      Left->ParentBracket = Contexts.back().ContextKind;
353
354      if (Contexts.back().CaretFound)
355        Left->Type = TT_ObjCBlockLBrace;
356      Contexts.back().CaretFound = false;
357
358      ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
359      Contexts.back().ColonIsDictLiteral = true;
360      if (Left->BlockKind == BK_BracedInit)
361        Contexts.back().IsExpression = true;
362
363      while (CurrentToken) {
364        if (CurrentToken->is(tok::r_brace)) {
365          Left->MatchingParen = CurrentToken;
366          CurrentToken->MatchingParen = Left;
367          next();
368          return true;
369        }
370        if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
371          return false;
372        updateParameterCount(Left, CurrentToken);
373        if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
374          FormatToken *Previous = CurrentToken->getPreviousNonComment();
375          if (((CurrentToken->is(tok::colon) &&
376                (!Contexts.back().ColonIsDictLiteral ||
377                 Style.Language != FormatStyle::LK_Cpp)) ||
378               Style.Language == FormatStyle::LK_Proto) &&
379              Previous->Tok.getIdentifierInfo())
380            Previous->Type = TT_SelectorName;
381          if (CurrentToken->is(tok::colon) ||
382              Style.Language == FormatStyle::LK_JavaScript)
383            Left->Type = TT_DictLiteral;
384        }
385        if (!consumeToken())
386          return false;
387      }
388    }
389    return true;
390  }
391
392  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
393    if (Current->is(tok::l_brace) && !Current->is(TT_DictLiteral))
394      ++Left->BlockParameterCount;
395    if (Current->is(tok::comma)) {
396      ++Left->ParameterCount;
397      if (!Left->Role)
398        Left->Role.reset(new CommaSeparatedList(Style));
399      Left->Role->CommaFound(Current);
400    } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
401      Left->ParameterCount = 1;
402    }
403  }
404
405  bool parseConditional() {
406    while (CurrentToken) {
407      if (CurrentToken->is(tok::colon)) {
408        CurrentToken->Type = TT_ConditionalExpr;
409        next();
410        return true;
411      }
412      if (!consumeToken())
413        return false;
414    }
415    return false;
416  }
417
418  bool parseTemplateDeclaration() {
419    if (CurrentToken && CurrentToken->is(tok::less)) {
420      CurrentToken->Type = TT_TemplateOpener;
421      next();
422      if (!parseAngle())
423        return false;
424      if (CurrentToken)
425        CurrentToken->Previous->ClosesTemplateDeclaration = true;
426      return true;
427    }
428    return false;
429  }
430
431  bool consumeToken() {
432    FormatToken *Tok = CurrentToken;
433    next();
434    switch (Tok->Tok.getKind()) {
435    case tok::plus:
436    case tok::minus:
437      if (!Tok->Previous && Line.MustBeDeclaration)
438        Tok->Type = TT_ObjCMethodSpecifier;
439      break;
440    case tok::colon:
441      if (!Tok->Previous)
442        return false;
443      // Colons from ?: are handled in parseConditional().
444      if (Style.Language == FormatStyle::LK_JavaScript) {
445        if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
446            (Contexts.size() == 1 &&               // switch/case labels
447             !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
448            Contexts.back().ContextKind == tok::l_paren ||  // function params
449            Contexts.back().ContextKind == tok::l_square || // array type
450            (Contexts.size() == 1 &&
451             Line.MustBeDeclaration)) { // method/property declaration
452          Tok->Type = TT_JsTypeColon;
453          break;
454        }
455      }
456      if (Contexts.back().ColonIsDictLiteral) {
457        Tok->Type = TT_DictLiteral;
458      } else if (Contexts.back().ColonIsObjCMethodExpr ||
459                 Line.startsWith(TT_ObjCMethodSpecifier)) {
460        Tok->Type = TT_ObjCMethodExpr;
461        Tok->Previous->Type = TT_SelectorName;
462        if (Tok->Previous->ColumnWidth >
463            Contexts.back().LongestObjCSelectorName) {
464          Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
465        }
466        if (!Contexts.back().FirstObjCSelectorName)
467          Contexts.back().FirstObjCSelectorName = Tok->Previous;
468      } else if (Contexts.back().ColonIsForRangeExpr) {
469        Tok->Type = TT_RangeBasedForLoopColon;
470      } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
471        Tok->Type = TT_BitFieldColon;
472      } else if (Contexts.size() == 1 &&
473                 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
474        if (Tok->Previous->is(tok::r_paren))
475          Tok->Type = TT_CtorInitializerColon;
476        else
477          Tok->Type = TT_InheritanceColon;
478      } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
479                 Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
480        // This handles a special macro in ObjC code where selectors including
481        // the colon are passed as macro arguments.
482        Tok->Type = TT_ObjCMethodExpr;
483      } else if (Contexts.back().ContextKind == tok::l_paren) {
484        Tok->Type = TT_InlineASMColon;
485      }
486      break;
487    case tok::kw_if:
488    case tok::kw_while:
489      if (CurrentToken && CurrentToken->is(tok::l_paren)) {
490        next();
491        if (!parseParens(/*LookForDecls=*/true))
492          return false;
493      }
494      break;
495    case tok::kw_for:
496      Contexts.back().ColonIsForRangeExpr = true;
497      next();
498      if (!parseParens())
499        return false;
500      break;
501    case tok::l_paren:
502      // When faced with 'operator()()', the kw_operator handler incorrectly
503      // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
504      // the first two parens OverloadedOperators and the second l_paren an
505      // OverloadedOperatorLParen.
506      if (Tok->Previous &&
507          Tok->Previous->is(tok::r_paren) &&
508          Tok->Previous->MatchingParen &&
509          Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
510        Tok->Previous->Type = TT_OverloadedOperator;
511        Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
512        Tok->Type = TT_OverloadedOperatorLParen;
513      }
514
515      if (!parseParens())
516        return false;
517      if (Line.MustBeDeclaration && Contexts.size() == 1 &&
518          !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
519          (!Tok->Previous ||
520           !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
521                                   TT_LeadingJavaAnnotation)))
522        Line.MightBeFunctionDecl = true;
523      break;
524    case tok::l_square:
525      if (!parseSquare())
526        return false;
527      break;
528    case tok::l_brace:
529      if (!parseBrace())
530        return false;
531      break;
532    case tok::less:
533      if (!NonTemplateLess.count(Tok) &&
534          (!Tok->Previous ||
535           (!Tok->Previous->Tok.isLiteral() &&
536            !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) &&
537          parseAngle()) {
538        Tok->Type = TT_TemplateOpener;
539      } else {
540        Tok->Type = TT_BinaryOperator;
541        NonTemplateLess.insert(Tok);
542        CurrentToken = Tok;
543        next();
544      }
545      break;
546    case tok::r_paren:
547    case tok::r_square:
548      return false;
549    case tok::r_brace:
550      // Lines can start with '}'.
551      if (Tok->Previous)
552        return false;
553      break;
554    case tok::greater:
555      Tok->Type = TT_BinaryOperator;
556      break;
557    case tok::kw_operator:
558      while (CurrentToken &&
559             !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
560        if (CurrentToken->isOneOf(tok::star, tok::amp))
561          CurrentToken->Type = TT_PointerOrReference;
562        consumeToken();
563        if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
564          CurrentToken->Previous->Type = TT_OverloadedOperator;
565      }
566      if (CurrentToken) {
567        CurrentToken->Type = TT_OverloadedOperatorLParen;
568        if (CurrentToken->Previous->is(TT_BinaryOperator))
569          CurrentToken->Previous->Type = TT_OverloadedOperator;
570      }
571      break;
572    case tok::question:
573      if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
574          Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
575                             tok::r_brace)) {
576        // Question marks before semicolons, colons, etc. indicate optional
577        // types (fields, parameters), e.g.
578        //   function(x?: string, y?) {...}
579        //   class X { y?; }
580        Tok->Type = TT_JsTypeOptionalQuestion;
581        break;
582      }
583      // Declarations cannot be conditional expressions, this can only be part
584      // of a type declaration.
585      if (Line.MustBeDeclaration &&
586          Style.Language == FormatStyle::LK_JavaScript)
587        break;
588      parseConditional();
589      break;
590    case tok::kw_template:
591      parseTemplateDeclaration();
592      break;
593    case tok::comma:
594      if (Contexts.back().InCtorInitializer)
595        Tok->Type = TT_CtorInitializerComma;
596      else if (Contexts.back().FirstStartOfName &&
597               (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
598        Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
599        Line.IsMultiVariableDeclStmt = true;
600      }
601      if (Contexts.back().IsForEachMacro)
602        Contexts.back().IsExpression = true;
603      break;
604    default:
605      break;
606    }
607    return true;
608  }
609
610  void parseIncludeDirective() {
611    if (CurrentToken && CurrentToken->is(tok::less)) {
612      next();
613      while (CurrentToken) {
614        if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
615          CurrentToken->Type = TT_ImplicitStringLiteral;
616        next();
617      }
618    }
619  }
620
621  void parseWarningOrError() {
622    next();
623    // We still want to format the whitespace left of the first token of the
624    // warning or error.
625    next();
626    while (CurrentToken) {
627      CurrentToken->Type = TT_ImplicitStringLiteral;
628      next();
629    }
630  }
631
632  void parsePragma() {
633    next(); // Consume "pragma".
634    if (CurrentToken &&
635        CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
636      bool IsMark = CurrentToken->is(Keywords.kw_mark);
637      next(); // Consume "mark".
638      next(); // Consume first token (so we fix leading whitespace).
639      while (CurrentToken) {
640        if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
641          CurrentToken->Type = TT_ImplicitStringLiteral;
642        next();
643      }
644    }
645  }
646
647  LineType parsePreprocessorDirective() {
648    LineType Type = LT_PreprocessorDirective;
649    next();
650    if (!CurrentToken)
651      return Type;
652    if (CurrentToken->Tok.is(tok::numeric_constant)) {
653      CurrentToken->SpacesRequiredBefore = 1;
654      return Type;
655    }
656    // Hashes in the middle of a line can lead to any strange token
657    // sequence.
658    if (!CurrentToken->Tok.getIdentifierInfo())
659      return Type;
660    switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
661    case tok::pp_include:
662    case tok::pp_include_next:
663    case tok::pp_import:
664      next();
665      parseIncludeDirective();
666      Type = LT_ImportStatement;
667      break;
668    case tok::pp_error:
669    case tok::pp_warning:
670      parseWarningOrError();
671      break;
672    case tok::pp_pragma:
673      parsePragma();
674      break;
675    case tok::pp_if:
676    case tok::pp_elif:
677      Contexts.back().IsExpression = true;
678      parseLine();
679      break;
680    default:
681      break;
682    }
683    while (CurrentToken)
684      next();
685    return Type;
686  }
687
688public:
689  LineType parseLine() {
690    NonTemplateLess.clear();
691    if (CurrentToken->is(tok::hash))
692      return parsePreprocessorDirective();
693
694    // Directly allow to 'import <string-literal>' to support protocol buffer
695    // definitions (code.google.com/p/protobuf) or missing "#" (either way we
696    // should not break the line).
697    IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
698    if ((Style.Language == FormatStyle::LK_Java &&
699         CurrentToken->is(Keywords.kw_package)) ||
700        (Info && Info->getPPKeywordID() == tok::pp_import &&
701         CurrentToken->Next &&
702         CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
703                                     tok::kw_static))) {
704      next();
705      parseIncludeDirective();
706      return LT_ImportStatement;
707    }
708
709    // If this line starts and ends in '<' and '>', respectively, it is likely
710    // part of "#define <a/b.h>".
711    if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
712      parseIncludeDirective();
713      return LT_ImportStatement;
714    }
715
716    // In .proto files, top-level options are very similar to import statements
717    // and should not be line-wrapped.
718    if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
719        CurrentToken->is(Keywords.kw_option)) {
720      next();
721      if (CurrentToken && CurrentToken->is(tok::identifier))
722        return LT_ImportStatement;
723    }
724
725    bool KeywordVirtualFound = false;
726    bool ImportStatement = false;
727    while (CurrentToken) {
728      if (CurrentToken->is(tok::kw_virtual))
729        KeywordVirtualFound = true;
730      if (isImportStatement(*CurrentToken))
731        ImportStatement = true;
732      if (!consumeToken())
733        return LT_Invalid;
734    }
735    if (KeywordVirtualFound)
736      return LT_VirtualFunctionDecl;
737    if (ImportStatement)
738      return LT_ImportStatement;
739
740    if (Line.startsWith(TT_ObjCMethodSpecifier)) {
741      if (Contexts.back().FirstObjCSelectorName)
742        Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
743            Contexts.back().LongestObjCSelectorName;
744      return LT_ObjCMethodDecl;
745    }
746
747    return LT_Other;
748  }
749
750private:
751  bool isImportStatement(const FormatToken &Tok) {
752    // FIXME: Closure-library specific stuff should not be hard-coded but be
753    // configurable.
754    return Style.Language == FormatStyle::LK_JavaScript &&
755           Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
756           Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
757                              Tok.Next->Next->TokenText == "provide" ||
758                              Tok.Next->Next->TokenText == "require" ||
759                              Tok.Next->Next->TokenText == "setTestOnly") &&
760           Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
761  }
762
763  void resetTokenMetadata(FormatToken *Token) {
764    if (!Token)
765      return;
766
767    // Reset token type in case we have already looked at it and then
768    // recovered from an error (e.g. failure to find the matching >).
769    if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
770                               TT_FunctionLBrace, TT_ImplicitStringLiteral,
771                               TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
772                               TT_RegexLiteral))
773      CurrentToken->Type = TT_Unknown;
774    CurrentToken->Role.reset();
775    CurrentToken->MatchingParen = nullptr;
776    CurrentToken->FakeLParens.clear();
777    CurrentToken->FakeRParens = 0;
778  }
779
780  void next() {
781    if (CurrentToken) {
782      CurrentToken->NestingLevel = Contexts.size() - 1;
783      CurrentToken->BindingStrength = Contexts.back().BindingStrength;
784      modifyContext(*CurrentToken);
785      determineTokenType(*CurrentToken);
786      CurrentToken = CurrentToken->Next;
787    }
788
789    resetTokenMetadata(CurrentToken);
790  }
791
792  /// \brief A struct to hold information valid in a specific context, e.g.
793  /// a pair of parenthesis.
794  struct Context {
795    Context(tok::TokenKind ContextKind, unsigned BindingStrength,
796            bool IsExpression)
797        : ContextKind(ContextKind), BindingStrength(BindingStrength),
798          IsExpression(IsExpression) {}
799
800    tok::TokenKind ContextKind;
801    unsigned BindingStrength;
802    bool IsExpression;
803    unsigned LongestObjCSelectorName = 0;
804    bool ColonIsForRangeExpr = false;
805    bool ColonIsDictLiteral = false;
806    bool ColonIsObjCMethodExpr = false;
807    FormatToken *FirstObjCSelectorName = nullptr;
808    FormatToken *FirstStartOfName = nullptr;
809    bool CanBeExpression = true;
810    bool InTemplateArgument = false;
811    bool InCtorInitializer = false;
812    bool CaretFound = false;
813    bool IsForEachMacro = false;
814  };
815
816  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
817  /// of each instance.
818  struct ScopedContextCreator {
819    AnnotatingParser &P;
820
821    ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
822                         unsigned Increase)
823        : P(P) {
824      P.Contexts.push_back(Context(ContextKind,
825                                   P.Contexts.back().BindingStrength + Increase,
826                                   P.Contexts.back().IsExpression));
827    }
828
829    ~ScopedContextCreator() { P.Contexts.pop_back(); }
830  };
831
832  void modifyContext(const FormatToken &Current) {
833    if (Current.getPrecedence() == prec::Assignment &&
834        !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
835        (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
836      Contexts.back().IsExpression = true;
837      if (!Line.startsWith(TT_UnaryOperator)) {
838        for (FormatToken *Previous = Current.Previous;
839             Previous && Previous->Previous &&
840             !Previous->Previous->isOneOf(tok::comma, tok::semi);
841             Previous = Previous->Previous) {
842          if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
843            Previous = Previous->MatchingParen;
844            if (!Previous)
845              break;
846          }
847          if (Previous->opensScope())
848            break;
849          if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
850              Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
851              Previous->Previous && Previous->Previous->isNot(tok::equal))
852            Previous->Type = TT_PointerOrReference;
853        }
854      }
855    } else if (Current.is(tok::lessless) &&
856               (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
857      Contexts.back().IsExpression = true;
858    } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
859      Contexts.back().IsExpression = true;
860    } else if (Current.is(TT_TrailingReturnArrow)) {
861      Contexts.back().IsExpression = false;
862    } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
863      Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
864    } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
865               !Line.InPPDirective &&
866               (!Current.Previous ||
867                Current.Previous->isNot(tok::kw_decltype))) {
868      bool ParametersOfFunctionType =
869          Current.Previous && Current.Previous->is(tok::r_paren) &&
870          Current.Previous->MatchingParen &&
871          Current.Previous->MatchingParen->is(TT_FunctionTypeLParen);
872      bool IsForOrCatch = Current.Previous &&
873                          Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
874      Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
875    } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
876      for (FormatToken *Previous = Current.Previous;
877           Previous && Previous->isOneOf(tok::star, tok::amp);
878           Previous = Previous->Previous)
879        Previous->Type = TT_PointerOrReference;
880      if (Line.MustBeDeclaration)
881        Contexts.back().IsExpression = Contexts.front().InCtorInitializer;
882    } else if (Current.Previous &&
883               Current.Previous->is(TT_CtorInitializerColon)) {
884      Contexts.back().IsExpression = true;
885      Contexts.back().InCtorInitializer = true;
886    } else if (Current.is(tok::kw_new)) {
887      Contexts.back().CanBeExpression = false;
888    } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
889      // This should be the condition or increment in a for-loop.
890      Contexts.back().IsExpression = true;
891    }
892  }
893
894  void determineTokenType(FormatToken &Current) {
895    if (!Current.is(TT_Unknown))
896      // The token type is already known.
897      return;
898
899    // Line.MightBeFunctionDecl can only be true after the parentheses of a
900    // function declaration have been found. In this case, 'Current' is a
901    // trailing token of this declaration and thus cannot be a name.
902    if (Current.is(Keywords.kw_instanceof)) {
903      Current.Type = TT_BinaryOperator;
904    } else if (isStartOfName(Current) &&
905               (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
906      Contexts.back().FirstStartOfName = &Current;
907      Current.Type = TT_StartOfName;
908    } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
909      AutoFound = true;
910    } else if (Current.is(tok::arrow) &&
911               Style.Language == FormatStyle::LK_Java) {
912      Current.Type = TT_LambdaArrow;
913    } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
914               Current.NestingLevel == 0) {
915      Current.Type = TT_TrailingReturnArrow;
916    } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
917      Current.Type =
918          determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
919                                             Contexts.back().IsExpression,
920                                Contexts.back().InTemplateArgument);
921    } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
922      Current.Type = determinePlusMinusCaretUsage(Current);
923      if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
924        Contexts.back().CaretFound = true;
925    } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
926      Current.Type = determineIncrementUsage(Current);
927    } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
928      Current.Type = TT_UnaryOperator;
929    } else if (Current.is(tok::question)) {
930      if (Style.Language == FormatStyle::LK_JavaScript &&
931          Line.MustBeDeclaration) {
932        // In JavaScript, `interface X { foo?(): bar; }` is an optional method
933        // on the interface, not a ternary expression.
934        Current.Type = TT_JsTypeOptionalQuestion;
935      } else {
936        Current.Type = TT_ConditionalExpr;
937      }
938    } else if (Current.isBinaryOperator() &&
939               (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
940      Current.Type = TT_BinaryOperator;
941    } else if (Current.is(tok::comment)) {
942      if (Current.TokenText.startswith("/*")) {
943        if (Current.TokenText.endswith("*/"))
944          Current.Type = TT_BlockComment;
945        else
946          // The lexer has for some reason determined a comment here. But we
947          // cannot really handle it, if it isn't properly terminated.
948          Current.Tok.setKind(tok::unknown);
949      } else {
950        Current.Type = TT_LineComment;
951      }
952    } else if (Current.is(tok::r_paren)) {
953      if (rParenEndsCast(Current))
954        Current.Type = TT_CastRParen;
955      if (Current.MatchingParen && Current.Next &&
956          !Current.Next->isBinaryOperator() &&
957          !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace))
958        if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
959          if (BeforeParen->is(tok::identifier) &&
960              BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
961              (!BeforeParen->Previous ||
962               BeforeParen->Previous->ClosesTemplateDeclaration))
963            Current.Type = TT_FunctionAnnotationRParen;
964    } else if (Current.is(tok::at) && Current.Next) {
965      if (Current.Next->isStringLiteral()) {
966        Current.Type = TT_ObjCStringLiteral;
967      } else {
968        switch (Current.Next->Tok.getObjCKeywordID()) {
969        case tok::objc_interface:
970        case tok::objc_implementation:
971        case tok::objc_protocol:
972          Current.Type = TT_ObjCDecl;
973          break;
974        case tok::objc_property:
975          Current.Type = TT_ObjCProperty;
976          break;
977        default:
978          break;
979        }
980      }
981    } else if (Current.is(tok::period)) {
982      FormatToken *PreviousNoComment = Current.getPreviousNonComment();
983      if (PreviousNoComment &&
984          PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
985        Current.Type = TT_DesignatedInitializerPeriod;
986      else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
987               Current.Previous->isOneOf(TT_JavaAnnotation,
988                                         TT_LeadingJavaAnnotation)) {
989        Current.Type = Current.Previous->Type;
990      }
991    } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
992               Current.Previous &&
993               !Current.Previous->isOneOf(tok::equal, tok::at) &&
994               Line.MightBeFunctionDecl && Contexts.size() == 1) {
995      // Line.MightBeFunctionDecl can only be true after the parentheses of a
996      // function declaration have been found.
997      Current.Type = TT_TrailingAnnotation;
998    } else if ((Style.Language == FormatStyle::LK_Java ||
999                Style.Language == FormatStyle::LK_JavaScript) &&
1000               Current.Previous) {
1001      if (Current.Previous->is(tok::at) &&
1002          Current.isNot(Keywords.kw_interface)) {
1003        const FormatToken &AtToken = *Current.Previous;
1004        const FormatToken *Previous = AtToken.getPreviousNonComment();
1005        if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1006          Current.Type = TT_LeadingJavaAnnotation;
1007        else
1008          Current.Type = TT_JavaAnnotation;
1009      } else if (Current.Previous->is(tok::period) &&
1010                 Current.Previous->isOneOf(TT_JavaAnnotation,
1011                                           TT_LeadingJavaAnnotation)) {
1012        Current.Type = Current.Previous->Type;
1013      }
1014    }
1015  }
1016
1017  /// \brief Take a guess at whether \p Tok starts a name of a function or
1018  /// variable declaration.
1019  ///
1020  /// This is a heuristic based on whether \p Tok is an identifier following
1021  /// something that is likely a type.
1022  bool isStartOfName(const FormatToken &Tok) {
1023    if (Tok.isNot(tok::identifier) || !Tok.Previous)
1024      return false;
1025
1026    if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
1027      return false;
1028
1029    // Skip "const" as it does not have an influence on whether this is a name.
1030    FormatToken *PreviousNotConst = Tok.Previous;
1031    while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1032      PreviousNotConst = PreviousNotConst->Previous;
1033
1034    if (!PreviousNotConst)
1035      return false;
1036
1037    bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1038                       PreviousNotConst->Previous &&
1039                       PreviousNotConst->Previous->is(tok::hash);
1040
1041    if (PreviousNotConst->is(TT_TemplateCloser))
1042      return PreviousNotConst && PreviousNotConst->MatchingParen &&
1043             PreviousNotConst->MatchingParen->Previous &&
1044             PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1045             PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1046
1047    if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1048        PreviousNotConst->MatchingParen->Previous &&
1049        PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1050      return true;
1051
1052    return (!IsPPKeyword &&
1053            PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1054           PreviousNotConst->is(TT_PointerOrReference) ||
1055           PreviousNotConst->isSimpleTypeSpecifier();
1056  }
1057
1058  /// \brief Determine whether ')' is ending a cast.
1059  bool rParenEndsCast(const FormatToken &Tok) {
1060    // C-style casts are only used in C++ and Java.
1061    if (Style.Language != FormatStyle::LK_Cpp &&
1062        Style.Language != FormatStyle::LK_Java)
1063      return false;
1064
1065    // Empty parens aren't casts and there are no casts at the end of the line.
1066    if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1067      return false;
1068
1069    FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1070    if (LeftOfParens) {
1071      // If there is an opening parenthesis left of the current parentheses,
1072      // look past it as these might be chained casts.
1073      if (LeftOfParens->is(tok::r_paren)) {
1074        if (!LeftOfParens->MatchingParen ||
1075            !LeftOfParens->MatchingParen->Previous)
1076          return false;
1077        LeftOfParens = LeftOfParens->MatchingParen->Previous;
1078      }
1079
1080      // If there is an identifier (or with a few exceptions a keyword) right
1081      // before the parentheses, this is unlikely to be a cast.
1082      if (LeftOfParens->Tok.getIdentifierInfo() &&
1083          !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1084                                 tok::kw_delete))
1085        return false;
1086
1087      // Certain other tokens right before the parentheses are also signals that
1088      // this cannot be a cast.
1089      if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1090                                TT_TemplateCloser))
1091        return false;
1092    }
1093
1094    if (Tok.Next->is(tok::question))
1095      return false;
1096
1097    // As Java has no function types, a "(" after the ")" likely means that this
1098    // is a cast.
1099    if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1100      return true;
1101
1102    // If a (non-string) literal follows, this is likely a cast.
1103    if (Tok.Next->isNot(tok::string_literal) &&
1104        (Tok.Next->Tok.isLiteral() ||
1105         Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1106      return true;
1107
1108    // Heuristically try to determine whether the parentheses contain a type.
1109    bool ParensAreType =
1110        !Tok.Previous ||
1111        Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1112        Tok.Previous->isSimpleTypeSpecifier();
1113    bool ParensCouldEndDecl =
1114        Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1115    if (ParensAreType && !ParensCouldEndDecl &&
1116        (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression))
1117      return true;
1118
1119    // At this point, we heuristically assume that there are no casts at the
1120    // start of the line. We assume that we have found most cases where there
1121    // are by the logic above, e.g. "(void)x;".
1122    if (!LeftOfParens)
1123      return false;
1124
1125    // If the following token is an identifier, this is a cast. All cases where
1126    // this can be something else are handled above.
1127    if (Tok.Next->is(tok::identifier))
1128      return true;
1129
1130    if (!Tok.Next->Next)
1131      return false;
1132
1133    // If the next token after the parenthesis is a unary operator, assume
1134    // that this is cast, unless there are unexpected tokens inside the
1135    // parenthesis.
1136    bool NextIsUnary =
1137        Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1138    if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1139        !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1140      return false;
1141    // Search for unexpected tokens.
1142    for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1143         Prev = Prev->Previous) {
1144      if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1145        return false;
1146    }
1147    return true;
1148  }
1149
1150  /// \brief Return the type of the given token assuming it is * or &.
1151  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1152                                  bool InTemplateArgument) {
1153    if (Style.Language == FormatStyle::LK_JavaScript)
1154      return TT_BinaryOperator;
1155
1156    const FormatToken *PrevToken = Tok.getPreviousNonComment();
1157    if (!PrevToken)
1158      return TT_UnaryOperator;
1159
1160    const FormatToken *NextToken = Tok.getNextNonComment();
1161    if (!NextToken ||
1162        NextToken->isOneOf(tok::arrow, Keywords.kw_final,
1163                           Keywords.kw_override) ||
1164        (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1165      return TT_PointerOrReference;
1166
1167    if (PrevToken->is(tok::coloncolon))
1168      return TT_PointerOrReference;
1169
1170    if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1171                           tok::comma, tok::semi, tok::kw_return, tok::colon,
1172                           tok::equal, tok::kw_delete, tok::kw_sizeof) ||
1173        PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1174                           TT_UnaryOperator, TT_CastRParen))
1175      return TT_UnaryOperator;
1176
1177    if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1178      return TT_PointerOrReference;
1179    if (NextToken->is(tok::kw_operator) && !IsExpression)
1180      return TT_PointerOrReference;
1181    if (NextToken->isOneOf(tok::comma, tok::semi))
1182      return TT_PointerOrReference;
1183
1184    if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
1185        PrevToken->MatchingParen->Previous &&
1186        PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
1187                                                    tok::kw_decltype))
1188      return TT_PointerOrReference;
1189
1190    if (PrevToken->Tok.isLiteral() ||
1191        PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1192                           tok::kw_false, tok::r_brace) ||
1193        NextToken->Tok.isLiteral() ||
1194        NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1195        NextToken->isUnaryOperator() ||
1196        // If we know we're in a template argument, there are no named
1197        // declarations. Thus, having an identifier on the right-hand side
1198        // indicates a binary operator.
1199        (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1200      return TT_BinaryOperator;
1201
1202    // "&&(" is quite unlikely to be two successive unary "&".
1203    if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1204      return TT_BinaryOperator;
1205
1206    // This catches some cases where evaluation order is used as control flow:
1207    //   aaa && aaa->f();
1208    const FormatToken *NextNextToken = NextToken->getNextNonComment();
1209    if (NextNextToken && NextNextToken->is(tok::arrow))
1210      return TT_BinaryOperator;
1211
1212    // It is very unlikely that we are going to find a pointer or reference type
1213    // definition on the RHS of an assignment.
1214    if (IsExpression && !Contexts.back().CaretFound)
1215      return TT_BinaryOperator;
1216
1217    return TT_PointerOrReference;
1218  }
1219
1220  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1221    const FormatToken *PrevToken = Tok.getPreviousNonComment();
1222    if (!PrevToken || PrevToken->is(TT_CastRParen))
1223      return TT_UnaryOperator;
1224
1225    // Use heuristics to recognize unary operators.
1226    if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1227                           tok::question, tok::colon, tok::kw_return,
1228                           tok::kw_case, tok::at, tok::l_brace))
1229      return TT_UnaryOperator;
1230
1231    // There can't be two consecutive binary operators.
1232    if (PrevToken->is(TT_BinaryOperator))
1233      return TT_UnaryOperator;
1234
1235    // Fall back to marking the token as binary operator.
1236    return TT_BinaryOperator;
1237  }
1238
1239  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1240  TokenType determineIncrementUsage(const FormatToken &Tok) {
1241    const FormatToken *PrevToken = Tok.getPreviousNonComment();
1242    if (!PrevToken || PrevToken->is(TT_CastRParen))
1243      return TT_UnaryOperator;
1244    if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1245      return TT_TrailingUnaryOperator;
1246
1247    return TT_UnaryOperator;
1248  }
1249
1250  SmallVector<Context, 8> Contexts;
1251
1252  const FormatStyle &Style;
1253  AnnotatedLine &Line;
1254  FormatToken *CurrentToken;
1255  bool AutoFound;
1256  const AdditionalKeywords &Keywords;
1257
1258  // Set of "<" tokens that do not open a template parameter list. If parseAngle
1259  // determines that a specific token can't be a template opener, it will make
1260  // same decision irrespective of the decisions for tokens leading up to it.
1261  // Store this information to prevent this from causing exponential runtime.
1262  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1263};
1264
1265static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1266static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1267
1268/// \brief Parses binary expressions by inserting fake parenthesis based on
1269/// operator precedence.
1270class ExpressionParser {
1271public:
1272  ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1273                   AnnotatedLine &Line)
1274      : Style(Style), Keywords(Keywords), Current(Line.First) {}
1275
1276  /// \brief Parse expressions with the given operatore precedence.
1277  void parse(int Precedence = 0) {
1278    // Skip 'return' and ObjC selector colons as they are not part of a binary
1279    // expression.
1280    while (Current && (Current->is(tok::kw_return) ||
1281                       (Current->is(tok::colon) &&
1282                        Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1283      next();
1284
1285    if (!Current || Precedence > PrecedenceArrowAndPeriod)
1286      return;
1287
1288    // Conditional expressions need to be parsed separately for proper nesting.
1289    if (Precedence == prec::Conditional) {
1290      parseConditionalExpr();
1291      return;
1292    }
1293
1294    // Parse unary operators, which all have a higher precedence than binary
1295    // operators.
1296    if (Precedence == PrecedenceUnaryOperator) {
1297      parseUnaryOperator();
1298      return;
1299    }
1300
1301    FormatToken *Start = Current;
1302    FormatToken *LatestOperator = nullptr;
1303    unsigned OperatorIndex = 0;
1304
1305    while (Current) {
1306      // Consume operators with higher precedence.
1307      parse(Precedence + 1);
1308
1309      int CurrentPrecedence = getCurrentPrecedence();
1310
1311      if (Current && Current->is(TT_SelectorName) &&
1312          Precedence == CurrentPrecedence) {
1313        if (LatestOperator)
1314          addFakeParenthesis(Start, prec::Level(Precedence));
1315        Start = Current;
1316      }
1317
1318      // At the end of the line or when an operator with higher precedence is
1319      // found, insert fake parenthesis and return.
1320      if (!Current || (Current->closesScope() && Current->MatchingParen) ||
1321          (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1322          (CurrentPrecedence == prec::Conditional &&
1323           Precedence == prec::Assignment && Current->is(tok::colon))) {
1324        break;
1325      }
1326
1327      // Consume scopes: (), [], <> and {}
1328      if (Current->opensScope()) {
1329        while (Current && !Current->closesScope()) {
1330          next();
1331          parse();
1332        }
1333        next();
1334      } else {
1335        // Operator found.
1336        if (CurrentPrecedence == Precedence) {
1337          LatestOperator = Current;
1338          Current->OperatorIndex = OperatorIndex;
1339          ++OperatorIndex;
1340        }
1341        next(/*SkipPastLeadingComments=*/Precedence > 0);
1342      }
1343    }
1344
1345    if (LatestOperator && (Current || Precedence > 0)) {
1346      LatestOperator->LastOperator = true;
1347      if (Precedence == PrecedenceArrowAndPeriod) {
1348        // Call expressions don't have a binary operator precedence.
1349        addFakeParenthesis(Start, prec::Unknown);
1350      } else {
1351        addFakeParenthesis(Start, prec::Level(Precedence));
1352      }
1353    }
1354  }
1355
1356private:
1357  /// \brief Gets the precedence (+1) of the given token for binary operators
1358  /// and other tokens that we treat like binary operators.
1359  int getCurrentPrecedence() {
1360    if (Current) {
1361      const FormatToken *NextNonComment = Current->getNextNonComment();
1362      if (Current->is(TT_ConditionalExpr))
1363        return prec::Conditional;
1364      if (NextNonComment && NextNonComment->is(tok::colon) &&
1365          NextNonComment->is(TT_DictLiteral))
1366        return prec::Comma;
1367      if (Current->is(TT_LambdaArrow))
1368        return prec::Comma;
1369      if (Current->is(TT_JsFatArrow))
1370        return prec::Assignment;
1371      if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
1372                           TT_JsComputedPropertyName) ||
1373          (Current->is(tok::comment) && NextNonComment &&
1374           NextNonComment->is(TT_SelectorName)))
1375        return 0;
1376      if (Current->is(TT_RangeBasedForLoopColon))
1377        return prec::Comma;
1378      if ((Style.Language == FormatStyle::LK_Java ||
1379           Style.Language == FormatStyle::LK_JavaScript) &&
1380          Current->is(Keywords.kw_instanceof))
1381        return prec::Relational;
1382      if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1383        return Current->getPrecedence();
1384      if (Current->isOneOf(tok::period, tok::arrow))
1385        return PrecedenceArrowAndPeriod;
1386      if (Style.Language == FormatStyle::LK_Java &&
1387          Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1388                           Keywords.kw_throws))
1389        return 0;
1390    }
1391    return -1;
1392  }
1393
1394  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1395    Start->FakeLParens.push_back(Precedence);
1396    if (Precedence > prec::Unknown)
1397      Start->StartsBinaryExpression = true;
1398    if (Current) {
1399      FormatToken *Previous = Current->Previous;
1400      while (Previous->is(tok::comment) && Previous->Previous)
1401        Previous = Previous->Previous;
1402      ++Previous->FakeRParens;
1403      if (Precedence > prec::Unknown)
1404        Previous->EndsBinaryExpression = true;
1405    }
1406  }
1407
1408  /// \brief Parse unary operator expressions and surround them with fake
1409  /// parentheses if appropriate.
1410  void parseUnaryOperator() {
1411    if (!Current || Current->isNot(TT_UnaryOperator)) {
1412      parse(PrecedenceArrowAndPeriod);
1413      return;
1414    }
1415
1416    FormatToken *Start = Current;
1417    next();
1418    parseUnaryOperator();
1419
1420    // The actual precedence doesn't matter.
1421    addFakeParenthesis(Start, prec::Unknown);
1422  }
1423
1424  void parseConditionalExpr() {
1425    while (Current && Current->isTrailingComment()) {
1426      next();
1427    }
1428    FormatToken *Start = Current;
1429    parse(prec::LogicalOr);
1430    if (!Current || !Current->is(tok::question))
1431      return;
1432    next();
1433    parse(prec::Assignment);
1434    if (!Current || Current->isNot(TT_ConditionalExpr))
1435      return;
1436    next();
1437    parse(prec::Assignment);
1438    addFakeParenthesis(Start, prec::Conditional);
1439  }
1440
1441  void next(bool SkipPastLeadingComments = true) {
1442    if (Current)
1443      Current = Current->Next;
1444    while (Current &&
1445           (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1446           Current->isTrailingComment())
1447      Current = Current->Next;
1448  }
1449
1450  const FormatStyle &Style;
1451  const AdditionalKeywords &Keywords;
1452  FormatToken *Current;
1453};
1454
1455} // end anonymous namespace
1456
1457void TokenAnnotator::setCommentLineLevels(
1458    SmallVectorImpl<AnnotatedLine *> &Lines) {
1459  const AnnotatedLine *NextNonCommentLine = nullptr;
1460  for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1461                                                          E = Lines.rend();
1462       I != E; ++I) {
1463    if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1464        (*I)->First->Next == nullptr)
1465      (*I)->Level = NextNonCommentLine->Level;
1466    else
1467      NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1468
1469    setCommentLineLevels((*I)->Children);
1470  }
1471}
1472
1473void TokenAnnotator::annotate(AnnotatedLine &Line) {
1474  for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1475                                                  E = Line.Children.end();
1476       I != E; ++I) {
1477    annotate(**I);
1478  }
1479  AnnotatingParser Parser(Style, Line, Keywords);
1480  Line.Type = Parser.parseLine();
1481  if (Line.Type == LT_Invalid)
1482    return;
1483
1484  ExpressionParser ExprParser(Style, Keywords, Line);
1485  ExprParser.parse();
1486
1487  if (Line.startsWith(TT_ObjCMethodSpecifier))
1488    Line.Type = LT_ObjCMethodDecl;
1489  else if (Line.startsWith(TT_ObjCDecl))
1490    Line.Type = LT_ObjCDecl;
1491  else if (Line.startsWith(TT_ObjCProperty))
1492    Line.Type = LT_ObjCProperty;
1493
1494  Line.First->SpacesRequiredBefore = 1;
1495  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1496}
1497
1498// This function heuristically determines whether 'Current' starts the name of a
1499// function declaration.
1500static bool isFunctionDeclarationName(const FormatToken &Current) {
1501  auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
1502    for (; Next; Next = Next->Next) {
1503      if (Next->is(TT_OverloadedOperatorLParen))
1504        return Next;
1505      if (Next->is(TT_OverloadedOperator))
1506        continue;
1507      if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1508        // For 'new[]' and 'delete[]'.
1509        if (Next->Next && Next->Next->is(tok::l_square) &&
1510            Next->Next->Next && Next->Next->Next->is(tok::r_square))
1511          Next = Next->Next->Next;
1512        continue;
1513      }
1514
1515      break;
1516    }
1517    return nullptr;
1518  };
1519
1520  const FormatToken *Next = Current.Next;
1521  if (Current.is(tok::kw_operator)) {
1522    if (Current.Previous && Current.Previous->is(tok::coloncolon))
1523      return false;
1524    Next = skipOperatorName(Next);
1525  } else {
1526    if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1527      return false;
1528    for (; Next; Next = Next->Next) {
1529      if (Next->is(TT_TemplateOpener)) {
1530        Next = Next->MatchingParen;
1531      } else if (Next->is(tok::coloncolon)) {
1532        Next = Next->Next;
1533        if (!Next)
1534          return false;
1535        if (Next->is(tok::kw_operator)) {
1536          Next = skipOperatorName(Next->Next);
1537          break;
1538        }
1539        if (!Next->is(tok::identifier))
1540          return false;
1541      } else if (Next->is(tok::l_paren)) {
1542        break;
1543      } else {
1544        return false;
1545      }
1546    }
1547  }
1548
1549  if (!Next || !Next->is(tok::l_paren))
1550    return false;
1551  if (Next->Next == Next->MatchingParen)
1552    return true;
1553  for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1554       Tok = Tok->Next) {
1555    if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1556        Tok->isOneOf(TT_PointerOrReference, TT_StartOfName))
1557      return true;
1558    if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1559        Tok->Tok.isLiteral())
1560      return false;
1561  }
1562  return false;
1563}
1564
1565bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1566  assert(Line.MightBeFunctionDecl);
1567
1568  if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
1569       Style.AlwaysBreakAfterReturnType ==
1570           FormatStyle::RTBS_TopLevelDefinitions) &&
1571      Line.Level > 0)
1572    return false;
1573
1574  switch (Style.AlwaysBreakAfterReturnType) {
1575  case FormatStyle::RTBS_None:
1576    return false;
1577  case FormatStyle::RTBS_All:
1578  case FormatStyle::RTBS_TopLevel:
1579    return true;
1580  case FormatStyle::RTBS_AllDefinitions:
1581  case FormatStyle::RTBS_TopLevelDefinitions:
1582    return Line.mightBeFunctionDefinition();
1583  }
1584
1585  return false;
1586}
1587
1588void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1589  for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1590                                                  E = Line.Children.end();
1591       I != E; ++I) {
1592    calculateFormattingInformation(**I);
1593  }
1594
1595  Line.First->TotalLength =
1596      Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1597  if (!Line.First->Next)
1598    return;
1599  FormatToken *Current = Line.First->Next;
1600  bool InFunctionDecl = Line.MightBeFunctionDecl;
1601  while (Current) {
1602    if (isFunctionDeclarationName(*Current))
1603      Current->Type = TT_FunctionDeclarationName;
1604    if (Current->is(TT_LineComment)) {
1605      if (Current->Previous->BlockKind == BK_BracedInit &&
1606          Current->Previous->opensScope())
1607        Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1608      else
1609        Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1610
1611      // If we find a trailing comment, iterate backwards to determine whether
1612      // it seems to relate to a specific parameter. If so, break before that
1613      // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1614      // to the previous line in:
1615      //   SomeFunction(a,
1616      //                b, // comment
1617      //                c);
1618      if (!Current->HasUnescapedNewline) {
1619        for (FormatToken *Parameter = Current->Previous; Parameter;
1620             Parameter = Parameter->Previous) {
1621          if (Parameter->isOneOf(tok::comment, tok::r_brace))
1622            break;
1623          if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1624            if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1625                Parameter->HasUnescapedNewline)
1626              Parameter->MustBreakBefore = true;
1627            break;
1628          }
1629        }
1630      }
1631    } else if (Current->SpacesRequiredBefore == 0 &&
1632               spaceRequiredBefore(Line, *Current)) {
1633      Current->SpacesRequiredBefore = 1;
1634    }
1635
1636    Current->MustBreakBefore =
1637        Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1638
1639    if (!Current->MustBreakBefore && InFunctionDecl &&
1640        Current->is(TT_FunctionDeclarationName))
1641      Current->MustBreakBefore = mustBreakForReturnType(Line);
1642
1643    Current->CanBreakBefore =
1644        Current->MustBreakBefore || canBreakBefore(Line, *Current);
1645    unsigned ChildSize = 0;
1646    if (Current->Previous->Children.size() == 1) {
1647      FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1648      ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1649                                                  : LastOfChild.TotalLength + 1;
1650    }
1651    const FormatToken *Prev = Current->Previous;
1652    if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1653        (Prev->Children.size() == 1 &&
1654         Prev->Children[0]->First->MustBreakBefore) ||
1655        Current->IsMultiline)
1656      Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1657    else
1658      Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1659                             ChildSize + Current->SpacesRequiredBefore;
1660
1661    if (Current->is(TT_CtorInitializerColon))
1662      InFunctionDecl = false;
1663
1664    // FIXME: Only calculate this if CanBreakBefore is true once static
1665    // initializers etc. are sorted out.
1666    // FIXME: Move magic numbers to a better place.
1667    Current->SplitPenalty = 20 * Current->BindingStrength +
1668                            splitPenalty(Line, *Current, InFunctionDecl);
1669
1670    Current = Current->Next;
1671  }
1672
1673  calculateUnbreakableTailLengths(Line);
1674  for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1675    if (Current->Role)
1676      Current->Role->precomputeFormattingInfos(Current);
1677  }
1678
1679  DEBUG({ printDebugInfo(Line); });
1680}
1681
1682void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1683  unsigned UnbreakableTailLength = 0;
1684  FormatToken *Current = Line.Last;
1685  while (Current) {
1686    Current->UnbreakableTailLength = UnbreakableTailLength;
1687    if (Current->CanBreakBefore ||
1688        Current->isOneOf(tok::comment, tok::string_literal)) {
1689      UnbreakableTailLength = 0;
1690    } else {
1691      UnbreakableTailLength +=
1692          Current->ColumnWidth + Current->SpacesRequiredBefore;
1693    }
1694    Current = Current->Previous;
1695  }
1696}
1697
1698unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1699                                      const FormatToken &Tok,
1700                                      bool InFunctionDecl) {
1701  const FormatToken &Left = *Tok.Previous;
1702  const FormatToken &Right = Tok;
1703
1704  if (Left.is(tok::semi))
1705    return 0;
1706
1707  if (Style.Language == FormatStyle::LK_Java) {
1708    if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
1709      return 1;
1710    if (Right.is(Keywords.kw_implements))
1711      return 2;
1712    if (Left.is(tok::comma) && Left.NestingLevel == 0)
1713      return 3;
1714  } else if (Style.Language == FormatStyle::LK_JavaScript) {
1715    if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
1716      return 100;
1717    if (Left.is(TT_JsTypeColon))
1718      return 100;
1719  }
1720
1721  if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1722                              Right.Next->is(TT_DictLiteral)))
1723    return 1;
1724  if (Right.is(tok::l_square)) {
1725    if (Style.Language == FormatStyle::LK_Proto)
1726      return 1;
1727    // Slightly prefer formatting local lambda definitions like functions.
1728    if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
1729      return 50;
1730    if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
1731                       TT_ArrayInitializerLSquare))
1732      return 500;
1733  }
1734
1735  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
1736      Right.is(tok::kw_operator)) {
1737    if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1738      return 3;
1739    if (Left.is(TT_StartOfName))
1740      return 110;
1741    if (InFunctionDecl && Right.NestingLevel == 0)
1742      return Style.PenaltyReturnTypeOnItsOwnLine;
1743    return 200;
1744  }
1745  if (Right.is(TT_PointerOrReference))
1746    return 190;
1747  if (Right.is(TT_LambdaArrow))
1748    return 110;
1749  if (Left.is(tok::equal) && Right.is(tok::l_brace))
1750    return 150;
1751  if (Left.is(TT_CastRParen))
1752    return 100;
1753  if (Left.is(tok::coloncolon) ||
1754      (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1755    return 500;
1756  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1757    return 5000;
1758
1759  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
1760    return 2;
1761
1762  if (Right.isMemberAccess()) {
1763    // Breaking before the "./->" of a chained call/member access is reasonably
1764    // cheap, as formatting those with one call per line is generally
1765    // desirable. In particular, it should be cheaper to break before the call
1766    // than it is to break inside a call's parameters, which could lead to weird
1767    // "hanging" indents. The exception is the very last "./->" to support this
1768    // frequent pattern:
1769    //
1770    //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
1771    //       dddddddd);
1772    //
1773    // which might otherwise be blown up onto many lines. Here, clang-format
1774    // won't produce "hanging" indents anyway as there is no other trailing
1775    // call.
1776    return Right.LastOperator ? 150 : 40;
1777  }
1778
1779  if (Right.is(TT_TrailingAnnotation) &&
1780      (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1781    // Moving trailing annotations to the next line is fine for ObjC method
1782    // declarations.
1783    if (Line.startsWith(TT_ObjCMethodSpecifier))
1784      return 10;
1785    // Generally, breaking before a trailing annotation is bad unless it is
1786    // function-like. It seems to be especially preferable to keep standard
1787    // annotations (i.e. "const", "final" and "override") on the same line.
1788    // Use a slightly higher penalty after ")" so that annotations like
1789    // "const override" are kept together.
1790    bool is_short_annotation = Right.TokenText.size() < 10;
1791    return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1792  }
1793
1794  // In for-loops, prefer breaking at ',' and ';'.
1795  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
1796    return 4;
1797
1798  // In Objective-C method expressions, prefer breaking before "param:" over
1799  // breaking after it.
1800  if (Right.is(TT_SelectorName))
1801    return 0;
1802  if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
1803    return Line.MightBeFunctionDecl ? 50 : 500;
1804
1805  if (Left.is(tok::l_paren) && InFunctionDecl &&
1806      Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
1807    return 100;
1808  if (Left.is(tok::l_paren) && Left.Previous &&
1809      Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
1810    return 1000;
1811  if (Left.is(tok::equal) && InFunctionDecl)
1812    return 110;
1813  if (Right.is(tok::r_brace))
1814    return 1;
1815  if (Left.is(TT_TemplateOpener))
1816    return 100;
1817  if (Left.opensScope()) {
1818    if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
1819      return 0;
1820    return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1821                                   : 19;
1822  }
1823  if (Left.is(TT_JavaAnnotation))
1824    return 50;
1825
1826  if (Right.is(tok::lessless)) {
1827    if (Left.is(tok::string_literal) &&
1828        (!Right.LastOperator || Right.OperatorIndex != 1)) {
1829      StringRef Content = Left.TokenText;
1830      if (Content.startswith("\""))
1831        Content = Content.drop_front(1);
1832      if (Content.endswith("\""))
1833        Content = Content.drop_back(1);
1834      Content = Content.trim();
1835      if (Content.size() > 1 &&
1836          (Content.back() == ':' || Content.back() == '='))
1837        return 25;
1838    }
1839    return 1; // Breaking at a << is really cheap.
1840  }
1841  if (Left.is(TT_ConditionalExpr))
1842    return prec::Conditional;
1843  prec::Level Level = Left.getPrecedence();
1844  if (Level != prec::Unknown)
1845    return Level;
1846  Level = Right.getPrecedence();
1847  if (Level != prec::Unknown)
1848    return Level;
1849
1850  return 3;
1851}
1852
1853bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1854                                          const FormatToken &Left,
1855                                          const FormatToken &Right) {
1856  if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
1857    return true;
1858  if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1859      Left.Tok.getObjCKeywordID() == tok::objc_property)
1860    return true;
1861  if (Right.is(tok::hashhash))
1862    return Left.is(tok::hash);
1863  if (Left.isOneOf(tok::hashhash, tok::hash))
1864    return Right.is(tok::hash);
1865  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1866    return Style.SpaceInEmptyParentheses;
1867  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1868    return (Right.is(TT_CastRParen) ||
1869            (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
1870               ? Style.SpacesInCStyleCastParentheses
1871               : Style.SpacesInParentheses;
1872  if (Right.isOneOf(tok::semi, tok::comma))
1873    return false;
1874  if (Right.is(tok::less) &&
1875      (Left.is(tok::kw_template) ||
1876       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1877    return true;
1878  if (Left.isOneOf(tok::exclaim, tok::tilde))
1879    return false;
1880  if (Left.is(tok::at) &&
1881      Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1882                    tok::numeric_constant, tok::l_paren, tok::l_brace,
1883                    tok::kw_true, tok::kw_false))
1884    return false;
1885  if (Left.is(tok::coloncolon))
1886    return false;
1887  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1888    return false;
1889  if (Right.is(tok::ellipsis))
1890    return Left.Tok.isLiteral();
1891  if (Left.is(tok::l_square) && Right.is(tok::amp))
1892    return false;
1893  if (Right.is(TT_PointerOrReference))
1894    return (Left.is(tok::r_paren) && Left.MatchingParen &&
1895            (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
1896             (Left.MatchingParen->Previous &&
1897              Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) ||
1898           (Left.Tok.isLiteral() ||
1899            (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
1900             (Style.PointerAlignment != FormatStyle::PAS_Left ||
1901              Line.IsMultiVariableDeclStmt)));
1902  if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
1903      (!Left.is(TT_PointerOrReference) ||
1904       (Style.PointerAlignment != FormatStyle::PAS_Right &&
1905        !Line.IsMultiVariableDeclStmt)))
1906    return true;
1907  if (Left.is(TT_PointerOrReference))
1908    return Right.Tok.isLiteral() ||
1909           Right.isOneOf(TT_BlockComment, Keywords.kw_final,
1910                         Keywords.kw_override) ||
1911           (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
1912           (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
1913                           tok::l_paren) &&
1914            (Style.PointerAlignment != FormatStyle::PAS_Right &&
1915             !Line.IsMultiVariableDeclStmt) &&
1916            Left.Previous &&
1917            !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1918  if (Right.is(tok::star) && Left.is(tok::l_paren))
1919    return false;
1920  if (Left.is(tok::l_square))
1921    return (Left.is(TT_ArrayInitializerLSquare) &&
1922            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
1923           (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
1924            Right.isNot(tok::r_square));
1925  if (Right.is(tok::r_square))
1926    return Right.MatchingParen &&
1927           ((Style.SpacesInContainerLiterals &&
1928             Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
1929            (Style.SpacesInSquareBrackets &&
1930             Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
1931  if (Right.is(tok::l_square) &&
1932      !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) &&
1933      !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
1934    return false;
1935  if (Left.is(tok::colon))
1936    return !Left.is(TT_ObjCMethodExpr);
1937  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1938    return !Left.Children.empty(); // No spaces in "{}".
1939  if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1940      (Right.is(tok::r_brace) && Right.MatchingParen &&
1941       Right.MatchingParen->BlockKind != BK_Block))
1942    return !Style.Cpp11BracedListStyle;
1943  if (Left.is(TT_BlockComment))
1944    return !Left.TokenText.endswith("=*/");
1945  if (Right.is(tok::l_paren)) {
1946    if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
1947      return true;
1948    return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
1949           (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1950            (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
1951                          tok::kw_switch, tok::kw_case, TT_ForEachMacro,
1952                          TT_ObjCForIn) ||
1953             (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
1954                           tok::kw_new, tok::kw_delete) &&
1955              (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
1956           (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1957            (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
1958             Left.is(tok::r_paren)) &&
1959            Line.Type != LT_PreprocessorDirective);
1960  }
1961  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1962    return false;
1963  if (Right.is(TT_UnaryOperator))
1964    return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1965           (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
1966  if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
1967                    tok::r_paren) ||
1968       Left.isSimpleTypeSpecifier()) &&
1969      Right.is(tok::l_brace) && Right.getNextNonComment() &&
1970      Right.BlockKind != BK_Block)
1971    return false;
1972  if (Left.is(tok::period) || Right.is(tok::period))
1973    return false;
1974  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1975    return false;
1976  if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
1977      Left.MatchingParen->Previous &&
1978      Left.MatchingParen->Previous->is(tok::period))
1979    // A.<B>DoSomething();
1980    return false;
1981  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
1982    return false;
1983  return true;
1984}
1985
1986bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1987                                         const FormatToken &Right) {
1988  const FormatToken &Left = *Right.Previous;
1989  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
1990    return true; // Never ever merge two identifiers.
1991  if (Style.Language == FormatStyle::LK_Cpp) {
1992    if (Left.is(tok::kw_operator))
1993      return Right.is(tok::coloncolon);
1994  } else if (Style.Language == FormatStyle::LK_Proto) {
1995    if (Right.is(tok::period) &&
1996        Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
1997                     Keywords.kw_repeated, Keywords.kw_extend))
1998      return true;
1999    if (Right.is(tok::l_paren) &&
2000        Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2001      return true;
2002  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2003    if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, TT_JsFatArrow,
2004                     Keywords.kw_in))
2005      return true;
2006    if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2007      return false;
2008    if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2009        Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2010      return false;
2011    if (Left.is(tok::ellipsis))
2012      return false;
2013    if (Left.is(TT_TemplateCloser) &&
2014        !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2015                       Keywords.kw_implements, Keywords.kw_extends))
2016      // Type assertions ('<type>expr') are not followed by whitespace. Other
2017      // locations that should have whitespace following are identified by the
2018      // above set of follower tokens.
2019      return false;
2020  } else if (Style.Language == FormatStyle::LK_Java) {
2021    if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2022      return true;
2023    if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2024      return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2025    if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2026                      tok::kw_protected) ||
2027         Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2028                      Keywords.kw_native)) &&
2029        Right.is(TT_TemplateOpener))
2030      return true;
2031  }
2032  if (Left.is(TT_ImplicitStringLiteral))
2033    return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2034  if (Line.Type == LT_ObjCMethodDecl) {
2035    if (Left.is(TT_ObjCMethodSpecifier))
2036      return true;
2037    if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2038      // Don't space between ')' and <id>
2039      return false;
2040  }
2041  if (Line.Type == LT_ObjCProperty &&
2042      (Right.is(tok::equal) || Left.is(tok::equal)))
2043    return false;
2044
2045  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2046      Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2047    return true;
2048  if (Left.is(tok::comma))
2049    return true;
2050  if (Right.is(tok::comma))
2051    return false;
2052  if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2053    return true;
2054  if (Right.is(TT_OverloadedOperatorLParen))
2055    return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2056  if (Right.is(tok::colon)) {
2057    if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2058        !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2059      return false;
2060    if (Right.is(TT_ObjCMethodExpr))
2061      return false;
2062    if (Left.is(tok::question))
2063      return false;
2064    if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2065      return false;
2066    if (Right.is(TT_DictLiteral))
2067      return Style.SpacesInContainerLiterals;
2068    return true;
2069  }
2070  if (Left.is(TT_UnaryOperator))
2071    return Right.is(TT_BinaryOperator);
2072
2073  // If the next token is a binary operator or a selector name, we have
2074  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2075  if (Left.is(TT_CastRParen))
2076    return Style.SpaceAfterCStyleCast ||
2077           Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2078
2079  if (Left.is(tok::greater) && Right.is(tok::greater))
2080    return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2081           (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2082  if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2083      Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
2084    return false;
2085  if (!Style.SpaceBeforeAssignmentOperators &&
2086      Right.getPrecedence() == prec::Assignment)
2087    return false;
2088  if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
2089    return (Left.is(TT_TemplateOpener) &&
2090            Style.Standard == FormatStyle::LS_Cpp03) ||
2091           !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) ||
2092             Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
2093  if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2094    return Style.SpacesInAngles;
2095  if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2096      (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2097       !Right.is(tok::r_paren)))
2098    return true;
2099  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2100      Right.isNot(TT_FunctionTypeLParen))
2101    return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2102  if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2103      Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2104    return false;
2105  if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2106      Line.startsWith(tok::hash))
2107    return true;
2108  if (Right.is(TT_TrailingUnaryOperator))
2109    return false;
2110  if (Left.is(TT_RegexLiteral))
2111    return false;
2112  return spaceRequiredBetween(Line, Left, Right);
2113}
2114
2115// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2116static bool isAllmanBrace(const FormatToken &Tok) {
2117  return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2118         !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2119}
2120
2121bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2122                                     const FormatToken &Right) {
2123  const FormatToken &Left = *Right.Previous;
2124  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2125    return true;
2126
2127  if (Style.Language == FormatStyle::LK_JavaScript) {
2128    // FIXME: This might apply to other languages and token kinds.
2129    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
2130        Left.Previous->is(tok::char_constant))
2131      return true;
2132    if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2133        Left.Previous && Left.Previous->is(tok::equal) &&
2134        Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2135                            tok::kw_const) &&
2136        // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2137        // above.
2138        !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2139      // Object literals on the top level of a file are treated as "enum-style".
2140      // Each key/value pair is put on a separate line, instead of bin-packing.
2141      return true;
2142    if (Left.is(tok::l_brace) && Line.Level == 0 &&
2143        (Line.startsWith(tok::kw_enum) ||
2144         Line.startsWith(tok::kw_export, tok::kw_enum)))
2145      // JavaScript top-level enum key/value pairs are put on separate lines
2146      // instead of bin-packing.
2147      return true;
2148    if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2149        !Left.Children.empty())
2150      // Support AllowShortFunctionsOnASingleLine for JavaScript.
2151      return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2152             Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2153             (Left.NestingLevel == 0 && Line.Level == 0 &&
2154              Style.AllowShortFunctionsOnASingleLine ==
2155                  FormatStyle::SFS_Inline);
2156  } else if (Style.Language == FormatStyle::LK_Java) {
2157    if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2158        Right.Next->is(tok::string_literal))
2159      return true;
2160  }
2161
2162  // If the last token before a '}' is a comma or a trailing comment, the
2163  // intention is to insert a line break after it in order to make shuffling
2164  // around entries easier.
2165  const FormatToken *BeforeClosingBrace = nullptr;
2166  if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
2167      Left.BlockKind != BK_Block && Left.MatchingParen)
2168    BeforeClosingBrace = Left.MatchingParen->Previous;
2169  else if (Right.MatchingParen &&
2170           Right.MatchingParen->isOneOf(tok::l_brace,
2171                                        TT_ArrayInitializerLSquare))
2172    BeforeClosingBrace = &Left;
2173  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2174                             BeforeClosingBrace->isTrailingComment()))
2175    return true;
2176
2177  if (Right.is(tok::comment))
2178    return Left.BlockKind != BK_BracedInit &&
2179           Left.isNot(TT_CtorInitializerColon) &&
2180           (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2181  if (Left.isTrailingComment())
2182    return true;
2183  if (Left.isStringLiteral() &&
2184      (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
2185    return true;
2186  if (Right.Previous->IsUnterminatedLiteral)
2187    return true;
2188  if (Right.is(tok::lessless) && Right.Next &&
2189      Right.Previous->is(tok::string_literal) &&
2190      Right.Next->is(tok::string_literal))
2191    return true;
2192  if (Right.Previous->ClosesTemplateDeclaration &&
2193      Right.Previous->MatchingParen &&
2194      Right.Previous->MatchingParen->NestingLevel == 0 &&
2195      Style.AlwaysBreakTemplateDeclarations)
2196    return true;
2197  if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) &&
2198      Style.BreakConstructorInitializersBeforeComma &&
2199      !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2200    return true;
2201  if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2202    // Raw string literals are special wrt. line breaks. The author has made a
2203    // deliberate choice and might have aligned the contents of the string
2204    // literal accordingly. Thus, we try keep existing line breaks.
2205    return Right.NewlinesBefore > 0;
2206  if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
2207      Style.Language == FormatStyle::LK_Proto)
2208    // Don't put enums onto single lines in protocol buffers.
2209    return true;
2210  if (Right.is(TT_InlineASMBrace))
2211    return Right.HasUnescapedNewline;
2212  if (isAllmanBrace(Left) || isAllmanBrace(Right))
2213    return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2214           (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2215           (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2216  if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) &&
2217      Right.is(TT_SelectorName))
2218    return true;
2219  if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2220    return true;
2221
2222  if ((Style.Language == FormatStyle::LK_Java ||
2223       Style.Language == FormatStyle::LK_JavaScript) &&
2224      Left.is(TT_LeadingJavaAnnotation) &&
2225      Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2226      (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2227    return true;
2228
2229  return false;
2230}
2231
2232bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2233                                    const FormatToken &Right) {
2234  const FormatToken &Left = *Right.Previous;
2235
2236  // Language-specific stuff.
2237  if (Style.Language == FormatStyle::LK_Java) {
2238    if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2239                     Keywords.kw_implements))
2240      return false;
2241    if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2242                      Keywords.kw_implements))
2243      return true;
2244  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2245    if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2246      return false;
2247    if (Left.is(TT_JsTypeColon))
2248      return true;
2249  }
2250
2251  if (Left.is(tok::at))
2252    return false;
2253  if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2254    return false;
2255  if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2256    return !Right.is(tok::l_paren);
2257  if (Right.is(TT_PointerOrReference))
2258    return Line.IsMultiVariableDeclStmt ||
2259           (Style.PointerAlignment == FormatStyle::PAS_Right &&
2260            (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2261  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2262      Right.is(tok::kw_operator))
2263    return true;
2264  if (Left.is(TT_PointerOrReference))
2265    return false;
2266  if (Right.isTrailingComment())
2267    // We rely on MustBreakBefore being set correctly here as we should not
2268    // change the "binding" behavior of a comment.
2269    // The first comment in a braced lists is always interpreted as belonging to
2270    // the first list element. Otherwise, it should be placed outside of the
2271    // list.
2272    return Left.BlockKind == BK_BracedInit;
2273  if (Left.is(tok::question) && Right.is(tok::colon))
2274    return false;
2275  if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2276    return Style.BreakBeforeTernaryOperators;
2277  if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2278    return !Style.BreakBeforeTernaryOperators;
2279  if (Right.is(TT_InheritanceColon))
2280    return true;
2281  if (Right.is(tok::colon) &&
2282      !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2283    return false;
2284  if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
2285    return true;
2286  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2287                                    Right.Next->is(TT_ObjCMethodExpr)))
2288    return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2289  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2290    return true;
2291  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2292    return true;
2293  if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2294                    TT_OverloadedOperator))
2295    return false;
2296  if (Left.is(TT_RangeBasedForLoopColon))
2297    return true;
2298  if (Right.is(TT_RangeBasedForLoopColon))
2299    return false;
2300  if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2301      Left.is(tok::kw_operator))
2302    return false;
2303  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2304      Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2305    return false;
2306  if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2307    return false;
2308  if (Left.is(tok::l_paren) && Left.Previous &&
2309      (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2310    return false;
2311  if (Right.is(TT_ImplicitStringLiteral))
2312    return false;
2313
2314  if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2315    return false;
2316  if (Right.is(tok::r_square) && Right.MatchingParen &&
2317      Right.MatchingParen->is(TT_LambdaLSquare))
2318    return false;
2319
2320  // We only break before r_brace if there was a corresponding break before
2321  // the l_brace, which is tracked by BreakBeforeClosingBrace.
2322  if (Right.is(tok::r_brace))
2323    return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2324
2325  // Allow breaking after a trailing annotation, e.g. after a method
2326  // declaration.
2327  if (Left.is(TT_TrailingAnnotation))
2328    return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2329                          tok::less, tok::coloncolon);
2330
2331  if (Right.is(tok::kw___attribute))
2332    return true;
2333
2334  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2335    return true;
2336
2337  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2338    return true;
2339
2340  if (Left.is(TT_CtorInitializerComma) &&
2341      Style.BreakConstructorInitializersBeforeComma)
2342    return false;
2343  if (Right.is(TT_CtorInitializerComma) &&
2344      Style.BreakConstructorInitializersBeforeComma)
2345    return true;
2346  if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2347      (Left.is(tok::less) && Right.is(tok::less)))
2348    return false;
2349  if (Right.is(TT_BinaryOperator) &&
2350      Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
2351      (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
2352       Right.getPrecedence() != prec::Assignment))
2353    return true;
2354  if (Left.is(TT_ArrayInitializerLSquare))
2355    return true;
2356  if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2357    return true;
2358  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2359      !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2360      Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
2361      (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
2362       Left.getPrecedence() == prec::Assignment))
2363    return true;
2364  return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2365                      tok::kw_class, tok::kw_struct) ||
2366         Right.isMemberAccess() ||
2367         Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2368                       tok::colon, tok::l_square, tok::at) ||
2369         (Left.is(tok::r_paren) &&
2370          Right.isOneOf(tok::identifier, tok::kw_const)) ||
2371         (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
2372}
2373
2374void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2375  llvm::errs() << "AnnotatedTokens:\n";
2376  const FormatToken *Tok = Line.First;
2377  while (Tok) {
2378    llvm::errs() << " M=" << Tok->MustBreakBefore
2379                 << " C=" << Tok->CanBreakBefore
2380                 << " T=" << getTokenTypeName(Tok->Type)
2381                 << " S=" << Tok->SpacesRequiredBefore
2382                 << " B=" << Tok->BlockParameterCount
2383                 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
2384                 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
2385                 << " FakeLParens=";
2386    for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2387      llvm::errs() << Tok->FakeLParens[i] << "/";
2388    llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
2389    if (!Tok->Next)
2390      assert(Tok == Line.Last);
2391    Tok = Tok->Next;
2392  }
2393  llvm::errs() << "----\n";
2394}
2395
2396} // namespace format
2397} // namespace clang
2398