Format.cpp revision 5f2173ee723fd17b758f2a35a5bb39ca74eca523
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "format-formatter"
20
21#include "UnwrappedLineParser.h"
22#include "clang/Basic/Diagnostic.h"
23#include "clang/Basic/OperatorPrecedence.h"
24#include "clang/Basic/SourceManager.h"
25#include "clang/Format/Format.h"
26#include "clang/Frontend/TextDiagnosticPrinter.h"
27#include "clang/Lex/Lexer.h"
28#include "llvm/Support/Debug.h"
29#include <string>
30
31// Uncomment to get debug output from tests:
32// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0)
33
34namespace clang {
35namespace format {
36
37enum TokenType {
38  TT_BinaryOperator,
39  TT_BlockComment,
40  TT_CastRParen,
41  TT_ConditionalExpr,
42  TT_CtorInitializerColon,
43  TT_ImplicitStringLiteral,
44  TT_LineComment,
45  TT_ObjCBlockLParen,
46  TT_ObjCDecl,
47  TT_ObjCMethodSpecifier,
48  TT_ObjCMethodExpr,
49  TT_ObjCProperty,
50  TT_OverloadedOperator,
51  TT_PointerOrReference,
52  TT_PureVirtualSpecifier,
53  TT_TemplateCloser,
54  TT_TemplateOpener,
55  TT_TrailingUnaryOperator,
56  TT_UnaryOperator,
57  TT_Unknown
58};
59
60enum LineType {
61  LT_Invalid,
62  LT_Other,
63  LT_BuilderTypeCall,
64  LT_PreprocessorDirective,
65  LT_VirtualFunctionDecl,
66  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
67  LT_ObjCMethodDecl,
68  LT_ObjCProperty // An @property line.
69};
70
71class AnnotatedToken {
72public:
73  explicit AnnotatedToken(const FormatToken &FormatTok)
74      : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
75        CanBreakBefore(false), MustBreakBefore(false),
76        ClosesTemplateDeclaration(false), MatchingParen(NULL),
77        ParameterCount(1), Parent(NULL) {
78  }
79
80  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
81  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
82
83  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
84    return FormatTok.Tok.isObjCAtKeyword(Kind);
85  }
86
87  FormatToken FormatTok;
88
89  TokenType Type;
90
91  bool SpaceRequiredBefore;
92  bool CanBreakBefore;
93  bool MustBreakBefore;
94
95  bool ClosesTemplateDeclaration;
96
97  AnnotatedToken *MatchingParen;
98
99  /// \brief Number of parameters, if this is "(", "[" or "<".
100  ///
101  /// This is initialized to 1 as we don't need to distinguish functions with
102  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
103  /// the number of commas.
104  unsigned ParameterCount;
105
106  /// \brief The total length of the line up to and including this token.
107  unsigned TotalLength;
108
109  std::vector<AnnotatedToken> Children;
110  AnnotatedToken *Parent;
111
112  const AnnotatedToken *getPreviousNoneComment() const {
113    AnnotatedToken *Tok = Parent;
114    while (Tok != NULL && Tok->is(tok::comment))
115      Tok = Tok->Parent;
116    return Tok;
117  }
118};
119
120class AnnotatedLine {
121public:
122  AnnotatedLine(const UnwrappedLine &Line)
123      : First(Line.Tokens.front()), Level(Line.Level),
124        InPPDirective(Line.InPPDirective),
125        MustBeDeclaration(Line.MustBeDeclaration) {
126    assert(!Line.Tokens.empty());
127    AnnotatedToken *Current = &First;
128    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
129                                                E = Line.Tokens.end();
130         I != E; ++I) {
131      Current->Children.push_back(AnnotatedToken(*I));
132      Current->Children[0].Parent = Current;
133      Current = &Current->Children[0];
134    }
135    Last = Current;
136  }
137  AnnotatedLine(const AnnotatedLine &Other)
138      : First(Other.First), Type(Other.Type), Level(Other.Level),
139        InPPDirective(Other.InPPDirective),
140        MustBeDeclaration(Other.MustBeDeclaration) {
141    Last = &First;
142    while (!Last->Children.empty()) {
143      Last->Children[0].Parent = Last;
144      Last = &Last->Children[0];
145    }
146  }
147
148  AnnotatedToken First;
149  AnnotatedToken *Last;
150
151  LineType Type;
152  unsigned Level;
153  bool InPPDirective;
154  bool MustBeDeclaration;
155};
156
157static prec::Level getPrecedence(const AnnotatedToken &Tok) {
158  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
159}
160
161FormatStyle getLLVMStyle() {
162  FormatStyle LLVMStyle;
163  LLVMStyle.ColumnLimit = 80;
164  LLVMStyle.MaxEmptyLinesToKeep = 1;
165  LLVMStyle.PointerAndReferenceBindToType = false;
166  LLVMStyle.AccessModifierOffset = -2;
167  LLVMStyle.SplitTemplateClosingGreater = true;
168  LLVMStyle.IndentCaseLabels = false;
169  LLVMStyle.SpacesBeforeTrailingComments = 1;
170  LLVMStyle.BinPackParameters = true;
171  LLVMStyle.AllowAllParametersOnNextLine = true;
172  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
173  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
174  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
175  return LLVMStyle;
176}
177
178FormatStyle getGoogleStyle() {
179  FormatStyle GoogleStyle;
180  GoogleStyle.ColumnLimit = 80;
181  GoogleStyle.MaxEmptyLinesToKeep = 1;
182  GoogleStyle.PointerAndReferenceBindToType = true;
183  GoogleStyle.AccessModifierOffset = -1;
184  GoogleStyle.SplitTemplateClosingGreater = false;
185  GoogleStyle.IndentCaseLabels = true;
186  GoogleStyle.SpacesBeforeTrailingComments = 2;
187  GoogleStyle.BinPackParameters = false;
188  GoogleStyle.AllowAllParametersOnNextLine = true;
189  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
190  GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
191  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
192  return GoogleStyle;
193}
194
195FormatStyle getChromiumStyle() {
196  FormatStyle ChromiumStyle = getGoogleStyle();
197  ChromiumStyle.AllowAllParametersOnNextLine = false;
198  return ChromiumStyle;
199}
200
201struct OptimizationParameters {
202  unsigned PenaltyIndentLevel;
203  unsigned PenaltyLevelDecrease;
204  unsigned PenaltyExcessCharacter;
205};
206
207/// \brief Manages the whitespaces around tokens and their replacements.
208///
209/// This includes special handling for certain constructs, e.g. the alignment of
210/// trailing line comments.
211class WhitespaceManager {
212public:
213  WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
214
215  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
216  /// each \c AnnotatedToken.
217  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
218                         unsigned Spaces, unsigned WhitespaceStartColumn,
219                         const FormatStyle &Style) {
220    // 2+ newlines mean an empty line separating logic scopes.
221    if (NewLines >= 2)
222      alignComments();
223
224    // Align line comments if they are trailing or if they continue other
225    // trailing comments.
226    if (Tok.Type == TT_LineComment &&
227        (Tok.Parent != NULL || !Comments.empty())) {
228      if (Style.ColumnLimit >=
229          Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
230        Comments.push_back(StoredComment());
231        Comments.back().Tok = Tok.FormatTok;
232        Comments.back().Spaces = Spaces;
233        Comments.back().NewLines = NewLines;
234        Comments.back().MinColumn = WhitespaceStartColumn + Spaces;
235        Comments.back().MaxColumn = Style.ColumnLimit -
236                                    Spaces - Tok.FormatTok.TokenLength;
237        return;
238      }
239    }
240
241    // If this line does not have a trailing comment, align the stored comments.
242    if (Tok.Children.empty() && Tok.Type != TT_LineComment)
243      alignComments();
244    storeReplacement(Tok.FormatTok,
245                     std::string(NewLines, '\n') + std::string(Spaces, ' '));
246  }
247
248  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
249  /// backslashes to escape newlines inside a preprocessor directive.
250  ///
251  /// This function and \c replaceWhitespace have the same behavior if
252  /// \c Newlines == 0.
253  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
254                           unsigned Spaces, unsigned WhitespaceStartColumn,
255                           const FormatStyle &Style) {
256    std::string NewLineText;
257    if (NewLines > 0) {
258      unsigned Offset = std::min<int>(Style.ColumnLimit - 1,
259                                      WhitespaceStartColumn);
260      for (unsigned i = 0; i < NewLines; ++i) {
261        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
262        NewLineText += "\\\n";
263        Offset = 0;
264      }
265    }
266    storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' '));
267  }
268
269  /// \brief Returns all the \c Replacements created during formatting.
270  const tooling::Replacements &generateReplacements() {
271    alignComments();
272    return Replaces;
273  }
274
275private:
276  /// \brief Structure to store a comment for later layout and alignment.
277  struct StoredComment {
278    FormatToken Tok;
279    unsigned MinColumn;
280    unsigned MaxColumn;
281    unsigned NewLines;
282    unsigned Spaces;
283  };
284  SmallVector<StoredComment, 16> Comments;
285  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
286
287  /// \brief Try to align all stashed comments.
288  void alignComments() {
289    unsigned MinColumn = 0;
290    unsigned MaxColumn = UINT_MAX;
291    comment_iterator Start = Comments.begin();
292    for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E;
293         ++I) {
294      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
295        alignComments(Start, I, MinColumn);
296        MinColumn = I->MinColumn;
297        MaxColumn = I->MaxColumn;
298        Start = I;
299      } else {
300        MinColumn = std::max(MinColumn, I->MinColumn);
301        MaxColumn = std::min(MaxColumn, I->MaxColumn);
302      }
303    }
304    alignComments(Start, Comments.end(), MinColumn);
305    Comments.clear();
306  }
307
308  /// \brief Put all the comments between \p I and \p E into \p Column.
309  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
310    while (I != E) {
311      unsigned Spaces = I->Spaces + Column - I->MinColumn;
312      storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
313                       std::string(Spaces, ' '));
314      ++I;
315    }
316  }
317
318  /// \brief Stores \p Text as the replacement for the whitespace in front of
319  /// \p Tok.
320  void storeReplacement(const FormatToken &Tok, const std::string Text) {
321    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
322                                         Tok.WhiteSpaceLength, Text));
323  }
324
325  SourceManager &SourceMgr;
326  tooling::Replacements Replaces;
327};
328
329/// \brief Returns if a token is an Objective-C selector name.
330///
331/// For example, "bar" is a selector name in [foo bar:(4 + 5)].
332static bool isObjCSelectorName(const AnnotatedToken &Tok) {
333  return Tok.is(tok::identifier) && !Tok.Children.empty() &&
334         Tok.Children[0].is(tok::colon) &&
335         Tok.Children[0].Type == TT_ObjCMethodExpr;
336}
337
338class UnwrappedLineFormatter {
339public:
340  UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
341                         const AnnotatedLine &Line, unsigned FirstIndent,
342                         const AnnotatedToken &RootToken,
343                         WhitespaceManager &Whitespaces, bool StructuralError)
344      : Style(Style), SourceMgr(SourceMgr), Line(Line),
345        FirstIndent(FirstIndent), RootToken(RootToken),
346        Whitespaces(Whitespaces) {
347    Parameters.PenaltyIndentLevel = 20;
348    Parameters.PenaltyLevelDecrease = 30;
349    Parameters.PenaltyExcessCharacter = 1000000;
350  }
351
352  /// \brief Formats an \c UnwrappedLine.
353  ///
354  /// \returns The column after the last token in the last line of the
355  /// \c UnwrappedLine.
356  unsigned format() {
357    // Initialize state dependent on indent.
358    LineState State;
359    State.Column = FirstIndent;
360    State.NextToken = &RootToken;
361    State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent));
362    State.ForLoopVariablePos = 0;
363    State.LineContainsContinuedForLoopSection = false;
364    State.StartOfLineLevel = 1;
365
366    DEBUG({
367      DebugTokenState(*State.NextToken);
368    });
369
370    // The first token has already been indented and thus consumed.
371    moveStateToNextToken(State);
372
373    // Start iterating at 1 as we have correctly formatted of Token #0 above.
374    while (State.NextToken != NULL) {
375      if (State.NextToken->Type == TT_ImplicitStringLiteral) {
376        // Calculating the column is important for aligning trailing comments.
377        // FIXME: This does not seem to happen in conjunction with escaped
378        // newlines. If it does, fix!
379        State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
380                        State.NextToken->FormatTok.TokenLength;
381        State.NextToken = State.NextToken->Children.empty() ? NULL :
382                          &State.NextToken->Children[0];
383      } else if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
384        addTokenToState(false, false, State);
385      } else {
386        unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
387        unsigned Break = calcPenalty(State, true, NoBreak);
388        DEBUG({
389          if (Break < NoBreak)
390            llvm::errs() << "\n";
391          else
392            llvm::errs() << " ";
393          llvm::errs() << "<";
394          DebugPenalty(Break, Break < NoBreak);
395          llvm::errs() << "/";
396          DebugPenalty(NoBreak, !(Break < NoBreak));
397          llvm::errs() << "> ";
398          DebugTokenState(*State.NextToken);
399        });
400        addTokenToState(Break < NoBreak, false, State);
401        if (State.NextToken != NULL &&
402            State.NextToken->Parent->Type == TT_CtorInitializerColon) {
403          if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
404              Line.Last->TotalLength > getColumnLimit() - State.Column - 1)
405            State.Stack.back().BreakAfterComma = true;
406        }
407      }
408    }
409    DEBUG(llvm::errs() << "\n");
410    return State.Column;
411  }
412
413private:
414  void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
415    const Token &Tok = AnnotatedTok.FormatTok.Tok;
416    llvm::errs()
417        << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
418                     Tok.getLength());
419    llvm::errs();
420  }
421
422  void DebugPenalty(unsigned Penalty, bool Winner) {
423    llvm::errs().changeColor(Winner ? raw_ostream::GREEN : raw_ostream::RED);
424    if (Penalty == UINT_MAX)
425      llvm::errs() << "MAX";
426    else
427      llvm::errs() << Penalty;
428    llvm::errs().resetColor();
429  }
430
431  struct ParenState {
432    ParenState(unsigned Indent, unsigned LastSpace)
433        : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
434          FirstLessLess(0), BreakBeforeClosingBrace(false),
435          BreakAfterComma(false), HasMultiParameterLine(false) {}
436
437    /// \brief The position to which a specific parenthesis level needs to be
438    /// indented.
439    unsigned Indent;
440
441    /// \brief The position of the last space on each level.
442    ///
443    /// Used e.g. to break like:
444    /// functionCall(Parameter, otherCall(
445    ///                             OtherParameter));
446    unsigned LastSpace;
447
448    /// \brief This is the column of the first token after an assignment.
449    unsigned AssignmentColumn;
450
451    /// \brief The position the first "<<" operator encountered on each level.
452    ///
453    /// Used to align "<<" operators. 0 if no such operator has been encountered
454    /// on a level.
455    unsigned FirstLessLess;
456
457    /// \brief Whether a newline needs to be inserted before the block's closing
458    /// brace.
459    ///
460    /// We only want to insert a newline before the closing brace if there also
461    /// was a newline after the beginning left brace.
462    bool BreakBeforeClosingBrace;
463
464    bool BreakAfterComma;
465    bool HasMultiParameterLine;
466
467    bool operator<(const ParenState &Other) const {
468      if (Indent != Other.Indent)
469        return Indent < Other.Indent;
470      if (LastSpace != Other.LastSpace)
471        return LastSpace < Other.LastSpace;
472      if (AssignmentColumn != Other.AssignmentColumn)
473        return AssignmentColumn < Other.AssignmentColumn;
474      if (FirstLessLess != Other.FirstLessLess)
475        return FirstLessLess < Other.FirstLessLess;
476      if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
477        return BreakBeforeClosingBrace;
478      if (BreakAfterComma != Other.BreakAfterComma)
479        return BreakAfterComma;
480      if (HasMultiParameterLine != Other.HasMultiParameterLine)
481        return HasMultiParameterLine;
482      return false;
483    }
484  };
485
486  /// \brief The current state when indenting a unwrapped line.
487  ///
488  /// As the indenting tries different combinations this is copied by value.
489  struct LineState {
490    /// \brief The number of used columns in the current line.
491    unsigned Column;
492
493    /// \brief The token that needs to be next formatted.
494    const AnnotatedToken *NextToken;
495
496    /// \brief The parenthesis level of the first token on the current line.
497    unsigned StartOfLineLevel;
498
499    /// \brief The column of the first variable in a for-loop declaration.
500    ///
501    /// Used to align the second variable if necessary.
502    unsigned ForLoopVariablePos;
503
504    /// \brief \c true if this line contains a continued for-loop section.
505    bool LineContainsContinuedForLoopSection;
506
507    /// \brief A stack keeping track of properties applying to parenthesis
508    /// levels.
509    std::vector<ParenState> Stack;
510
511    /// \brief Comparison operator to be able to used \c LineState in \c map.
512    bool operator<(const LineState &Other) const {
513      if (Other.NextToken != NextToken)
514        return Other.NextToken > NextToken;
515      if (Other.Column != Column)
516        return Other.Column > Column;
517      if (Other.StartOfLineLevel != StartOfLineLevel)
518        return Other.StartOfLineLevel > StartOfLineLevel;
519      if (Other.ForLoopVariablePos != ForLoopVariablePos)
520        return Other.ForLoopVariablePos < ForLoopVariablePos;
521      if (Other.LineContainsContinuedForLoopSection !=
522          LineContainsContinuedForLoopSection)
523        return LineContainsContinuedForLoopSection;
524      return Other.Stack < Stack;
525    }
526  };
527
528  /// \brief Appends the next token to \p State and updates information
529  /// necessary for indentation.
530  ///
531  /// Puts the token on the current line if \p Newline is \c true and adds a
532  /// line break and necessary indentation otherwise.
533  ///
534  /// If \p DryRun is \c false, also creates and stores the required
535  /// \c Replacement.
536  void addTokenToState(bool Newline, bool DryRun, LineState &State) {
537    const AnnotatedToken &Current = *State.NextToken;
538    const AnnotatedToken &Previous = *State.NextToken->Parent;
539    assert(State.Stack.size());
540    unsigned ParenLevel = State.Stack.size() - 1;
541
542    if (Newline) {
543      unsigned WhitespaceStartColumn = State.Column;
544      if (Current.is(tok::r_brace)) {
545        State.Column = Line.Level * 2;
546      } else if (Current.is(tok::string_literal) &&
547                 Previous.is(tok::string_literal)) {
548        State.Column = State.Column - Previous.FormatTok.TokenLength;
549      } else if (Current.is(tok::lessless) &&
550                 State.Stack[ParenLevel].FirstLessLess != 0) {
551        State.Column = State.Stack[ParenLevel].FirstLessLess;
552      } else if (ParenLevel != 0 &&
553                 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
554                  Previous.is(tok::question) ||
555                  Previous.Type == TT_ConditionalExpr ||
556                  Current.is(tok::period) || Current.is(tok::arrow))) {
557        // Indent and extra 4 spaces after if we know the current expression is
558        // continued.  Don't do that on the top level, as we already indent 4
559        // there.
560        State.Column = State.Stack[ParenLevel].Indent + 4;
561      } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) {
562        State.Column = State.ForLoopVariablePos;
563      } else if (State.NextToken->Parent->ClosesTemplateDeclaration) {
564        State.Column = State.Stack[ParenLevel].Indent - 4;
565      } else if (Previous.Type == TT_BinaryOperator &&
566                 State.Stack.back().AssignmentColumn != 0) {
567        State.Column = State.Stack.back().AssignmentColumn;
568      } else {
569        State.Column = State.Stack[ParenLevel].Indent;
570      }
571
572      // A line starting with a closing brace is assumed to be correct for the
573      // same level as before the opening brace.
574      State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1);
575
576      if (RootToken.is(tok::kw_for))
577        State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi);
578
579      if (!DryRun) {
580        if (!Line.InPPDirective)
581          Whitespaces.replaceWhitespace(Current, 1, State.Column,
582                                        WhitespaceStartColumn, Style);
583        else
584          Whitespaces.replacePPWhitespace(Current, 1, State.Column,
585                                          WhitespaceStartColumn, Style);
586      }
587
588      State.Stack[ParenLevel].LastSpace = State.Column;
589      if (Current.is(tok::colon) && State.NextToken->Type != TT_ConditionalExpr)
590        State.Stack[ParenLevel].Indent += 2;
591    } else {
592      if (Current.is(tok::equal) && RootToken.is(tok::kw_for))
593        State.ForLoopVariablePos = State.Column -
594                                   Previous.FormatTok.TokenLength;
595
596      unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
597      if (State.NextToken->Type == TT_LineComment)
598        Spaces = Style.SpacesBeforeTrailingComments;
599
600      if (!DryRun)
601        Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
602
603      // FIXME: Do we need to do this for assignments nested in other
604      // expressions?
605      if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
606          (getPrecedence(Previous) == prec::Assignment ||
607           Previous.is(tok::kw_return)))
608        State.Stack.back().AssignmentColumn = State.Column + Spaces;
609      if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
610          State.NextToken->Parent->Type == TT_TemplateOpener)
611        State.Stack[ParenLevel].Indent = State.Column + Spaces;
612      if (Current.getPreviousNoneComment() != NULL &&
613          Current.getPreviousNoneComment()->is(tok::comma) &&
614          Current.isNot(tok::comment))
615        State.Stack[ParenLevel].HasMultiParameterLine = true;
616
617      State.Column += Spaces;
618      if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
619        // Treat the condition inside an if as if it was a second function
620        // parameter, i.e. let nested calls have an indent of 4.
621        State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
622      else if (Previous.is(tok::comma) && ParenLevel != 0)
623        // Top-level spaces are exempt as that mostly leads to better results.
624        State.Stack.back().LastSpace = State.Column;
625      else if (Previous.ParameterCount > 1 &&
626               (Previous.is(tok::l_paren) || Previous.is(tok::l_square) ||
627                Previous.Type == TT_TemplateOpener))
628        // If this function has multiple parameters, indent nested calls from
629        // the start of the first parameter.
630        State.Stack.back().LastSpace = State.Column;
631    }
632
633    // If we break after an {, we should also break before the corresponding }.
634    if (Newline && Previous.is(tok::l_brace))
635      State.Stack.back().BreakBeforeClosingBrace = true;
636
637    if (!Style.BinPackParameters && Newline) {
638      // If we are breaking after '(', '{', '<', this is not bin packing unless
639      // AllowAllParametersOnNextLine is false.
640      if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
641           Previous.Type != TT_TemplateOpener) ||
642          !Style.AllowAllParametersOnNextLine)
643        State.Stack.back().BreakAfterComma = true;
644
645      // Any break on this level means that the parent level has been broken
646      // and we need to avoid bin packing there.
647      for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
648        State.Stack[i].BreakAfterComma = true;
649      }
650    }
651
652    moveStateToNextToken(State);
653  }
654
655  /// \brief Mark the next token as consumed in \p State and modify its stacks
656  /// accordingly.
657  void moveStateToNextToken(LineState &State) {
658    const AnnotatedToken &Current = *State.NextToken;
659    assert(State.Stack.size());
660
661    if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
662      State.Stack.back().FirstLessLess = State.Column;
663
664    // If we encounter an opening (, [, { or <, we add a level to our stacks to
665    // prepare for the following tokens.
666    if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
667        Current.is(tok::l_brace) ||
668        State.NextToken->Type == TT_TemplateOpener) {
669      unsigned NewIndent;
670      if (Current.is(tok::l_brace)) {
671        // FIXME: This does not work with nested static initializers.
672        // Implement a better handling for static initializers and similar
673        // constructs.
674        NewIndent = Line.Level * 2 + 2;
675      } else {
676        NewIndent = 4 + State.Stack.back().LastSpace;
677      }
678      State.Stack.push_back(
679          ParenState(NewIndent, State.Stack.back().LastSpace));
680    }
681
682    // If we encounter a closing ), ], } or >, we can remove a level from our
683    // stacks.
684    if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
685        (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
686        State.NextToken->Type == TT_TemplateCloser) {
687      State.Stack.pop_back();
688    }
689
690    if (State.NextToken->Children.empty())
691      State.NextToken = NULL;
692    else
693      State.NextToken = &State.NextToken->Children[0];
694
695    State.Column += Current.FormatTok.TokenLength;
696  }
697
698  /// \brief Calculate the penalty for splitting after the token at \p Index.
699  unsigned splitPenalty(const AnnotatedToken &Tok) {
700    const AnnotatedToken &Left = Tok;
701    const AnnotatedToken &Right = Tok.Children[0];
702
703    if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
704      return 50;
705    if (Left.is(tok::equal) && Right.is(tok::l_brace))
706      return 150;
707    if (Left.is(tok::coloncolon))
708      return 500;
709
710    // In for-loops, prefer breaking at ',' and ';'.
711    if (RootToken.is(tok::kw_for) &&
712        (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
713      return 20;
714
715    if (Left.is(tok::semi) || Left.is(tok::comma))
716      return 0;
717
718    // In Objective-C method expressions, prefer breaking before "param:" over
719    // breaking after it.
720    if (isObjCSelectorName(Right))
721      return 0;
722    if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
723      return 20;
724
725    if (Left.is(tok::l_paren))
726      return 20;
727    // FIXME: The penalty for a trailing "<" or "[" being higher than the
728    // penalty for a trainling "(" is a temporary workaround until we can
729    // properly avoid breaking in array subscripts or template parameters.
730    if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener)
731      return 50;
732
733    if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr)
734      return prec::Assignment;
735    prec::Level Level = getPrecedence(Left);
736
737    if (Level != prec::Unknown)
738      return Level;
739
740    if (Right.is(tok::arrow) || Right.is(tok::period)) {
741      if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
742        return 15; // Should be smaller than breaking at a nested comma.
743      return 150;
744    }
745
746    return 3;
747  }
748
749  unsigned getColumnLimit() {
750    return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
751  }
752
753  /// \brief Calculate the number of lines needed to format the remaining part
754  /// of the unwrapped line.
755  ///
756  /// Assumes the formatting so far has led to
757  /// the \c LineSta \p State. If \p NewLine is set, a new line will be
758  /// added after the previous token.
759  ///
760  /// \param StopAt is used for optimization. If we can determine that we'll
761  /// definitely need at least \p StopAt additional lines, we already know of a
762  /// better solution.
763  unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) {
764    // We are at the end of the unwrapped line, so we don't need any more lines.
765    if (State.NextToken == NULL)
766      return 0;
767
768    if (!NewLine && State.NextToken->MustBreakBefore)
769      return UINT_MAX;
770    if (NewLine && !State.NextToken->CanBreakBefore &&
771        !(State.NextToken->is(tok::r_brace) &&
772          State.Stack.back().BreakBeforeClosingBrace))
773      return UINT_MAX;
774    if (!NewLine && State.NextToken->is(tok::r_brace) &&
775        State.Stack.back().BreakBeforeClosingBrace)
776      return UINT_MAX;
777    if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
778        State.LineContainsContinuedForLoopSection)
779      return UINT_MAX;
780    if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
781        State.NextToken->isNot(tok::comment) &&
782        State.Stack.back().BreakAfterComma)
783      return UINT_MAX;
784    // Trying to insert a parameter on a new line if there are already more than
785    // one parameter on the current line is bin packing.
786    if (NewLine && State.NextToken->Parent->is(tok::comma) &&
787        State.Stack.back().HasMultiParameterLine && !Style.BinPackParameters)
788      return UINT_MAX;
789    if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon ||
790                     (State.NextToken->Parent->ClosesTemplateDeclaration &&
791                      State.Stack.size() == 1)))
792      return UINT_MAX;
793
794    unsigned CurrentPenalty = 0;
795    if (NewLine) {
796      CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
797                        splitPenalty(*State.NextToken->Parent);
798    } else {
799      if (State.Stack.size() < State.StartOfLineLevel &&
800          State.NextToken->is(tok::identifier))
801        CurrentPenalty += Parameters.PenaltyLevelDecrease *
802                          (State.StartOfLineLevel - State.Stack.size());
803    }
804
805    addTokenToState(NewLine, true, State);
806
807    // Exceeding column limit is bad, assign penalty.
808    if (State.Column > getColumnLimit()) {
809      unsigned ExcessCharacters = State.Column - getColumnLimit();
810      CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
811    }
812
813    if (StopAt <= CurrentPenalty)
814      return UINT_MAX;
815    StopAt -= CurrentPenalty;
816    StateMap::iterator I = Memory.find(State);
817    if (I != Memory.end()) {
818      // If this state has already been examined, we can safely return the
819      // previous result if we
820      // - have not hit the optimatization (and thus returned UINT_MAX) OR
821      // - are now computing for a smaller or equal StopAt.
822      unsigned SavedResult = I->second.first;
823      unsigned SavedStopAt = I->second.second;
824      if (SavedResult != UINT_MAX)
825        return SavedResult + CurrentPenalty;
826      else if (StopAt <= SavedStopAt)
827        return UINT_MAX;
828    }
829
830    unsigned NoBreak = calcPenalty(State, false, StopAt);
831    unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
832    unsigned Result = std::min(NoBreak, WithBreak);
833
834    // We have to store 'Result' without adding 'CurrentPenalty' as the latter
835    // can depend on 'NewLine'.
836    Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
837
838    return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
839  }
840
841  FormatStyle Style;
842  SourceManager &SourceMgr;
843  const AnnotatedLine &Line;
844  const unsigned FirstIndent;
845  const AnnotatedToken &RootToken;
846  WhitespaceManager &Whitespaces;
847
848  // A map from an indent state to a pair (Result, Used-StopAt).
849  typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
850  StateMap Memory;
851
852  OptimizationParameters Parameters;
853};
854
855/// \brief Determines extra information about the tokens comprising an
856/// \c UnwrappedLine.
857class TokenAnnotator {
858public:
859  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
860                 AnnotatedLine &Line)
861      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
862
863  /// \brief A parser that gathers additional information about tokens.
864  ///
865  /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
866  /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
867  /// into template parameter lists.
868  class AnnotatingParser {
869  public:
870    AnnotatingParser(AnnotatedToken &RootToken)
871        : CurrentToken(&RootToken), KeywordVirtualFound(false),
872          ColonIsObjCMethodExpr(false) {}
873
874    /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
875    struct ObjCSelectorRAII {
876      AnnotatingParser &P;
877      bool ColonWasObjCMethodExpr;
878
879      ObjCSelectorRAII(AnnotatingParser &P)
880          : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {}
881
882      ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
883
884      void markStart(AnnotatedToken &Left) {
885        P.ColonIsObjCMethodExpr = true;
886        Left.Type = TT_ObjCMethodExpr;
887      }
888
889      void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
890    };
891
892
893    bool parseAngle() {
894      if (CurrentToken == NULL)
895        return false;
896      AnnotatedToken *Left = CurrentToken->Parent;
897      while (CurrentToken != NULL) {
898        if (CurrentToken->is(tok::greater)) {
899          Left->MatchingParen = CurrentToken;
900          CurrentToken->MatchingParen = Left;
901          CurrentToken->Type = TT_TemplateCloser;
902          next();
903          return true;
904        }
905        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
906            CurrentToken->is(tok::r_brace))
907          return false;
908        if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
909            CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
910          return false;
911        if (CurrentToken->is(tok::comma))
912          ++Left->ParameterCount;
913        if (!consumeToken())
914          return false;
915      }
916      return false;
917    }
918
919    bool parseParens(bool LookForDecls = false) {
920      if (CurrentToken == NULL)
921        return false;
922      bool StartsObjCMethodExpr = false;
923      AnnotatedToken *Left = CurrentToken->Parent;
924      if (CurrentToken->is(tok::caret)) {
925        // ^( starts a block.
926        Left->Type = TT_ObjCBlockLParen;
927      } else if (AnnotatedToken *MaybeSel = Left->Parent) {
928        // @selector( starts a selector.
929        if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
930            MaybeSel->Parent->is(tok::at)) {
931          StartsObjCMethodExpr = true;
932        }
933      }
934
935      ObjCSelectorRAII objCSelector(*this);
936      if (StartsObjCMethodExpr)
937        objCSelector.markStart(*Left);
938
939      while (CurrentToken != NULL) {
940        // LookForDecls is set when "if (" has been seen. Check for
941        // 'identifier' '*' 'identifier' followed by not '=' -- this
942        // '*' has to be a binary operator but determineStarAmpUsage() will
943        // categorize it as an unary operator, so set the right type here.
944        if (LookForDecls && !CurrentToken->Children.empty()) {
945          AnnotatedToken &Prev = *CurrentToken->Parent;
946          AnnotatedToken &Next = CurrentToken->Children[0];
947          if (Prev.Parent->is(tok::identifier) &&
948              (Prev.is(tok::star) || Prev.is(tok::amp)) &&
949              CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
950            Prev.Type = TT_BinaryOperator;
951            LookForDecls = false;
952          }
953        }
954
955        if (CurrentToken->is(tok::r_paren)) {
956          Left->MatchingParen = CurrentToken;
957          CurrentToken->MatchingParen = Left;
958
959          if (StartsObjCMethodExpr)
960            objCSelector.markEnd(*CurrentToken);
961
962          next();
963          return true;
964        }
965        if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
966          return false;
967        if (CurrentToken->is(tok::comma))
968          ++Left->ParameterCount;
969        if (!consumeToken())
970          return false;
971      }
972      return false;
973    }
974
975    bool parseSquare() {
976      if (!CurrentToken)
977        return false;
978
979      // A '[' could be an index subscript (after an indentifier or after
980      // ')' or ']'), or it could be the start of an Objective-C method
981      // expression.
982      AnnotatedToken *Left = CurrentToken->Parent;
983      bool StartsObjCMethodExpr =
984          !Left->Parent || Left->Parent->is(tok::colon) ||
985          Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
986          Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
987          getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(),
988                             true, true) > prec::Unknown;
989
990      ObjCSelectorRAII objCSelector(*this);
991      if (StartsObjCMethodExpr)
992        objCSelector.markStart(*Left);
993
994      while (CurrentToken != NULL) {
995        if (CurrentToken->is(tok::r_square)) {
996          if (!CurrentToken->Children.empty() &&
997              CurrentToken->Children[0].is(tok::l_paren)) {
998            // An ObjC method call can't be followed by an open parenthesis.
999            // FIXME: Do we incorrectly label ":" with this?
1000            StartsObjCMethodExpr = false;
1001            Left->Type = TT_Unknown;
1002	  }
1003          if (StartsObjCMethodExpr)
1004            objCSelector.markEnd(*CurrentToken);
1005          Left->MatchingParen = CurrentToken;
1006          CurrentToken->MatchingParen = Left;
1007          next();
1008          return true;
1009        }
1010        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
1011          return false;
1012        if (CurrentToken->is(tok::comma))
1013          ++Left->ParameterCount;
1014        if (!consumeToken())
1015          return false;
1016      }
1017      return false;
1018    }
1019
1020    bool parseBrace() {
1021      // Lines are fine to end with '{'.
1022      if (CurrentToken == NULL)
1023        return true;
1024      AnnotatedToken *Left = CurrentToken->Parent;
1025      while (CurrentToken != NULL) {
1026        if (CurrentToken->is(tok::r_brace)) {
1027          Left->MatchingParen = CurrentToken;
1028          CurrentToken->MatchingParen = Left;
1029          next();
1030          return true;
1031        }
1032        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
1033          return false;
1034        if (!consumeToken())
1035          return false;
1036      }
1037      return true;
1038    }
1039
1040    bool parseConditional() {
1041      while (CurrentToken != NULL) {
1042        if (CurrentToken->is(tok::colon)) {
1043          CurrentToken->Type = TT_ConditionalExpr;
1044          next();
1045          return true;
1046        }
1047        if (!consumeToken())
1048          return false;
1049      }
1050      return false;
1051    }
1052
1053    bool parseTemplateDeclaration() {
1054      if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
1055        CurrentToken->Type = TT_TemplateOpener;
1056        next();
1057        if (!parseAngle())
1058          return false;
1059        CurrentToken->Parent->ClosesTemplateDeclaration = true;
1060        return true;
1061      }
1062      return false;
1063    }
1064
1065    bool consumeToken() {
1066      AnnotatedToken *Tok = CurrentToken;
1067      next();
1068      switch (Tok->FormatTok.Tok.getKind()) {
1069      case tok::plus:
1070      case tok::minus:
1071        // At the start of the line, +/- specific ObjectiveC method
1072        // declarations.
1073        if (Tok->Parent == NULL)
1074          Tok->Type = TT_ObjCMethodSpecifier;
1075        break;
1076      case tok::colon:
1077        // Colons from ?: are handled in parseConditional().
1078        if (Tok->Parent->is(tok::r_paren))
1079          Tok->Type = TT_CtorInitializerColon;
1080        if (ColonIsObjCMethodExpr)
1081          Tok->Type = TT_ObjCMethodExpr;
1082        break;
1083      case tok::kw_if:
1084      case tok::kw_while:
1085        if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
1086          next();
1087          if (!parseParens(/*LookForDecls=*/true))
1088            return false;
1089        }
1090        break;
1091      case tok::l_paren:
1092        if (!parseParens())
1093          return false;
1094        break;
1095      case tok::l_square:
1096        if (!parseSquare())
1097          return false;
1098        break;
1099      case tok::l_brace:
1100        if (!parseBrace())
1101          return false;
1102        break;
1103      case tok::less:
1104        if (parseAngle())
1105          Tok->Type = TT_TemplateOpener;
1106        else {
1107          Tok->Type = TT_BinaryOperator;
1108          CurrentToken = Tok;
1109          next();
1110        }
1111        break;
1112      case tok::r_paren:
1113      case tok::r_square:
1114        return false;
1115      case tok::r_brace:
1116        // Lines can start with '}'.
1117        if (Tok->Parent != NULL)
1118          return false;
1119        break;
1120      case tok::greater:
1121        Tok->Type = TT_BinaryOperator;
1122        break;
1123      case tok::kw_operator:
1124        if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
1125          CurrentToken->Type = TT_OverloadedOperator;
1126          next();
1127          if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
1128            CurrentToken->Type = TT_OverloadedOperator;
1129            next();
1130          }
1131        } else {
1132          while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
1133            CurrentToken->Type = TT_OverloadedOperator;
1134            next();
1135          }
1136        }
1137        break;
1138      case tok::question:
1139        parseConditional();
1140        break;
1141      case tok::kw_template:
1142        parseTemplateDeclaration();
1143        break;
1144      default:
1145        break;
1146      }
1147      return true;
1148    }
1149
1150    void parseIncludeDirective() {
1151      next();
1152      if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
1153        next();
1154        while (CurrentToken != NULL) {
1155          if (CurrentToken->isNot(tok::comment) ||
1156              !CurrentToken->Children.empty())
1157            CurrentToken->Type = TT_ImplicitStringLiteral;
1158          next();
1159        }
1160      } else {
1161        while (CurrentToken != NULL) {
1162          next();
1163        }
1164      }
1165    }
1166
1167    void parseWarningOrError() {
1168      next();
1169      // We still want to format the whitespace left of the first token of the
1170      // warning or error.
1171      next();
1172      while (CurrentToken != NULL) {
1173        CurrentToken->Type = TT_ImplicitStringLiteral;
1174        next();
1175      }
1176    }
1177
1178    void parsePreprocessorDirective() {
1179      next();
1180      if (CurrentToken == NULL)
1181        return;
1182      // Hashes in the middle of a line can lead to any strange token
1183      // sequence.
1184      if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
1185        return;
1186      switch (
1187          CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
1188      case tok::pp_include:
1189      case tok::pp_import:
1190        parseIncludeDirective();
1191        break;
1192      case tok::pp_error:
1193      case tok::pp_warning:
1194        parseWarningOrError();
1195        break;
1196      default:
1197        break;
1198      }
1199    }
1200
1201    LineType parseLine() {
1202      int PeriodsAndArrows = 0;
1203      if (CurrentToken->is(tok::hash)) {
1204        parsePreprocessorDirective();
1205        return LT_PreprocessorDirective;
1206      }
1207      while (CurrentToken != NULL) {
1208
1209        if (CurrentToken->is(tok::kw_virtual))
1210          KeywordVirtualFound = true;
1211        if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
1212          ++PeriodsAndArrows;
1213        if (!consumeToken())
1214          return LT_Invalid;
1215      }
1216      if (KeywordVirtualFound)
1217        return LT_VirtualFunctionDecl;
1218
1219      // Assume a builder-type call if there are 2 or more "." and "->".
1220      if (PeriodsAndArrows >= 2)
1221        return LT_BuilderTypeCall;
1222
1223      return LT_Other;
1224    }
1225
1226    void next() {
1227      if (CurrentToken != NULL && !CurrentToken->Children.empty())
1228        CurrentToken = &CurrentToken->Children[0];
1229      else
1230        CurrentToken = NULL;
1231    }
1232
1233  private:
1234    AnnotatedToken *CurrentToken;
1235    bool KeywordVirtualFound;
1236    bool ColonIsObjCMethodExpr;
1237  };
1238
1239  void calculateExtraInformation(AnnotatedToken &Current) {
1240    Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
1241
1242    if (Current.FormatTok.MustBreakBefore) {
1243      Current.MustBreakBefore = true;
1244    } else {
1245      if (Current.Type == TT_LineComment) {
1246        Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
1247      } else if ((Current.Parent->is(tok::comment) &&
1248                  Current.FormatTok.NewlinesBefore > 0) ||
1249                 (Current.is(tok::string_literal) &&
1250                  Current.Parent->is(tok::string_literal))) {
1251        Current.MustBreakBefore = true;
1252      } else {
1253        Current.MustBreakBefore = false;
1254      }
1255    }
1256    Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
1257    if (Current.MustBreakBefore)
1258      Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
1259    else
1260      Current.TotalLength = Current.Parent->TotalLength +
1261                            Current.FormatTok.TokenLength +
1262                            (Current.SpaceRequiredBefore ? 1 : 0);
1263    if (!Current.Children.empty())
1264      calculateExtraInformation(Current.Children[0]);
1265  }
1266
1267  void annotate() {
1268    AnnotatingParser Parser(Line.First);
1269    Line.Type = Parser.parseLine();
1270    if (Line.Type == LT_Invalid)
1271      return;
1272
1273    determineTokenTypes(Line.First, /*IsExpression=*/ false);
1274
1275    if (Line.First.Type == TT_ObjCMethodSpecifier)
1276      Line.Type = LT_ObjCMethodDecl;
1277    else if (Line.First.Type == TT_ObjCDecl)
1278      Line.Type = LT_ObjCDecl;
1279    else if (Line.First.Type == TT_ObjCProperty)
1280      Line.Type = LT_ObjCProperty;
1281
1282    Line.First.SpaceRequiredBefore = true;
1283    Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
1284    Line.First.CanBreakBefore = Line.First.MustBreakBefore;
1285
1286    Line.First.TotalLength = Line.First.FormatTok.TokenLength;
1287    if (!Line.First.Children.empty())
1288      calculateExtraInformation(Line.First.Children[0]);
1289  }
1290
1291private:
1292  void determineTokenTypes(AnnotatedToken &Current, bool IsExpression) {
1293    if (getPrecedence(Current) == prec::Assignment) {
1294      IsExpression = true;
1295      AnnotatedToken *Previous = Current.Parent;
1296      while (Previous != NULL) {
1297        if (Previous->Type == TT_BinaryOperator &&
1298            (Previous->is(tok::star) || Previous->is(tok::amp))) {
1299          Previous->Type = TT_PointerOrReference;
1300        }
1301        Previous = Previous->Parent;
1302      }
1303    }
1304    if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
1305        (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
1306         (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
1307      IsExpression = true;
1308
1309    if (Current.Type == TT_Unknown) {
1310      if (Current.is(tok::star) || Current.is(tok::amp)) {
1311        Current.Type = determineStarAmpUsage(Current, IsExpression);
1312      } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
1313                 Current.is(tok::caret)) {
1314        Current.Type = determinePlusMinusCaretUsage(Current);
1315      } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
1316        Current.Type = determineIncrementUsage(Current);
1317      } else if (Current.is(tok::exclaim)) {
1318        Current.Type = TT_UnaryOperator;
1319      } else if (isBinaryOperator(Current)) {
1320        Current.Type = TT_BinaryOperator;
1321      } else if (Current.is(tok::comment)) {
1322        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
1323                                            Lex.getLangOpts()));
1324        if (StringRef(Data).startswith("//"))
1325          Current.Type = TT_LineComment;
1326        else
1327          Current.Type = TT_BlockComment;
1328      } else if (Current.is(tok::r_paren) &&
1329                 (Current.Parent->Type == TT_PointerOrReference ||
1330                  Current.Parent->Type == TT_TemplateCloser) &&
1331                 (Current.Children.empty() ||
1332                  (Current.Children[0].isNot(tok::equal) &&
1333                   Current.Children[0].isNot(tok::semi) &&
1334                   Current.Children[0].isNot(tok::l_brace)))) {
1335        // FIXME: We need to get smarter and understand more cases of casts.
1336        Current.Type = TT_CastRParen;
1337      } else if (Current.is(tok::at) && Current.Children.size()) {
1338        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
1339        case tok::objc_interface:
1340        case tok::objc_implementation:
1341        case tok::objc_protocol:
1342          Current.Type = TT_ObjCDecl;
1343          break;
1344        case tok::objc_property:
1345          Current.Type = TT_ObjCProperty;
1346          break;
1347        default:
1348          break;
1349        }
1350      }
1351    }
1352
1353    if (!Current.Children.empty())
1354      determineTokenTypes(Current.Children[0], IsExpression);
1355  }
1356
1357  bool isBinaryOperator(const AnnotatedToken &Tok) {
1358    // Comma is a binary operator, but does not behave as such wrt. formatting.
1359    return getPrecedence(Tok) > prec::Comma;
1360  }
1361
1362  /// \brief Returns the previous token ignoring comments.
1363  const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
1364    const AnnotatedToken *PrevToken = Tok.Parent;
1365    while (PrevToken != NULL && PrevToken->is(tok::comment))
1366      PrevToken = PrevToken->Parent;
1367    return PrevToken;
1368  }
1369
1370  /// \brief Returns the next token ignoring comments.
1371  const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
1372    if (Tok.Children.empty())
1373      return NULL;
1374    const AnnotatedToken *NextToken = &Tok.Children[0];
1375    while (NextToken->is(tok::comment)) {
1376      if (NextToken->Children.empty())
1377        return NULL;
1378      NextToken = &NextToken->Children[0];
1379    }
1380    return NextToken;
1381  }
1382
1383  /// \brief Return the type of the given token assuming it is * or &.
1384  TokenType determineStarAmpUsage(const AnnotatedToken &Tok,
1385                                  bool IsExpression) {
1386    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1387    if (PrevToken == NULL)
1388      return TT_UnaryOperator;
1389
1390    const AnnotatedToken *NextToken = getNextToken(Tok);
1391    if (NextToken == NULL)
1392      return TT_Unknown;
1393
1394    if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr)
1395      return TT_PointerOrReference;
1396
1397    if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
1398        PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
1399        PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
1400        PrevToken->Type == TT_BinaryOperator ||
1401        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
1402      return TT_UnaryOperator;
1403
1404    if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
1405        PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
1406        NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
1407        NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
1408        NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
1409        NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
1410        NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
1411      return TT_BinaryOperator;
1412
1413    if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
1414        NextToken->is(tok::greater))
1415      return TT_PointerOrReference;
1416
1417    // It is very unlikely that we are going to find a pointer or reference type
1418    // definition on the RHS of an assignment.
1419    if (IsExpression)
1420      return TT_BinaryOperator;
1421
1422    return TT_PointerOrReference;
1423  }
1424
1425  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
1426    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1427    if (PrevToken == NULL)
1428      return TT_UnaryOperator;
1429
1430    // Use heuristics to recognize unary operators.
1431    if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
1432        PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
1433        PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
1434        PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
1435        PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
1436      return TT_UnaryOperator;
1437
1438    // There can't be to consecutive binary operators.
1439    if (PrevToken->Type == TT_BinaryOperator)
1440      return TT_UnaryOperator;
1441
1442    // Fall back to marking the token as binary operator.
1443    return TT_BinaryOperator;
1444  }
1445
1446  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1447  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
1448    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1449    if (PrevToken == NULL)
1450      return TT_UnaryOperator;
1451    if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
1452        PrevToken->is(tok::identifier))
1453      return TT_TrailingUnaryOperator;
1454
1455    return TT_UnaryOperator;
1456  }
1457
1458  bool spaceRequiredBetween(const AnnotatedToken &Left,
1459                            const AnnotatedToken &Right) {
1460    if (Right.is(tok::hashhash))
1461      return Left.is(tok::hash);
1462    if (Left.is(tok::hashhash) || Left.is(tok::hash))
1463      return Right.is(tok::hash);
1464    if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
1465      return false;
1466    if (Right.is(tok::less) &&
1467        (Left.is(tok::kw_template) ||
1468         (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1469      return true;
1470    if (Left.is(tok::arrow) || Right.is(tok::arrow))
1471      return false;
1472    if (Left.is(tok::exclaim) || Left.is(tok::tilde))
1473      return false;
1474    if (Left.is(tok::at) &&
1475        (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
1476         Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
1477         Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
1478         Right.is(tok::kw_true) || Right.is(tok::kw_false)))
1479      return false;
1480    if (Left.is(tok::coloncolon))
1481      return false;
1482    if (Right.is(tok::coloncolon))
1483      return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
1484    if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
1485      return false;
1486    if (Right.is(tok::amp) || Right.is(tok::star))
1487      return Left.FormatTok.Tok.isLiteral() ||
1488             (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
1489              !Style.PointerAndReferenceBindToType);
1490    if (Left.is(tok::amp) || Left.is(tok::star))
1491      return Right.FormatTok.Tok.isLiteral() ||
1492             Style.PointerAndReferenceBindToType;
1493    if (Right.is(tok::star) && Left.is(tok::l_paren))
1494      return false;
1495    if (Left.is(tok::l_square) || Right.is(tok::r_square))
1496      return false;
1497    if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1498      return false;
1499    if (Left.is(tok::period) || Right.is(tok::period))
1500      return false;
1501    if (Left.is(tok::colon))
1502      return Left.Type != TT_ObjCMethodExpr;
1503    if (Right.is(tok::colon))
1504      return Right.Type != TT_ObjCMethodExpr;
1505    if (Left.is(tok::l_paren))
1506      return false;
1507    if (Right.is(tok::l_paren)) {
1508      return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
1509             Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
1510             Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
1511             Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
1512             Left.is(tok::kw_delete);
1513    }
1514    if (Left.is(tok::at) &&
1515        Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1516      return false;
1517    if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1518      return false;
1519    return true;
1520  }
1521
1522  bool spaceRequiredBefore(const AnnotatedToken &Tok) {
1523    if (Line.Type == LT_ObjCMethodDecl) {
1524      if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
1525          Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
1526        return true;
1527      if (Tok.is(tok::colon))
1528        return false;
1529      if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1530        return true;
1531      if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1532        // Don't space between ')' and <id>
1533        return false;
1534      if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
1535        // Don't space between ':' and '('
1536        return false;
1537    }
1538    if (Line.Type == LT_ObjCProperty &&
1539        (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1540      return false;
1541
1542    if (Tok.Parent->is(tok::comma))
1543      return true;
1544    if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1545      return true;
1546    if (Tok.Type == TT_OverloadedOperator)
1547      return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
1548             Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
1549    if (Tok.Parent->Type == TT_OverloadedOperator)
1550      return false;
1551    if (Tok.is(tok::colon))
1552      return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
1553             Tok.Type != TT_ObjCMethodExpr;
1554    if (Tok.Parent->Type == TT_UnaryOperator ||
1555        Tok.Parent->Type == TT_CastRParen)
1556      return false;
1557    if (Tok.Type == TT_UnaryOperator)
1558      return Tok.Parent->isNot(tok::l_paren) &&
1559             Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
1560             (Tok.Parent->isNot(tok::colon) ||
1561              Tok.Parent->Type != TT_ObjCMethodExpr);
1562    if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1563      return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
1564             TT_TemplateCloser && Style.SplitTemplateClosingGreater;
1565    }
1566    if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1567      return true;
1568    if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1569      return false;
1570    if (Tok.is(tok::less) && Line.First.is(tok::hash))
1571      return true;
1572    if (Tok.Type == TT_TrailingUnaryOperator)
1573      return false;
1574    return spaceRequiredBetween(*Tok.Parent, Tok);
1575  }
1576
1577  bool canBreakBefore(const AnnotatedToken &Right) {
1578    const AnnotatedToken &Left = *Right.Parent;
1579    if (Line.Type == LT_ObjCMethodDecl) {
1580      if (Right.is(tok::identifier) && !Right.Children.empty() &&
1581          Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
1582        return true;
1583      if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
1584          Left.Parent->is(tok::colon))
1585        // Don't break this identifier as ':' or identifier
1586        // before it will break.
1587        return false;
1588      if (Right.is(tok::colon) && Left.is(tok::identifier) &&
1589          Left.CanBreakBefore)
1590        // Don't break at ':' if identifier before it can beak.
1591        return false;
1592    }
1593    if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1594      return false;
1595    if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1596      return true;
1597    if (isObjCSelectorName(Right))
1598      return true;
1599    if (Left.ClosesTemplateDeclaration)
1600      return true;
1601    if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1602        Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
1603      return false;
1604    if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1605      return false;
1606
1607    if (Right.Type == TT_LineComment)
1608      // We rely on MustBreakBefore being set correctly here as we should not
1609      // change the "binding" behavior of a comment.
1610      return false;
1611
1612    // Allow breaking after a trailing 'const', e.g. after a method declaration,
1613    // unless it is follow by ';', '{' or '='.
1614    if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1615        Left.Parent->is(tok::r_paren))
1616      return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
1617             Right.isNot(tok::equal);
1618
1619    // We only break before r_brace if there was a corresponding break before
1620    // the l_brace, which is tracked by BreakBeforeClosingBrace.
1621    if (Right.is(tok::r_brace))
1622      return false;
1623
1624    if (Right.is(tok::r_paren) || Right.is(tok::greater))
1625      return false;
1626    return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1627           Left.is(tok::comma) || Right.is(tok::lessless) ||
1628           Right.is(tok::arrow) || Right.is(tok::period) ||
1629           Right.is(tok::colon) || Left.is(tok::coloncolon) ||
1630           Left.is(tok::semi) || Left.is(tok::l_brace) ||
1631           Left.is(tok::question) || Left.Type == TT_ConditionalExpr ||
1632           (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1633            Right.is(tok::identifier)) ||
1634           (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1635           (Left.is(tok::l_square) && !Right.is(tok::r_square));
1636  }
1637
1638  FormatStyle Style;
1639  SourceManager &SourceMgr;
1640  Lexer &Lex;
1641  AnnotatedLine &Line;
1642};
1643
1644class LexerBasedFormatTokenSource : public FormatTokenSource {
1645public:
1646  LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1647      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1648        IdentTable(Lex.getLangOpts()) {
1649    Lex.SetKeepWhitespaceMode(true);
1650  }
1651
1652  virtual FormatToken getNextToken() {
1653    if (GreaterStashed) {
1654      FormatTok.NewlinesBefore = 0;
1655      FormatTok.WhiteSpaceStart =
1656          FormatTok.Tok.getLocation().getLocWithOffset(1);
1657      FormatTok.WhiteSpaceLength = 0;
1658      GreaterStashed = false;
1659      return FormatTok;
1660    }
1661
1662    FormatTok = FormatToken();
1663    Lex.LexFromRawLexer(FormatTok.Tok);
1664    StringRef Text = rawTokenText(FormatTok.Tok);
1665    FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1666    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1667      FormatTok.IsFirst = true;
1668
1669    // Consume and record whitespace until we find a significant token.
1670    while (FormatTok.Tok.is(tok::unknown)) {
1671      FormatTok.NewlinesBefore += Text.count('\n');
1672      FormatTok.HasUnescapedNewline = Text.count("\\\n") !=
1673                                      FormatTok.NewlinesBefore;
1674      FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1675
1676      if (FormatTok.Tok.is(tok::eof))
1677        return FormatTok;
1678      Lex.LexFromRawLexer(FormatTok.Tok);
1679      Text = rawTokenText(FormatTok.Tok);
1680    }
1681
1682    // Now FormatTok is the next non-whitespace token.
1683    FormatTok.TokenLength = Text.size();
1684
1685    // In case the token starts with escaped newlines, we want to
1686    // take them into account as whitespace - this pattern is quite frequent
1687    // in macro definitions.
1688    // FIXME: What do we want to do with other escaped spaces, and escaped
1689    // spaces or newlines in the middle of tokens?
1690    // FIXME: Add a more explicit test.
1691    unsigned i = 0;
1692    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1693      // FIXME: ++FormatTok.NewlinesBefore is missing...
1694      FormatTok.WhiteSpaceLength += 2;
1695      FormatTok.TokenLength -= 2;
1696      i += 2;
1697    }
1698
1699    if (FormatTok.Tok.is(tok::raw_identifier)) {
1700      IdentifierInfo &Info = IdentTable.get(Text);
1701      FormatTok.Tok.setIdentifierInfo(&Info);
1702      FormatTok.Tok.setKind(Info.getTokenID());
1703    }
1704
1705    if (FormatTok.Tok.is(tok::greatergreater)) {
1706      FormatTok.Tok.setKind(tok::greater);
1707      GreaterStashed = true;
1708    }
1709
1710    return FormatTok;
1711  }
1712
1713private:
1714  FormatToken FormatTok;
1715  bool GreaterStashed;
1716  Lexer &Lex;
1717  SourceManager &SourceMgr;
1718  IdentifierTable IdentTable;
1719
1720  /// Returns the text of \c FormatTok.
1721  StringRef rawTokenText(Token &Tok) {
1722    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1723                     Tok.getLength());
1724  }
1725};
1726
1727class Formatter : public UnwrappedLineConsumer {
1728public:
1729  Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1730            SourceManager &SourceMgr,
1731            const std::vector<CharSourceRange> &Ranges)
1732      : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1733        Whitespaces(SourceMgr), Ranges(Ranges) {}
1734
1735  virtual ~Formatter() {}
1736
1737  tooling::Replacements format() {
1738    LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1739    UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1740    StructuralError = Parser.parse();
1741    unsigned PreviousEndOfLineColumn = 0;
1742    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1743      TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
1744      Annotator.annotate();
1745    }
1746    for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1747                                              E = AnnotatedLines.end();
1748         I != E; ++I) {
1749      const AnnotatedLine &TheLine = *I;
1750      if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
1751        unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
1752                                           TheLine.InPPDirective,
1753                                           PreviousEndOfLineColumn);
1754        tryFitMultipleLinesInOne(Indent, I, E);
1755        UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1756                                         TheLine.First, Whitespaces,
1757                                         StructuralError);
1758        PreviousEndOfLineColumn = Formatter.format();
1759      } else {
1760        // If we did not reformat this unwrapped line, the column at the end of
1761        // the last token is unchanged - thus, we can calculate the end of the
1762        // last token, and return the result.
1763        PreviousEndOfLineColumn =
1764            SourceMgr.getSpellingColumnNumber(
1765                TheLine.Last->FormatTok.Tok.getLocation()) +
1766            Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
1767                                   SourceMgr, Lex.getLangOpts()) -
1768            1;
1769      }
1770    }
1771    return Whitespaces.generateReplacements();
1772  }
1773
1774private:
1775  /// \brief Tries to merge lines into one.
1776  ///
1777  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1778  /// if possible; note that \c I will be incremented when lines are merged.
1779  ///
1780  /// Returns whether the resulting \c Line can fit in a single line.
1781  void tryFitMultipleLinesInOne(unsigned Indent,
1782                                std::vector<AnnotatedLine>::iterator &I,
1783                                std::vector<AnnotatedLine>::iterator E) {
1784    unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
1785
1786    // We can never merge stuff if there are trailing line comments.
1787    if (I->Last->Type == TT_LineComment)
1788      return;
1789
1790    // Check whether the UnwrappedLine can be put onto a single line. If
1791    // so, this is bound to be the optimal solution (by definition) and we
1792    // don't need to analyze the entire solution space.
1793    if (I->Last->TotalLength > Limit)
1794      return;
1795    Limit -= I->Last->TotalLength;
1796
1797    if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1798      return;
1799
1800    if (I->Last->is(tok::l_brace)) {
1801      tryMergeSimpleBlock(I, E, Limit);
1802    } else if (I->First.is(tok::kw_if)) {
1803      tryMergeSimpleIf(I, E, Limit);
1804    } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1805                                    I->First.FormatTok.IsFirst)) {
1806      tryMergeSimplePPDirective(I, E, Limit);
1807    }
1808    return;
1809  }
1810
1811  void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1812                                 std::vector<AnnotatedLine>::iterator E,
1813                                 unsigned Limit) {
1814    AnnotatedLine &Line = *I;
1815    if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1816      return;
1817    if (I + 2 != E && (I + 2)->InPPDirective &&
1818        !(I + 2)->First.FormatTok.HasUnescapedNewline)
1819      return;
1820    if (1 + (I + 1)->Last->TotalLength > Limit)
1821      return;
1822    join(Line, *(++I));
1823  }
1824
1825  void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1826                        std::vector<AnnotatedLine>::iterator E,
1827                        unsigned Limit) {
1828    if (!Style.AllowShortIfStatementsOnASingleLine)
1829      return;
1830    if ((I + 1)->InPPDirective != I->InPPDirective ||
1831        ((I + 1)->InPPDirective &&
1832         (I + 1)->First.FormatTok.HasUnescapedNewline))
1833      return;
1834    AnnotatedLine &Line = *I;
1835    if (Line.Last->isNot(tok::r_paren))
1836      return;
1837    if (1 + (I + 1)->Last->TotalLength > Limit)
1838      return;
1839    if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1840      return;
1841    // Only inline simple if's (no nested if or else).
1842    if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1843      return;
1844    join(Line, *(++I));
1845  }
1846
1847  void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1848                        std::vector<AnnotatedLine>::iterator E,
1849                        unsigned Limit){
1850    // First, check that the current line allows merging. This is the case if
1851    // we're not in a control flow statement and the last token is an opening
1852    // brace.
1853    AnnotatedLine &Line = *I;
1854    bool AllowedTokens =
1855        Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
1856        Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
1857        Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
1858        Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
1859        // This gets rid of all ObjC @ keywords and methods.
1860        Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
1861        Line.First.isNot(tok::plus);
1862    if (!AllowedTokens)
1863      return;
1864
1865    AnnotatedToken *Tok = &(I + 1)->First;
1866    if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1867        !Tok->MustBreakBefore && Tok->TotalLength <= Limit) {
1868      Tok->SpaceRequiredBefore = false;
1869      join(Line, *(I + 1));
1870      I += 1;
1871    } else {
1872      // Check that we still have three lines and they fit into the limit.
1873      if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1874          !nextTwoLinesFitInto(I, Limit))
1875        return;
1876
1877      // Second, check that the next line does not contain any braces - if it
1878      // does, readability declines when putting it into a single line.
1879      if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1880        return;
1881      do {
1882        if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
1883          return;
1884        Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1885      } while (Tok != NULL);
1886
1887      // Last, check that the third line contains a single closing brace.
1888      Tok = &(I + 2)->First;
1889      if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1890          Tok->MustBreakBefore)
1891        return;
1892
1893      join(Line, *(I + 1));
1894      join(Line, *(I + 2));
1895      I += 2;
1896    }
1897  }
1898
1899  bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1900                           unsigned Limit) {
1901    return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1902           Limit;
1903  }
1904
1905  void join(AnnotatedLine &A, const AnnotatedLine &B) {
1906    A.Last->Children.push_back(B.First);
1907    while (!A.Last->Children.empty()) {
1908      A.Last->Children[0].Parent = A.Last;
1909      A.Last = &A.Last->Children[0];
1910    }
1911  }
1912
1913  bool touchesRanges(const AnnotatedLine &TheLine) {
1914    const FormatToken *First = &TheLine.First.FormatTok;
1915    const FormatToken *Last = &TheLine.Last->FormatTok;
1916    CharSourceRange LineRange = CharSourceRange::getTokenRange(
1917                                    First->Tok.getLocation(),
1918                                    Last->Tok.getLocation());
1919    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1920      if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1921                                               Ranges[i].getBegin()) &&
1922          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1923                                               LineRange.getBegin()))
1924        return true;
1925    }
1926    return false;
1927  }
1928
1929  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1930    AnnotatedLines.push_back(AnnotatedLine(TheLine));
1931  }
1932
1933  /// \brief Add a new line and the required indent before the first Token
1934  /// of the \c UnwrappedLine if there was no structural parsing error.
1935  /// Returns the indent level of the \c UnwrappedLine.
1936  unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level,
1937                            bool InPPDirective,
1938                            unsigned PreviousEndOfLineColumn) {
1939    const FormatToken &Tok = RootToken.FormatTok;
1940    if (!Tok.WhiteSpaceStart.isValid() || StructuralError)
1941      return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1;
1942
1943    unsigned Newlines = std::min(Tok.NewlinesBefore,
1944                                 Style.MaxEmptyLinesToKeep + 1);
1945    if (Newlines == 0 && !Tok.IsFirst)
1946      Newlines = 1;
1947    unsigned Indent = Level * 2;
1948
1949    bool IsAccessModifier = false;
1950    if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1951        RootToken.is(tok::kw_private))
1952      IsAccessModifier = true;
1953    else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1954             (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1955              RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1956              RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1957              RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1958      IsAccessModifier = true;
1959
1960    if (IsAccessModifier &&
1961        static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
1962      Indent += Style.AccessModifierOffset;
1963    if (!InPPDirective || Tok.HasUnescapedNewline) {
1964      Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
1965    } else {
1966      Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1967                                      PreviousEndOfLineColumn, Style);
1968    }
1969    return Indent;
1970  }
1971
1972  DiagnosticsEngine &Diag;
1973  FormatStyle Style;
1974  Lexer &Lex;
1975  SourceManager &SourceMgr;
1976  WhitespaceManager Whitespaces;
1977  std::vector<CharSourceRange> Ranges;
1978  std::vector<AnnotatedLine> AnnotatedLines;
1979  bool StructuralError;
1980};
1981
1982tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1983                               SourceManager &SourceMgr,
1984                               std::vector<CharSourceRange> Ranges,
1985                               DiagnosticConsumer *DiagClient) {
1986  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1987  OwningPtr<DiagnosticConsumer> DiagPrinter;
1988  if (DiagClient == 0) {
1989    DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1990    DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1991    DiagClient = DiagPrinter.get();
1992  }
1993  DiagnosticsEngine Diagnostics(
1994      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1995      DiagClient, false);
1996  Diagnostics.setSourceManager(&SourceMgr);
1997  Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1998  return formatter.format();
1999}
2000
2001LangOptions getFormattingLangOpts() {
2002  LangOptions LangOpts;
2003  LangOpts.CPlusPlus = 1;
2004  LangOpts.CPlusPlus11 = 1;
2005  LangOpts.Bool = 1;
2006  LangOpts.ObjC1 = 1;
2007  LangOpts.ObjC2 = 1;
2008  return LangOpts;
2009}
2010
2011} // namespace format
2012} // namespace clang
2013