Format.cpp revision 6f21a988990ff5872822dcb049bd8fc65ce3d236
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-formatter"
17
18#include "TokenAnnotator.h"
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/Diagnostic.h"
21#include "clang/Basic/OperatorPrecedence.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Format/Format.h"
24#include "clang/Frontend/TextDiagnosticPrinter.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/Debug.h"
28#include <queue>
29#include <string>
30
31namespace clang {
32namespace format {
33
34FormatStyle getLLVMStyle() {
35  FormatStyle LLVMStyle;
36  LLVMStyle.ColumnLimit = 80;
37  LLVMStyle.MaxEmptyLinesToKeep = 1;
38  LLVMStyle.PointerBindsToType = false;
39  LLVMStyle.DerivePointerBinding = false;
40  LLVMStyle.AccessModifierOffset = -2;
41  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
42  LLVMStyle.IndentCaseLabels = false;
43  LLVMStyle.SpacesBeforeTrailingComments = 1;
44  LLVMStyle.BinPackParameters = true;
45  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
46  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
47  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
48  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
49  LLVMStyle.PenaltyExcessCharacter = 1000000;
50  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5;
51  return LLVMStyle;
52}
53
54FormatStyle getGoogleStyle() {
55  FormatStyle GoogleStyle;
56  GoogleStyle.ColumnLimit = 80;
57  GoogleStyle.MaxEmptyLinesToKeep = 1;
58  GoogleStyle.PointerBindsToType = true;
59  GoogleStyle.DerivePointerBinding = true;
60  GoogleStyle.AccessModifierOffset = -1;
61  GoogleStyle.Standard = FormatStyle::LS_Auto;
62  GoogleStyle.IndentCaseLabels = true;
63  GoogleStyle.SpacesBeforeTrailingComments = 2;
64  GoogleStyle.BinPackParameters = true;
65  GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
66  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
67  GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
68  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
69  GoogleStyle.PenaltyExcessCharacter = 1000000;
70  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100;
71  return GoogleStyle;
72}
73
74FormatStyle getChromiumStyle() {
75  FormatStyle ChromiumStyle = getGoogleStyle();
76  ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
77  ChromiumStyle.BinPackParameters = false;
78  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
79  ChromiumStyle.DerivePointerBinding = false;
80  return ChromiumStyle;
81}
82
83static bool isTrailingComment(const AnnotatedToken &Tok) {
84  return Tok.is(tok::comment) &&
85         (Tok.Children.empty() || Tok.Children[0].MustBreakBefore);
86}
87
88// Returns the length of everything up to the first possible line break after
89// the ), ], } or > matching \c Tok.
90static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
91  if (Tok.MatchingParen == NULL)
92    return 0;
93  AnnotatedToken *End = Tok.MatchingParen;
94  while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
95    End = &End->Children[0];
96  }
97  return End->TotalLength - Tok.TotalLength + 1;
98}
99
100/// \brief Manages the whitespaces around tokens and their replacements.
101///
102/// This includes special handling for certain constructs, e.g. the alignment of
103/// trailing line comments.
104class WhitespaceManager {
105public:
106  WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
107
108  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
109  /// each \c AnnotatedToken.
110  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
111                         unsigned Spaces, unsigned WhitespaceStartColumn,
112                         const FormatStyle &Style) {
113    // 2+ newlines mean an empty line separating logic scopes.
114    if (NewLines >= 2)
115      alignComments();
116
117    // Align line comments if they are trailing or if they continue other
118    // trailing comments.
119    if (isTrailingComment(Tok)) {
120      // Remove the comment's trailing whitespace.
121      if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
122        Replaces.insert(tooling::Replacement(
123            SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset(
124                           Tok.FormatTok.TokenLength),
125            Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
126
127      // Align comment with other comments.
128      if (Tok.Parent != NULL || !Comments.empty()) {
129        if (Style.ColumnLimit >=
130            Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
131          Comments.push_back(StoredComment());
132          Comments.back().Tok = Tok.FormatTok;
133          Comments.back().Spaces = Spaces;
134          Comments.back().NewLines = NewLines;
135          if (NewLines == 0)
136            Comments.back().MinColumn = WhitespaceStartColumn + Spaces;
137          else
138            Comments.back().MinColumn = Spaces;
139          Comments.back().MaxColumn =
140              Style.ColumnLimit - Tok.FormatTok.TokenLength;
141          return;
142        }
143      }
144    }
145
146    // If this line does not have a trailing comment, align the stored comments.
147    if (Tok.Children.empty() && !isTrailingComment(Tok))
148      alignComments();
149    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
150  }
151
152  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
153  /// backslashes to escape newlines inside a preprocessor directive.
154  ///
155  /// This function and \c replaceWhitespace have the same behavior if
156  /// \c Newlines == 0.
157  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
158                           unsigned Spaces, unsigned WhitespaceStartColumn,
159                           const FormatStyle &Style) {
160    storeReplacement(
161        Tok.FormatTok,
162        getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
163  }
164
165  /// \brief Inserts a line break into the middle of a token.
166  ///
167  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
168  /// break and \p Postfix before the rest of the token starts in the next line.
169  ///
170  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
171  /// used to generate the correct line break.
172  void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
173                  StringRef Postfix, bool InPPDirective, unsigned Spaces,
174                  unsigned WhitespaceStartColumn, const FormatStyle &Style) {
175    std::string NewLineText;
176    if (!InPPDirective)
177      NewLineText = getNewLineText(1, Spaces);
178    else
179      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
180    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
181    SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
182        .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
183    Replaces.insert(
184        tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
185  }
186
187  /// \brief Returns all the \c Replacements created during formatting.
188  const tooling::Replacements &generateReplacements() {
189    alignComments();
190    return Replaces;
191  }
192
193private:
194  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
195    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
196  }
197
198  std::string
199  getNewLineText(unsigned NewLines, unsigned Spaces,
200                 unsigned WhitespaceStartColumn, const FormatStyle &Style) {
201    std::string NewLineText;
202    if (NewLines > 0) {
203      unsigned Offset =
204          std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
205      for (unsigned i = 0; i < NewLines; ++i) {
206        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
207        NewLineText += "\\\n";
208        Offset = 0;
209      }
210    }
211    return NewLineText + std::string(Spaces, ' ');
212  }
213
214  /// \brief Structure to store a comment for later layout and alignment.
215  struct StoredComment {
216    FormatToken Tok;
217    unsigned MinColumn;
218    unsigned MaxColumn;
219    unsigned NewLines;
220    unsigned Spaces;
221  };
222  SmallVector<StoredComment, 16> Comments;
223  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
224
225  /// \brief Try to align all stashed comments.
226  void alignComments() {
227    unsigned MinColumn = 0;
228    unsigned MaxColumn = UINT_MAX;
229    comment_iterator Start = Comments.begin();
230    for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E;
231         ++I) {
232      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
233        alignComments(Start, I, MinColumn);
234        MinColumn = I->MinColumn;
235        MaxColumn = I->MaxColumn;
236        Start = I;
237      } else {
238        MinColumn = std::max(MinColumn, I->MinColumn);
239        MaxColumn = std::min(MaxColumn, I->MaxColumn);
240      }
241    }
242    alignComments(Start, Comments.end(), MinColumn);
243    Comments.clear();
244  }
245
246  /// \brief Put all the comments between \p I and \p E into \p Column.
247  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
248    while (I != E) {
249      unsigned Spaces = I->Spaces + Column - I->MinColumn;
250      storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
251                               std::string(Spaces, ' '));
252      ++I;
253    }
254  }
255
256  /// \brief Stores \p Text as the replacement for the whitespace in front of
257  /// \p Tok.
258  void storeReplacement(const FormatToken &Tok, const std::string Text) {
259    // Don't create a replacement, if it does not change anything.
260    if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
261                  Tok.WhiteSpaceLength) == Text)
262      return;
263
264    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
265                                         Tok.WhiteSpaceLength, Text));
266  }
267
268  SourceManager &SourceMgr;
269  tooling::Replacements Replaces;
270};
271
272class UnwrappedLineFormatter {
273public:
274  UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
275                         const AnnotatedLine &Line, unsigned FirstIndent,
276                         const AnnotatedToken &RootToken,
277                         WhitespaceManager &Whitespaces, bool StructuralError)
278      : Style(Style), SourceMgr(SourceMgr), Line(Line),
279        FirstIndent(FirstIndent), RootToken(RootToken),
280        Whitespaces(Whitespaces), Count(0) {}
281
282  /// \brief Formats an \c UnwrappedLine.
283  ///
284  /// \returns The column after the last token in the last line of the
285  /// \c UnwrappedLine.
286  unsigned format(const AnnotatedLine *NextLine) {
287    // Initialize state dependent on indent.
288    LineState State;
289    State.Column = FirstIndent;
290    State.NextToken = &RootToken;
291    State.Stack.push_back(
292        ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters,
293                   /*HasMultiParameterLine=*/ false));
294    State.VariablePos = 0;
295    State.LineContainsContinuedForLoopSection = false;
296    State.ParenLevel = 0;
297    State.StartOfStringLiteral = 0;
298    State.StartOfLineLevel = State.ParenLevel;
299
300    DEBUG({
301      DebugTokenState(*State.NextToken);
302    });
303
304    // The first token has already been indented and thus consumed.
305    moveStateToNextToken(State, /*DryRun=*/ false);
306
307    // If everything fits on a single line, just put it there.
308    unsigned ColumnLimit = Style.ColumnLimit;
309    if (NextLine && NextLine->InPPDirective &&
310        !NextLine->First.FormatTok.HasUnescapedNewline)
311      ColumnLimit = getColumnLimit();
312    if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
313      while (State.NextToken != NULL) {
314        addTokenToState(false, false, State);
315      }
316      return State.Column;
317    }
318
319    // If the ObjC method declaration does not fit on a line, we should format
320    // it with one arg per line.
321    if (Line.Type == LT_ObjCMethodDecl)
322      State.Stack.back().BreakBeforeParameter = true;
323
324    // Find best solution in solution space.
325    return analyzeSolutionSpace(State);
326  }
327
328private:
329  void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
330    const Token &Tok = AnnotatedTok.FormatTok.Tok;
331    llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
332                              Tok.getLength());
333    llvm::errs();
334  }
335
336  struct ParenState {
337    ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
338               bool HasMultiParameterLine)
339        : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
340          BreakBeforeClosingBrace(false), QuestionColumn(0),
341          AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
342          HasMultiParameterLine(HasMultiParameterLine), ColonPos(0),
343          StartOfFunctionCall(0) {}
344
345    /// \brief The position to which a specific parenthesis level needs to be
346    /// indented.
347    unsigned Indent;
348
349    /// \brief The position of the last space on each level.
350    ///
351    /// Used e.g. to break like:
352    /// functionCall(Parameter, otherCall(
353    ///                             OtherParameter));
354    unsigned LastSpace;
355
356    /// \brief The position the first "<<" operator encountered on each level.
357    ///
358    /// Used to align "<<" operators. 0 if no such operator has been encountered
359    /// on a level.
360    unsigned FirstLessLess;
361
362    /// \brief Whether a newline needs to be inserted before the block's closing
363    /// brace.
364    ///
365    /// We only want to insert a newline before the closing brace if there also
366    /// was a newline after the beginning left brace.
367    bool BreakBeforeClosingBrace;
368
369    /// \brief The column of a \c ? in a conditional expression;
370    unsigned QuestionColumn;
371
372    /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
373    /// lines, in this context.
374    bool AvoidBinPacking;
375
376    /// \brief Break after the next comma (or all the commas in this context if
377    /// \c AvoidBinPacking is \c true).
378    bool BreakBeforeParameter;
379
380    /// \brief This context already has a line with more than one parameter.
381    bool HasMultiParameterLine;
382
383    /// \brief The position of the colon in an ObjC method declaration/call.
384    unsigned ColonPos;
385
386    /// \brief The start of the most recent function in a builder-type call.
387    unsigned StartOfFunctionCall;
388
389    bool operator<(const ParenState &Other) const {
390      if (Indent != Other.Indent)
391        return Indent < Other.Indent;
392      if (LastSpace != Other.LastSpace)
393        return LastSpace < Other.LastSpace;
394      if (FirstLessLess != Other.FirstLessLess)
395        return FirstLessLess < Other.FirstLessLess;
396      if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
397        return BreakBeforeClosingBrace;
398      if (QuestionColumn != Other.QuestionColumn)
399        return QuestionColumn < Other.QuestionColumn;
400      if (AvoidBinPacking != Other.AvoidBinPacking)
401        return AvoidBinPacking;
402      if (BreakBeforeParameter != Other.BreakBeforeParameter)
403        return BreakBeforeParameter;
404      if (HasMultiParameterLine != Other.HasMultiParameterLine)
405        return HasMultiParameterLine;
406      if (ColonPos != Other.ColonPos)
407        return ColonPos < Other.ColonPos;
408      if (StartOfFunctionCall != Other.StartOfFunctionCall)
409        return StartOfFunctionCall < Other.StartOfFunctionCall;
410      return false;
411    }
412  };
413
414  /// \brief The current state when indenting a unwrapped line.
415  ///
416  /// As the indenting tries different combinations this is copied by value.
417  struct LineState {
418    /// \brief The number of used columns in the current line.
419    unsigned Column;
420
421    /// \brief The token that needs to be next formatted.
422    const AnnotatedToken *NextToken;
423
424    /// \brief The column of the first variable name in a variable declaration.
425    ///
426    /// Used to align further variables if necessary.
427    unsigned VariablePos;
428
429    /// \brief \c true if this line contains a continued for-loop section.
430    bool LineContainsContinuedForLoopSection;
431
432    /// \brief The level of nesting inside (), [], <> and {}.
433    unsigned ParenLevel;
434
435    /// \brief The \c ParenLevel at the start of this line.
436    unsigned StartOfLineLevel;
437
438    /// \brief The start column of the string literal, if we're in a string
439    /// literal sequence, 0 otherwise.
440    unsigned StartOfStringLiteral;
441
442    /// \brief A stack keeping track of properties applying to parenthesis
443    /// levels.
444    std::vector<ParenState> Stack;
445
446    /// \brief Comparison operator to be able to used \c LineState in \c map.
447    bool operator<(const LineState &Other) const {
448      if (NextToken != Other.NextToken)
449        return NextToken < Other.NextToken;
450      if (Column != Other.Column)
451        return Column < Other.Column;
452      if (VariablePos != Other.VariablePos)
453        return VariablePos < Other.VariablePos;
454      if (LineContainsContinuedForLoopSection !=
455          Other.LineContainsContinuedForLoopSection)
456        return LineContainsContinuedForLoopSection;
457      if (ParenLevel != Other.ParenLevel)
458        return ParenLevel < Other.ParenLevel;
459      if (StartOfLineLevel != Other.StartOfLineLevel)
460        return StartOfLineLevel < Other.StartOfLineLevel;
461      if (StartOfStringLiteral != Other.StartOfStringLiteral)
462        return StartOfStringLiteral < Other.StartOfStringLiteral;
463      return Stack < Other.Stack;
464    }
465  };
466
467  /// \brief Appends the next token to \p State and updates information
468  /// necessary for indentation.
469  ///
470  /// Puts the token on the current line if \p Newline is \c true and adds a
471  /// line break and necessary indentation otherwise.
472  ///
473  /// If \p DryRun is \c false, also creates and stores the required
474  /// \c Replacement.
475  unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
476    const AnnotatedToken &Current = *State.NextToken;
477    const AnnotatedToken &Previous = *State.NextToken->Parent;
478    assert(State.Stack.size());
479
480    if (Current.Type == TT_ImplicitStringLiteral) {
481      State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
482                      State.NextToken->FormatTok.TokenLength;
483      if (State.NextToken->Children.empty())
484        State.NextToken = NULL;
485      else
486        State.NextToken = &State.NextToken->Children[0];
487      return 0;
488    }
489
490    if (Newline) {
491      unsigned WhitespaceStartColumn = State.Column;
492      if (Current.is(tok::r_brace)) {
493        State.Column = Line.Level * 2;
494      } else if (Current.is(tok::string_literal) &&
495                 State.StartOfStringLiteral != 0) {
496        State.Column = State.StartOfStringLiteral;
497        State.Stack.back().BreakBeforeParameter = true;
498      } else if (Current.is(tok::lessless) &&
499                 State.Stack.back().FirstLessLess != 0) {
500        State.Column = State.Stack.back().FirstLessLess;
501      } else if (State.ParenLevel != 0 &&
502                 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
503                  Current.is(tok::period) || Current.is(tok::arrow) ||
504                  Current.is(tok::question))) {
505        // Indent and extra 4 spaces after if we know the current expression is
506        // continued.  Don't do that on the top level, as we already indent 4
507        // there.
508        State.Column = std::max(State.Stack.back().LastSpace,
509                                State.Stack.back().Indent) + 4;
510      } else if (Current.Type == TT_ConditionalExpr) {
511        State.Column = State.Stack.back().QuestionColumn;
512      } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
513                 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
514                  State.ParenLevel == 0)) {
515        State.Column = State.VariablePos;
516      } else if (Previous.ClosesTemplateDeclaration ||
517                 (Current.Type == TT_StartOfName && State.ParenLevel == 0)) {
518        State.Column = State.Stack.back().Indent - 4;
519      } else if (Current.Type == TT_ObjCSelectorName) {
520        if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
521          State.Column =
522              State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
523        } else {
524          State.Column = State.Stack.back().Indent;
525          State.Stack.back().ColonPos =
526              State.Column + Current.FormatTok.TokenLength;
527        }
528      } else if (Previous.Type == TT_ObjCMethodExpr ||
529                 Current.Type == TT_StartOfName) {
530        State.Column = State.Stack.back().Indent + 4;
531      } else {
532        State.Column = State.Stack.back().Indent;
533      }
534
535      if (Current.is(tok::question))
536        State.Stack.back().BreakBeforeParameter = true;
537      if ((Previous.is(tok::comma) || Previous.is(tok::semi)) &&
538          !State.Stack.back().AvoidBinPacking)
539        State.Stack.back().BreakBeforeParameter = false;
540
541      if (!DryRun) {
542        unsigned NewLines = 1;
543        if (Current.Type == TT_LineComment)
544          NewLines =
545              std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
546                                          Style.MaxEmptyLinesToKeep + 1));
547        if (!Line.InPPDirective)
548          Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
549                                        WhitespaceStartColumn, Style);
550        else
551          Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
552                                          WhitespaceStartColumn, Style);
553      }
554
555      State.Stack.back().LastSpace = State.Column;
556      State.StartOfLineLevel = State.ParenLevel;
557      if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
558        State.Stack.back().Indent += 2;
559
560      // Any break on this level means that the parent level has been broken
561      // and we need to avoid bin packing there.
562      for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
563        State.Stack[i].BreakBeforeParameter = true;
564      }
565      if (Current.is(tok::period) || Current.is(tok::arrow))
566        State.Stack.back().BreakBeforeParameter = true;
567
568      // If we break after {, we should also break before the corresponding }.
569      if (Previous.is(tok::l_brace))
570        State.Stack.back().BreakBeforeClosingBrace = true;
571
572      if (State.Stack.back().AvoidBinPacking) {
573        // If we are breaking after '(', '{', '<', this is not bin packing
574        // unless AllowAllParametersOfDeclarationOnNextLine is false.
575        if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
576            (!Style.AllowAllParametersOfDeclarationOnNextLine &&
577             Line.MustBeDeclaration))
578          State.Stack.back().BreakBeforeParameter = true;
579      }
580    } else {
581      // FIXME: Put VariablePos into ParenState and remove second part of if().
582      if (Current.is(tok::equal) &&
583          (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
584        State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
585
586      unsigned Spaces = State.NextToken->SpacesRequiredBefore;
587
588      if (!DryRun)
589        Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
590
591      if (Current.Type == TT_ObjCSelectorName &&
592          State.Stack.back().ColonPos == 0) {
593        if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
594            State.Column + Spaces + Current.FormatTok.TokenLength)
595          State.Stack.back().ColonPos =
596              State.Stack.back().Indent + Current.LongestObjCSelectorName;
597        else
598          State.Stack.back().ColonPos =
599              State.Column + Spaces + Current.FormatTok.TokenLength;
600      }
601
602      if (Current.Type != TT_LineComment &&
603          (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
604           State.NextToken->Parent->Type == TT_TemplateOpener))
605        State.Stack.back().Indent = State.Column + Spaces;
606      if (Previous.is(tok::comma) && !isTrailingComment(Current))
607        State.Stack.back().HasMultiParameterLine = true;
608
609      State.Column += Spaces;
610      if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
611        // Treat the condition inside an if as if it was a second function
612        // parameter, i.e. let nested calls have an indent of 4.
613        State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
614      else if (Previous.is(tok::comma) && State.ParenLevel != 0)
615        // Top-level spaces are exempt as that mostly leads to better results.
616        State.Stack.back().LastSpace = State.Column;
617      else if ((Previous.Type == TT_BinaryOperator ||
618                Previous.Type == TT_ConditionalExpr ||
619                Previous.Type == TT_CtorInitializerColon) &&
620               getPrecedence(Previous) != prec::Assignment)
621        State.Stack.back().LastSpace = State.Column;
622      else if (Previous.Type == TT_InheritanceColon)
623        State.Stack.back().Indent = State.Column;
624      else if (Previous.ParameterCount > 1 &&
625               (Previous.is(tok::l_paren) || Previous.is(tok::l_square) ||
626                Previous.is(tok::l_brace) ||
627                Previous.Type == TT_TemplateOpener))
628        // If this function has multiple parameters, indent nested calls from
629        // the start of the first parameter.
630        State.Stack.back().LastSpace = State.Column;
631    }
632
633    return moveStateToNextToken(State, DryRun);
634  }
635
636  /// \brief Mark the next token as consumed in \p State and modify its stacks
637  /// accordingly.
638  unsigned moveStateToNextToken(LineState &State, bool DryRun) {
639    const AnnotatedToken &Current = *State.NextToken;
640    assert(State.Stack.size());
641
642    if (Current.Type == TT_InheritanceColon)
643      State.Stack.back().AvoidBinPacking = true;
644    if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
645      State.Stack.back().FirstLessLess = State.Column;
646    if (Current.is(tok::question))
647      State.Stack.back().QuestionColumn = State.Column;
648    if ((Current.is(tok::period) || Current.is(tok::arrow)) &&
649        Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
650      State.Stack.back().StartOfFunctionCall =
651          Current.LastInChainOfCalls ? 0 : State.Column;
652    if (Current.Type == TT_CtorInitializerColon) {
653      if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
654        State.Stack.back().AvoidBinPacking = true;
655      State.Stack.back().BreakBeforeParameter = false;
656    }
657
658    // Insert scopes created by fake parenthesis.
659    for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
660      ParenState NewParenState = State.Stack.back();
661      NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
662      NewParenState.BreakBeforeParameter = false;
663      State.Stack.push_back(NewParenState);
664    }
665
666    // If we encounter an opening (, [, { or <, we add a level to our stacks to
667    // prepare for the following tokens.
668    if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
669        Current.is(tok::l_brace) ||
670        State.NextToken->Type == TT_TemplateOpener) {
671      unsigned NewIndent;
672      bool AvoidBinPacking;
673      if (Current.is(tok::l_brace)) {
674        NewIndent = 2 + State.Stack.back().LastSpace;
675        AvoidBinPacking = false;
676      } else {
677        NewIndent = 4 + std::max(State.Stack.back().LastSpace,
678                                 State.Stack.back().StartOfFunctionCall);
679        AvoidBinPacking =
680            !Style.BinPackParameters || State.Stack.back().AvoidBinPacking;
681      }
682      State.Stack.push_back(
683          ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
684                     State.Stack.back().HasMultiParameterLine));
685      ++State.ParenLevel;
686    }
687
688    // If this '[' opens an ObjC call, determine whether all parameters fit into
689    // one line and put one per line if they don't.
690    if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
691        Current.MatchingParen != NULL) {
692      if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
693        State.Stack.back().BreakBeforeParameter = true;
694    }
695
696    // If we encounter a closing ), ], } or >, we can remove a level from our
697    // stacks.
698    if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
699        (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
700        State.NextToken->Type == TT_TemplateCloser) {
701      State.Stack.pop_back();
702      --State.ParenLevel;
703    }
704
705    // Remove scopes created by fake parenthesis.
706    for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
707      State.Stack.pop_back();
708    }
709
710    if (Current.is(tok::string_literal)) {
711      State.StartOfStringLiteral = State.Column;
712    } else if (Current.isNot(tok::comment)) {
713      State.StartOfStringLiteral = 0;
714    }
715
716    State.Column += Current.FormatTok.TokenLength;
717
718    if (State.NextToken->Children.empty())
719      State.NextToken = NULL;
720    else
721      State.NextToken = &State.NextToken->Children[0];
722
723    return breakProtrudingToken(Current, State, DryRun);
724  }
725
726  /// \brief If the current token sticks out over the end of the line, break
727  /// it if possible.
728  unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
729                                bool DryRun) {
730    if (Current.isNot(tok::string_literal))
731      return 0;
732    // Only break up default narrow strings.
733    if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
734      return 0;
735
736    unsigned Penalty = 0;
737    unsigned TailOffset = 0;
738    unsigned TailLength = Current.FormatTok.TokenLength;
739    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
740    unsigned OffsetFromStart = 0;
741    while (StartColumn + TailLength > getColumnLimit()) {
742      StringRef Text = StringRef(
743          Current.FormatTok.Tok.getLiteralData() + TailOffset, TailLength);
744      if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
745        break;
746      StringRef::size_type SplitPoint = getSplitPoint(
747          Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
748      if (SplitPoint == StringRef::npos)
749        break;
750      assert(SplitPoint != 0);
751      // +2, because 'Text' starts after the opening quotes, and does not
752      // include the closing quote we need to insert.
753      unsigned WhitespaceStartColumn =
754          StartColumn + OffsetFromStart + SplitPoint + 2;
755      State.Stack.back().LastSpace = StartColumn;
756      if (!DryRun) {
757        Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
758                               Line.InPPDirective, StartColumn,
759                               WhitespaceStartColumn, Style);
760      }
761      TailOffset += SplitPoint + 1;
762      TailLength -= SplitPoint + 1;
763      OffsetFromStart = 1;
764      Penalty += Style.PenaltyExcessCharacter;
765      for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
766        State.Stack[i].BreakBeforeParameter = true;
767    }
768    State.Column = StartColumn + TailLength;
769    return Penalty;
770  }
771
772  StringRef::size_type
773  getSplitPoint(StringRef Text, StringRef::size_type Offset) {
774    StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
775    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
776      return SpaceOffset;
777    StringRef::size_type SlashOffset = Text.rfind('/', Offset);
778    if (SlashOffset != StringRef::npos && SlashOffset != 0)
779      return SlashOffset;
780    StringRef::size_type Split = getStartOfCharacter(Text, Offset);
781    if (Split != StringRef::npos && Split > 1)
782      // Do not split at 0.
783      return Split - 1;
784    return StringRef::npos;
785  }
786
787  StringRef::size_type
788  getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
789    StringRef::size_type NextEscape = Text.find('\\');
790    while (NextEscape != StringRef::npos && NextEscape < Offset) {
791      StringRef::size_type SequenceLength =
792          getEscapeSequenceLength(Text.substr(NextEscape));
793      if (Offset < NextEscape + SequenceLength)
794        return NextEscape;
795      NextEscape = Text.find('\\', NextEscape + SequenceLength);
796    }
797    return Offset;
798  }
799
800  unsigned getEscapeSequenceLength(StringRef Text) {
801    assert(Text[0] == '\\');
802    if (Text.size() < 2)
803      return 1;
804
805    switch (Text[1]) {
806    case 'u':
807      return 6;
808    case 'U':
809      return 10;
810    case 'x':
811      return getHexLength(Text);
812    default:
813      if (Text[1] >= '0' && Text[1] <= '7')
814        return getOctalLength(Text);
815      return 2;
816    }
817  }
818
819  unsigned getHexLength(StringRef Text) {
820    unsigned I = 2; // Point after '\x'.
821    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
822                               (Text[I] >= 'a' && Text[I] <= 'f') ||
823                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
824      ++I;
825    }
826    return I;
827  }
828
829  unsigned getOctalLength(StringRef Text) {
830    unsigned I = 1;
831    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
832      ++I;
833    }
834    return I;
835  }
836
837  unsigned getColumnLimit() {
838    return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
839  }
840
841  /// \brief An edge in the solution space from \c Previous->State to \c State,
842  /// inserting a newline dependent on the \c NewLine.
843  struct StateNode {
844    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
845        : State(State), NewLine(NewLine), Previous(Previous) {}
846    LineState State;
847    bool NewLine;
848    StateNode *Previous;
849  };
850
851  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
852  ///
853  /// In case of equal penalties, we want to prefer states that were inserted
854  /// first. During state generation we make sure that we insert states first
855  /// that break the line as late as possible.
856  typedef std::pair<unsigned, unsigned> OrderedPenalty;
857
858  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
859  /// \c State has the given \c OrderedPenalty.
860  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
861
862  /// \brief The BFS queue type.
863  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
864                              std::greater<QueueItem> > QueueType;
865
866  /// \brief Analyze the entire solution space starting from \p InitialState.
867  ///
868  /// This implements a variant of Dijkstra's algorithm on the graph that spans
869  /// the solution space (\c LineStates are the nodes). The algorithm tries to
870  /// find the shortest path (the one with lowest penalty) from \p InitialState
871  /// to a state where all tokens are placed.
872  unsigned analyzeSolutionSpace(LineState &InitialState) {
873    std::set<LineState> Seen;
874
875    // Insert start element into queue.
876    StateNode *Node =
877        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
878    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
879    ++Count;
880
881    // While not empty, take first element and follow edges.
882    while (!Queue.empty()) {
883      unsigned Penalty = Queue.top().first.first;
884      StateNode *Node = Queue.top().second;
885      if (Node->State.NextToken == NULL) {
886        DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
887        break;
888      }
889      Queue.pop();
890
891      if (!Seen.insert(Node->State).second)
892        // State already examined with lower penalty.
893        continue;
894
895      addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
896      addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
897    }
898
899    if (Queue.empty())
900      // We were unable to find a solution, do nothing.
901      // FIXME: Add diagnostic?
902      return 0;
903
904    // Reconstruct the solution.
905    reconstructPath(InitialState, Queue.top().second);
906    DEBUG(llvm::errs() << "---\n");
907
908    // Return the column after the last token of the solution.
909    return Queue.top().second->State.Column;
910  }
911
912  void reconstructPath(LineState &State, StateNode *Current) {
913    // FIXME: This recursive implementation limits the possible number
914    // of tokens per line if compiled into a binary with small stack space.
915    // To become more independent of stack frame limitations we would need
916    // to also change the TokenAnnotator.
917    if (Current->Previous == NULL)
918      return;
919    reconstructPath(State, Current->Previous);
920    DEBUG({
921      if (Current->NewLine) {
922        llvm::errs()
923            << "Penalty for splitting before "
924            << Current->Previous->State.NextToken->FormatTok.Tok.getName()
925            << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
926      }
927    });
928    addTokenToState(Current->NewLine, false, State);
929  }
930
931  /// \brief Add the following state to the analysis queue \c Queue.
932  ///
933  /// Assume the current state is \p PreviousNode and has been reached with a
934  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
935  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
936                           bool NewLine) {
937    if (NewLine && !canBreak(PreviousNode->State))
938      return;
939    if (!NewLine && mustBreak(PreviousNode->State))
940      return;
941    if (NewLine)
942      Penalty += PreviousNode->State.NextToken->SplitPenalty;
943
944    StateNode *Node = new (Allocator.Allocate())
945        StateNode(PreviousNode->State, NewLine, PreviousNode);
946    Penalty += addTokenToState(NewLine, true, Node->State);
947    if (Node->State.Column > getColumnLimit()) {
948      unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
949      Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
950    }
951
952    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
953    ++Count;
954  }
955
956  /// \brief Returns \c true, if a line break after \p State is allowed.
957  bool canBreak(const LineState &State) {
958    if (!State.NextToken->CanBreakBefore &&
959        !(State.NextToken->is(tok::r_brace) &&
960          State.Stack.back().BreakBeforeClosingBrace))
961      return false;
962    // This prevents breaks like:
963    //   ...
964    //   SomeParameter, OtherParameter).DoSomething(
965    //   ...
966    // As they hide "DoSomething" and generally bad for readability.
967    if (State.NextToken->Parent->is(tok::l_paren) &&
968        State.ParenLevel <= State.StartOfLineLevel)
969      return false;
970    // Trying to insert a parameter on a new line if there are already more than
971    // one parameter on the current line is bin packing.
972    if (State.Stack.back().HasMultiParameterLine &&
973        State.Stack.back().AvoidBinPacking)
974      return false;
975    return true;
976  }
977
978  /// \brief Returns \c true, if a line break after \p State is mandatory.
979  bool mustBreak(const LineState &State) {
980    if (State.NextToken->MustBreakBefore)
981      return true;
982    if (State.NextToken->is(tok::r_brace) &&
983        State.Stack.back().BreakBeforeClosingBrace)
984      return true;
985    if (State.NextToken->Parent->is(tok::semi) &&
986        State.LineContainsContinuedForLoopSection)
987      return true;
988    if ((State.NextToken->Parent->is(tok::comma) ||
989         State.NextToken->Parent->is(tok::semi) ||
990         State.NextToken->is(tok::question) ||
991         State.NextToken->Type == TT_ConditionalExpr) &&
992        State.Stack.back().BreakBeforeParameter &&
993        !isTrailingComment(*State.NextToken) &&
994        State.NextToken->isNot(tok::r_paren) &&
995        State.NextToken->isNot(tok::r_brace))
996      return true;
997    // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
998    // out whether it is the first parameter. Clean this up.
999    if (State.NextToken->Type == TT_ObjCSelectorName &&
1000        State.NextToken->LongestObjCSelectorName == 0 &&
1001        State.Stack.back().BreakBeforeParameter)
1002      return true;
1003    if ((State.NextToken->Type == TT_CtorInitializerColon ||
1004         (State.NextToken->Parent->ClosesTemplateDeclaration &&
1005          State.ParenLevel == 0)))
1006      return true;
1007    return false;
1008  }
1009
1010  FormatStyle Style;
1011  SourceManager &SourceMgr;
1012  const AnnotatedLine &Line;
1013  const unsigned FirstIndent;
1014  const AnnotatedToken &RootToken;
1015  WhitespaceManager &Whitespaces;
1016
1017  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1018  QueueType Queue;
1019  // Increasing count of \c StateNode items we have created. This is used
1020  // to create a deterministic order independent of the container.
1021  unsigned Count;
1022};
1023
1024class LexerBasedFormatTokenSource : public FormatTokenSource {
1025public:
1026  LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1027      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1028        IdentTable(Lex.getLangOpts()) {
1029    Lex.SetKeepWhitespaceMode(true);
1030  }
1031
1032  virtual FormatToken getNextToken() {
1033    if (GreaterStashed) {
1034      FormatTok.NewlinesBefore = 0;
1035      FormatTok.WhiteSpaceStart =
1036          FormatTok.Tok.getLocation().getLocWithOffset(1);
1037      FormatTok.WhiteSpaceLength = 0;
1038      GreaterStashed = false;
1039      return FormatTok;
1040    }
1041
1042    FormatTok = FormatToken();
1043    Lex.LexFromRawLexer(FormatTok.Tok);
1044    StringRef Text = rawTokenText(FormatTok.Tok);
1045    FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1046    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1047      FormatTok.IsFirst = true;
1048
1049    // Consume and record whitespace until we find a significant token.
1050    while (FormatTok.Tok.is(tok::unknown)) {
1051      unsigned Newlines = Text.count('\n');
1052      if (Newlines > 0)
1053        FormatTok.LastNewlineOffset =
1054            FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
1055      unsigned EscapedNewlines = Text.count("\\\n");
1056      FormatTok.NewlinesBefore += Newlines;
1057      FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
1058      FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1059
1060      if (FormatTok.Tok.is(tok::eof))
1061        return FormatTok;
1062      Lex.LexFromRawLexer(FormatTok.Tok);
1063      Text = rawTokenText(FormatTok.Tok);
1064    }
1065
1066    // Now FormatTok is the next non-whitespace token.
1067    FormatTok.TokenLength = Text.size();
1068
1069    // In case the token starts with escaped newlines, we want to
1070    // take them into account as whitespace - this pattern is quite frequent
1071    // in macro definitions.
1072    // FIXME: What do we want to do with other escaped spaces, and escaped
1073    // spaces or newlines in the middle of tokens?
1074    // FIXME: Add a more explicit test.
1075    unsigned i = 0;
1076    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1077      // FIXME: ++FormatTok.NewlinesBefore is missing...
1078      FormatTok.WhiteSpaceLength += 2;
1079      FormatTok.TokenLength -= 2;
1080      i += 2;
1081    }
1082
1083    if (FormatTok.Tok.is(tok::raw_identifier)) {
1084      IdentifierInfo &Info = IdentTable.get(Text);
1085      FormatTok.Tok.setIdentifierInfo(&Info);
1086      FormatTok.Tok.setKind(Info.getTokenID());
1087    }
1088
1089    if (FormatTok.Tok.is(tok::greatergreater)) {
1090      FormatTok.Tok.setKind(tok::greater);
1091      FormatTok.TokenLength = 1;
1092      GreaterStashed = true;
1093    }
1094
1095    // If we reformat comments, we remove trailing whitespace. Update the length
1096    // accordingly.
1097    if (FormatTok.Tok.is(tok::comment))
1098      FormatTok.TokenLength = Text.rtrim().size();
1099
1100    return FormatTok;
1101  }
1102
1103  IdentifierTable &getIdentTable() { return IdentTable; }
1104
1105private:
1106  FormatToken FormatTok;
1107  bool GreaterStashed;
1108  Lexer &Lex;
1109  SourceManager &SourceMgr;
1110  IdentifierTable IdentTable;
1111
1112  /// Returns the text of \c FormatTok.
1113  StringRef rawTokenText(Token &Tok) {
1114    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1115                     Tok.getLength());
1116  }
1117};
1118
1119class Formatter : public UnwrappedLineConsumer {
1120public:
1121  Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1122            SourceManager &SourceMgr,
1123            const std::vector<CharSourceRange> &Ranges)
1124      : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1125        Whitespaces(SourceMgr), Ranges(Ranges) {}
1126
1127  virtual ~Formatter() {}
1128
1129  void deriveLocalStyle() {
1130    unsigned CountBoundToVariable = 0;
1131    unsigned CountBoundToType = 0;
1132    bool HasCpp03IncompatibleFormat = false;
1133    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1134      if (AnnotatedLines[i].First.Children.empty())
1135        continue;
1136      AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
1137      while (!Tok->Children.empty()) {
1138        if (Tok->Type == TT_PointerOrReference) {
1139          bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
1140          bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
1141          if (SpacesBefore && !SpacesAfter)
1142            ++CountBoundToVariable;
1143          else if (!SpacesBefore && SpacesAfter)
1144            ++CountBoundToType;
1145        }
1146
1147        if (Tok->Type == TT_TemplateCloser &&
1148            Tok->Parent->Type == TT_TemplateCloser &&
1149            Tok->FormatTok.WhiteSpaceLength == 0)
1150          HasCpp03IncompatibleFormat = true;
1151        Tok = &Tok->Children[0];
1152      }
1153    }
1154    if (Style.DerivePointerBinding) {
1155      if (CountBoundToType > CountBoundToVariable)
1156        Style.PointerBindsToType = true;
1157      else if (CountBoundToType < CountBoundToVariable)
1158        Style.PointerBindsToType = false;
1159    }
1160    if (Style.Standard == FormatStyle::LS_Auto) {
1161      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1162                                                  : FormatStyle::LS_Cpp03;
1163    }
1164  }
1165
1166  tooling::Replacements format() {
1167    LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1168    UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1169    StructuralError = Parser.parse();
1170    unsigned PreviousEndOfLineColumn = 0;
1171    TokenAnnotator Annotator(Style, SourceMgr, Lex,
1172                             Tokens.getIdentTable().get("in"));
1173    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1174      Annotator.annotate(AnnotatedLines[i]);
1175    }
1176    deriveLocalStyle();
1177    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1178      Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1179    }
1180    std::vector<int> IndentForLevel;
1181    bool PreviousLineWasTouched = false;
1182    for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1183                                              E = AnnotatedLines.end();
1184         I != E; ++I) {
1185      const AnnotatedLine &TheLine = *I;
1186      int Offset = getIndentOffset(TheLine.First);
1187      while (IndentForLevel.size() <= TheLine.Level)
1188        IndentForLevel.push_back(-1);
1189      IndentForLevel.resize(TheLine.Level + 1);
1190      bool WasMoved =
1191          PreviousLineWasTouched && TheLine.First.FormatTok.NewlinesBefore == 0;
1192      if (TheLine.First.is(tok::eof)) {
1193        if (PreviousLineWasTouched) {
1194          unsigned NewLines =
1195              std::min(TheLine.First.FormatTok.NewlinesBefore, 1u);
1196          Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
1197                                        /*WhitespaceStartColumn*/ 0, Style);
1198        }
1199      } else if (TheLine.Type != LT_Invalid &&
1200                 (WasMoved || touchesLine(TheLine))) {
1201        unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1202        unsigned Indent = LevelIndent;
1203        if (static_cast<int>(Indent) + Offset >= 0)
1204          Indent += Offset;
1205        if (!TheLine.First.FormatTok.WhiteSpaceStart.isValid() ||
1206            StructuralError) {
1207          Indent = LevelIndent = SourceMgr.getSpellingColumnNumber(
1208              TheLine.First.FormatTok.Tok.getLocation()) - 1;
1209        } else {
1210          formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1211                           PreviousEndOfLineColumn);
1212        }
1213        tryFitMultipleLinesInOne(Indent, I, E);
1214        UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1215                                         TheLine.First, Whitespaces,
1216                                         StructuralError);
1217        PreviousEndOfLineColumn =
1218            Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1219        IndentForLevel[TheLine.Level] = LevelIndent;
1220        PreviousLineWasTouched = true;
1221      } else {
1222        if (TheLine.First.FormatTok.NewlinesBefore > 0 ||
1223            TheLine.First.FormatTok.IsFirst) {
1224          unsigned Indent = SourceMgr.getSpellingColumnNumber(
1225              TheLine.First.FormatTok.Tok.getLocation()) - 1;
1226          unsigned LevelIndent = Indent;
1227          if (static_cast<int>(LevelIndent) - Offset >= 0)
1228            LevelIndent -= Offset;
1229          IndentForLevel[TheLine.Level] = LevelIndent;
1230
1231          // Remove trailing whitespace of the previous line if it was touched.
1232          if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
1233            formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1234                             PreviousEndOfLineColumn);
1235        }
1236        // If we did not reformat this unwrapped line, the column at the end of
1237        // the last token is unchanged - thus, we can calculate the end of the
1238        // last token.
1239        PreviousEndOfLineColumn =
1240            SourceMgr.getSpellingColumnNumber(
1241                TheLine.Last->FormatTok.Tok.getLocation()) +
1242            Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
1243                                   SourceMgr, Lex.getLangOpts()) - 1;
1244        PreviousLineWasTouched = false;
1245      }
1246    }
1247    return Whitespaces.generateReplacements();
1248  }
1249
1250private:
1251  /// \brief Get the indent of \p Level from \p IndentForLevel.
1252  ///
1253  /// \p IndentForLevel must contain the indent for the level \c l
1254  /// at \p IndentForLevel[l], or a value < 0 if the indent for
1255  /// that level is unknown.
1256  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1257    if (IndentForLevel[Level] != -1)
1258      return IndentForLevel[Level];
1259    if (Level == 0)
1260      return 0;
1261    return getIndent(IndentForLevel, Level - 1) + 2;
1262  }
1263
1264  /// \brief Get the offset of the line relatively to the level.
1265  ///
1266  /// For example, 'public:' labels in classes are offset by 1 or 2
1267  /// characters to the left from their level.
1268  int getIndentOffset(const AnnotatedToken &RootToken) {
1269    bool IsAccessModifier = false;
1270    if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1271        RootToken.is(tok::kw_private))
1272      IsAccessModifier = true;
1273    else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1274             (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1275              RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1276              RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1277              RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1278      IsAccessModifier = true;
1279
1280    if (IsAccessModifier)
1281      return Style.AccessModifierOffset;
1282    return 0;
1283  }
1284
1285  /// \brief Tries to merge lines into one.
1286  ///
1287  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1288  /// if possible; note that \c I will be incremented when lines are merged.
1289  ///
1290  /// Returns whether the resulting \c Line can fit in a single line.
1291  void tryFitMultipleLinesInOne(unsigned Indent,
1292                                std::vector<AnnotatedLine>::iterator &I,
1293                                std::vector<AnnotatedLine>::iterator E) {
1294    // We can never merge stuff if there are trailing line comments.
1295    if (I->Last->Type == TT_LineComment)
1296      return;
1297
1298    unsigned Limit = Style.ColumnLimit - Indent;
1299    // If we already exceed the column limit, we set 'Limit' to 0. The different
1300    // tryMerge..() functions can then decide whether to still do merging.
1301    Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1302
1303    if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1304      return;
1305
1306    if (I->Last->is(tok::l_brace)) {
1307      tryMergeSimpleBlock(I, E, Limit);
1308    } else if (I->First.is(tok::kw_if)) {
1309      tryMergeSimpleIf(I, E, Limit);
1310    } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1311                                    I->First.FormatTok.IsFirst)) {
1312      tryMergeSimplePPDirective(I, E, Limit);
1313    }
1314    return;
1315  }
1316
1317  void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1318                                 std::vector<AnnotatedLine>::iterator E,
1319                                 unsigned Limit) {
1320    if (Limit == 0)
1321      return;
1322    AnnotatedLine &Line = *I;
1323    if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1324      return;
1325    if (I + 2 != E && (I + 2)->InPPDirective &&
1326        !(I + 2)->First.FormatTok.HasUnescapedNewline)
1327      return;
1328    if (1 + (I + 1)->Last->TotalLength > Limit)
1329      return;
1330    join(Line, *(++I));
1331  }
1332
1333  void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1334                        std::vector<AnnotatedLine>::iterator E,
1335                        unsigned Limit) {
1336    if (Limit == 0)
1337      return;
1338    if (!Style.AllowShortIfStatementsOnASingleLine)
1339      return;
1340    if ((I + 1)->InPPDirective != I->InPPDirective ||
1341        ((I + 1)->InPPDirective &&
1342         (I + 1)->First.FormatTok.HasUnescapedNewline))
1343      return;
1344    AnnotatedLine &Line = *I;
1345    if (Line.Last->isNot(tok::r_paren))
1346      return;
1347    if (1 + (I + 1)->Last->TotalLength > Limit)
1348      return;
1349    if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1350      return;
1351    // Only inline simple if's (no nested if or else).
1352    if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1353      return;
1354    join(Line, *(++I));
1355  }
1356
1357  void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1358                           std::vector<AnnotatedLine>::iterator E,
1359                           unsigned Limit) {
1360    // First, check that the current line allows merging. This is the case if
1361    // we're not in a control flow statement and the last token is an opening
1362    // brace.
1363    AnnotatedLine &Line = *I;
1364    bool AllowedTokens =
1365        Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
1366        Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
1367        Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
1368        Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
1369        // This gets rid of all ObjC @ keywords and methods.
1370        Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
1371        Line.First.isNot(tok::plus);
1372    if (!AllowedTokens)
1373      return;
1374
1375    AnnotatedToken *Tok = &(I + 1)->First;
1376    if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1377        !Tok->MustBreakBefore) {
1378      // We merge empty blocks even if the line exceeds the column limit.
1379      Tok->SpacesRequiredBefore = 0;
1380      Tok->CanBreakBefore = true;
1381      join(Line, *(I + 1));
1382      I += 1;
1383    } else if (Limit != 0) {
1384      // Check that we still have three lines and they fit into the limit.
1385      if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1386          !nextTwoLinesFitInto(I, Limit))
1387        return;
1388
1389      // Second, check that the next line does not contain any braces - if it
1390      // does, readability declines when putting it into a single line.
1391      if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1392        return;
1393      do {
1394        if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
1395          return;
1396        Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1397      } while (Tok != NULL);
1398
1399      // Last, check that the third line contains a single closing brace.
1400      Tok = &(I + 2)->First;
1401      if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1402          Tok->MustBreakBefore)
1403        return;
1404
1405      join(Line, *(I + 1));
1406      join(Line, *(I + 2));
1407      I += 2;
1408    }
1409  }
1410
1411  bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1412                           unsigned Limit) {
1413    return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1414           Limit;
1415  }
1416
1417  void join(AnnotatedLine &A, const AnnotatedLine &B) {
1418    unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
1419    A.Last->Children.push_back(B.First);
1420    while (!A.Last->Children.empty()) {
1421      A.Last->Children[0].Parent = A.Last;
1422      A.Last->Children[0].TotalLength += LengthA;
1423      A.Last = &A.Last->Children[0];
1424    }
1425  }
1426
1427  bool touchesRanges(const CharSourceRange &Range) {
1428    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1429      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1430                                               Ranges[i].getBegin()) &&
1431          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1432                                               Range.getBegin()))
1433        return true;
1434    }
1435    return false;
1436  }
1437
1438  bool touchesLine(const AnnotatedLine &TheLine) {
1439    const FormatToken *First = &TheLine.First.FormatTok;
1440    const FormatToken *Last = &TheLine.Last->FormatTok;
1441    CharSourceRange LineRange = CharSourceRange::getTokenRange(
1442        First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
1443        Last->Tok.getLocation());
1444    return touchesRanges(LineRange);
1445  }
1446
1447  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1448    const FormatToken *First = &TheLine.First.FormatTok;
1449    CharSourceRange LineRange = CharSourceRange::getCharRange(
1450        First->WhiteSpaceStart,
1451        First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
1452    return touchesRanges(LineRange);
1453  }
1454
1455  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1456    AnnotatedLines.push_back(AnnotatedLine(TheLine));
1457  }
1458
1459  /// \brief Add a new line and the required indent before the first Token
1460  /// of the \c UnwrappedLine if there was no structural parsing error.
1461  /// Returns the indent level of the \c UnwrappedLine.
1462  void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent,
1463                        bool InPPDirective, unsigned PreviousEndOfLineColumn) {
1464    const FormatToken &Tok = RootToken.FormatTok;
1465
1466    unsigned Newlines =
1467        std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1468    if (Newlines == 0 && !Tok.IsFirst)
1469      Newlines = 1;
1470
1471    if (!InPPDirective || Tok.HasUnescapedNewline) {
1472      Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
1473    } else {
1474      Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1475                                      PreviousEndOfLineColumn, Style);
1476    }
1477  }
1478
1479  DiagnosticsEngine &Diag;
1480  FormatStyle Style;
1481  Lexer &Lex;
1482  SourceManager &SourceMgr;
1483  WhitespaceManager Whitespaces;
1484  std::vector<CharSourceRange> Ranges;
1485  std::vector<AnnotatedLine> AnnotatedLines;
1486  bool StructuralError;
1487};
1488
1489tooling::Replacements
1490reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1491         std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
1492  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1493  OwningPtr<DiagnosticConsumer> DiagPrinter;
1494  if (DiagClient == 0) {
1495    DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1496    DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1497    DiagClient = DiagPrinter.get();
1498  }
1499  DiagnosticsEngine Diagnostics(
1500      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1501      DiagClient, false);
1502  Diagnostics.setSourceManager(&SourceMgr);
1503  Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1504  return formatter.format();
1505}
1506
1507LangOptions getFormattingLangOpts() {
1508  LangOptions LangOpts;
1509  LangOpts.CPlusPlus = 1;
1510  LangOpts.CPlusPlus11 = 1;
1511  LangOpts.Bool = 1;
1512  LangOpts.ObjC1 = 1;
1513  LangOpts.ObjC2 = 1;
1514  return LangOpts;
1515}
1516
1517} // namespace format
1518} // namespace clang
1519