Format.cpp revision 237d4c1c785be13656bff6c09e5b7ccd066ff5ba
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-formatter"
17
18#include "TokenAnnotator.h"
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/Diagnostic.h"
21#include "clang/Basic/OperatorPrecedence.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Format/Format.h"
24#include "clang/Frontend/TextDiagnosticPrinter.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/Debug.h"
28#include <queue>
29#include <string>
30
31namespace clang {
32namespace format {
33
34FormatStyle getLLVMStyle() {
35  FormatStyle LLVMStyle;
36  LLVMStyle.ColumnLimit = 80;
37  LLVMStyle.MaxEmptyLinesToKeep = 1;
38  LLVMStyle.PointerBindsToType = false;
39  LLVMStyle.DerivePointerBinding = false;
40  LLVMStyle.AccessModifierOffset = -2;
41  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
42  LLVMStyle.IndentCaseLabels = false;
43  LLVMStyle.SpacesBeforeTrailingComments = 1;
44  LLVMStyle.BinPackParameters = true;
45  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
46  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
47  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
48  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
49  LLVMStyle.PenaltyExcessCharacter = 1000000;
50  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5;
51  return LLVMStyle;
52}
53
54FormatStyle getGoogleStyle() {
55  FormatStyle GoogleStyle;
56  GoogleStyle.ColumnLimit = 80;
57  GoogleStyle.MaxEmptyLinesToKeep = 1;
58  GoogleStyle.PointerBindsToType = true;
59  GoogleStyle.DerivePointerBinding = true;
60  GoogleStyle.AccessModifierOffset = -1;
61  GoogleStyle.Standard = FormatStyle::LS_Auto;
62  GoogleStyle.IndentCaseLabels = true;
63  GoogleStyle.SpacesBeforeTrailingComments = 2;
64  GoogleStyle.BinPackParameters = false;
65  GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
66  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
67  GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
68  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
69  GoogleStyle.PenaltyExcessCharacter = 1000000;
70  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100;
71  return GoogleStyle;
72}
73
74FormatStyle getChromiumStyle() {
75  FormatStyle ChromiumStyle = getGoogleStyle();
76  ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
77  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
78  ChromiumStyle.DerivePointerBinding = false;
79  return ChromiumStyle;
80}
81
82static bool isTrailingComment(const AnnotatedToken &Tok) {
83  return Tok.is(tok::comment) &&
84         (Tok.Children.empty() || Tok.Children[0].MustBreakBefore);
85}
86
87// Returns the length of everything up to the first possible line break after
88// the ), ], } or > matching \c Tok.
89static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
90  if (Tok.MatchingParen == NULL)
91    return 0;
92  AnnotatedToken *End = Tok.MatchingParen;
93  while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
94    End = &End->Children[0];
95  }
96  return End->TotalLength - Tok.TotalLength + 1;
97}
98
99/// \brief Manages the whitespaces around tokens and their replacements.
100///
101/// This includes special handling for certain constructs, e.g. the alignment of
102/// trailing line comments.
103class WhitespaceManager {
104public:
105  WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
106
107  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
108  /// each \c AnnotatedToken.
109  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
110                         unsigned Spaces, unsigned WhitespaceStartColumn,
111                         const FormatStyle &Style) {
112    // 2+ newlines mean an empty line separating logic scopes.
113    if (NewLines >= 2)
114      alignComments();
115
116    // Align line comments if they are trailing or if they continue other
117    // trailing comments.
118    if (isTrailingComment(Tok) && (Tok.Parent != NULL || !Comments.empty())) {
119      if (Style.ColumnLimit >=
120          Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
121        Comments.push_back(StoredComment());
122        Comments.back().Tok = Tok.FormatTok;
123        Comments.back().Spaces = Spaces;
124        Comments.back().NewLines = NewLines;
125        if (NewLines == 0)
126          Comments.back().MinColumn = WhitespaceStartColumn + Spaces;
127        else
128          Comments.back().MinColumn = Spaces;
129        Comments.back().MaxColumn =
130            Style.ColumnLimit - Tok.FormatTok.TokenLength;
131        return;
132      }
133    }
134
135    // If this line does not have a trailing comment, align the stored comments.
136    if (Tok.Children.empty() && !isTrailingComment(Tok))
137      alignComments();
138    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
139  }
140
141  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
142  /// backslashes to escape newlines inside a preprocessor directive.
143  ///
144  /// This function and \c replaceWhitespace have the same behavior if
145  /// \c Newlines == 0.
146  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
147                           unsigned Spaces, unsigned WhitespaceStartColumn,
148                           const FormatStyle &Style) {
149    storeReplacement(
150        Tok.FormatTok,
151        getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
152  }
153
154  /// \brief Inserts a line break into the middle of a token.
155  ///
156  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
157  /// break and \p Postfix before the rest of the token starts in the next line.
158  ///
159  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
160  /// used to generate the correct line break.
161  void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
162                  StringRef Postfix, bool InPPDirective, unsigned Spaces,
163                  unsigned WhitespaceStartColumn, const FormatStyle &Style) {
164    std::string NewLineText;
165    if (!InPPDirective)
166      NewLineText = getNewLineText(1, Spaces);
167    else
168      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
169    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
170    SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
171        .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
172    Replaces.insert(
173        tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
174  }
175
176  /// \brief Returns all the \c Replacements created during formatting.
177  const tooling::Replacements &generateReplacements() {
178    alignComments();
179    return Replaces;
180  }
181
182private:
183  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
184    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
185  }
186
187  std::string
188  getNewLineText(unsigned NewLines, unsigned Spaces,
189                 unsigned WhitespaceStartColumn, const FormatStyle &Style) {
190    std::string NewLineText;
191    if (NewLines > 0) {
192      unsigned Offset =
193          std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
194      for (unsigned i = 0; i < NewLines; ++i) {
195        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
196        NewLineText += "\\\n";
197        Offset = 0;
198      }
199    }
200    return NewLineText + std::string(Spaces, ' ');
201  }
202
203  /// \brief Structure to store a comment for later layout and alignment.
204  struct StoredComment {
205    FormatToken Tok;
206    unsigned MinColumn;
207    unsigned MaxColumn;
208    unsigned NewLines;
209    unsigned Spaces;
210  };
211  SmallVector<StoredComment, 16> Comments;
212  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
213
214  /// \brief Try to align all stashed comments.
215  void alignComments() {
216    unsigned MinColumn = 0;
217    unsigned MaxColumn = UINT_MAX;
218    comment_iterator Start = Comments.begin();
219    for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E;
220         ++I) {
221      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
222        alignComments(Start, I, MinColumn);
223        MinColumn = I->MinColumn;
224        MaxColumn = I->MaxColumn;
225        Start = I;
226      } else {
227        MinColumn = std::max(MinColumn, I->MinColumn);
228        MaxColumn = std::min(MaxColumn, I->MaxColumn);
229      }
230    }
231    alignComments(Start, Comments.end(), MinColumn);
232    Comments.clear();
233  }
234
235  /// \brief Put all the comments between \p I and \p E into \p Column.
236  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
237    while (I != E) {
238      unsigned Spaces = I->Spaces + Column - I->MinColumn;
239      storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
240                               std::string(Spaces, ' '));
241      ++I;
242    }
243  }
244
245  /// \brief Stores \p Text as the replacement for the whitespace in front of
246  /// \p Tok.
247  void storeReplacement(const FormatToken &Tok, const std::string Text) {
248    // Don't create a replacement, if it does not change anything.
249    if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
250                  Tok.WhiteSpaceLength) == Text)
251      return;
252
253    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
254                                         Tok.WhiteSpaceLength, Text));
255  }
256
257  SourceManager &SourceMgr;
258  tooling::Replacements Replaces;
259};
260
261static bool isVarDeclName(const AnnotatedToken &Tok) {
262  return Tok.Parent != NULL && Tok.is(tok::identifier) &&
263         (Tok.Parent->Type == TT_PointerOrReference ||
264          Tok.Parent->is(tok::identifier));
265}
266
267class UnwrappedLineFormatter {
268public:
269  UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
270                         const AnnotatedLine &Line, unsigned FirstIndent,
271                         const AnnotatedToken &RootToken,
272                         WhitespaceManager &Whitespaces, bool StructuralError)
273      : Style(Style), SourceMgr(SourceMgr), Line(Line),
274        FirstIndent(FirstIndent), RootToken(RootToken),
275        Whitespaces(Whitespaces), Count(0) {}
276
277  /// \brief Formats an \c UnwrappedLine.
278  ///
279  /// \returns The column after the last token in the last line of the
280  /// \c UnwrappedLine.
281  unsigned format() {
282    // Initialize state dependent on indent.
283    LineState State;
284    State.Column = FirstIndent;
285    State.NextToken = &RootToken;
286    State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent,
287                                     !Style.BinPackParameters,
288                                     /*HasMultiParameterLine=*/ false));
289    State.VariablePos = 0;
290    State.LineContainsContinuedForLoopSection = false;
291    State.ParenLevel = 0;
292    State.StartOfStringLiteral = 0;
293    State.StartOfLineLevel = State.ParenLevel;
294
295    DEBUG({
296      DebugTokenState(*State.NextToken);
297    });
298
299    // The first token has already been indented and thus consumed.
300    moveStateToNextToken(State, /*DryRun=*/ false);
301
302    // If everything fits on a single line, just put it there.
303    if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
304      while (State.NextToken != NULL) {
305        addTokenToState(false, false, State);
306      }
307      return State.Column;
308    }
309
310    // If the ObjC method declaration does not fit on a line, we should format
311    // it with one arg per line.
312    if (Line.Type == LT_ObjCMethodDecl)
313      State.Stack.back().BreakBeforeParameter = true;
314
315    // Find best solution in solution space.
316    return analyzeSolutionSpace(State);
317  }
318
319private:
320  void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
321    const Token &Tok = AnnotatedTok.FormatTok.Tok;
322    llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
323                              Tok.getLength());
324    llvm::errs();
325  }
326
327  struct ParenState {
328    ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
329               bool HasMultiParameterLine)
330        : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
331          BreakBeforeClosingBrace(false), QuestionColumn(0),
332          AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
333          HasMultiParameterLine(HasMultiParameterLine), ColonPos(0) {}
334
335    /// \brief The position to which a specific parenthesis level needs to be
336    /// indented.
337    unsigned Indent;
338
339    /// \brief The position of the last space on each level.
340    ///
341    /// Used e.g. to break like:
342    /// functionCall(Parameter, otherCall(
343    ///                             OtherParameter));
344    unsigned LastSpace;
345
346    /// \brief The position the first "<<" operator encountered on each level.
347    ///
348    /// Used to align "<<" operators. 0 if no such operator has been encountered
349    /// on a level.
350    unsigned FirstLessLess;
351
352    /// \brief Whether a newline needs to be inserted before the block's closing
353    /// brace.
354    ///
355    /// We only want to insert a newline before the closing brace if there also
356    /// was a newline after the beginning left brace.
357    bool BreakBeforeClosingBrace;
358
359    /// \brief The column of a \c ? in a conditional expression;
360    unsigned QuestionColumn;
361
362    /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
363    /// lines, in this context.
364    bool AvoidBinPacking;
365
366    /// \brief Break after the next comma (or all the commas in this context if
367    /// \c AvoidBinPacking is \c true).
368    bool BreakBeforeParameter;
369
370    /// \brief This context already has a line with more than one parameter.
371    bool HasMultiParameterLine;
372
373    /// \brief The position of the colon in an ObjC method declaration/call.
374    unsigned ColonPos;
375
376    bool operator<(const ParenState &Other) const {
377      if (Indent != Other.Indent)
378        return Indent < Other.Indent;
379      if (LastSpace != Other.LastSpace)
380        return LastSpace < Other.LastSpace;
381      if (FirstLessLess != Other.FirstLessLess)
382        return FirstLessLess < Other.FirstLessLess;
383      if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
384        return BreakBeforeClosingBrace;
385      if (QuestionColumn != Other.QuestionColumn)
386        return QuestionColumn < Other.QuestionColumn;
387      if (AvoidBinPacking != Other.AvoidBinPacking)
388        return AvoidBinPacking;
389      if (BreakBeforeParameter != Other.BreakBeforeParameter)
390        return BreakBeforeParameter;
391      if (HasMultiParameterLine != Other.HasMultiParameterLine)
392        return HasMultiParameterLine;
393      if (ColonPos != Other.ColonPos)
394        return ColonPos < Other.ColonPos;
395      return false;
396    }
397  };
398
399  /// \brief The current state when indenting a unwrapped line.
400  ///
401  /// As the indenting tries different combinations this is copied by value.
402  struct LineState {
403    /// \brief The number of used columns in the current line.
404    unsigned Column;
405
406    /// \brief The token that needs to be next formatted.
407    const AnnotatedToken *NextToken;
408
409    /// \brief The column of the first variable name in a variable declaration.
410    ///
411    /// Used to align further variables if necessary.
412    unsigned VariablePos;
413
414    /// \brief \c true if this line contains a continued for-loop section.
415    bool LineContainsContinuedForLoopSection;
416
417    /// \brief The level of nesting inside (), [], <> and {}.
418    unsigned ParenLevel;
419
420    /// \brief The \c ParenLevel at the start of this line.
421    unsigned StartOfLineLevel;
422
423    /// \brief The start column of the string literal, if we're in a string
424    /// literal sequence, 0 otherwise.
425    unsigned StartOfStringLiteral;
426
427    /// \brief A stack keeping track of properties applying to parenthesis
428    /// levels.
429    std::vector<ParenState> Stack;
430
431    /// \brief Comparison operator to be able to used \c LineState in \c map.
432    bool operator<(const LineState &Other) const {
433      if (NextToken != Other.NextToken)
434        return NextToken < Other.NextToken;
435      if (Column != Other.Column)
436        return Column < Other.Column;
437      if (VariablePos != Other.VariablePos)
438        return VariablePos < Other.VariablePos;
439      if (LineContainsContinuedForLoopSection !=
440          Other.LineContainsContinuedForLoopSection)
441        return LineContainsContinuedForLoopSection;
442      if (ParenLevel != Other.ParenLevel)
443        return ParenLevel < Other.ParenLevel;
444      if (StartOfLineLevel != Other.StartOfLineLevel)
445        return StartOfLineLevel < Other.StartOfLineLevel;
446      if (StartOfStringLiteral != Other.StartOfStringLiteral)
447        return StartOfStringLiteral < Other.StartOfStringLiteral;
448      return Stack < Other.Stack;
449    }
450  };
451
452  /// \brief Appends the next token to \p State and updates information
453  /// necessary for indentation.
454  ///
455  /// Puts the token on the current line if \p Newline is \c true and adds a
456  /// line break and necessary indentation otherwise.
457  ///
458  /// If \p DryRun is \c false, also creates and stores the required
459  /// \c Replacement.
460  unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
461    const AnnotatedToken &Current = *State.NextToken;
462    const AnnotatedToken &Previous = *State.NextToken->Parent;
463    assert(State.Stack.size());
464
465    if (Current.Type == TT_ImplicitStringLiteral) {
466      State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
467                      State.NextToken->FormatTok.TokenLength;
468      if (State.NextToken->Children.empty())
469        State.NextToken = NULL;
470      else
471        State.NextToken = &State.NextToken->Children[0];
472      return 0;
473    }
474
475    if (Newline) {
476      unsigned WhitespaceStartColumn = State.Column;
477      if (Current.is(tok::r_brace)) {
478        State.Column = Line.Level * 2;
479      } else if (Current.is(tok::string_literal) &&
480                 State.StartOfStringLiteral != 0) {
481        State.Column = State.StartOfStringLiteral;
482        State.Stack.back().BreakBeforeParameter = true;
483      } else if (Current.is(tok::lessless) &&
484                 State.Stack.back().FirstLessLess != 0) {
485        State.Column = State.Stack.back().FirstLessLess;
486      } else if (State.ParenLevel != 0 &&
487                 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
488                  Current.is(tok::period) || Current.is(tok::arrow) ||
489                  Current.is(tok::question))) {
490        // Indent and extra 4 spaces after if we know the current expression is
491        // continued.  Don't do that on the top level, as we already indent 4
492        // there.
493        State.Column = std::max(State.Stack.back().LastSpace,
494                                State.Stack.back().Indent) + 4;
495      } else if (Current.Type == TT_ConditionalExpr) {
496        State.Column = State.Stack.back().QuestionColumn;
497      } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
498                 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
499                  State.ParenLevel == 0)) {
500        State.Column = State.VariablePos;
501      } else if (State.NextToken->Parent->ClosesTemplateDeclaration ||
502                 Current.Type == TT_StartOfName) {
503        State.Column = State.Stack.back().Indent - 4;
504      } else if (Current.Type == TT_ObjCSelectorName) {
505        if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
506          State.Column =
507              State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
508        } else {
509          State.Column = State.Stack.back().Indent;
510          State.Stack.back().ColonPos =
511              State.Column + Current.FormatTok.TokenLength;
512        }
513      } else if (Previous.Type == TT_ObjCMethodExpr || isVarDeclName(Current)) {
514        State.Column = State.Stack.back().Indent + 4;
515      } else {
516        State.Column = State.Stack.back().Indent;
517      }
518
519      if (Current.is(tok::question))
520        State.Stack.back().BreakBeforeParameter = true;
521      if ((Previous.is(tok::comma) || Previous.is(tok::semi)) &&
522          !State.Stack.back().AvoidBinPacking)
523        State.Stack.back().BreakBeforeParameter = false;
524
525      if (!DryRun) {
526        unsigned NewLines =
527            std::max(1u, std::min(Current.FormatTok.NewlinesBefore,
528                                  Style.MaxEmptyLinesToKeep + 1));
529        if (!Line.InPPDirective)
530          Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
531                                        WhitespaceStartColumn, Style);
532        else
533          Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
534                                          WhitespaceStartColumn, Style);
535      }
536
537      State.Stack.back().LastSpace = State.Column;
538      State.StartOfLineLevel = State.ParenLevel;
539      if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
540        State.Stack.back().Indent += 2;
541
542      // Any break on this level means that the parent level has been broken
543      // and we need to avoid bin packing there.
544      for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
545        State.Stack[i].BreakBeforeParameter = true;
546      }
547      // If we break after {, we should also break before the corresponding }.
548      if (Previous.is(tok::l_brace))
549        State.Stack.back().BreakBeforeClosingBrace = true;
550
551      if (State.Stack.back().AvoidBinPacking) {
552        // If we are breaking after '(', '{', '<', this is not bin packing
553        // unless AllowAllParametersOfDeclarationOnNextLine is false.
554        if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
555             Previous.Type != TT_TemplateOpener) ||
556            (!Style.AllowAllParametersOfDeclarationOnNextLine &&
557             Line.MustBeDeclaration))
558          State.Stack.back().BreakBeforeParameter = true;
559      }
560    } else {
561      if (Current.is(tok::equal))
562        State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
563
564      unsigned Spaces = State.NextToken->SpacesRequiredBefore;
565
566      if (!DryRun)
567        Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
568
569      if (Current.Type == TT_ObjCSelectorName &&
570          State.Stack.back().ColonPos == 0) {
571        if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
572            State.Column + Spaces + Current.FormatTok.TokenLength)
573          State.Stack.back().ColonPos =
574              State.Stack.back().Indent + Current.LongestObjCSelectorName;
575        else
576          State.Stack.back().ColonPos =
577              State.Column + Spaces + Current.FormatTok.TokenLength;
578      }
579
580      if (Current.Type != TT_LineComment &&
581          (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
582           State.NextToken->Parent->Type == TT_TemplateOpener))
583        State.Stack.back().Indent = State.Column + Spaces;
584      if (Previous.is(tok::comma) && !isTrailingComment(Current))
585        State.Stack.back().HasMultiParameterLine = true;
586
587      State.Column += Spaces;
588      if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
589        // Treat the condition inside an if as if it was a second function
590        // parameter, i.e. let nested calls have an indent of 4.
591        State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
592      else if (Previous.is(tok::comma) && State.ParenLevel != 0)
593        // Top-level spaces are exempt as that mostly leads to better results.
594        State.Stack.back().LastSpace = State.Column;
595      else if ((Previous.Type == TT_BinaryOperator ||
596                Previous.Type == TT_ConditionalExpr ||
597                Previous.Type == TT_CtorInitializerColon) &&
598               getPrecedence(Previous) != prec::Assignment)
599        State.Stack.back().LastSpace = State.Column;
600      else if (Previous.Type == TT_InheritanceColon)
601        State.Stack.back().Indent = State.Column;
602      else if (Previous.ParameterCount > 1 &&
603               (Previous.is(tok::l_paren) || Previous.is(tok::l_square) ||
604                Previous.is(tok::l_brace) ||
605                Previous.Type == TT_TemplateOpener))
606        // If this function has multiple parameters, indent nested calls from
607        // the start of the first parameter.
608        State.Stack.back().LastSpace = State.Column;
609      else if ((Current.is(tok::period) || Current.is(tok::arrow)) &&
610               Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
611        State.Stack.back().LastSpace = State.Column;
612    }
613
614    return moveStateToNextToken(State, DryRun);
615  }
616
617  /// \brief Mark the next token as consumed in \p State and modify its stacks
618  /// accordingly.
619  unsigned moveStateToNextToken(LineState &State, bool DryRun) {
620    const AnnotatedToken &Current = *State.NextToken;
621    assert(State.Stack.size());
622
623    if (Current.Type == TT_InheritanceColon)
624      State.Stack.back().AvoidBinPacking = true;
625    if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
626      State.Stack.back().FirstLessLess = State.Column;
627    if (Current.is(tok::question))
628      State.Stack.back().QuestionColumn = State.Column;
629    if (Current.Type == TT_CtorInitializerColon) {
630      if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
631        State.Stack.back().AvoidBinPacking = true;
632      State.Stack.back().BreakBeforeParameter = false;
633    }
634
635    // Insert scopes created by fake parenthesis.
636    for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
637      ParenState NewParenState = State.Stack.back();
638      NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
639      NewParenState.BreakBeforeParameter = false;
640      State.Stack.push_back(NewParenState);
641    }
642
643    // If we encounter an opening (, [, { or <, we add a level to our stacks to
644    // prepare for the following tokens.
645    if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
646        Current.is(tok::l_brace) ||
647        State.NextToken->Type == TT_TemplateOpener) {
648      unsigned NewIndent;
649      bool AvoidBinPacking;
650      if (Current.is(tok::l_brace)) {
651        NewIndent = 2 + State.Stack.back().LastSpace;
652        AvoidBinPacking = false;
653      } else {
654        NewIndent = 4 + State.Stack.back().LastSpace;
655        AvoidBinPacking = !Style.BinPackParameters;
656      }
657      State.Stack.push_back(
658          ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
659                     State.Stack.back().HasMultiParameterLine));
660      ++State.ParenLevel;
661    }
662
663    // If this '[' opens an ObjC call, determine whether all parameters fit into
664    // one line and put one per line if they don't.
665    if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
666        Current.MatchingParen != NULL) {
667      if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
668        State.Stack.back().BreakBeforeParameter = true;
669    }
670
671    // If we encounter a closing ), ], } or >, we can remove a level from our
672    // stacks.
673    if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
674        (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
675        State.NextToken->Type == TT_TemplateCloser) {
676      State.Stack.pop_back();
677      --State.ParenLevel;
678    }
679
680    // Remove scopes created by fake parenthesis.
681    for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
682      State.Stack.pop_back();
683    }
684
685    if (Current.is(tok::string_literal)) {
686      State.StartOfStringLiteral = State.Column;
687    } else if (Current.isNot(tok::comment)) {
688      State.StartOfStringLiteral = 0;
689    }
690
691    State.Column += Current.FormatTok.TokenLength;
692
693    if (State.NextToken->Children.empty())
694      State.NextToken = NULL;
695    else
696      State.NextToken = &State.NextToken->Children[0];
697
698    return breakProtrudingToken(Current, State, DryRun);
699  }
700
701  /// \brief If the current token sticks out over the end of the line, break
702  /// it if possible.
703  unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
704                                bool DryRun) {
705    if (Current.isNot(tok::string_literal))
706      return 0;
707
708    unsigned Penalty = 0;
709    unsigned TailOffset = 0;
710    unsigned TailLength = Current.FormatTok.TokenLength;
711    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
712    unsigned OffsetFromStart = 0;
713    while (StartColumn + TailLength > getColumnLimit()) {
714      StringRef Text = StringRef(Current.FormatTok.Tok.getLiteralData() +
715                                 TailOffset, TailLength);
716      StringRef::size_type SplitPoint =
717          getSplitPoint(Text, getColumnLimit() - StartColumn - 1);
718      if (SplitPoint == StringRef::npos)
719        break;
720      assert(SplitPoint != 0);
721      // +2, because 'Text' starts after the opening quotes, and does not
722      // include the closing quote we need to insert.
723      unsigned WhitespaceStartColumn =
724          StartColumn + OffsetFromStart + SplitPoint + 2;
725      State.Stack.back().LastSpace = StartColumn;
726      if (!DryRun) {
727        Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
728                               Line.InPPDirective, StartColumn,
729                               WhitespaceStartColumn, Style);
730      }
731      TailOffset += SplitPoint + 1;
732      TailLength -= SplitPoint + 1;
733      OffsetFromStart = 1;
734      Penalty += 100;
735    }
736    State.Column = StartColumn + TailLength;
737    return Penalty;
738  }
739
740  StringRef::size_type
741  getSplitPoint(StringRef Text, StringRef::size_type Offset) {
742    // FIXME: Implement more sophisticated splitting mechanism, and a fallback.
743    return Text.rfind(' ', Offset);
744  }
745
746  unsigned getColumnLimit() {
747    return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
748  }
749
750  /// \brief An edge in the solution space from \c Previous->State to \c State,
751  /// inserting a newline dependent on the \c NewLine.
752  struct StateNode {
753    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
754        : State(State), NewLine(NewLine), Previous(Previous) {}
755    LineState State;
756    bool NewLine;
757    StateNode *Previous;
758  };
759
760  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
761  ///
762  /// In case of equal penalties, we want to prefer states that were inserted
763  /// first. During state generation we make sure that we insert states first
764  /// that break the line as late as possible.
765  typedef std::pair<unsigned, unsigned> OrderedPenalty;
766
767  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
768  /// \c State has the given \c OrderedPenalty.
769  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
770
771  /// \brief The BFS queue type.
772  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
773                              std::greater<QueueItem> > QueueType;
774
775  /// \brief Analyze the entire solution space starting from \p InitialState.
776  ///
777  /// This implements a variant of Dijkstra's algorithm on the graph that spans
778  /// the solution space (\c LineStates are the nodes). The algorithm tries to
779  /// find the shortest path (the one with lowest penalty) from \p InitialState
780  /// to a state where all tokens are placed.
781  unsigned analyzeSolutionSpace(LineState &InitialState) {
782    std::set<LineState> Seen;
783
784    // Insert start element into queue.
785    StateNode *Node =
786        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
787    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
788    ++Count;
789
790    // While not empty, take first element and follow edges.
791    while (!Queue.empty()) {
792      unsigned Penalty = Queue.top().first.first;
793      StateNode *Node = Queue.top().second;
794      if (Node->State.NextToken == NULL) {
795        DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
796        break;
797      }
798      Queue.pop();
799
800      if (!Seen.insert(Node->State).second)
801        // State already examined with lower penalty.
802        continue;
803
804      addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
805      addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
806    }
807
808    if (Queue.empty())
809      // We were unable to find a solution, do nothing.
810      // FIXME: Add diagnostic?
811      return 0;
812
813    // Reconstruct the solution.
814    reconstructPath(InitialState, Queue.top().second);
815    DEBUG(llvm::errs() << "---\n");
816
817    // Return the column after the last token of the solution.
818    return Queue.top().second->State.Column;
819  }
820
821  void reconstructPath(LineState &State, StateNode *Current) {
822    // FIXME: This recursive implementation limits the possible number
823    // of tokens per line if compiled into a binary with small stack space.
824    // To become more independent of stack frame limitations we would need
825    // to also change the TokenAnnotator.
826    if (Current->Previous == NULL)
827      return;
828    reconstructPath(State, Current->Previous);
829    DEBUG({
830      if (Current->NewLine) {
831        llvm::errs()
832            << "Penalty for splitting before "
833            << Current->Previous->State.NextToken->FormatTok.Tok.getName()
834            << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
835      }
836    });
837    addTokenToState(Current->NewLine, false, State);
838  }
839
840  /// \brief Add the following state to the analysis queue \c Queue.
841  ///
842  /// Assume the current state is \p PreviousNode and has been reached with a
843  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
844  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
845                           bool NewLine) {
846    if (NewLine && !canBreak(PreviousNode->State))
847      return;
848    if (!NewLine && mustBreak(PreviousNode->State))
849      return;
850    if (NewLine)
851      Penalty += PreviousNode->State.NextToken->SplitPenalty;
852
853    StateNode *Node = new (Allocator.Allocate())
854        StateNode(PreviousNode->State, NewLine, PreviousNode);
855    Penalty += addTokenToState(NewLine, true, Node->State);
856    if (Node->State.Column > getColumnLimit()) {
857      unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
858      Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
859    }
860
861    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
862    ++Count;
863  }
864
865  /// \brief Returns \c true, if a line break after \p State is allowed.
866  bool canBreak(const LineState &State) {
867    if (!State.NextToken->CanBreakBefore &&
868        !(State.NextToken->is(tok::r_brace) &&
869          State.Stack.back().BreakBeforeClosingBrace))
870      return false;
871    // This prevents breaks like:
872    //   ...
873    //   SomeParameter, OtherParameter).DoSomething(
874    //   ...
875    // As they hide "DoSomething" and generally bad for readability.
876    if (State.NextToken->Parent->is(tok::l_paren) &&
877        State.ParenLevel <= State.StartOfLineLevel)
878      return false;
879    // Trying to insert a parameter on a new line if there are already more than
880    // one parameter on the current line is bin packing.
881    if (State.Stack.back().HasMultiParameterLine &&
882        State.Stack.back().AvoidBinPacking)
883      return false;
884    return true;
885  }
886
887  /// \brief Returns \c true, if a line break after \p State is mandatory.
888  bool mustBreak(const LineState &State) {
889    if (State.NextToken->MustBreakBefore)
890      return true;
891    if (State.NextToken->is(tok::r_brace) &&
892        State.Stack.back().BreakBeforeClosingBrace)
893      return true;
894    if (State.NextToken->Parent->is(tok::semi) &&
895        State.LineContainsContinuedForLoopSection)
896      return true;
897    if ((State.NextToken->Parent->is(tok::comma) ||
898         State.NextToken->Parent->is(tok::semi) ||
899         State.NextToken->is(tok::question) ||
900         State.NextToken->Type == TT_ConditionalExpr) &&
901        State.Stack.back().BreakBeforeParameter &&
902        !isTrailingComment(*State.NextToken) &&
903        State.NextToken->isNot(tok::r_paren) &&
904        State.NextToken->isNot(tok::r_brace))
905      return true;
906    // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
907    // out whether it is the first parameter. Clean this up.
908    if (State.NextToken->Type == TT_ObjCSelectorName &&
909        State.NextToken->LongestObjCSelectorName == 0 &&
910        State.Stack.back().BreakBeforeParameter)
911      return true;
912    if ((State.NextToken->Type == TT_CtorInitializerColon ||
913         (State.NextToken->Parent->ClosesTemplateDeclaration &&
914          State.ParenLevel == 0)))
915      return true;
916    return false;
917  }
918
919  FormatStyle Style;
920  SourceManager &SourceMgr;
921  const AnnotatedLine &Line;
922  const unsigned FirstIndent;
923  const AnnotatedToken &RootToken;
924  WhitespaceManager &Whitespaces;
925
926  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
927  QueueType Queue;
928  // Increasing count of \c StateNode items we have created. This is used
929  // to create a deterministic order independent of the container.
930  unsigned Count;
931};
932
933class LexerBasedFormatTokenSource : public FormatTokenSource {
934public:
935  LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
936      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
937        IdentTable(Lex.getLangOpts()) {
938    Lex.SetKeepWhitespaceMode(true);
939  }
940
941  virtual FormatToken getNextToken() {
942    if (GreaterStashed) {
943      FormatTok.NewlinesBefore = 0;
944      FormatTok.WhiteSpaceStart =
945          FormatTok.Tok.getLocation().getLocWithOffset(1);
946      FormatTok.WhiteSpaceLength = 0;
947      GreaterStashed = false;
948      return FormatTok;
949    }
950
951    FormatTok = FormatToken();
952    Lex.LexFromRawLexer(FormatTok.Tok);
953    StringRef Text = rawTokenText(FormatTok.Tok);
954    FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
955    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
956      FormatTok.IsFirst = true;
957
958    // Consume and record whitespace until we find a significant token.
959    while (FormatTok.Tok.is(tok::unknown)) {
960      unsigned Newlines = Text.count('\n');
961      unsigned EscapedNewlines = Text.count("\\\n");
962      FormatTok.NewlinesBefore += Newlines;
963      FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
964      FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
965
966      if (FormatTok.Tok.is(tok::eof))
967        return FormatTok;
968      Lex.LexFromRawLexer(FormatTok.Tok);
969      Text = rawTokenText(FormatTok.Tok);
970    }
971
972    // Now FormatTok is the next non-whitespace token.
973    FormatTok.TokenLength = Text.size();
974
975    // In case the token starts with escaped newlines, we want to
976    // take them into account as whitespace - this pattern is quite frequent
977    // in macro definitions.
978    // FIXME: What do we want to do with other escaped spaces, and escaped
979    // spaces or newlines in the middle of tokens?
980    // FIXME: Add a more explicit test.
981    unsigned i = 0;
982    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
983      // FIXME: ++FormatTok.NewlinesBefore is missing...
984      FormatTok.WhiteSpaceLength += 2;
985      FormatTok.TokenLength -= 2;
986      i += 2;
987    }
988
989    if (FormatTok.Tok.is(tok::raw_identifier)) {
990      IdentifierInfo &Info = IdentTable.get(Text);
991      FormatTok.Tok.setIdentifierInfo(&Info);
992      FormatTok.Tok.setKind(Info.getTokenID());
993    }
994
995    if (FormatTok.Tok.is(tok::greatergreater)) {
996      FormatTok.Tok.setKind(tok::greater);
997      GreaterStashed = true;
998    }
999
1000    return FormatTok;
1001  }
1002
1003  IdentifierTable &getIdentTable() { return IdentTable; }
1004
1005private:
1006  FormatToken FormatTok;
1007  bool GreaterStashed;
1008  Lexer &Lex;
1009  SourceManager &SourceMgr;
1010  IdentifierTable IdentTable;
1011
1012  /// Returns the text of \c FormatTok.
1013  StringRef rawTokenText(Token &Tok) {
1014    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1015                     Tok.getLength());
1016  }
1017};
1018
1019class Formatter : public UnwrappedLineConsumer {
1020public:
1021  Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1022            SourceManager &SourceMgr,
1023            const std::vector<CharSourceRange> &Ranges)
1024      : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1025        Whitespaces(SourceMgr), Ranges(Ranges) {}
1026
1027  virtual ~Formatter() {}
1028
1029  void deriveLocalStyle() {
1030    unsigned CountBoundToVariable = 0;
1031    unsigned CountBoundToType = 0;
1032    bool HasCpp03IncompatibleFormat = false;
1033    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1034      if (AnnotatedLines[i].First.Children.empty())
1035        continue;
1036      AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
1037      while (!Tok->Children.empty()) {
1038        if (Tok->Type == TT_PointerOrReference) {
1039          bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
1040          bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
1041          if (SpacesBefore && !SpacesAfter)
1042            ++CountBoundToVariable;
1043          else if (!SpacesBefore && SpacesAfter)
1044            ++CountBoundToType;
1045        }
1046
1047        if (Tok->Type == TT_TemplateCloser &&
1048            Tok->Parent->Type == TT_TemplateCloser &&
1049            Tok->FormatTok.WhiteSpaceLength == 0)
1050          HasCpp03IncompatibleFormat = true;
1051        Tok = &Tok->Children[0];
1052      }
1053    }
1054    if (Style.DerivePointerBinding) {
1055      if (CountBoundToType > CountBoundToVariable)
1056        Style.PointerBindsToType = true;
1057      else if (CountBoundToType < CountBoundToVariable)
1058        Style.PointerBindsToType = false;
1059    }
1060    if (Style.Standard == FormatStyle::LS_Auto) {
1061      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1062                                                  : FormatStyle::LS_Cpp03;
1063    }
1064  }
1065
1066  tooling::Replacements format() {
1067    LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1068    UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1069    StructuralError = Parser.parse();
1070    unsigned PreviousEndOfLineColumn = 0;
1071    TokenAnnotator Annotator(Style, SourceMgr, Lex,
1072                             Tokens.getIdentTable().get("in"));
1073    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1074      Annotator.annotate(AnnotatedLines[i]);
1075    }
1076    deriveLocalStyle();
1077    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1078      Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1079    }
1080    std::vector<int> IndentForLevel;
1081    bool PreviousLineWasTouched = false;
1082    for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1083                                              E = AnnotatedLines.end();
1084         I != E; ++I) {
1085      const AnnotatedLine &TheLine = *I;
1086      int Offset = getIndentOffset(TheLine.First);
1087      while (IndentForLevel.size() <= TheLine.Level)
1088        IndentForLevel.push_back(-1);
1089      IndentForLevel.resize(TheLine.Level + 1);
1090      bool WasMoved =
1091          PreviousLineWasTouched && TheLine.First.FormatTok.NewlinesBefore == 0;
1092      if (TheLine.Type != LT_Invalid && (WasMoved || touchesRanges(TheLine))) {
1093        unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1094        unsigned Indent = LevelIndent;
1095        if (static_cast<int>(Indent) + Offset >= 0)
1096          Indent += Offset;
1097        if (!TheLine.First.FormatTok.WhiteSpaceStart.isValid() ||
1098            StructuralError) {
1099          Indent = LevelIndent = SourceMgr.getSpellingColumnNumber(
1100              TheLine.First.FormatTok.Tok.getLocation()) - 1;
1101        } else {
1102          formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1103                           PreviousEndOfLineColumn);
1104        }
1105        tryFitMultipleLinesInOne(Indent, I, E);
1106        UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1107                                         TheLine.First, Whitespaces,
1108                                         StructuralError);
1109        PreviousEndOfLineColumn = Formatter.format();
1110        IndentForLevel[TheLine.Level] = LevelIndent;
1111        PreviousLineWasTouched = true;
1112      } else {
1113        if (TheLine.First.FormatTok.NewlinesBefore > 0 ||
1114            TheLine.First.FormatTok.IsFirst) {
1115          unsigned Indent = SourceMgr.getSpellingColumnNumber(
1116              TheLine.First.FormatTok.Tok.getLocation()) - 1;
1117          unsigned LevelIndent = Indent;
1118          if (static_cast<int>(LevelIndent) - Offset >= 0)
1119            LevelIndent -= Offset;
1120          IndentForLevel[TheLine.Level] = LevelIndent;
1121
1122          // Remove trailing whitespace of the previous line if it was touched.
1123          if (PreviousLineWasTouched)
1124            formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective,
1125                             PreviousEndOfLineColumn);
1126        }
1127        // If we did not reformat this unwrapped line, the column at the end of
1128        // the last token is unchanged - thus, we can calculate the end of the
1129        // last token.
1130        PreviousEndOfLineColumn =
1131            SourceMgr.getSpellingColumnNumber(
1132                TheLine.Last->FormatTok.Tok.getLocation()) +
1133            Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
1134                                   SourceMgr, Lex.getLangOpts()) - 1;
1135        PreviousLineWasTouched = false;
1136      }
1137    }
1138    return Whitespaces.generateReplacements();
1139  }
1140
1141private:
1142  /// \brief Get the indent of \p Level from \p IndentForLevel.
1143  ///
1144  /// \p IndentForLevel must contain the indent for the level \c l
1145  /// at \p IndentForLevel[l], or a value < 0 if the indent for
1146  /// that level is unknown.
1147  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1148    if (IndentForLevel[Level] != -1)
1149      return IndentForLevel[Level];
1150    if (Level == 0)
1151      return 0;
1152    return getIndent(IndentForLevel, Level - 1) + 2;
1153  }
1154
1155  /// \brief Get the offset of the line relatively to the level.
1156  ///
1157  /// For example, 'public:' labels in classes are offset by 1 or 2
1158  /// characters to the left from their level.
1159  int getIndentOffset(const AnnotatedToken &RootToken) {
1160    bool IsAccessModifier = false;
1161    if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1162        RootToken.is(tok::kw_private))
1163      IsAccessModifier = true;
1164    else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1165             (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1166              RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1167              RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1168              RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1169      IsAccessModifier = true;
1170
1171    if (IsAccessModifier)
1172      return Style.AccessModifierOffset;
1173    return 0;
1174  }
1175
1176  /// \brief Tries to merge lines into one.
1177  ///
1178  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1179  /// if possible; note that \c I will be incremented when lines are merged.
1180  ///
1181  /// Returns whether the resulting \c Line can fit in a single line.
1182  void tryFitMultipleLinesInOne(unsigned Indent,
1183                                std::vector<AnnotatedLine>::iterator &I,
1184                                std::vector<AnnotatedLine>::iterator E) {
1185    // We can never merge stuff if there are trailing line comments.
1186    if (I->Last->Type == TT_LineComment)
1187      return;
1188
1189    unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
1190    // If we already exceed the column limit, we set 'Limit' to 0. The different
1191    // tryMerge..() functions can then decide whether to still do merging.
1192    Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1193
1194    if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1195      return;
1196
1197    if (I->Last->is(tok::l_brace)) {
1198      tryMergeSimpleBlock(I, E, Limit);
1199    } else if (I->First.is(tok::kw_if)) {
1200      tryMergeSimpleIf(I, E, Limit);
1201    } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1202                                    I->First.FormatTok.IsFirst)) {
1203      tryMergeSimplePPDirective(I, E, Limit);
1204    }
1205    return;
1206  }
1207
1208  void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1209                                 std::vector<AnnotatedLine>::iterator E,
1210                                 unsigned Limit) {
1211    if (Limit == 0)
1212      return;
1213    AnnotatedLine &Line = *I;
1214    if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1215      return;
1216    if (I + 2 != E && (I + 2)->InPPDirective &&
1217        !(I + 2)->First.FormatTok.HasUnescapedNewline)
1218      return;
1219    if (1 + (I + 1)->Last->TotalLength > Limit)
1220      return;
1221    join(Line, *(++I));
1222  }
1223
1224  void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1225                        std::vector<AnnotatedLine>::iterator E,
1226                        unsigned Limit) {
1227    if (Limit == 0)
1228      return;
1229    if (!Style.AllowShortIfStatementsOnASingleLine)
1230      return;
1231    if ((I + 1)->InPPDirective != I->InPPDirective ||
1232        ((I + 1)->InPPDirective &&
1233         (I + 1)->First.FormatTok.HasUnescapedNewline))
1234      return;
1235    AnnotatedLine &Line = *I;
1236    if (Line.Last->isNot(tok::r_paren))
1237      return;
1238    if (1 + (I + 1)->Last->TotalLength > Limit)
1239      return;
1240    if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1241      return;
1242    // Only inline simple if's (no nested if or else).
1243    if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1244      return;
1245    join(Line, *(++I));
1246  }
1247
1248  void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1249                           std::vector<AnnotatedLine>::iterator E,
1250                           unsigned Limit) {
1251    // First, check that the current line allows merging. This is the case if
1252    // we're not in a control flow statement and the last token is an opening
1253    // brace.
1254    AnnotatedLine &Line = *I;
1255    bool AllowedTokens =
1256        Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
1257        Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
1258        Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
1259        Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
1260        // This gets rid of all ObjC @ keywords and methods.
1261        Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
1262        Line.First.isNot(tok::plus);
1263    if (!AllowedTokens)
1264      return;
1265
1266    AnnotatedToken *Tok = &(I + 1)->First;
1267    if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1268        !Tok->MustBreakBefore) {
1269      // We merge empty blocks even if the line exceeds the column limit.
1270      Tok->SpacesRequiredBefore = 0;
1271      Tok->CanBreakBefore = true;
1272      join(Line, *(I + 1));
1273      I += 1;
1274    } else if (Limit != 0) {
1275      // Check that we still have three lines and they fit into the limit.
1276      if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1277          !nextTwoLinesFitInto(I, Limit))
1278        return;
1279
1280      // Second, check that the next line does not contain any braces - if it
1281      // does, readability declines when putting it into a single line.
1282      if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1283        return;
1284      do {
1285        if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
1286          return;
1287        Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1288      } while (Tok != NULL);
1289
1290      // Last, check that the third line contains a single closing brace.
1291      Tok = &(I + 2)->First;
1292      if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1293          Tok->MustBreakBefore)
1294        return;
1295
1296      join(Line, *(I + 1));
1297      join(Line, *(I + 2));
1298      I += 2;
1299    }
1300  }
1301
1302  bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1303                           unsigned Limit) {
1304    return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1305           Limit;
1306  }
1307
1308  void join(AnnotatedLine &A, const AnnotatedLine &B) {
1309    unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
1310    A.Last->Children.push_back(B.First);
1311    while (!A.Last->Children.empty()) {
1312      A.Last->Children[0].Parent = A.Last;
1313      A.Last->Children[0].TotalLength += LengthA;
1314      A.Last = &A.Last->Children[0];
1315    }
1316  }
1317
1318  bool touchesRanges(const AnnotatedLine &TheLine) {
1319    const FormatToken *First = &TheLine.First.FormatTok;
1320    const FormatToken *Last = &TheLine.Last->FormatTok;
1321    CharSourceRange LineRange = CharSourceRange::getTokenRange(
1322        First->Tok.getLocation(), Last->Tok.getLocation());
1323    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1324      if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1325                                               Ranges[i].getBegin()) &&
1326          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1327                                               LineRange.getBegin()))
1328        return true;
1329    }
1330    return false;
1331  }
1332
1333  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1334    AnnotatedLines.push_back(AnnotatedLine(TheLine));
1335  }
1336
1337  /// \brief Add a new line and the required indent before the first Token
1338  /// of the \c UnwrappedLine if there was no structural parsing error.
1339  /// Returns the indent level of the \c UnwrappedLine.
1340  void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent,
1341                        bool InPPDirective, unsigned PreviousEndOfLineColumn) {
1342    const FormatToken &Tok = RootToken.FormatTok;
1343
1344    unsigned Newlines =
1345        std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1346    if (Newlines == 0 && !Tok.IsFirst)
1347      Newlines = 1;
1348
1349    if (!InPPDirective || Tok.HasUnescapedNewline) {
1350      Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
1351    } else {
1352      Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1353                                      PreviousEndOfLineColumn, Style);
1354    }
1355  }
1356
1357  DiagnosticsEngine &Diag;
1358  FormatStyle Style;
1359  Lexer &Lex;
1360  SourceManager &SourceMgr;
1361  WhitespaceManager Whitespaces;
1362  std::vector<CharSourceRange> Ranges;
1363  std::vector<AnnotatedLine> AnnotatedLines;
1364  bool StructuralError;
1365};
1366
1367tooling::Replacements
1368reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1369         std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
1370  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1371  OwningPtr<DiagnosticConsumer> DiagPrinter;
1372  if (DiagClient == 0) {
1373    DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1374    DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1375    DiagClient = DiagPrinter.get();
1376  }
1377  DiagnosticsEngine Diagnostics(
1378      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1379      DiagClient, false);
1380  Diagnostics.setSourceManager(&SourceMgr);
1381  Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1382  return formatter.format();
1383}
1384
1385LangOptions getFormattingLangOpts() {
1386  LangOptions LangOpts;
1387  LangOpts.CPlusPlus = 1;
1388  LangOpts.CPlusPlus11 = 1;
1389  LangOpts.Bool = 1;
1390  LangOpts.ObjC1 = 1;
1391  LangOpts.ObjC2 = 1;
1392  return LangOpts;
1393}
1394
1395} // namespace format
1396} // namespace clang
1397