Format.cpp revision 1d82b1a33bcfe85f4834fb6920517ed07e9355d3
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-formatter"
17
18#include "ContinuationIndenter.h"
19#include "TokenAnnotator.h"
20#include "UnwrappedLineParser.h"
21#include "WhitespaceManager.h"
22#include "clang/Basic/Diagnostic.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Format/Format.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/Support/Allocator.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/YAMLTraits.h"
30#include "llvm/Support/Path.h"
31#include <queue>
32#include <string>
33
34namespace llvm {
35namespace yaml {
36template <>
37struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
38  static void enumeration(IO &IO,
39                          clang::format::FormatStyle::LanguageStandard &Value) {
40    IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
41    IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
42    IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
43    IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
44    IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
45  }
46};
47
48template <>
49struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
50  static void
51  enumeration(IO &IO, clang::format::FormatStyle::UseTabStyle &Value) {
52    IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
53    IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
54    IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
55    IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
56    IO.enumCase(Value, "ForIndentation",
57                clang::format::FormatStyle::UT_ForIndentation);
58  }
59};
60
61template <>
62struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
63  static void
64  enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
65    IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
66    IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
67    IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
68    IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
69  }
70};
71
72template <>
73struct ScalarEnumerationTraits<
74    clang::format::FormatStyle::NamespaceIndentationKind> {
75  static void
76  enumeration(IO &IO,
77              clang::format::FormatStyle::NamespaceIndentationKind &Value) {
78    IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
79    IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
80    IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
81  }
82};
83
84template <> struct MappingTraits<clang::format::FormatStyle> {
85  static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
86    if (IO.outputting()) {
87      StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
88                                  "Mozilla", "WebKit" };
89      ArrayRef<StringRef> Styles(StylesArray);
90      for (size_t i = 0, e = Styles.size(); i < e; ++i) {
91        StringRef StyleName(Styles[i]);
92        clang::format::FormatStyle PredefinedStyle;
93        if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
94            Style == PredefinedStyle) {
95          IO.mapOptional("# BasedOnStyle", StyleName);
96          break;
97        }
98      }
99    } else {
100      StringRef BasedOnStyle;
101      IO.mapOptional("BasedOnStyle", BasedOnStyle);
102      if (!BasedOnStyle.empty())
103        if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
104          IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
105          return;
106        }
107    }
108
109    IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
110    IO.mapOptional("ConstructorInitializerIndentWidth",
111                   Style.ConstructorInitializerIndentWidth);
112    IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
113    IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
114    IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
115                   Style.AllowAllParametersOfDeclarationOnNextLine);
116    IO.mapOptional("AllowShortIfStatementsOnASingleLine",
117                   Style.AllowShortIfStatementsOnASingleLine);
118    IO.mapOptional("AllowShortLoopsOnASingleLine",
119                   Style.AllowShortLoopsOnASingleLine);
120    IO.mapOptional("AlwaysBreakTemplateDeclarations",
121                   Style.AlwaysBreakTemplateDeclarations);
122    IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
123                   Style.AlwaysBreakBeforeMultilineStrings);
124    IO.mapOptional("BreakBeforeBinaryOperators",
125                   Style.BreakBeforeBinaryOperators);
126    IO.mapOptional("BreakConstructorInitializersBeforeComma",
127                   Style.BreakConstructorInitializersBeforeComma);
128    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
129    IO.mapOptional("ColumnLimit", Style.ColumnLimit);
130    IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
131                   Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
132    IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
133    IO.mapOptional("ExperimentalAutoDetectBinPacking",
134                   Style.ExperimentalAutoDetectBinPacking);
135    IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
136    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
137    IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
138    IO.mapOptional("ObjCSpaceBeforeProtocolList",
139                   Style.ObjCSpaceBeforeProtocolList);
140    IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
141    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
142    IO.mapOptional("PenaltyBreakFirstLessLess",
143                   Style.PenaltyBreakFirstLessLess);
144    IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
145    IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
146                   Style.PenaltyReturnTypeOnItsOwnLine);
147    IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
148    IO.mapOptional("SpacesBeforeTrailingComments",
149                   Style.SpacesBeforeTrailingComments);
150    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
151    IO.mapOptional("Standard", Style.Standard);
152    IO.mapOptional("IndentWidth", Style.IndentWidth);
153    IO.mapOptional("TabWidth", Style.TabWidth);
154    IO.mapOptional("UseTab", Style.UseTab);
155    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
156    IO.mapOptional("IndentFunctionDeclarationAfterType",
157                   Style.IndentFunctionDeclarationAfterType);
158    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
159    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
160    IO.mapOptional("SpacesInCStyleCastParentheses",
161                   Style.SpacesInCStyleCastParentheses);
162    IO.mapOptional("SpaceAfterControlStatementKeyword",
163                   Style.SpaceAfterControlStatementKeyword);
164    IO.mapOptional("SpaceBeforeAssignmentOperators",
165                   Style.SpaceBeforeAssignmentOperators);
166  }
167};
168}
169}
170
171namespace clang {
172namespace format {
173
174void setDefaultPenalties(FormatStyle &Style) {
175  Style.PenaltyBreakComment = 60;
176  Style.PenaltyBreakFirstLessLess = 120;
177  Style.PenaltyBreakString = 1000;
178  Style.PenaltyExcessCharacter = 1000000;
179}
180
181FormatStyle getLLVMStyle() {
182  FormatStyle LLVMStyle;
183  LLVMStyle.AccessModifierOffset = -2;
184  LLVMStyle.AlignEscapedNewlinesLeft = false;
185  LLVMStyle.AlignTrailingComments = true;
186  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
187  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
188  LLVMStyle.AllowShortLoopsOnASingleLine = false;
189  LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
190  LLVMStyle.AlwaysBreakTemplateDeclarations = false;
191  LLVMStyle.BinPackParameters = true;
192  LLVMStyle.BreakBeforeBinaryOperators = false;
193  LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
194  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
195  LLVMStyle.ColumnLimit = 80;
196  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
197  LLVMStyle.ConstructorInitializerIndentWidth = 4;
198  LLVMStyle.Cpp11BracedListStyle = false;
199  LLVMStyle.DerivePointerBinding = false;
200  LLVMStyle.ExperimentalAutoDetectBinPacking = false;
201  LLVMStyle.IndentCaseLabels = false;
202  LLVMStyle.IndentFunctionDeclarationAfterType = false;
203  LLVMStyle.IndentWidth = 2;
204  LLVMStyle.TabWidth = 8;
205  LLVMStyle.MaxEmptyLinesToKeep = 1;
206  LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
207  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
208  LLVMStyle.PointerBindsToType = false;
209  LLVMStyle.SpacesBeforeTrailingComments = 1;
210  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
211  LLVMStyle.UseTab = FormatStyle::UT_Never;
212  LLVMStyle.SpacesInParentheses = false;
213  LLVMStyle.SpaceInEmptyParentheses = false;
214  LLVMStyle.SpacesInCStyleCastParentheses = false;
215  LLVMStyle.SpaceAfterControlStatementKeyword = true;
216  LLVMStyle.SpaceBeforeAssignmentOperators = true;
217
218  setDefaultPenalties(LLVMStyle);
219  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
220
221  return LLVMStyle;
222}
223
224FormatStyle getGoogleStyle() {
225  FormatStyle GoogleStyle;
226  GoogleStyle.AccessModifierOffset = -1;
227  GoogleStyle.AlignEscapedNewlinesLeft = true;
228  GoogleStyle.AlignTrailingComments = true;
229  GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
230  GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
231  GoogleStyle.AllowShortLoopsOnASingleLine = true;
232  GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
233  GoogleStyle.AlwaysBreakTemplateDeclarations = true;
234  GoogleStyle.BinPackParameters = true;
235  GoogleStyle.BreakBeforeBinaryOperators = false;
236  GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
237  GoogleStyle.BreakConstructorInitializersBeforeComma = false;
238  GoogleStyle.ColumnLimit = 80;
239  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
240  GoogleStyle.ConstructorInitializerIndentWidth = 4;
241  GoogleStyle.Cpp11BracedListStyle = true;
242  GoogleStyle.DerivePointerBinding = true;
243  GoogleStyle.ExperimentalAutoDetectBinPacking = false;
244  GoogleStyle.IndentCaseLabels = true;
245  GoogleStyle.IndentFunctionDeclarationAfterType = true;
246  GoogleStyle.IndentWidth = 2;
247  GoogleStyle.TabWidth = 8;
248  GoogleStyle.MaxEmptyLinesToKeep = 1;
249  GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
250  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
251  GoogleStyle.PointerBindsToType = true;
252  GoogleStyle.SpacesBeforeTrailingComments = 2;
253  GoogleStyle.Standard = FormatStyle::LS_Auto;
254  GoogleStyle.UseTab = FormatStyle::UT_Never;
255  GoogleStyle.SpacesInParentheses = false;
256  GoogleStyle.SpaceInEmptyParentheses = false;
257  GoogleStyle.SpacesInCStyleCastParentheses = false;
258  GoogleStyle.SpaceAfterControlStatementKeyword = true;
259  GoogleStyle.SpaceBeforeAssignmentOperators = true;
260
261  setDefaultPenalties(GoogleStyle);
262  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
263
264  return GoogleStyle;
265}
266
267FormatStyle getChromiumStyle() {
268  FormatStyle ChromiumStyle = getGoogleStyle();
269  ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
270  ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
271  ChromiumStyle.AllowShortLoopsOnASingleLine = false;
272  ChromiumStyle.BinPackParameters = false;
273  ChromiumStyle.DerivePointerBinding = false;
274  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
275  return ChromiumStyle;
276}
277
278FormatStyle getMozillaStyle() {
279  FormatStyle MozillaStyle = getLLVMStyle();
280  MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
281  MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
282  MozillaStyle.DerivePointerBinding = true;
283  MozillaStyle.IndentCaseLabels = true;
284  MozillaStyle.ObjCSpaceBeforeProtocolList = false;
285  MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
286  MozillaStyle.PointerBindsToType = true;
287  return MozillaStyle;
288}
289
290FormatStyle getWebKitStyle() {
291  FormatStyle Style = getLLVMStyle();
292  Style.AccessModifierOffset = -4;
293  Style.AlignTrailingComments = false;
294  Style.BreakBeforeBinaryOperators = true;
295  Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
296  Style.BreakConstructorInitializersBeforeComma = true;
297  Style.ColumnLimit = 0;
298  Style.IndentWidth = 4;
299  Style.NamespaceIndentation = FormatStyle::NI_Inner;
300  Style.PointerBindsToType = true;
301  return Style;
302}
303
304bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
305  if (Name.equals_lower("llvm"))
306    *Style = getLLVMStyle();
307  else if (Name.equals_lower("chromium"))
308    *Style = getChromiumStyle();
309  else if (Name.equals_lower("mozilla"))
310    *Style = getMozillaStyle();
311  else if (Name.equals_lower("google"))
312    *Style = getGoogleStyle();
313  else if (Name.equals_lower("webkit"))
314    *Style = getWebKitStyle();
315  else
316    return false;
317
318  return true;
319}
320
321llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
322  if (Text.trim().empty())
323    return llvm::make_error_code(llvm::errc::invalid_argument);
324  llvm::yaml::Input Input(Text);
325  Input >> *Style;
326  return Input.error();
327}
328
329std::string configurationAsText(const FormatStyle &Style) {
330  std::string Text;
331  llvm::raw_string_ostream Stream(Text);
332  llvm::yaml::Output Output(Stream);
333  // We use the same mapping method for input and output, so we need a non-const
334  // reference here.
335  FormatStyle NonConstStyle = Style;
336  Output << NonConstStyle;
337  return Stream.str();
338}
339
340namespace {
341
342class NoColumnLimitFormatter {
343public:
344  NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
345
346  /// \brief Formats the line starting at \p State, simply keeping all of the
347  /// input's line breaking decisions.
348  void format(unsigned FirstIndent, const AnnotatedLine *Line) {
349    LineState State =
350        Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
351    while (State.NextToken != NULL) {
352      bool Newline =
353          Indenter->mustBreak(State) ||
354          (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
355      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
356    }
357  }
358
359private:
360  ContinuationIndenter *Indenter;
361};
362
363class UnwrappedLineFormatter {
364public:
365  UnwrappedLineFormatter(ContinuationIndenter *Indenter,
366                         WhitespaceManager *Whitespaces,
367                         const FormatStyle &Style, const AnnotatedLine &Line)
368      : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), Line(Line),
369        Count(0) {}
370
371  /// \brief Formats an \c UnwrappedLine and returns the penalty.
372  ///
373  /// If \p DryRun is \c false, directly applies the changes.
374  unsigned format(unsigned FirstIndent, bool DryRun = false) {
375    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
376
377    // If the ObjC method declaration does not fit on a line, we should format
378    // it with one arg per line.
379    if (Line.Type == LT_ObjCMethodDecl)
380      State.Stack.back().BreakBeforeParameter = true;
381
382    // Find best solution in solution space.
383    return analyzeSolutionSpace(State, DryRun);
384  }
385
386private:
387  /// \brief An edge in the solution space from \c Previous->State to \c State,
388  /// inserting a newline dependent on the \c NewLine.
389  struct StateNode {
390    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
391        : State(State), NewLine(NewLine), Previous(Previous) {}
392    LineState State;
393    bool NewLine;
394    StateNode *Previous;
395  };
396
397  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
398  ///
399  /// In case of equal penalties, we want to prefer states that were inserted
400  /// first. During state generation we make sure that we insert states first
401  /// that break the line as late as possible.
402  typedef std::pair<unsigned, unsigned> OrderedPenalty;
403
404  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
405  /// \c State has the given \c OrderedPenalty.
406  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
407
408  /// \brief The BFS queue type.
409  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
410                              std::greater<QueueItem> > QueueType;
411
412  /// \brief Analyze the entire solution space starting from \p InitialState.
413  ///
414  /// This implements a variant of Dijkstra's algorithm on the graph that spans
415  /// the solution space (\c LineStates are the nodes). The algorithm tries to
416  /// find the shortest path (the one with lowest penalty) from \p InitialState
417  /// to a state where all tokens are placed. Returns the penalty.
418  ///
419  /// If \p DryRun is \c false, directly applies the changes.
420  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
421    std::set<LineState> Seen;
422
423    // Insert start element into queue.
424    StateNode *Node =
425        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
426    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
427    ++Count;
428
429    unsigned Penalty = 0;
430
431    // While not empty, take first element and follow edges.
432    while (!Queue.empty()) {
433      Penalty = Queue.top().first.first;
434      StateNode *Node = Queue.top().second;
435      if (Node->State.NextToken == NULL) {
436        DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
437        break;
438      }
439      Queue.pop();
440
441      // Cut off the analysis of certain solutions if the analysis gets too
442      // complex. See description of IgnoreStackForComparison.
443      if (Count > 10000)
444        Node->State.IgnoreStackForComparison = true;
445
446      if (!Seen.insert(Node->State).second)
447        // State already examined with lower penalty.
448        continue;
449
450      addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
451      addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
452    }
453
454    if (Queue.empty())
455      // We were unable to find a solution, do nothing.
456      // FIXME: Add diagnostic?
457      return 0;
458
459    // Reconstruct the solution.
460    if (!DryRun)
461      reconstructPath(InitialState, Queue.top().second);
462
463    DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
464    DEBUG(llvm::dbgs() << "---\n");
465
466    return Penalty;
467  }
468
469  void reconstructPath(LineState &State, StateNode *Current) {
470    std::deque<StateNode *> Path;
471    // We do not need a break before the initial token.
472    while (Current->Previous) {
473      Path.push_front(Current);
474      Current = Current->Previous;
475    }
476    for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
477         I != E; ++I) {
478      unsigned Penalty = 0;
479      formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
480      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
481
482      DEBUG({
483        if ((*I)->NewLine) {
484          llvm::dbgs() << "Penalty for placing "
485                       << (*I)->Previous->State.NextToken->Tok.getName() << ": "
486                       << Penalty << "\n";
487        }
488      });
489    }
490  }
491
492  /// \brief Add the following state to the analysis queue \c Queue.
493  ///
494  /// Assume the current state is \p PreviousNode and has been reached with a
495  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
496  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
497                           bool NewLine) {
498    if (NewLine && !Indenter->canBreak(PreviousNode->State))
499      return;
500    if (!NewLine && Indenter->mustBreak(PreviousNode->State))
501      return;
502
503    StateNode *Node = new (Allocator.Allocate())
504        StateNode(PreviousNode->State, NewLine, PreviousNode);
505    if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
506      return;
507
508    Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
509
510    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
511    ++Count;
512  }
513
514  /// \brief If the \p State's next token is an r_brace closing a nested block,
515  /// format the nested block before it.
516  ///
517  /// Returns \c true if all children could be placed successfully and adapts
518  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
519  /// creates changes using \c Whitespaces.
520  ///
521  /// The crucial idea here is that children always get formatted upon
522  /// encountering the closing brace right after the nested block. Now, if we
523  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
524  /// \c false), the entire block has to be kept on the same line (which is only
525  /// possible if it fits on the line, only contains a single statement, etc.
526  ///
527  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
528  /// break after the "{", format all lines with correct indentation and the put
529  /// the closing "}" on yet another new line.
530  ///
531  /// This enables us to keep the simple structure of the
532  /// \c UnwrappedLineFormatter, where we only have two options for each token:
533  /// break or don't break.
534  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
535                      unsigned &Penalty) {
536    const FormatToken &LBrace = *State.NextToken->Previous;
537    if (LBrace.isNot(tok::l_brace) || LBrace.BlockKind != BK_Block ||
538        LBrace.Children.size() == 0)
539      // The previous token does not open a block. Nothing to do. We don't
540      // assert so that we can simply call this function for all tokens.
541      return true;
542
543    if (NewLine) {
544      unsigned ParentIndent = State.Stack.back().Indent;
545      for (SmallVector<AnnotatedLine *, 1>::const_iterator
546               I = LBrace.Children.begin(),
547               E = LBrace.Children.end();
548           I != E; ++I) {
549        unsigned Indent =
550            ParentIndent + ((*I)->Level - Line.Level - 1) * Style.IndentWidth;
551        if (!DryRun) {
552          unsigned Newlines = std::min((*I)->First->NewlinesBefore,
553                                       Style.MaxEmptyLinesToKeep + 1);
554          Newlines = std::max(1u, Newlines);
555          Whitespaces->replaceWhitespace(
556              *(*I)->First, Newlines, (*I)->Level, /*Spaces=*/Indent,
557              /*StartOfTokenColumn=*/Indent, Line.InPPDirective);
558        }
559        UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style, **I);
560        Penalty += Formatter.format(Indent, DryRun);
561      }
562      return true;
563    }
564
565    if (LBrace.Children.size() > 1)
566      return false; // Cannot merge multiple statements into a single line.
567
568    // We can't put the closing "}" on a line with a trailing comment.
569    if (LBrace.Children[0]->Last->isTrailingComment())
570      return false;
571
572    if (!DryRun) {
573      Whitespaces->replaceWhitespace(
574          *LBrace.Children[0]->First,
575          /*Newlines=*/0, /*IndentLevel=*/1, /*Spaces=*/1,
576          /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
577      UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style,
578                                       *LBrace.Children[0]);
579      Penalty += Formatter.format(State.Column + 1, DryRun);
580    }
581
582    State.Column += 1 + LBrace.Children[0]->Last->TotalLength;
583    return true;
584  }
585
586  ContinuationIndenter *Indenter;
587  WhitespaceManager *Whitespaces;
588  FormatStyle Style;
589  const AnnotatedLine &Line;
590
591  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
592  QueueType Queue;
593  // Increasing count of \c StateNode items we have created. This is used
594  // to create a deterministic order independent of the container.
595  unsigned Count;
596};
597
598class FormatTokenLexer {
599public:
600  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
601                   encoding::Encoding Encoding)
602      : FormatTok(NULL), GreaterStashed(false), Column(0),
603        TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
604        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
605    Lex.SetKeepWhitespaceMode(true);
606  }
607
608  ArrayRef<FormatToken *> lex() {
609    assert(Tokens.empty());
610    do {
611      Tokens.push_back(getNextToken());
612      maybeJoinPreviousTokens();
613    } while (Tokens.back()->Tok.isNot(tok::eof));
614    return Tokens;
615  }
616
617  IdentifierTable &getIdentTable() { return IdentTable; }
618
619private:
620  void maybeJoinPreviousTokens() {
621    if (Tokens.size() < 4)
622      return;
623    FormatToken *Last = Tokens.back();
624    if (!Last->is(tok::r_paren))
625      return;
626
627    FormatToken *String = Tokens[Tokens.size() - 2];
628    if (!String->is(tok::string_literal) || String->IsMultiline)
629      return;
630
631    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
632      return;
633
634    FormatToken *Macro = Tokens[Tokens.size() - 4];
635    if (Macro->TokenText != "_T")
636      return;
637
638    const char *Start = Macro->TokenText.data();
639    const char *End = Last->TokenText.data() + Last->TokenText.size();
640    String->TokenText = StringRef(Start, End - Start);
641    String->IsFirst = Macro->IsFirst;
642    String->LastNewlineOffset = Macro->LastNewlineOffset;
643    String->WhitespaceRange = Macro->WhitespaceRange;
644    String->OriginalColumn = Macro->OriginalColumn;
645    String->ColumnWidth = encoding::columnWidthWithTabs(
646        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
647
648    Tokens.pop_back();
649    Tokens.pop_back();
650    Tokens.pop_back();
651    Tokens.back() = String;
652  }
653
654  FormatToken *getNextToken() {
655    if (GreaterStashed) {
656      // Create a synthesized second '>' token.
657      // FIXME: Increment Column and set OriginalColumn.
658      Token Greater = FormatTok->Tok;
659      FormatTok = new (Allocator.Allocate()) FormatToken;
660      FormatTok->Tok = Greater;
661      SourceLocation GreaterLocation =
662          FormatTok->Tok.getLocation().getLocWithOffset(1);
663      FormatTok->WhitespaceRange =
664          SourceRange(GreaterLocation, GreaterLocation);
665      FormatTok->TokenText = ">";
666      FormatTok->ColumnWidth = 1;
667      GreaterStashed = false;
668      return FormatTok;
669    }
670
671    FormatTok = new (Allocator.Allocate()) FormatToken;
672    readRawToken(*FormatTok);
673    SourceLocation WhitespaceStart =
674        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
675    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
676      FormatTok->IsFirst = true;
677
678    // Consume and record whitespace until we find a significant token.
679    unsigned WhitespaceLength = TrailingWhitespace;
680    while (FormatTok->Tok.is(tok::unknown)) {
681      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
682        switch (FormatTok->TokenText[i]) {
683        case '\n':
684          ++FormatTok->NewlinesBefore;
685          // FIXME: This is technically incorrect, as it could also
686          // be a literal backslash at the end of the line.
687          if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
688                         (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
689                          FormatTok->TokenText[i - 2] != '\\')))
690            FormatTok->HasUnescapedNewline = true;
691          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
692          Column = 0;
693          break;
694        case '\r':
695        case '\f':
696        case '\v':
697          Column = 0;
698          break;
699        case ' ':
700          ++Column;
701          break;
702        case '\t':
703          Column += Style.TabWidth - Column % Style.TabWidth;
704          break;
705        case '\\':
706          ++Column;
707          if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
708                             FormatTok->TokenText[i + 1] != '\n'))
709            FormatTok->Type = TT_ImplicitStringLiteral;
710          break;
711        default:
712          FormatTok->Type = TT_ImplicitStringLiteral;
713          ++Column;
714          break;
715        }
716      }
717
718      if (FormatTok->Type == TT_ImplicitStringLiteral)
719        break;
720      WhitespaceLength += FormatTok->Tok.getLength();
721
722      readRawToken(*FormatTok);
723    }
724
725    // In case the token starts with escaped newlines, we want to
726    // take them into account as whitespace - this pattern is quite frequent
727    // in macro definitions.
728    // FIXME: Add a more explicit test.
729    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
730           FormatTok->TokenText[1] == '\n') {
731      // FIXME: ++FormatTok->NewlinesBefore is missing...
732      WhitespaceLength += 2;
733      Column = 0;
734      FormatTok->TokenText = FormatTok->TokenText.substr(2);
735    }
736
737    FormatTok->WhitespaceRange = SourceRange(
738        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
739
740    FormatTok->OriginalColumn = Column;
741
742    TrailingWhitespace = 0;
743    if (FormatTok->Tok.is(tok::comment)) {
744      // FIXME: Add the trimmed whitespace to Column.
745      StringRef UntrimmedText = FormatTok->TokenText;
746      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
747      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
748    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
749      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
750      FormatTok->Tok.setIdentifierInfo(&Info);
751      FormatTok->Tok.setKind(Info.getTokenID());
752    } else if (FormatTok->Tok.is(tok::greatergreater)) {
753      FormatTok->Tok.setKind(tok::greater);
754      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
755      GreaterStashed = true;
756    }
757
758    // Now FormatTok is the next non-whitespace token.
759
760    StringRef Text = FormatTok->TokenText;
761    size_t FirstNewlinePos = Text.find('\n');
762    if (FirstNewlinePos == StringRef::npos) {
763      // FIXME: ColumnWidth actually depends on the start column, we need to
764      // take this into account when the token is moved.
765      FormatTok->ColumnWidth =
766          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
767      Column += FormatTok->ColumnWidth;
768    } else {
769      FormatTok->IsMultiline = true;
770      // FIXME: ColumnWidth actually depends on the start column, we need to
771      // take this into account when the token is moved.
772      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
773          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
774
775      // The last line of the token always starts in column 0.
776      // Thus, the length can be precomputed even in the presence of tabs.
777      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
778          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
779          Encoding);
780      Column = FormatTok->LastLineColumnWidth;
781    }
782
783    return FormatTok;
784  }
785
786  FormatToken *FormatTok;
787  bool GreaterStashed;
788  unsigned Column;
789  unsigned TrailingWhitespace;
790  Lexer &Lex;
791  SourceManager &SourceMgr;
792  FormatStyle &Style;
793  IdentifierTable IdentTable;
794  encoding::Encoding Encoding;
795  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
796  SmallVector<FormatToken *, 16> Tokens;
797
798  void readRawToken(FormatToken &Tok) {
799    Lex.LexFromRawLexer(Tok.Tok);
800    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
801                              Tok.Tok.getLength());
802    // For formatting, treat unterminated string literals like normal string
803    // literals.
804    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
805        Tok.TokenText[0] == '"') {
806      Tok.Tok.setKind(tok::string_literal);
807      Tok.IsUnterminatedLiteral = true;
808    }
809  }
810};
811
812class Formatter : public UnwrappedLineConsumer {
813public:
814  Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
815            const std::vector<CharSourceRange> &Ranges)
816      : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
817        Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
818        Ranges(Ranges), Encoding(encoding::detectEncoding(Lex.getBuffer())) {
819    DEBUG(llvm::dbgs() << "File encoding: "
820                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
821                                                               : "unknown")
822                       << "\n");
823  }
824
825  virtual ~Formatter() {
826    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
827      delete AnnotatedLines[i];
828    }
829  }
830
831  tooling::Replacements format() {
832    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
833
834    UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
835    bool StructuralError = Parser.parse();
836    TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
837    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
838      Annotator.annotate(*AnnotatedLines[i]);
839    }
840    deriveLocalStyle();
841    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
842      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
843    }
844
845    Annotator.setCommentLineLevels(AnnotatedLines);
846
847    std::vector<int> IndentForLevel;
848    bool PreviousLineWasTouched = false;
849    const AnnotatedLine *PreviousLine = NULL;
850    bool FormatPPDirective = false;
851    for (SmallVectorImpl<AnnotatedLine *>::iterator I = AnnotatedLines.begin(),
852                                                    E = AnnotatedLines.end();
853         I != E; ++I) {
854      const AnnotatedLine &TheLine = **I;
855      const FormatToken *FirstTok = TheLine.First;
856      int Offset = getIndentOffset(*TheLine.First);
857
858      // Check whether this line is part of a formatted preprocessor directive.
859      if (FirstTok->HasUnescapedNewline)
860        FormatPPDirective = false;
861      if (!FormatPPDirective && TheLine.InPPDirective &&
862          (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
863        FormatPPDirective = true;
864
865      // Determine indent and try to merge multiple unwrapped lines.
866      while (IndentForLevel.size() <= TheLine.Level)
867        IndentForLevel.push_back(-1);
868      IndentForLevel.resize(TheLine.Level + 1);
869      unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
870      if (static_cast<int>(Indent) + Offset >= 0)
871        Indent += Offset;
872      tryFitMultipleLinesInOne(Indent, I, E);
873
874      bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
875      if (TheLine.First->is(tok::eof)) {
876        if (PreviousLineWasTouched) {
877          unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u);
878          Whitespaces.replaceWhitespace(*TheLine.First, NewLines,
879                                        /*IndentLevel=*/0, /*Spaces=*/0,
880                                        /*TargetColumn=*/0);
881        }
882      } else if (TheLine.Type != LT_Invalid &&
883                 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
884        unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
885        if (FirstTok->WhitespaceRange.isValid() &&
886            // Insert a break even if there is a structural error in case where
887            // we break apart a line consisting of multiple unwrapped lines.
888            (FirstTok->NewlinesBefore == 0 || !StructuralError)) {
889          formatFirstToken(*TheLine.First, PreviousLine, Indent,
890                           TheLine.InPPDirective);
891        } else {
892          Indent = LevelIndent = FirstTok->OriginalColumn;
893        }
894        ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
895                                      BinPackInconclusiveFunctions);
896
897        // If everything fits on a single line, just put it there.
898        unsigned ColumnLimit = Style.ColumnLimit;
899        AnnotatedLine *NextLine = *(I + 1);
900        if ((I + 1) != E && NextLine->InPPDirective &&
901            !NextLine->First->HasUnescapedNewline)
902          ColumnLimit = getColumnLimit(TheLine.InPPDirective);
903
904        if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
905          LineState State =
906              Indenter.getInitialState(Indent, &TheLine, /*DryRun=*/false);
907          while (State.NextToken != NULL)
908            Indenter.addTokenToState(State, false, false);
909        } else if (Style.ColumnLimit == 0) {
910          NoColumnLimitFormatter Formatter(&Indenter);
911          Formatter.format(Indent, &TheLine);
912        } else {
913          UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
914                                           TheLine);
915          Formatter.format(Indent);
916        }
917
918        IndentForLevel[TheLine.Level] = LevelIndent;
919        PreviousLineWasTouched = true;
920      } else {
921        // Format the first token if necessary, and notify the WhitespaceManager
922        // about the unchanged whitespace.
923        for (const FormatToken *Tok = TheLine.First; Tok != NULL;
924             Tok = Tok->Next) {
925          if (Tok == TheLine.First &&
926              (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
927            unsigned LevelIndent = Tok->OriginalColumn;
928            // Remove trailing whitespace of the previous line if it was
929            // touched.
930            if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
931              formatFirstToken(*Tok, PreviousLine, LevelIndent,
932                               TheLine.InPPDirective);
933            } else {
934              Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
935            }
936
937            if (static_cast<int>(LevelIndent) - Offset >= 0)
938              LevelIndent -= Offset;
939            if (Tok->isNot(tok::comment))
940              IndentForLevel[TheLine.Level] = LevelIndent;
941          } else {
942            Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
943          }
944        }
945        // If we did not reformat this unwrapped line, the column at the end of
946        // the last token is unchanged - thus, we can calculate the end of the
947        // last token.
948        PreviousLineWasTouched = false;
949      }
950      PreviousLine = *I;
951    }
952    return Whitespaces.generateReplacements();
953  }
954
955private:
956  static bool inputUsesCRLF(StringRef Text) {
957    return Text.count('\r') * 2 > Text.count('\n');
958  }
959
960  void deriveLocalStyle() {
961    unsigned CountBoundToVariable = 0;
962    unsigned CountBoundToType = 0;
963    bool HasCpp03IncompatibleFormat = false;
964    bool HasBinPackedFunction = false;
965    bool HasOnePerLineFunction = false;
966    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
967      if (!AnnotatedLines[i]->First->Next)
968        continue;
969      FormatToken *Tok = AnnotatedLines[i]->First->Next;
970      while (Tok->Next) {
971        if (Tok->Type == TT_PointerOrReference) {
972          bool SpacesBefore =
973              Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
974          bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
975                             Tok->Next->WhitespaceRange.getEnd();
976          if (SpacesBefore && !SpacesAfter)
977            ++CountBoundToVariable;
978          else if (!SpacesBefore && SpacesAfter)
979            ++CountBoundToType;
980        }
981
982        if (Tok->Type == TT_TemplateCloser &&
983            Tok->Previous->Type == TT_TemplateCloser &&
984            Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd())
985          HasCpp03IncompatibleFormat = true;
986
987        if (Tok->PackingKind == PPK_BinPacked)
988          HasBinPackedFunction = true;
989        if (Tok->PackingKind == PPK_OnePerLine)
990          HasOnePerLineFunction = true;
991
992        Tok = Tok->Next;
993      }
994    }
995    if (Style.DerivePointerBinding) {
996      if (CountBoundToType > CountBoundToVariable)
997        Style.PointerBindsToType = true;
998      else if (CountBoundToType < CountBoundToVariable)
999        Style.PointerBindsToType = false;
1000    }
1001    if (Style.Standard == FormatStyle::LS_Auto) {
1002      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1003                                                  : FormatStyle::LS_Cpp03;
1004    }
1005    BinPackInconclusiveFunctions =
1006        HasBinPackedFunction || !HasOnePerLineFunction;
1007  }
1008
1009  /// \brief Get the indent of \p Level from \p IndentForLevel.
1010  ///
1011  /// \p IndentForLevel must contain the indent for the level \c l
1012  /// at \p IndentForLevel[l], or a value < 0 if the indent for
1013  /// that level is unknown.
1014  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1015    if (IndentForLevel[Level] != -1)
1016      return IndentForLevel[Level];
1017    if (Level == 0)
1018      return 0;
1019    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1020  }
1021
1022  /// \brief Get the offset of the line relatively to the level.
1023  ///
1024  /// For example, 'public:' labels in classes are offset by 1 or 2
1025  /// characters to the left from their level.
1026  int getIndentOffset(const FormatToken &RootToken) {
1027    if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1028      return Style.AccessModifierOffset;
1029    return 0;
1030  }
1031
1032  /// \brief Tries to merge lines into one.
1033  ///
1034  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1035  /// if possible; note that \c I will be incremented when lines are merged.
1036  void tryFitMultipleLinesInOne(unsigned Indent,
1037                                SmallVectorImpl<AnnotatedLine *>::iterator &I,
1038                                SmallVectorImpl<AnnotatedLine *>::iterator E) {
1039    // We can never merge stuff if there are trailing line comments.
1040    AnnotatedLine *TheLine = *I;
1041    if (TheLine->Last->Type == TT_LineComment)
1042      return;
1043
1044    if (Indent > Style.ColumnLimit)
1045      return;
1046
1047    unsigned Limit = Style.ColumnLimit - Indent;
1048    // If we already exceed the column limit, we set 'Limit' to 0. The different
1049    // tryMerge..() functions can then decide whether to still do merging.
1050    Limit = TheLine->Last->TotalLength > Limit
1051                ? 0
1052                : Limit - TheLine->Last->TotalLength;
1053
1054    if (I + 1 == E || (*(I + 1))->Type == LT_Invalid)
1055      return;
1056
1057    if (TheLine->Last->is(tok::l_brace)) {
1058      tryMergeSimpleBlock(I, E, Limit);
1059    } else if (Style.AllowShortIfStatementsOnASingleLine &&
1060               TheLine->First->is(tok::kw_if)) {
1061      tryMergeSimpleControlStatement(I, E, Limit);
1062    } else if (Style.AllowShortLoopsOnASingleLine &&
1063               TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
1064      tryMergeSimpleControlStatement(I, E, Limit);
1065    } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
1066                                          TheLine->First->IsFirst)) {
1067      tryMergeSimplePPDirective(I, E, Limit);
1068    }
1069  }
1070
1071  void tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1072                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
1073                                 unsigned Limit) {
1074    if (Limit == 0)
1075      return;
1076    AnnotatedLine &Line = **I;
1077    if (!(*(I + 1))->InPPDirective || (*(I + 1))->First->HasUnescapedNewline)
1078      return;
1079    if (I + 2 != E && (*(I + 2))->InPPDirective &&
1080        !(*(I + 2))->First->HasUnescapedNewline)
1081      return;
1082    if (1 + (*(I + 1))->Last->TotalLength > Limit)
1083      return;
1084    join(Line, **(++I));
1085  }
1086
1087  void
1088  tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1089                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
1090                                 unsigned Limit) {
1091    if (Limit == 0)
1092      return;
1093    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
1094        (*(I + 1))->First->is(tok::l_brace))
1095      return;
1096    if ((*(I + 1))->InPPDirective != (*I)->InPPDirective ||
1097        ((*(I + 1))->InPPDirective && (*(I + 1))->First->HasUnescapedNewline))
1098      return;
1099    AnnotatedLine &Line = **I;
1100    if (Line.Last->isNot(tok::r_paren))
1101      return;
1102    if (1 + (*(I + 1))->Last->TotalLength > Limit)
1103      return;
1104    if ((*(I + 1))->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
1105                                   tok::kw_while) ||
1106        (*(I + 1))->First->Type == TT_LineComment)
1107      return;
1108    // Only inline simple if's (no nested if or else).
1109    if (I + 2 != E && Line.First->is(tok::kw_if) &&
1110        (*(I + 2))->First->is(tok::kw_else))
1111      return;
1112    join(Line, **(++I));
1113  }
1114
1115  void tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1116                           SmallVectorImpl<AnnotatedLine *>::iterator E,
1117                           unsigned Limit) {
1118    // No merging if the brace already is on the next line.
1119    if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
1120      return;
1121
1122    // First, check that the current line allows merging. This is the case if
1123    // we're not in a control flow statement and the last token is an opening
1124    // brace.
1125    AnnotatedLine &Line = **I;
1126    if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1127                            tok::kw_else, tok::kw_try, tok::kw_catch,
1128                            tok::kw_for,
1129                            // This gets rid of all ObjC @ keywords and methods.
1130                            tok::at, tok::minus, tok::plus))
1131      return;
1132
1133    FormatToken *Tok = (*(I + 1))->First;
1134    if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
1135        (Tok->getNextNonComment() == NULL ||
1136         Tok->getNextNonComment()->is(tok::semi))) {
1137      // We merge empty blocks even if the line exceeds the column limit.
1138      Tok->SpacesRequiredBefore = 0;
1139      Tok->CanBreakBefore = true;
1140      join(Line, **(I + 1));
1141      I += 1;
1142    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
1143      // Check that we still have three lines and they fit into the limit.
1144      if (I + 2 == E || (*(I + 2))->Type == LT_Invalid ||
1145          !nextTwoLinesFitInto(I, Limit))
1146        return;
1147
1148      // Second, check that the next line does not contain any braces - if it
1149      // does, readability declines when putting it into a single line.
1150      if ((*(I + 1))->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1151        return;
1152      do {
1153        if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1154          return;
1155        Tok = Tok->Next;
1156      } while (Tok != NULL);
1157
1158      // Last, check that the third line contains a single closing brace.
1159      Tok = (*(I + 2))->First;
1160      if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
1161          Tok->MustBreakBefore)
1162        return;
1163
1164      join(Line, **(I + 1));
1165      join(Line, **(I + 2));
1166      I += 2;
1167    }
1168  }
1169
1170  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::iterator I,
1171                           unsigned Limit) {
1172    return 1 + (*(I + 1))->Last->TotalLength + 1 +
1173               (*(I + 2))->Last->TotalLength <=
1174           Limit;
1175  }
1176
1177  void join(AnnotatedLine &A, const AnnotatedLine &B) {
1178    assert(!A.Last->Next);
1179    assert(!B.First->Previous);
1180    A.Last->Next = B.First;
1181    B.First->Previous = A.Last;
1182    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1183    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1184      Tok->TotalLength += LengthA;
1185      A.Last = Tok;
1186    }
1187  }
1188
1189  bool touchesRanges(const CharSourceRange &Range) {
1190    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1191      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1192                                               Ranges[i].getBegin()) &&
1193          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1194                                               Range.getBegin()))
1195        return true;
1196    }
1197    return false;
1198  }
1199
1200  bool touchesLine(const AnnotatedLine &TheLine) {
1201    const FormatToken *First = TheLine.First;
1202    const FormatToken *Last = TheLine.Last;
1203    CharSourceRange LineRange = CharSourceRange::getCharRange(
1204        First->WhitespaceRange.getBegin().getLocWithOffset(
1205            First->LastNewlineOffset),
1206        Last->getStartOfNonWhitespace().getLocWithOffset(
1207            Last->TokenText.size() - 1));
1208    return touchesRanges(LineRange);
1209  }
1210
1211  bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::iterator I,
1212                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1213    for (; I != E; ++I) {
1214      if ((*I)->First->HasUnescapedNewline)
1215        return false;
1216      if (touchesLine(**I))
1217        return true;
1218    }
1219    return false;
1220  }
1221
1222  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1223    const FormatToken *First = TheLine.First;
1224    CharSourceRange LineRange = CharSourceRange::getCharRange(
1225        First->WhitespaceRange.getBegin(),
1226        First->WhitespaceRange.getBegin().getLocWithOffset(
1227            First->LastNewlineOffset));
1228    return touchesRanges(LineRange);
1229  }
1230
1231  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1232    AnnotatedLines.push_back(new AnnotatedLine(TheLine));
1233  }
1234
1235  /// \brief Add a new line and the required indent before the first Token
1236  /// of the \c UnwrappedLine if there was no structural parsing error.
1237  /// Returns the indent level of the \c UnwrappedLine.
1238  void formatFirstToken(const FormatToken &RootToken,
1239                        const AnnotatedLine *PreviousLine, unsigned Indent,
1240                        bool InPPDirective) {
1241    unsigned Newlines =
1242        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1243    // Remove empty lines before "}" where applicable.
1244    if (RootToken.is(tok::r_brace) &&
1245        (!RootToken.Next ||
1246         (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
1247      Newlines = std::min(Newlines, 1u);
1248    if (Newlines == 0 && !RootToken.IsFirst)
1249      Newlines = 1;
1250
1251    // Insert extra new line before access specifiers.
1252    if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
1253        RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1254      ++Newlines;
1255
1256    // Remove empty lines after access specifiers.
1257    if (PreviousLine && PreviousLine->First->isAccessSpecifier())
1258      Newlines = std::min(1u, Newlines);
1259
1260    Whitespaces.replaceWhitespace(
1261        RootToken, Newlines, Indent / Style.IndentWidth, Indent, Indent,
1262        InPPDirective && !RootToken.HasUnescapedNewline);
1263  }
1264
1265  unsigned getColumnLimit(bool InPPDirective) const {
1266    // In preprocessor directives reserve two chars for trailing " \"
1267    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
1268  }
1269
1270  FormatStyle Style;
1271  Lexer &Lex;
1272  SourceManager &SourceMgr;
1273  WhitespaceManager Whitespaces;
1274  std::vector<CharSourceRange> Ranges;
1275  SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1276
1277  encoding::Encoding Encoding;
1278  bool BinPackInconclusiveFunctions;
1279};
1280
1281} // end anonymous namespace
1282
1283tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1284                               SourceManager &SourceMgr,
1285                               std::vector<CharSourceRange> Ranges) {
1286  Formatter formatter(Style, Lex, SourceMgr, Ranges);
1287  return formatter.format();
1288}
1289
1290tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1291                               std::vector<tooling::Range> Ranges,
1292                               StringRef FileName) {
1293  FileManager Files((FileSystemOptions()));
1294  DiagnosticsEngine Diagnostics(
1295      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1296      new DiagnosticOptions);
1297  SourceManager SourceMgr(Diagnostics, Files);
1298  llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1299  const clang::FileEntry *Entry =
1300      Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1301  SourceMgr.overrideFileContents(Entry, Buf);
1302  FileID ID =
1303      SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1304  Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1305            getFormattingLangOpts(Style.Standard));
1306  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1307  std::vector<CharSourceRange> CharRanges;
1308  for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1309    SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1310    SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1311    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1312  }
1313  return reformat(Style, Lex, SourceMgr, CharRanges);
1314}
1315
1316LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1317  LangOptions LangOpts;
1318  LangOpts.CPlusPlus = 1;
1319  LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1320  LangOpts.LineComment = 1;
1321  LangOpts.Bool = 1;
1322  LangOpts.ObjC1 = 1;
1323  LangOpts.ObjC2 = 1;
1324  return LangOpts;
1325}
1326
1327const char *StyleOptionHelpDescription =
1328    "Coding style, currently supports:\n"
1329    "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1330    "Use -style=file to load style configuration from\n"
1331    ".clang-format file located in one of the parent\n"
1332    "directories of the source file (or current\n"
1333    "directory for stdin).\n"
1334    "Use -style=\"{key: value, ...}\" to set specific\n"
1335    "parameters, e.g.:\n"
1336    "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1337
1338FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
1339  // Fallback style in case the rest of this function can't determine a style.
1340  StringRef FallbackStyle = "LLVM";
1341  FormatStyle Style;
1342  getPredefinedStyle(FallbackStyle, &Style);
1343
1344  if (StyleName.startswith("{")) {
1345    // Parse YAML/JSON style from the command line.
1346    if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1347      llvm::errs() << "Error parsing -style: " << ec.message()
1348                   << ", using " << FallbackStyle << " style\n";
1349    }
1350    return Style;
1351  }
1352
1353  if (!StyleName.equals_lower("file")) {
1354    if (!getPredefinedStyle(StyleName, &Style))
1355      llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1356                   << " style\n";
1357    return Style;
1358  }
1359
1360  SmallString<128> Path(FileName);
1361  llvm::sys::fs::make_absolute(Path);
1362  for (StringRef Directory = Path;
1363       !Directory.empty();
1364       Directory = llvm::sys::path::parent_path(Directory)) {
1365    if (!llvm::sys::fs::is_directory(Directory))
1366      continue;
1367    SmallString<128> ConfigFile(Directory);
1368
1369    llvm::sys::path::append(ConfigFile, ".clang-format");
1370    DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1371    bool IsFile = false;
1372    // Ignore errors from is_regular_file: we only need to know if we can read
1373    // the file or not.
1374    llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1375
1376    if (!IsFile) {
1377      // Try _clang-format too, since dotfiles are not commonly used on Windows.
1378      ConfigFile = Directory;
1379      llvm::sys::path::append(ConfigFile, "_clang-format");
1380      DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1381      llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1382    }
1383
1384    if (IsFile) {
1385      OwningPtr<llvm::MemoryBuffer> Text;
1386      if (llvm::error_code ec = llvm::MemoryBuffer::getFile(ConfigFile, Text)) {
1387        llvm::errs() << ec.message() << "\n";
1388        continue;
1389      }
1390      if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1391        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1392                     << "\n";
1393        continue;
1394      }
1395      DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1396      return Style;
1397    }
1398  }
1399  llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1400               << " style\n";
1401  return Style;
1402}
1403
1404} // namespace format
1405} // namespace clang
1406