Format.cpp revision 78a4e619e775b0dbaa10c9feaea0adf1d3dfe507
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-formatter"
17
18#include "ContinuationIndenter.h"
19#include "TokenAnnotator.h"
20#include "UnwrappedLineParser.h"
21#include "WhitespaceManager.h"
22#include "clang/Basic/Diagnostic.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Format/Format.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/Support/Allocator.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/YAMLTraits.h"
30#include "llvm/Support/Path.h"
31#include <queue>
32#include <string>
33
34namespace llvm {
35namespace yaml {
36template <>
37struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
38  static void enumeration(IO &IO,
39                          clang::format::FormatStyle::LanguageStandard &Value) {
40    IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
41    IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
42    IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
43    IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
44    IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
45  }
46};
47
48template <>
49struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
50  static void
51  enumeration(IO &IO, clang::format::FormatStyle::UseTabStyle &Value) {
52    IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
53    IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
54    IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
55    IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
56    IO.enumCase(Value, "ForIndentation",
57                clang::format::FormatStyle::UT_ForIndentation);
58  }
59};
60
61template <>
62struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
63  static void
64  enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
65    IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
66    IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
67    IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
68    IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
69  }
70};
71
72template <>
73struct ScalarEnumerationTraits<
74    clang::format::FormatStyle::NamespaceIndentationKind> {
75  static void
76  enumeration(IO &IO,
77              clang::format::FormatStyle::NamespaceIndentationKind &Value) {
78    IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
79    IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
80    IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
81  }
82};
83
84template <> struct MappingTraits<clang::format::FormatStyle> {
85  static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
86    if (IO.outputting()) {
87      StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
88                                  "Mozilla", "WebKit" };
89      ArrayRef<StringRef> Styles(StylesArray);
90      for (size_t i = 0, e = Styles.size(); i < e; ++i) {
91        StringRef StyleName(Styles[i]);
92        clang::format::FormatStyle PredefinedStyle;
93        if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
94            Style == PredefinedStyle) {
95          IO.mapOptional("# BasedOnStyle", StyleName);
96          break;
97        }
98      }
99    } else {
100      StringRef BasedOnStyle;
101      IO.mapOptional("BasedOnStyle", BasedOnStyle);
102      if (!BasedOnStyle.empty())
103        if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
104          IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
105          return;
106        }
107    }
108
109    IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
110    IO.mapOptional("ConstructorInitializerIndentWidth",
111                   Style.ConstructorInitializerIndentWidth);
112    IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
113    IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
114    IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
115                   Style.AllowAllParametersOfDeclarationOnNextLine);
116    IO.mapOptional("AllowShortIfStatementsOnASingleLine",
117                   Style.AllowShortIfStatementsOnASingleLine);
118    IO.mapOptional("AllowShortLoopsOnASingleLine",
119                   Style.AllowShortLoopsOnASingleLine);
120    IO.mapOptional("AlwaysBreakTemplateDeclarations",
121                   Style.AlwaysBreakTemplateDeclarations);
122    IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
123                   Style.AlwaysBreakBeforeMultilineStrings);
124    IO.mapOptional("BreakBeforeBinaryOperators",
125                   Style.BreakBeforeBinaryOperators);
126    IO.mapOptional("BreakConstructorInitializersBeforeComma",
127                   Style.BreakConstructorInitializersBeforeComma);
128    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
129    IO.mapOptional("ColumnLimit", Style.ColumnLimit);
130    IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
131                   Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
132    IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
133    IO.mapOptional("ExperimentalAutoDetectBinPacking",
134                   Style.ExperimentalAutoDetectBinPacking);
135    IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
136    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
137    IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
138    IO.mapOptional("ObjCSpaceBeforeProtocolList",
139                   Style.ObjCSpaceBeforeProtocolList);
140    IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
141    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
142    IO.mapOptional("PenaltyBreakFirstLessLess",
143                   Style.PenaltyBreakFirstLessLess);
144    IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
145    IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
146                   Style.PenaltyReturnTypeOnItsOwnLine);
147    IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
148    IO.mapOptional("SpacesBeforeTrailingComments",
149                   Style.SpacesBeforeTrailingComments);
150    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
151    IO.mapOptional("Standard", Style.Standard);
152    IO.mapOptional("IndentWidth", Style.IndentWidth);
153    IO.mapOptional("TabWidth", Style.TabWidth);
154    IO.mapOptional("UseTab", Style.UseTab);
155    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
156    IO.mapOptional("IndentFunctionDeclarationAfterType",
157                   Style.IndentFunctionDeclarationAfterType);
158    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
159    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
160    IO.mapOptional("SpacesInCStyleCastParentheses",
161                   Style.SpacesInCStyleCastParentheses);
162    IO.mapOptional("SpaceAfterControlStatementKeyword",
163                   Style.SpaceAfterControlStatementKeyword);
164    IO.mapOptional("SpaceBeforeAssignmentOperators",
165                   Style.SpaceBeforeAssignmentOperators);
166  }
167};
168}
169}
170
171namespace clang {
172namespace format {
173
174void setDefaultPenalties(FormatStyle &Style) {
175  Style.PenaltyBreakComment = 60;
176  Style.PenaltyBreakFirstLessLess = 120;
177  Style.PenaltyBreakString = 1000;
178  Style.PenaltyExcessCharacter = 1000000;
179}
180
181FormatStyle getLLVMStyle() {
182  FormatStyle LLVMStyle;
183  LLVMStyle.AccessModifierOffset = -2;
184  LLVMStyle.AlignEscapedNewlinesLeft = false;
185  LLVMStyle.AlignTrailingComments = true;
186  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
187  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
188  LLVMStyle.AllowShortLoopsOnASingleLine = false;
189  LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
190  LLVMStyle.AlwaysBreakTemplateDeclarations = false;
191  LLVMStyle.BinPackParameters = true;
192  LLVMStyle.BreakBeforeBinaryOperators = false;
193  LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
194  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
195  LLVMStyle.ColumnLimit = 80;
196  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
197  LLVMStyle.ConstructorInitializerIndentWidth = 4;
198  LLVMStyle.Cpp11BracedListStyle = false;
199  LLVMStyle.DerivePointerBinding = false;
200  LLVMStyle.ExperimentalAutoDetectBinPacking = false;
201  LLVMStyle.IndentCaseLabels = false;
202  LLVMStyle.IndentFunctionDeclarationAfterType = false;
203  LLVMStyle.IndentWidth = 2;
204  LLVMStyle.TabWidth = 8;
205  LLVMStyle.MaxEmptyLinesToKeep = 1;
206  LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
207  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
208  LLVMStyle.PointerBindsToType = false;
209  LLVMStyle.SpacesBeforeTrailingComments = 1;
210  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
211  LLVMStyle.UseTab = FormatStyle::UT_Never;
212  LLVMStyle.SpacesInParentheses = false;
213  LLVMStyle.SpaceInEmptyParentheses = false;
214  LLVMStyle.SpacesInCStyleCastParentheses = false;
215  LLVMStyle.SpaceAfterControlStatementKeyword = true;
216  LLVMStyle.SpaceBeforeAssignmentOperators = true;
217
218  setDefaultPenalties(LLVMStyle);
219  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
220
221  return LLVMStyle;
222}
223
224FormatStyle getGoogleStyle() {
225  FormatStyle GoogleStyle;
226  GoogleStyle.AccessModifierOffset = -1;
227  GoogleStyle.AlignEscapedNewlinesLeft = true;
228  GoogleStyle.AlignTrailingComments = true;
229  GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
230  GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
231  GoogleStyle.AllowShortLoopsOnASingleLine = true;
232  GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
233  GoogleStyle.AlwaysBreakTemplateDeclarations = true;
234  GoogleStyle.BinPackParameters = true;
235  GoogleStyle.BreakBeforeBinaryOperators = false;
236  GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
237  GoogleStyle.BreakConstructorInitializersBeforeComma = false;
238  GoogleStyle.ColumnLimit = 80;
239  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
240  GoogleStyle.ConstructorInitializerIndentWidth = 4;
241  GoogleStyle.Cpp11BracedListStyle = true;
242  GoogleStyle.DerivePointerBinding = true;
243  GoogleStyle.ExperimentalAutoDetectBinPacking = false;
244  GoogleStyle.IndentCaseLabels = true;
245  GoogleStyle.IndentFunctionDeclarationAfterType = true;
246  GoogleStyle.IndentWidth = 2;
247  GoogleStyle.TabWidth = 8;
248  GoogleStyle.MaxEmptyLinesToKeep = 1;
249  GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
250  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
251  GoogleStyle.PointerBindsToType = true;
252  GoogleStyle.SpacesBeforeTrailingComments = 2;
253  GoogleStyle.Standard = FormatStyle::LS_Auto;
254  GoogleStyle.UseTab = FormatStyle::UT_Never;
255  GoogleStyle.SpacesInParentheses = false;
256  GoogleStyle.SpaceInEmptyParentheses = false;
257  GoogleStyle.SpacesInCStyleCastParentheses = false;
258  GoogleStyle.SpaceAfterControlStatementKeyword = true;
259  GoogleStyle.SpaceBeforeAssignmentOperators = true;
260
261  setDefaultPenalties(GoogleStyle);
262  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
263
264  return GoogleStyle;
265}
266
267FormatStyle getChromiumStyle() {
268  FormatStyle ChromiumStyle = getGoogleStyle();
269  ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
270  ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
271  ChromiumStyle.AllowShortLoopsOnASingleLine = false;
272  ChromiumStyle.BinPackParameters = false;
273  ChromiumStyle.DerivePointerBinding = false;
274  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
275  return ChromiumStyle;
276}
277
278FormatStyle getMozillaStyle() {
279  FormatStyle MozillaStyle = getLLVMStyle();
280  MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
281  MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
282  MozillaStyle.DerivePointerBinding = true;
283  MozillaStyle.IndentCaseLabels = true;
284  MozillaStyle.ObjCSpaceBeforeProtocolList = false;
285  MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
286  MozillaStyle.PointerBindsToType = true;
287  return MozillaStyle;
288}
289
290FormatStyle getWebKitStyle() {
291  FormatStyle Style = getLLVMStyle();
292  Style.AccessModifierOffset = -4;
293  Style.AlignTrailingComments = false;
294  Style.BreakBeforeBinaryOperators = true;
295  Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
296  Style.BreakConstructorInitializersBeforeComma = true;
297  Style.ColumnLimit = 0;
298  Style.IndentWidth = 4;
299  Style.NamespaceIndentation = FormatStyle::NI_Inner;
300  Style.PointerBindsToType = true;
301  return Style;
302}
303
304bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
305  if (Name.equals_lower("llvm"))
306    *Style = getLLVMStyle();
307  else if (Name.equals_lower("chromium"))
308    *Style = getChromiumStyle();
309  else if (Name.equals_lower("mozilla"))
310    *Style = getMozillaStyle();
311  else if (Name.equals_lower("google"))
312    *Style = getGoogleStyle();
313  else if (Name.equals_lower("webkit"))
314    *Style = getWebKitStyle();
315  else
316    return false;
317
318  return true;
319}
320
321llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
322  if (Text.trim().empty())
323    return llvm::make_error_code(llvm::errc::invalid_argument);
324  llvm::yaml::Input Input(Text);
325  Input >> *Style;
326  return Input.error();
327}
328
329std::string configurationAsText(const FormatStyle &Style) {
330  std::string Text;
331  llvm::raw_string_ostream Stream(Text);
332  llvm::yaml::Output Output(Stream);
333  // We use the same mapping method for input and output, so we need a non-const
334  // reference here.
335  FormatStyle NonConstStyle = Style;
336  Output << NonConstStyle;
337  return Stream.str();
338}
339
340namespace {
341
342class NoColumnLimitFormatter {
343public:
344  NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
345
346  /// \brief Formats the line starting at \p State, simply keeping all of the
347  /// input's line breaking decisions.
348  void format(unsigned FirstIndent, const AnnotatedLine *Line) {
349    LineState State =
350        Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
351    while (State.NextToken != NULL) {
352      bool Newline =
353          Indenter->mustBreak(State) ||
354          (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
355      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
356    }
357  }
358
359private:
360  ContinuationIndenter *Indenter;
361};
362
363class UnwrappedLineFormatter {
364public:
365  UnwrappedLineFormatter(ContinuationIndenter *Indenter,
366                         WhitespaceManager *Whitespaces,
367                         const FormatStyle &Style, const AnnotatedLine &Line)
368      : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), Line(Line),
369        Count(0) {}
370
371  /// \brief Formats an \c UnwrappedLine and returns the penalty.
372  ///
373  /// If \p DryRun is \c false, directly applies the changes.
374  unsigned format(unsigned FirstIndent, bool DryRun = false) {
375    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
376
377    // If the ObjC method declaration does not fit on a line, we should format
378    // it with one arg per line.
379    if (Line.Type == LT_ObjCMethodDecl)
380      State.Stack.back().BreakBeforeParameter = true;
381
382    // Find best solution in solution space.
383    return analyzeSolutionSpace(State, DryRun);
384  }
385
386private:
387  /// \brief An edge in the solution space from \c Previous->State to \c State,
388  /// inserting a newline dependent on the \c NewLine.
389  struct StateNode {
390    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
391        : State(State), NewLine(NewLine), Previous(Previous) {}
392    LineState State;
393    bool NewLine;
394    StateNode *Previous;
395  };
396
397  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
398  ///
399  /// In case of equal penalties, we want to prefer states that were inserted
400  /// first. During state generation we make sure that we insert states first
401  /// that break the line as late as possible.
402  typedef std::pair<unsigned, unsigned> OrderedPenalty;
403
404  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
405  /// \c State has the given \c OrderedPenalty.
406  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
407
408  /// \brief The BFS queue type.
409  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
410                              std::greater<QueueItem> > QueueType;
411
412  /// \brief Analyze the entire solution space starting from \p InitialState.
413  ///
414  /// This implements a variant of Dijkstra's algorithm on the graph that spans
415  /// the solution space (\c LineStates are the nodes). The algorithm tries to
416  /// find the shortest path (the one with lowest penalty) from \p InitialState
417  /// to a state where all tokens are placed. Returns the penalty.
418  ///
419  /// If \p DryRun is \c false, directly applies the changes.
420  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
421    std::set<LineState> Seen;
422
423    // Insert start element into queue.
424    StateNode *Node =
425        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
426    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
427    ++Count;
428
429    unsigned Penalty = 0;
430
431    // While not empty, take first element and follow edges.
432    while (!Queue.empty()) {
433      Penalty = Queue.top().first.first;
434      StateNode *Node = Queue.top().second;
435      if (Node->State.NextToken == NULL) {
436        DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
437        break;
438      }
439      Queue.pop();
440
441      // Cut off the analysis of certain solutions if the analysis gets too
442      // complex. See description of IgnoreStackForComparison.
443      if (Count > 10000)
444        Node->State.IgnoreStackForComparison = true;
445
446      if (!Seen.insert(Node->State).second)
447        // State already examined with lower penalty.
448        continue;
449
450      FormatDecision LastFormat = Node->State.NextToken->Decision;
451      if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
452        addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
453      if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
454        addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
455    }
456
457    if (Queue.empty()) {
458      // We were unable to find a solution, do nothing.
459      // FIXME: Add diagnostic?
460      DEBUG(llvm::dbgs() << "Could not find a solution.\n");
461      return 0;
462    }
463
464    // Reconstruct the solution.
465    if (!DryRun)
466      reconstructPath(InitialState, Queue.top().second);
467
468    DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
469    DEBUG(llvm::dbgs() << "---\n");
470
471    return Penalty;
472  }
473
474  void reconstructPath(LineState &State, StateNode *Current) {
475    std::deque<StateNode *> Path;
476    // We do not need a break before the initial token.
477    while (Current->Previous) {
478      Path.push_front(Current);
479      Current = Current->Previous;
480    }
481    for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
482         I != E; ++I) {
483      unsigned Penalty = 0;
484      formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
485      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
486
487      DEBUG({
488        if ((*I)->NewLine) {
489          llvm::dbgs() << "Penalty for placing "
490                       << (*I)->Previous->State.NextToken->Tok.getName() << ": "
491                       << Penalty << "\n";
492        }
493      });
494    }
495  }
496
497  /// \brief Add the following state to the analysis queue \c Queue.
498  ///
499  /// Assume the current state is \p PreviousNode and has been reached with a
500  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
501  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
502                           bool NewLine) {
503    if (NewLine && !Indenter->canBreak(PreviousNode->State))
504      return;
505    if (!NewLine && Indenter->mustBreak(PreviousNode->State))
506      return;
507
508    StateNode *Node = new (Allocator.Allocate())
509        StateNode(PreviousNode->State, NewLine, PreviousNode);
510    if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
511      return;
512
513    Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
514
515    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
516    ++Count;
517  }
518
519  /// \brief If the \p State's next token is an r_brace closing a nested block,
520  /// format the nested block before it.
521  ///
522  /// Returns \c true if all children could be placed successfully and adapts
523  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
524  /// creates changes using \c Whitespaces.
525  ///
526  /// The crucial idea here is that children always get formatted upon
527  /// encountering the closing brace right after the nested block. Now, if we
528  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
529  /// \c false), the entire block has to be kept on the same line (which is only
530  /// possible if it fits on the line, only contains a single statement, etc.
531  ///
532  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
533  /// break after the "{", format all lines with correct indentation and the put
534  /// the closing "}" on yet another new line.
535  ///
536  /// This enables us to keep the simple structure of the
537  /// \c UnwrappedLineFormatter, where we only have two options for each token:
538  /// break or don't break.
539  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
540                      unsigned &Penalty) {
541    const FormatToken &LBrace = *State.NextToken->Previous;
542    if (LBrace.isNot(tok::l_brace) || LBrace.BlockKind != BK_Block ||
543        LBrace.Children.size() == 0)
544      // The previous token does not open a block. Nothing to do. We don't
545      // assert so that we can simply call this function for all tokens.
546      return true;
547
548    if (NewLine) {
549      unsigned ParentIndent = State.Stack.back().Indent;
550      for (SmallVector<AnnotatedLine *, 1>::const_iterator
551               I = LBrace.Children.begin(),
552               E = LBrace.Children.end();
553           I != E; ++I) {
554        unsigned Indent =
555            ParentIndent + ((*I)->Level - Line.Level - 1) * Style.IndentWidth;
556        if (!DryRun) {
557          unsigned Newlines = std::min((*I)->First->NewlinesBefore,
558                                       Style.MaxEmptyLinesToKeep + 1);
559          Newlines = std::max(1u, Newlines);
560          Whitespaces->replaceWhitespace(
561              *(*I)->First, Newlines, (*I)->Level, /*Spaces=*/Indent,
562              /*StartOfTokenColumn=*/Indent, Line.InPPDirective);
563        }
564        UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style, **I);
565        Penalty += Formatter.format(Indent, DryRun);
566      }
567      return true;
568    }
569
570    if (LBrace.Children.size() > 1)
571      return false; // Cannot merge multiple statements into a single line.
572
573    // We can't put the closing "}" on a line with a trailing comment.
574    if (LBrace.Children[0]->Last->isTrailingComment())
575      return false;
576
577    if (!DryRun) {
578      Whitespaces->replaceWhitespace(
579          *LBrace.Children[0]->First,
580          /*Newlines=*/0, /*IndentLevel=*/1, /*Spaces=*/1,
581          /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
582      UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style,
583                                       *LBrace.Children[0]);
584      Penalty += Formatter.format(State.Column + 1, DryRun);
585    }
586
587    State.Column += 1 + LBrace.Children[0]->Last->TotalLength;
588    return true;
589  }
590
591  ContinuationIndenter *Indenter;
592  WhitespaceManager *Whitespaces;
593  FormatStyle Style;
594  const AnnotatedLine &Line;
595
596  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
597  QueueType Queue;
598  // Increasing count of \c StateNode items we have created. This is used
599  // to create a deterministic order independent of the container.
600  unsigned Count;
601};
602
603class FormatTokenLexer {
604public:
605  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
606                   encoding::Encoding Encoding)
607      : FormatTok(NULL), GreaterStashed(false), Column(0),
608        TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
609        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
610    Lex.SetKeepWhitespaceMode(true);
611  }
612
613  ArrayRef<FormatToken *> lex() {
614    assert(Tokens.empty());
615    do {
616      Tokens.push_back(getNextToken());
617      maybeJoinPreviousTokens();
618    } while (Tokens.back()->Tok.isNot(tok::eof));
619    return Tokens;
620  }
621
622  IdentifierTable &getIdentTable() { return IdentTable; }
623
624private:
625  void maybeJoinPreviousTokens() {
626    if (Tokens.size() < 4)
627      return;
628    FormatToken *Last = Tokens.back();
629    if (!Last->is(tok::r_paren))
630      return;
631
632    FormatToken *String = Tokens[Tokens.size() - 2];
633    if (!String->is(tok::string_literal) || String->IsMultiline)
634      return;
635
636    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
637      return;
638
639    FormatToken *Macro = Tokens[Tokens.size() - 4];
640    if (Macro->TokenText != "_T")
641      return;
642
643    const char *Start = Macro->TokenText.data();
644    const char *End = Last->TokenText.data() + Last->TokenText.size();
645    String->TokenText = StringRef(Start, End - Start);
646    String->IsFirst = Macro->IsFirst;
647    String->LastNewlineOffset = Macro->LastNewlineOffset;
648    String->WhitespaceRange = Macro->WhitespaceRange;
649    String->OriginalColumn = Macro->OriginalColumn;
650    String->ColumnWidth = encoding::columnWidthWithTabs(
651        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
652
653    Tokens.pop_back();
654    Tokens.pop_back();
655    Tokens.pop_back();
656    Tokens.back() = String;
657  }
658
659  FormatToken *getNextToken() {
660    if (GreaterStashed) {
661      // Create a synthesized second '>' token.
662      // FIXME: Increment Column and set OriginalColumn.
663      Token Greater = FormatTok->Tok;
664      FormatTok = new (Allocator.Allocate()) FormatToken;
665      FormatTok->Tok = Greater;
666      SourceLocation GreaterLocation =
667          FormatTok->Tok.getLocation().getLocWithOffset(1);
668      FormatTok->WhitespaceRange =
669          SourceRange(GreaterLocation, GreaterLocation);
670      FormatTok->TokenText = ">";
671      FormatTok->ColumnWidth = 1;
672      GreaterStashed = false;
673      return FormatTok;
674    }
675
676    FormatTok = new (Allocator.Allocate()) FormatToken;
677    readRawToken(*FormatTok);
678    SourceLocation WhitespaceStart =
679        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
680    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
681      FormatTok->IsFirst = true;
682
683    // Consume and record whitespace until we find a significant token.
684    unsigned WhitespaceLength = TrailingWhitespace;
685    while (FormatTok->Tok.is(tok::unknown)) {
686      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
687        switch (FormatTok->TokenText[i]) {
688        case '\n':
689          ++FormatTok->NewlinesBefore;
690          // FIXME: This is technically incorrect, as it could also
691          // be a literal backslash at the end of the line.
692          if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
693                         (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
694                          FormatTok->TokenText[i - 2] != '\\')))
695            FormatTok->HasUnescapedNewline = true;
696          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
697          Column = 0;
698          break;
699        case '\r':
700        case '\f':
701        case '\v':
702          Column = 0;
703          break;
704        case ' ':
705          ++Column;
706          break;
707        case '\t':
708          Column += Style.TabWidth - Column % Style.TabWidth;
709          break;
710        case '\\':
711          ++Column;
712          if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
713                             FormatTok->TokenText[i + 1] != '\n'))
714            FormatTok->Type = TT_ImplicitStringLiteral;
715          break;
716        default:
717          FormatTok->Type = TT_ImplicitStringLiteral;
718          ++Column;
719          break;
720        }
721      }
722
723      if (FormatTok->Type == TT_ImplicitStringLiteral)
724        break;
725      WhitespaceLength += FormatTok->Tok.getLength();
726
727      readRawToken(*FormatTok);
728    }
729
730    // In case the token starts with escaped newlines, we want to
731    // take them into account as whitespace - this pattern is quite frequent
732    // in macro definitions.
733    // FIXME: Add a more explicit test.
734    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
735           FormatTok->TokenText[1] == '\n') {
736      // FIXME: ++FormatTok->NewlinesBefore is missing...
737      WhitespaceLength += 2;
738      Column = 0;
739      FormatTok->TokenText = FormatTok->TokenText.substr(2);
740    }
741
742    FormatTok->WhitespaceRange = SourceRange(
743        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
744
745    FormatTok->OriginalColumn = Column;
746
747    TrailingWhitespace = 0;
748    if (FormatTok->Tok.is(tok::comment)) {
749      // FIXME: Add the trimmed whitespace to Column.
750      StringRef UntrimmedText = FormatTok->TokenText;
751      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
752      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
753    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
754      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
755      FormatTok->Tok.setIdentifierInfo(&Info);
756      FormatTok->Tok.setKind(Info.getTokenID());
757    } else if (FormatTok->Tok.is(tok::greatergreater)) {
758      FormatTok->Tok.setKind(tok::greater);
759      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
760      GreaterStashed = true;
761    }
762
763    // Now FormatTok is the next non-whitespace token.
764
765    StringRef Text = FormatTok->TokenText;
766    size_t FirstNewlinePos = Text.find('\n');
767    if (FirstNewlinePos == StringRef::npos) {
768      // FIXME: ColumnWidth actually depends on the start column, we need to
769      // take this into account when the token is moved.
770      FormatTok->ColumnWidth =
771          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
772      Column += FormatTok->ColumnWidth;
773    } else {
774      FormatTok->IsMultiline = true;
775      // FIXME: ColumnWidth actually depends on the start column, we need to
776      // take this into account when the token is moved.
777      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
778          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
779
780      // The last line of the token always starts in column 0.
781      // Thus, the length can be precomputed even in the presence of tabs.
782      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
783          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
784          Encoding);
785      Column = FormatTok->LastLineColumnWidth;
786    }
787
788    return FormatTok;
789  }
790
791  FormatToken *FormatTok;
792  bool GreaterStashed;
793  unsigned Column;
794  unsigned TrailingWhitespace;
795  Lexer &Lex;
796  SourceManager &SourceMgr;
797  FormatStyle &Style;
798  IdentifierTable IdentTable;
799  encoding::Encoding Encoding;
800  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
801  SmallVector<FormatToken *, 16> Tokens;
802
803  void readRawToken(FormatToken &Tok) {
804    Lex.LexFromRawLexer(Tok.Tok);
805    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
806                              Tok.Tok.getLength());
807    // For formatting, treat unterminated string literals like normal string
808    // literals.
809    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
810        Tok.TokenText[0] == '"') {
811      Tok.Tok.setKind(tok::string_literal);
812      Tok.IsUnterminatedLiteral = true;
813    }
814  }
815};
816
817class Formatter : public UnwrappedLineConsumer {
818public:
819  Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
820            const std::vector<CharSourceRange> &Ranges)
821      : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
822        Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
823        Ranges(Ranges), UnwrappedLines(1),
824        Encoding(encoding::detectEncoding(Lex.getBuffer())) {
825    DEBUG(llvm::dbgs() << "File encoding: "
826                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
827                                                               : "unknown")
828                       << "\n");
829  }
830
831  tooling::Replacements format() {
832    tooling::Replacements Result;
833    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
834
835    UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
836    bool StructuralError = Parser.parse();
837    assert(UnwrappedLines.rbegin()->empty());
838    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
839         ++Run) {
840      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
841      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
842      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
843        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
844      }
845      tooling::Replacements RunResult =
846          format(AnnotatedLines, StructuralError, Tokens);
847      DEBUG({
848        llvm::dbgs() << "Replacements for run " << Run << ":\n";
849        for (tooling::Replacements::iterator I = RunResult.begin(),
850                                             E = RunResult.end();
851             I != E; ++I) {
852          llvm::dbgs() << I->toString() << "\n";
853        }
854      });
855      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
856        delete AnnotatedLines[i];
857      }
858      Result.insert(RunResult.begin(), RunResult.end());
859      Whitespaces.reset();
860    }
861    return Result;
862  }
863
864  tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
865                               bool StructuralError, FormatTokenLexer &Tokens) {
866    TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
867    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
868      Annotator.annotate(*AnnotatedLines[i]);
869    }
870    deriveLocalStyle(AnnotatedLines);
871    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
872      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
873    }
874
875    Annotator.setCommentLineLevels(AnnotatedLines);
876
877    std::vector<int> IndentForLevel;
878    bool PreviousLineWasTouched = false;
879    const AnnotatedLine *PreviousLine = NULL;
880    bool FormatPPDirective = false;
881    for (SmallVectorImpl<AnnotatedLine *>::iterator I = AnnotatedLines.begin(),
882                                                    E = AnnotatedLines.end();
883         I != E; ++I) {
884      const AnnotatedLine &TheLine = **I;
885      const FormatToken *FirstTok = TheLine.First;
886      int Offset = getIndentOffset(*TheLine.First);
887
888      // Check whether this line is part of a formatted preprocessor directive.
889      if (FirstTok->HasUnescapedNewline)
890        FormatPPDirective = false;
891      if (!FormatPPDirective && TheLine.InPPDirective &&
892          (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
893        FormatPPDirective = true;
894
895      // Determine indent and try to merge multiple unwrapped lines.
896      while (IndentForLevel.size() <= TheLine.Level)
897        IndentForLevel.push_back(-1);
898      IndentForLevel.resize(TheLine.Level + 1);
899      unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
900      if (static_cast<int>(Indent) + Offset >= 0)
901        Indent += Offset;
902      tryFitMultipleLinesInOne(Indent, I, E);
903
904      bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
905      if (TheLine.First->is(tok::eof)) {
906        if (PreviousLineWasTouched) {
907          unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u);
908          Whitespaces.replaceWhitespace(*TheLine.First, NewLines,
909                                        /*IndentLevel=*/0, /*Spaces=*/0,
910                                        /*TargetColumn=*/0);
911        }
912      } else if (TheLine.Type != LT_Invalid &&
913                 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
914        unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
915        if (FirstTok->WhitespaceRange.isValid() &&
916            // Insert a break even if there is a structural error in case where
917            // we break apart a line consisting of multiple unwrapped lines.
918            (FirstTok->NewlinesBefore == 0 || !StructuralError)) {
919          formatFirstToken(*TheLine.First, PreviousLine, Indent,
920                           TheLine.InPPDirective);
921        } else {
922          Indent = LevelIndent = FirstTok->OriginalColumn;
923        }
924        ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
925                                      BinPackInconclusiveFunctions);
926
927        // If everything fits on a single line, just put it there.
928        unsigned ColumnLimit = Style.ColumnLimit;
929        AnnotatedLine *NextLine = *(I + 1);
930        if ((I + 1) != E && NextLine->InPPDirective &&
931            !NextLine->First->HasUnescapedNewline)
932          ColumnLimit = getColumnLimit(TheLine.InPPDirective);
933
934        if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
935          LineState State =
936              Indenter.getInitialState(Indent, &TheLine, /*DryRun=*/false);
937          while (State.NextToken != NULL)
938            Indenter.addTokenToState(State, false, false);
939        } else if (Style.ColumnLimit == 0) {
940          NoColumnLimitFormatter Formatter(&Indenter);
941          Formatter.format(Indent, &TheLine);
942        } else {
943          UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
944                                           TheLine);
945          Formatter.format(Indent);
946        }
947
948        IndentForLevel[TheLine.Level] = LevelIndent;
949        PreviousLineWasTouched = true;
950      } else {
951        // Format the first token if necessary, and notify the WhitespaceManager
952        // about the unchanged whitespace.
953        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
954          if (Tok == TheLine.First &&
955              (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
956            unsigned LevelIndent = Tok->OriginalColumn;
957            // Remove trailing whitespace of the previous line if it was
958            // touched.
959            if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
960              formatFirstToken(*Tok, PreviousLine, LevelIndent,
961                               TheLine.InPPDirective);
962            } else {
963              Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
964            }
965
966            if (static_cast<int>(LevelIndent) - Offset >= 0)
967              LevelIndent -= Offset;
968            if (Tok->isNot(tok::comment))
969              IndentForLevel[TheLine.Level] = LevelIndent;
970          } else {
971            Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
972          }
973        }
974        // If we did not reformat this unwrapped line, the column at the end of
975        // the last token is unchanged - thus, we can calculate the end of the
976        // last token.
977        PreviousLineWasTouched = false;
978      }
979      for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
980        Tok->Finalized = true;
981      }
982      PreviousLine = *I;
983    }
984    return Whitespaces.generateReplacements();
985  }
986
987private:
988  static bool inputUsesCRLF(StringRef Text) {
989    return Text.count('\r') * 2 > Text.count('\n');
990  }
991
992  void
993  deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
994    unsigned CountBoundToVariable = 0;
995    unsigned CountBoundToType = 0;
996    bool HasCpp03IncompatibleFormat = false;
997    bool HasBinPackedFunction = false;
998    bool HasOnePerLineFunction = false;
999    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1000      if (!AnnotatedLines[i]->First->Next)
1001        continue;
1002      FormatToken *Tok = AnnotatedLines[i]->First->Next;
1003      while (Tok->Next) {
1004        if (Tok->Type == TT_PointerOrReference) {
1005          bool SpacesBefore =
1006              Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1007          bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1008                             Tok->Next->WhitespaceRange.getEnd();
1009          if (SpacesBefore && !SpacesAfter)
1010            ++CountBoundToVariable;
1011          else if (!SpacesBefore && SpacesAfter)
1012            ++CountBoundToType;
1013        }
1014
1015        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1016          if (Tok->is(tok::coloncolon) &&
1017              Tok->Previous->Type == TT_TemplateOpener)
1018            HasCpp03IncompatibleFormat = true;
1019          if (Tok->Type == TT_TemplateCloser &&
1020              Tok->Previous->Type == TT_TemplateCloser)
1021            HasCpp03IncompatibleFormat = true;
1022        }
1023
1024        if (Tok->PackingKind == PPK_BinPacked)
1025          HasBinPackedFunction = true;
1026        if (Tok->PackingKind == PPK_OnePerLine)
1027          HasOnePerLineFunction = true;
1028
1029        Tok = Tok->Next;
1030      }
1031    }
1032    if (Style.DerivePointerBinding) {
1033      if (CountBoundToType > CountBoundToVariable)
1034        Style.PointerBindsToType = true;
1035      else if (CountBoundToType < CountBoundToVariable)
1036        Style.PointerBindsToType = false;
1037    }
1038    if (Style.Standard == FormatStyle::LS_Auto) {
1039      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1040                                                  : FormatStyle::LS_Cpp03;
1041    }
1042    BinPackInconclusiveFunctions =
1043        HasBinPackedFunction || !HasOnePerLineFunction;
1044  }
1045
1046  /// \brief Get the indent of \p Level from \p IndentForLevel.
1047  ///
1048  /// \p IndentForLevel must contain the indent for the level \c l
1049  /// at \p IndentForLevel[l], or a value < 0 if the indent for
1050  /// that level is unknown.
1051  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1052    if (IndentForLevel[Level] != -1)
1053      return IndentForLevel[Level];
1054    if (Level == 0)
1055      return 0;
1056    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1057  }
1058
1059  /// \brief Get the offset of the line relatively to the level.
1060  ///
1061  /// For example, 'public:' labels in classes are offset by 1 or 2
1062  /// characters to the left from their level.
1063  int getIndentOffset(const FormatToken &RootToken) {
1064    if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1065      return Style.AccessModifierOffset;
1066    return 0;
1067  }
1068
1069  /// \brief Tries to merge lines into one.
1070  ///
1071  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1072  /// if possible; note that \c I will be incremented when lines are merged.
1073  void tryFitMultipleLinesInOne(unsigned Indent,
1074                                SmallVectorImpl<AnnotatedLine *>::iterator &I,
1075                                SmallVectorImpl<AnnotatedLine *>::iterator E) {
1076    // We can never merge stuff if there are trailing line comments.
1077    AnnotatedLine *TheLine = *I;
1078    if (TheLine->Last->Type == TT_LineComment)
1079      return;
1080
1081    if (Indent > Style.ColumnLimit)
1082      return;
1083
1084    unsigned Limit = Style.ColumnLimit - Indent;
1085    // If we already exceed the column limit, we set 'Limit' to 0. The different
1086    // tryMerge..() functions can then decide whether to still do merging.
1087    Limit = TheLine->Last->TotalLength > Limit
1088                ? 0
1089                : Limit - TheLine->Last->TotalLength;
1090
1091    if (I + 1 == E || (*(I + 1))->Type == LT_Invalid)
1092      return;
1093
1094    if (TheLine->Last->is(tok::l_brace)) {
1095      tryMergeSimpleBlock(I, E, Limit);
1096    } else if (Style.AllowShortIfStatementsOnASingleLine &&
1097               TheLine->First->is(tok::kw_if)) {
1098      tryMergeSimpleControlStatement(I, E, Limit);
1099    } else if (Style.AllowShortLoopsOnASingleLine &&
1100               TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
1101      tryMergeSimpleControlStatement(I, E, Limit);
1102    } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
1103                                          TheLine->First->IsFirst)) {
1104      tryMergeSimplePPDirective(I, E, Limit);
1105    }
1106  }
1107
1108  void tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1109                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
1110                                 unsigned Limit) {
1111    if (Limit == 0)
1112      return;
1113    AnnotatedLine &Line = **I;
1114    if (!(*(I + 1))->InPPDirective || (*(I + 1))->First->HasUnescapedNewline)
1115      return;
1116    if (I + 2 != E && (*(I + 2))->InPPDirective &&
1117        !(*(I + 2))->First->HasUnescapedNewline)
1118      return;
1119    if (1 + (*(I + 1))->Last->TotalLength > Limit)
1120      return;
1121    join(Line, **(++I));
1122  }
1123
1124  void
1125  tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1126                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
1127                                 unsigned Limit) {
1128    if (Limit == 0)
1129      return;
1130    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
1131        (*(I + 1))->First->is(tok::l_brace))
1132      return;
1133    if ((*(I + 1))->InPPDirective != (*I)->InPPDirective ||
1134        ((*(I + 1))->InPPDirective && (*(I + 1))->First->HasUnescapedNewline))
1135      return;
1136    AnnotatedLine &Line = **I;
1137    if (Line.Last->isNot(tok::r_paren))
1138      return;
1139    if (1 + (*(I + 1))->Last->TotalLength > Limit)
1140      return;
1141    if ((*(I + 1))->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
1142                                   tok::kw_while) ||
1143        (*(I + 1))->First->Type == TT_LineComment)
1144      return;
1145    // Only inline simple if's (no nested if or else).
1146    if (I + 2 != E && Line.First->is(tok::kw_if) &&
1147        (*(I + 2))->First->is(tok::kw_else))
1148      return;
1149    join(Line, **(++I));
1150  }
1151
1152  void tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1153                           SmallVectorImpl<AnnotatedLine *>::iterator E,
1154                           unsigned Limit) {
1155    // No merging if the brace already is on the next line.
1156    if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
1157      return;
1158
1159    // First, check that the current line allows merging. This is the case if
1160    // we're not in a control flow statement and the last token is an opening
1161    // brace.
1162    AnnotatedLine &Line = **I;
1163    if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1164                            tok::kw_else, tok::kw_try, tok::kw_catch,
1165                            tok::kw_for,
1166                            // This gets rid of all ObjC @ keywords and methods.
1167                            tok::at, tok::minus, tok::plus))
1168      return;
1169
1170    FormatToken *Tok = (*(I + 1))->First;
1171    if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
1172        (Tok->getNextNonComment() == NULL ||
1173         Tok->getNextNonComment()->is(tok::semi))) {
1174      // We merge empty blocks even if the line exceeds the column limit.
1175      Tok->SpacesRequiredBefore = 0;
1176      Tok->CanBreakBefore = true;
1177      join(Line, **(I + 1));
1178      I += 1;
1179    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
1180      // Check that we still have three lines and they fit into the limit.
1181      if (I + 2 == E || (*(I + 2))->Type == LT_Invalid ||
1182          !nextTwoLinesFitInto(I, Limit))
1183        return;
1184
1185      // Second, check that the next line does not contain any braces - if it
1186      // does, readability declines when putting it into a single line.
1187      if ((*(I + 1))->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1188        return;
1189      do {
1190        if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1191          return;
1192        Tok = Tok->Next;
1193      } while (Tok != NULL);
1194
1195      // Last, check that the third line contains a single closing brace.
1196      Tok = (*(I + 2))->First;
1197      if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
1198          Tok->MustBreakBefore)
1199        return;
1200
1201      join(Line, **(I + 1));
1202      join(Line, **(I + 2));
1203      I += 2;
1204    }
1205  }
1206
1207  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::iterator I,
1208                           unsigned Limit) {
1209    return 1 + (*(I + 1))->Last->TotalLength + 1 +
1210               (*(I + 2))->Last->TotalLength <=
1211           Limit;
1212  }
1213
1214  void join(AnnotatedLine &A, const AnnotatedLine &B) {
1215    assert(!A.Last->Next);
1216    assert(!B.First->Previous);
1217    A.Last->Next = B.First;
1218    B.First->Previous = A.Last;
1219    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1220    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1221      Tok->TotalLength += LengthA;
1222      A.Last = Tok;
1223    }
1224  }
1225
1226  bool touchesRanges(const CharSourceRange &Range) {
1227    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1228      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1229                                               Ranges[i].getBegin()) &&
1230          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1231                                               Range.getBegin()))
1232        return true;
1233    }
1234    return false;
1235  }
1236
1237  bool touchesLine(const AnnotatedLine &TheLine) {
1238    const FormatToken *First = TheLine.First;
1239    const FormatToken *Last = TheLine.Last;
1240    CharSourceRange LineRange = CharSourceRange::getCharRange(
1241        First->WhitespaceRange.getBegin().getLocWithOffset(
1242            First->LastNewlineOffset),
1243        Last->getStartOfNonWhitespace().getLocWithOffset(
1244            Last->TokenText.size() - 1));
1245    return touchesRanges(LineRange);
1246  }
1247
1248  bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::iterator I,
1249                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1250    for (; I != E; ++I) {
1251      if ((*I)->First->HasUnescapedNewline)
1252        return false;
1253      if (touchesLine(**I))
1254        return true;
1255    }
1256    return false;
1257  }
1258
1259  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1260    const FormatToken *First = TheLine.First;
1261    CharSourceRange LineRange = CharSourceRange::getCharRange(
1262        First->WhitespaceRange.getBegin(),
1263        First->WhitespaceRange.getBegin().getLocWithOffset(
1264            First->LastNewlineOffset));
1265    return touchesRanges(LineRange);
1266  }
1267
1268  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1269    assert(!UnwrappedLines.empty());
1270    UnwrappedLines.back().push_back(TheLine);
1271  }
1272
1273  virtual void finishRun() {
1274    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1275  }
1276
1277  /// \brief Add a new line and the required indent before the first Token
1278  /// of the \c UnwrappedLine if there was no structural parsing error.
1279  /// Returns the indent level of the \c UnwrappedLine.
1280  void formatFirstToken(FormatToken &RootToken,
1281                        const AnnotatedLine *PreviousLine, unsigned Indent,
1282                        bool InPPDirective) {
1283    unsigned Newlines =
1284        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1285    // Remove empty lines before "}" where applicable.
1286    if (RootToken.is(tok::r_brace) &&
1287        (!RootToken.Next ||
1288         (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
1289      Newlines = std::min(Newlines, 1u);
1290    if (Newlines == 0 && !RootToken.IsFirst)
1291      Newlines = 1;
1292
1293    // Insert extra new line before access specifiers.
1294    if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
1295        RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1296      ++Newlines;
1297
1298    // Remove empty lines after access specifiers.
1299    if (PreviousLine && PreviousLine->First->isAccessSpecifier())
1300      Newlines = std::min(1u, Newlines);
1301
1302    Whitespaces.replaceWhitespace(
1303        RootToken, Newlines, Indent / Style.IndentWidth, Indent, Indent,
1304        InPPDirective && !RootToken.HasUnescapedNewline);
1305  }
1306
1307  unsigned getColumnLimit(bool InPPDirective) const {
1308    // In preprocessor directives reserve two chars for trailing " \"
1309    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
1310  }
1311
1312  FormatStyle Style;
1313  Lexer &Lex;
1314  SourceManager &SourceMgr;
1315  WhitespaceManager Whitespaces;
1316  std::vector<CharSourceRange> Ranges;
1317  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1318
1319  encoding::Encoding Encoding;
1320  bool BinPackInconclusiveFunctions;
1321};
1322
1323} // end anonymous namespace
1324
1325tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1326                               SourceManager &SourceMgr,
1327                               std::vector<CharSourceRange> Ranges) {
1328  Formatter formatter(Style, Lex, SourceMgr, Ranges);
1329  return formatter.format();
1330}
1331
1332tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1333                               std::vector<tooling::Range> Ranges,
1334                               StringRef FileName) {
1335  FileManager Files((FileSystemOptions()));
1336  DiagnosticsEngine Diagnostics(
1337      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1338      new DiagnosticOptions);
1339  SourceManager SourceMgr(Diagnostics, Files);
1340  llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1341  const clang::FileEntry *Entry =
1342      Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1343  SourceMgr.overrideFileContents(Entry, Buf);
1344  FileID ID =
1345      SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1346  Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1347            getFormattingLangOpts(Style.Standard));
1348  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1349  std::vector<CharSourceRange> CharRanges;
1350  for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1351    SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1352    SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1353    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1354  }
1355  return reformat(Style, Lex, SourceMgr, CharRanges);
1356}
1357
1358LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1359  LangOptions LangOpts;
1360  LangOpts.CPlusPlus = 1;
1361  LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1362  LangOpts.LineComment = 1;
1363  LangOpts.Bool = 1;
1364  LangOpts.ObjC1 = 1;
1365  LangOpts.ObjC2 = 1;
1366  return LangOpts;
1367}
1368
1369const char *StyleOptionHelpDescription =
1370    "Coding style, currently supports:\n"
1371    "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1372    "Use -style=file to load style configuration from\n"
1373    ".clang-format file located in one of the parent\n"
1374    "directories of the source file (or current\n"
1375    "directory for stdin).\n"
1376    "Use -style=\"{key: value, ...}\" to set specific\n"
1377    "parameters, e.g.:\n"
1378    "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1379
1380FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
1381  // Fallback style in case the rest of this function can't determine a style.
1382  StringRef FallbackStyle = "LLVM";
1383  FormatStyle Style;
1384  getPredefinedStyle(FallbackStyle, &Style);
1385
1386  if (StyleName.startswith("{")) {
1387    // Parse YAML/JSON style from the command line.
1388    if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1389      llvm::errs() << "Error parsing -style: " << ec.message()
1390                   << ", using " << FallbackStyle << " style\n";
1391    }
1392    return Style;
1393  }
1394
1395  if (!StyleName.equals_lower("file")) {
1396    if (!getPredefinedStyle(StyleName, &Style))
1397      llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1398                   << " style\n";
1399    return Style;
1400  }
1401
1402  SmallString<128> Path(FileName);
1403  llvm::sys::fs::make_absolute(Path);
1404  for (StringRef Directory = Path;
1405       !Directory.empty();
1406       Directory = llvm::sys::path::parent_path(Directory)) {
1407    if (!llvm::sys::fs::is_directory(Directory))
1408      continue;
1409    SmallString<128> ConfigFile(Directory);
1410
1411    llvm::sys::path::append(ConfigFile, ".clang-format");
1412    DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1413    bool IsFile = false;
1414    // Ignore errors from is_regular_file: we only need to know if we can read
1415    // the file or not.
1416    llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1417
1418    if (!IsFile) {
1419      // Try _clang-format too, since dotfiles are not commonly used on Windows.
1420      ConfigFile = Directory;
1421      llvm::sys::path::append(ConfigFile, "_clang-format");
1422      DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1423      llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1424    }
1425
1426    if (IsFile) {
1427      OwningPtr<llvm::MemoryBuffer> Text;
1428      if (llvm::error_code ec = llvm::MemoryBuffer::getFile(ConfigFile, Text)) {
1429        llvm::errs() << ec.message() << "\n";
1430        continue;
1431      }
1432      if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1433        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1434                     << "\n";
1435        continue;
1436      }
1437      DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1438      return Style;
1439    }
1440  }
1441  llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1442               << " style\n";
1443  return Style;
1444}
1445
1446} // namespace format
1447} // namespace clang
1448