Format.cpp revision 088dab50b76d235cd5b0e701048efccfcea3677e
1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "clang/Format/Format.h"
20#include "UnwrappedLineParser.h"
21#include "clang/Basic/Diagnostic.h"
22#include "clang/Basic/OperatorPrecedence.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Frontend/TextDiagnosticPrinter.h"
25#include "clang/Lex/Lexer.h"
26#include <string>
27
28namespace clang {
29namespace format {
30
31enum TokenType {
32  TT_BinaryOperator,
33  TT_BlockComment,
34  TT_CastRParen,
35  TT_ConditionalExpr,
36  TT_CtorInitializerColon,
37  TT_DirectorySeparator,
38  TT_LineComment,
39  TT_ObjCBlockLParen,
40  TT_ObjCDecl,
41  TT_ObjCMethodSpecifier,
42  TT_ObjCSelectorStart,
43  TT_ObjCProperty,
44  TT_OverloadedOperator,
45  TT_PointerOrReference,
46  TT_PureVirtualSpecifier,
47  TT_TemplateCloser,
48  TT_TemplateOpener,
49  TT_TrailingUnaryOperator,
50  TT_UnaryOperator,
51  TT_Unknown
52};
53
54enum LineType {
55  LT_Invalid,
56  LT_Other,
57  LT_PreprocessorDirective,
58  LT_VirtualFunctionDecl,
59  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
60  LT_ObjCMethodDecl,
61  LT_ObjCProperty // An @property line.
62};
63
64class AnnotatedToken {
65public:
66  AnnotatedToken(const FormatToken &FormatTok)
67      : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
68        CanBreakBefore(false), MustBreakBefore(false),
69        ClosesTemplateDeclaration(false), Parent(NULL) {}
70
71  bool is(tok::TokenKind Kind) const {
72    return FormatTok.Tok.is(Kind);
73  }
74  bool isNot(tok::TokenKind Kind) const {
75    return FormatTok.Tok.isNot(Kind);
76  }
77  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
78    return FormatTok.Tok.isObjCAtKeyword(Kind);
79  }
80
81  FormatToken FormatTok;
82
83  TokenType Type;
84
85  bool SpaceRequiredBefore;
86  bool CanBreakBefore;
87  bool MustBreakBefore;
88
89  bool ClosesTemplateDeclaration;
90
91  std::vector<AnnotatedToken> Children;
92  AnnotatedToken *Parent;
93};
94
95static prec::Level getPrecedence(const AnnotatedToken &Tok) {
96  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
97}
98
99FormatStyle getLLVMStyle() {
100  FormatStyle LLVMStyle;
101  LLVMStyle.ColumnLimit = 80;
102  LLVMStyle.MaxEmptyLinesToKeep = 1;
103  LLVMStyle.PointerAndReferenceBindToType = false;
104  LLVMStyle.AccessModifierOffset = -2;
105  LLVMStyle.SplitTemplateClosingGreater = true;
106  LLVMStyle.IndentCaseLabels = false;
107  LLVMStyle.SpacesBeforeTrailingComments = 1;
108  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
109  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
110  LLVMStyle.ObjCSpaceBeforeReturnType = true;
111  return LLVMStyle;
112}
113
114FormatStyle getGoogleStyle() {
115  FormatStyle GoogleStyle;
116  GoogleStyle.ColumnLimit = 80;
117  GoogleStyle.MaxEmptyLinesToKeep = 1;
118  GoogleStyle.PointerAndReferenceBindToType = true;
119  GoogleStyle.AccessModifierOffset = -1;
120  GoogleStyle.SplitTemplateClosingGreater = false;
121  GoogleStyle.IndentCaseLabels = true;
122  GoogleStyle.SpacesBeforeTrailingComments = 2;
123  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
124  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
125  GoogleStyle.ObjCSpaceBeforeReturnType = false;
126  return GoogleStyle;
127}
128
129struct OptimizationParameters {
130  unsigned PenaltyIndentLevel;
131  unsigned PenaltyLevelDecrease;
132  unsigned PenaltyExcessCharacter;
133};
134
135/// \brief Replaces the whitespace in front of \p Tok. Only call once for
136/// each \c FormatToken.
137static void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
138                              unsigned Spaces, const FormatStyle &Style,
139                              SourceManager &SourceMgr,
140                              tooling::Replacements &Replaces) {
141  Replaces.insert(tooling::Replacement(
142      SourceMgr, Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength,
143      std::string(NewLines, '\n') + std::string(Spaces, ' ')));
144}
145
146/// \brief Like \c replaceWhitespace, but additionally adds right-aligned
147/// backslashes to escape newlines inside a preprocessor directive.
148///
149/// This function and \c replaceWhitespace have the same behavior if
150/// \c Newlines == 0.
151static void replacePPWhitespace(
152    const AnnotatedToken &Tok, unsigned NewLines, unsigned Spaces,
153    unsigned WhitespaceStartColumn, const FormatStyle &Style,
154    SourceManager &SourceMgr, tooling::Replacements &Replaces) {
155  std::string NewLineText;
156  if (NewLines > 0) {
157    unsigned Offset = std::min<int>(Style.ColumnLimit - 1,
158                                    WhitespaceStartColumn);
159    for (unsigned i = 0; i < NewLines; ++i) {
160      NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
161      NewLineText += "\\\n";
162      Offset = 0;
163    }
164  }
165  Replaces.insert(tooling::Replacement(SourceMgr, Tok.FormatTok.WhiteSpaceStart,
166                                       Tok.FormatTok.WhiteSpaceLength,
167                                       NewLineText + std::string(Spaces, ' ')));
168}
169
170/// \brief Checks whether the (remaining) \c UnwrappedLine starting with
171/// \p RootToken fits into \p Limit columns.
172bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) {
173  unsigned Columns = RootToken.FormatTok.TokenLength;
174  bool FitsOnALine = true;
175  const AnnotatedToken *Tok = &RootToken;
176  while (!Tok->Children.empty()) {
177    Tok = &Tok->Children[0];
178    Columns += (Tok->SpaceRequiredBefore ? 1 : 0) + Tok->FormatTok.TokenLength;
179    // A special case for the colon of a constructor initializer as this only
180    // needs to be put on a new line if the line needs to be split.
181    if (Columns > Limit ||
182        (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) {
183      FitsOnALine = false;
184      break;
185    }
186  }
187  return FitsOnALine;
188}
189
190class UnwrappedLineFormatter {
191public:
192  UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
193                         const UnwrappedLine &Line, unsigned FirstIndent,
194                         bool FitsOnALine, LineType CurrentLineType,
195                         const AnnotatedToken &RootToken,
196                         tooling::Replacements &Replaces, bool StructuralError)
197      : Style(Style), SourceMgr(SourceMgr), Line(Line),
198        FirstIndent(FirstIndent), FitsOnALine(FitsOnALine),
199        CurrentLineType(CurrentLineType), RootToken(RootToken),
200        Replaces(Replaces) {
201    Parameters.PenaltyIndentLevel = 15;
202    Parameters.PenaltyLevelDecrease = 30;
203    Parameters.PenaltyExcessCharacter = 1000000;
204  }
205
206  /// \brief Formats an \c UnwrappedLine.
207  ///
208  /// \returns The column after the last token in the last line of the
209  /// \c UnwrappedLine.
210  unsigned format() {
211    // Initialize state dependent on indent.
212    LineState State;
213    State.Column = FirstIndent;
214    State.NextToken = &RootToken;
215    State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent));
216    State.ForLoopVariablePos = 0;
217    State.LineContainsContinuedForLoopSection = false;
218    State.StartOfLineLevel = 1;
219
220    // The first token has already been indented and thus consumed.
221    moveStateToNextToken(State);
222
223    // Start iterating at 1 as we have correctly formatted of Token #0 above.
224    while (State.NextToken != NULL) {
225      if (FitsOnALine) {
226        addTokenToState(false, false, State);
227      } else {
228        unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
229        unsigned Break = calcPenalty(State, true, NoBreak);
230        addTokenToState(Break < NoBreak, false, State);
231        if (State.NextToken != NULL &&
232            State.NextToken->Parent->Type == TT_CtorInitializerColon) {
233          if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
234              !fitsIntoLimit(*State.NextToken,
235                             getColumnLimit() - State.Column - 1))
236            State.Stack.back().BreakAfterComma = true;
237        }
238      }
239    }
240    return State.Column;
241  }
242
243private:
244  struct ParenState {
245    ParenState(unsigned Indent, unsigned LastSpace)
246        : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
247          BreakBeforeClosingBrace(false), BreakAfterComma(false) {}
248
249    /// \brief The position to which a specific parenthesis level needs to be
250    /// indented.
251    unsigned Indent;
252
253    /// \brief The position of the last space on each level.
254    ///
255    /// Used e.g. to break like:
256    /// functionCall(Parameter, otherCall(
257    ///                             OtherParameter));
258    unsigned LastSpace;
259
260    /// \brief The position the first "<<" operator encountered on each level.
261    ///
262    /// Used to align "<<" operators. 0 if no such operator has been encountered
263    /// on a level.
264    unsigned FirstLessLess;
265
266    /// \brief Whether a newline needs to be inserted before the block's closing
267    /// brace.
268    ///
269    /// We only want to insert a newline before the closing brace if there also
270    /// was a newline after the beginning left brace.
271    bool BreakBeforeClosingBrace;
272
273    bool BreakAfterComma;
274
275    bool operator<(const ParenState &Other) const {
276      if (Indent != Other.Indent)
277        return Indent < Other.Indent;
278      if (LastSpace != Other.LastSpace)
279        return LastSpace < Other.LastSpace;
280      if (FirstLessLess != Other.FirstLessLess)
281        return FirstLessLess < Other.FirstLessLess;
282      if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
283        return BreakBeforeClosingBrace;
284      return BreakAfterComma;
285    }
286  };
287
288  /// \brief The current state when indenting a unwrapped line.
289  ///
290  /// As the indenting tries different combinations this is copied by value.
291  struct LineState {
292    /// \brief The number of used columns in the current line.
293    unsigned Column;
294
295    /// \brief The token that needs to be next formatted.
296    const AnnotatedToken *NextToken;
297
298    /// \brief The parenthesis level of the first token on the current line.
299    unsigned StartOfLineLevel;
300
301    /// \brief The column of the first variable in a for-loop declaration.
302    ///
303    /// Used to align the second variable if necessary.
304    unsigned ForLoopVariablePos;
305
306    /// \brief \c true if this line contains a continued for-loop section.
307    bool LineContainsContinuedForLoopSection;
308
309    /// \brief A stack keeping track of properties applying to parenthesis
310    /// levels.
311    std::vector<ParenState> Stack;
312
313    /// \brief Comparison operator to be able to used \c LineState in \c map.
314    bool operator<(const LineState &Other) const {
315      if (Other.NextToken != NextToken)
316        return Other.NextToken > NextToken;
317      if (Other.Column != Column)
318        return Other.Column > Column;
319      if (Other.StartOfLineLevel != StartOfLineLevel)
320        return Other.StartOfLineLevel > StartOfLineLevel;
321      if (Other.ForLoopVariablePos != ForLoopVariablePos)
322        return Other.ForLoopVariablePos < ForLoopVariablePos;
323      if (Other.LineContainsContinuedForLoopSection !=
324          LineContainsContinuedForLoopSection)
325        return LineContainsContinuedForLoopSection;
326      return Other.Stack < Stack;
327    }
328  };
329
330  /// \brief Appends the next token to \p State and updates information
331  /// necessary for indentation.
332  ///
333  /// Puts the token on the current line if \p Newline is \c true and adds a
334  /// line break and necessary indentation otherwise.
335  ///
336  /// If \p DryRun is \c false, also creates and stores the required
337  /// \c Replacement.
338  void addTokenToState(bool Newline, bool DryRun, LineState &State) {
339    const AnnotatedToken &Current = *State.NextToken;
340    const AnnotatedToken &Previous = *State.NextToken->Parent;
341    assert(State.Stack.size());
342    unsigned ParenLevel = State.Stack.size() - 1;
343
344    if (Newline) {
345      unsigned WhitespaceStartColumn = State.Column;
346      if (Current.is(tok::r_brace)) {
347        State.Column = Line.Level * 2;
348      } else if (Current.is(tok::string_literal) &&
349                 Previous.is(tok::string_literal)) {
350        State.Column = State.Column - Previous.FormatTok.TokenLength;
351      } else if (Current.is(tok::lessless) &&
352                 State.Stack[ParenLevel].FirstLessLess != 0) {
353        State.Column = State.Stack[ParenLevel].FirstLessLess;
354      } else if (ParenLevel != 0 &&
355                 (Previous.is(tok::equal) || Current.is(tok::arrow) ||
356                  Current.is(tok::period) || Previous.is(tok::question) ||
357                  Previous.Type == TT_ConditionalExpr)) {
358        // Indent and extra 4 spaces after if we know the current expression is
359        // continued.  Don't do that on the top level, as we already indent 4
360        // there.
361        State.Column = State.Stack[ParenLevel].Indent + 4;
362      } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) {
363        State.Column = State.ForLoopVariablePos;
364      } else if (State.NextToken->Parent->ClosesTemplateDeclaration) {
365        State.Column = State.Stack[ParenLevel].Indent - 4;
366      } else {
367        State.Column = State.Stack[ParenLevel].Indent;
368      }
369
370      // A line starting with a closing brace is assumed to be correct for the
371      // same level as before the opening brace.
372      State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1);
373
374      if (RootToken.is(tok::kw_for))
375        State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi);
376
377      if (!DryRun) {
378        if (!Line.InPPDirective)
379          replaceWhitespace(Current.FormatTok, 1, State.Column, Style,
380                            SourceMgr, Replaces);
381        else
382          replacePPWhitespace(Current.FormatTok, 1, State.Column,
383                              WhitespaceStartColumn, Style, SourceMgr,
384                              Replaces);
385      }
386
387      State.Stack[ParenLevel].LastSpace = State.Column;
388      if (Current.is(tok::colon) && CurrentLineType != LT_ObjCMethodDecl &&
389          State.NextToken->Type != TT_ConditionalExpr)
390        State.Stack[ParenLevel].Indent += 2;
391    } else {
392      if (Current.is(tok::equal) && RootToken.is(tok::kw_for))
393        State.ForLoopVariablePos = State.Column -
394                                   Previous.FormatTok.TokenLength;
395
396      unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
397      if (State.NextToken->Type == TT_LineComment)
398        Spaces = Style.SpacesBeforeTrailingComments;
399
400      if (!DryRun)
401        replaceWhitespace(Current, 0, Spaces, Style, SourceMgr, Replaces);
402
403      // FIXME: Do we need to do this for assignments nested in other
404      // expressions?
405      if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
406          (getPrecedence(Previous) == prec::Assignment ||
407           Previous.is(tok::kw_return)))
408        State.Stack[ParenLevel].Indent = State.Column + Spaces;
409      if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
410          State.NextToken->Parent->Type == TT_TemplateOpener)
411        State.Stack[ParenLevel].Indent = State.Column + Spaces;
412
413      // Top-level spaces that are not part of assignments are exempt as that
414      // mostly leads to better results.
415      State.Column += Spaces;
416      if (Spaces > 0 &&
417          (ParenLevel != 0 || getPrecedence(Previous) == prec::Assignment))
418        State.Stack[ParenLevel].LastSpace = State.Column;
419    }
420    moveStateToNextToken(State);
421    if (Newline && Previous.is(tok::l_brace)) {
422      State.Stack.back().BreakBeforeClosingBrace = true;
423    }
424  }
425
426  /// \brief Mark the next token as consumed in \p State and modify its stacks
427  /// accordingly.
428  void moveStateToNextToken(LineState &State) {
429    const AnnotatedToken &Current = *State.NextToken;
430    assert(State.Stack.size());
431
432    if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
433      State.Stack.back().FirstLessLess = State.Column;
434
435    // If we encounter an opening (, [, { or <, we add a level to our stacks to
436    // prepare for the following tokens.
437    if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
438        Current.is(tok::l_brace) ||
439        State.NextToken->Type == TT_TemplateOpener) {
440      unsigned NewIndent;
441      if (Current.is(tok::l_brace)) {
442        // FIXME: This does not work with nested static initializers.
443        // Implement a better handling for static initializers and similar
444        // constructs.
445        NewIndent = Line.Level * 2 + 2;
446      } else {
447        NewIndent = 4 + State.Stack.back().LastSpace;
448      }
449      State.Stack.push_back(
450          ParenState(NewIndent, State.Stack.back().LastSpace));
451    }
452
453    // If we encounter a closing ), ], } or >, we can remove a level from our
454    // stacks.
455    if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
456        (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
457        State.NextToken->Type == TT_TemplateCloser) {
458      State.Stack.pop_back();
459    }
460
461    if (State.NextToken->Children.empty())
462      State.NextToken = NULL;
463    else
464      State.NextToken = &State.NextToken->Children[0];
465
466    State.Column += Current.FormatTok.TokenLength;
467  }
468
469  /// \brief Calculate the penalty for splitting after the token at \p Index.
470  unsigned splitPenalty(const AnnotatedToken &Tok) {
471    const AnnotatedToken &Left = Tok;
472    const AnnotatedToken &Right = Tok.Children[0];
473
474    // In for-loops, prefer breaking at ',' and ';'.
475    if (RootToken.is(tok::kw_for) &&
476        (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
477      return 20;
478
479    if (Left.is(tok::semi) || Left.is(tok::comma) ||
480        Left.ClosesTemplateDeclaration)
481      return 0;
482    if (Left.is(tok::l_paren))
483      return 20;
484
485    if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr)
486      return prec::Assignment;
487    prec::Level Level = getPrecedence(Left);
488
489    // Breaking after an assignment leads to a bad result as the two sides of
490    // the assignment are visually very close together.
491    if (Level == prec::Assignment)
492      return 50;
493
494    if (Level != prec::Unknown)
495      return Level;
496
497    if (Right.is(tok::arrow) || Right.is(tok::period))
498      return 150;
499
500    return 3;
501  }
502
503  unsigned getColumnLimit() {
504    return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
505  }
506
507  /// \brief Calculate the number of lines needed to format the remaining part
508  /// of the unwrapped line.
509  ///
510  /// Assumes the formatting so far has led to
511  /// the \c LineSta \p State. If \p NewLine is set, a new line will be
512  /// added after the previous token.
513  ///
514  /// \param StopAt is used for optimization. If we can determine that we'll
515  /// definitely need at least \p StopAt additional lines, we already know of a
516  /// better solution.
517  unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) {
518    // We are at the end of the unwrapped line, so we don't need any more lines.
519    if (State.NextToken == NULL)
520      return 0;
521
522    if (!NewLine && State.NextToken->MustBreakBefore)
523      return UINT_MAX;
524    if (NewLine && !State.NextToken->CanBreakBefore)
525      return UINT_MAX;
526    if (!NewLine && State.NextToken->is(tok::r_brace) &&
527        State.Stack.back().BreakBeforeClosingBrace)
528      return UINT_MAX;
529    if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
530        State.LineContainsContinuedForLoopSection)
531      return UINT_MAX;
532    if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
533        State.NextToken->Type != TT_LineComment &&
534        State.Stack.back().BreakAfterComma)
535      return UINT_MAX;
536
537    unsigned CurrentPenalty = 0;
538    if (NewLine) {
539      CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
540                        splitPenalty(*State.NextToken->Parent);
541    } else {
542      if (State.Stack.size() < State.StartOfLineLevel)
543        CurrentPenalty += Parameters.PenaltyLevelDecrease *
544                          (State.StartOfLineLevel - State.Stack.size());
545    }
546
547    addTokenToState(NewLine, true, State);
548
549    // Exceeding column limit is bad, assign penalty.
550    if (State.Column > getColumnLimit()) {
551      unsigned ExcessCharacters = State.Column - getColumnLimit();
552      CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
553    }
554
555    if (StopAt <= CurrentPenalty)
556      return UINT_MAX;
557    StopAt -= CurrentPenalty;
558    StateMap::iterator I = Memory.find(State);
559    if (I != Memory.end()) {
560      // If this state has already been examined, we can safely return the
561      // previous result if we
562      // - have not hit the optimatization (and thus returned UINT_MAX) OR
563      // - are now computing for a smaller or equal StopAt.
564      unsigned SavedResult = I->second.first;
565      unsigned SavedStopAt = I->second.second;
566      if (SavedResult != UINT_MAX)
567        return SavedResult + CurrentPenalty;
568      else if (StopAt <= SavedStopAt)
569        return UINT_MAX;
570    }
571
572    unsigned NoBreak = calcPenalty(State, false, StopAt);
573    unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
574    unsigned Result = std::min(NoBreak, WithBreak);
575
576    // We have to store 'Result' without adding 'CurrentPenalty' as the latter
577    // can depend on 'NewLine'.
578    Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
579
580    return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
581  }
582
583  FormatStyle Style;
584  SourceManager &SourceMgr;
585  const UnwrappedLine &Line;
586  const unsigned FirstIndent;
587  const bool FitsOnALine;
588  const LineType CurrentLineType;
589  const AnnotatedToken &RootToken;
590  tooling::Replacements &Replaces;
591
592  // A map from an indent state to a pair (Result, Used-StopAt).
593  typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
594  StateMap Memory;
595
596  OptimizationParameters Parameters;
597};
598
599/// \brief Determines extra information about the tokens comprising an
600/// \c UnwrappedLine.
601class TokenAnnotator {
602public:
603  TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style,
604                 SourceManager &SourceMgr, Lexer &Lex)
605      : Style(Style), SourceMgr(SourceMgr), Lex(Lex),
606        RootToken(Line.RootToken) {}
607
608  /// \brief A parser that gathers additional information about tokens.
609  ///
610  /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
611  /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
612  /// into template parameter lists.
613  class AnnotatingParser {
614  public:
615    AnnotatingParser(AnnotatedToken &RootToken)
616        : CurrentToken(&RootToken), KeywordVirtualFound(false) {}
617
618    bool parseAngle() {
619      while (CurrentToken != NULL) {
620        if (CurrentToken->is(tok::greater)) {
621          CurrentToken->Type = TT_TemplateCloser;
622          next();
623          return true;
624        }
625        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
626            CurrentToken->is(tok::r_brace))
627          return false;
628        if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
629            CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
630          return false;
631        if (!consumeToken())
632          return false;
633      }
634      return false;
635    }
636
637    bool parseParens() {
638      if (CurrentToken != NULL && CurrentToken->is(tok::caret))
639        CurrentToken->Parent->Type = TT_ObjCBlockLParen;
640      while (CurrentToken != NULL) {
641        if (CurrentToken->is(tok::r_paren)) {
642          next();
643          return true;
644        }
645        if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
646          return false;
647        if (!consumeToken())
648          return false;
649      }
650      return false;
651    }
652
653    bool parseSquare() {
654      while (CurrentToken != NULL) {
655        if (CurrentToken->is(tok::r_square)) {
656          next();
657          return true;
658        }
659        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
660          return false;
661        if (!consumeToken())
662          return false;
663      }
664      return false;
665    }
666
667    bool parseBrace() {
668      while (CurrentToken != NULL) {
669        if (CurrentToken->is(tok::r_brace)) {
670          next();
671          return true;
672        }
673        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
674          return false;
675        if (!consumeToken())
676          return false;
677      }
678      // Lines can currently end with '{'.
679      return true;
680    }
681
682    bool parseConditional() {
683      while (CurrentToken != NULL) {
684        if (CurrentToken->is(tok::colon)) {
685          CurrentToken->Type = TT_ConditionalExpr;
686          next();
687          return true;
688        }
689        if (!consumeToken())
690          return false;
691      }
692      return false;
693    }
694
695    bool parseTemplateDeclaration() {
696      if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
697        CurrentToken->Type = TT_TemplateOpener;
698        next();
699        if (!parseAngle())
700          return false;
701        CurrentToken->Parent->ClosesTemplateDeclaration = true;
702        parseLine();
703        return true;
704      }
705      return false;
706    }
707
708    bool consumeToken() {
709      AnnotatedToken *Tok = CurrentToken;
710      next();
711      switch (Tok->FormatTok.Tok.getKind()) {
712      case tok::plus:
713      case tok::minus:
714        // At the start of the line, +/- specific ObjectiveC method
715        // declarations.
716        if (Tok->Parent == NULL)
717          Tok->Type = TT_ObjCMethodSpecifier;
718        break;
719      case tok::l_paren: {
720        bool ParensWereObjCReturnType =
721            Tok->Parent && Tok->Parent->Type == TT_ObjCMethodSpecifier;
722        if (!parseParens())
723          return false;
724        if (CurrentToken != NULL && CurrentToken->is(tok::colon)) {
725          CurrentToken->Type = TT_CtorInitializerColon;
726          next();
727        } else if (CurrentToken != NULL && ParensWereObjCReturnType) {
728          CurrentToken->Type = TT_ObjCSelectorStart;
729          next();
730        }
731      } break;
732      case tok::l_square:
733        if (!parseSquare())
734          return false;
735        break;
736      case tok::l_brace:
737        if (!parseBrace())
738          return false;
739        break;
740      case tok::less:
741        if (parseAngle())
742          Tok->Type = TT_TemplateOpener;
743        else {
744          Tok->Type = TT_BinaryOperator;
745          CurrentToken = Tok;
746          next();
747        }
748        break;
749      case tok::r_paren:
750      case tok::r_square:
751        return false;
752      case tok::r_brace:
753        // Lines can start with '}'.
754        if (Tok->Parent != NULL)
755          return false;
756        break;
757      case tok::greater:
758        Tok->Type = TT_BinaryOperator;
759        break;
760      case tok::kw_operator:
761        if (CurrentToken->is(tok::l_paren)) {
762          CurrentToken->Type = TT_OverloadedOperator;
763          next();
764          if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
765            CurrentToken->Type = TT_OverloadedOperator;
766            next();
767          }
768        } else {
769          while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
770            CurrentToken->Type = TT_OverloadedOperator;
771            next();
772          }
773        }
774        break;
775      case tok::question:
776        parseConditional();
777        break;
778      case tok::kw_template:
779        parseTemplateDeclaration();
780        break;
781      default:
782        break;
783      }
784      return true;
785    }
786
787    void parseIncludeDirective() {
788      while (CurrentToken != NULL) {
789        if (CurrentToken->is(tok::slash))
790          CurrentToken->Type = TT_DirectorySeparator;
791        else if (CurrentToken->is(tok::less))
792          CurrentToken->Type = TT_TemplateOpener;
793        else if (CurrentToken->is(tok::greater))
794          CurrentToken->Type = TT_TemplateCloser;
795        next();
796      }
797    }
798
799    void parsePreprocessorDirective() {
800      next();
801      if (CurrentToken == NULL)
802        return;
803      // Hashes in the middle of a line can lead to any strange token
804      // sequence.
805      if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
806        return;
807      switch (
808          CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
809      case tok::pp_include:
810      case tok::pp_import:
811        parseIncludeDirective();
812        break;
813      default:
814        break;
815      }
816    }
817
818    LineType parseLine() {
819      if (CurrentToken->is(tok::hash)) {
820        parsePreprocessorDirective();
821        return LT_PreprocessorDirective;
822      }
823      while (CurrentToken != NULL) {
824        if (CurrentToken->is(tok::kw_virtual))
825          KeywordVirtualFound = true;
826        if (!consumeToken())
827          return LT_Invalid;
828      }
829      if (KeywordVirtualFound)
830        return LT_VirtualFunctionDecl;
831      return LT_Other;
832    }
833
834    void next() {
835      if (CurrentToken != NULL && !CurrentToken->Children.empty())
836        CurrentToken = &CurrentToken->Children[0];
837      else
838        CurrentToken = NULL;
839    }
840
841  private:
842    AnnotatedToken *CurrentToken;
843    bool KeywordVirtualFound;
844  };
845
846  void createAnnotatedTokens(AnnotatedToken &Current) {
847    if (!Current.FormatTok.Children.empty()) {
848      Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0]));
849      Current.Children.back().Parent = &Current;
850      createAnnotatedTokens(Current.Children.back());
851    }
852  }
853
854  void calculateExtraInformation(AnnotatedToken &Current) {
855    Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
856
857    if (Current.FormatTok.MustBreakBefore) {
858      Current.MustBreakBefore = true;
859    } else {
860      if (Current.Type == TT_CtorInitializerColon || Current.Parent->Type ==
861          TT_LineComment || (Current.is(tok::string_literal) &&
862                             Current.Parent->is(tok::string_literal))) {
863        Current.MustBreakBefore = true;
864      } else {
865        Current.MustBreakBefore = false;
866      }
867    }
868    Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
869    if (!Current.Children.empty())
870      calculateExtraInformation(Current.Children[0]);
871  }
872
873  bool annotate() {
874    createAnnotatedTokens(RootToken);
875
876    AnnotatingParser Parser(RootToken);
877    CurrentLineType = Parser.parseLine();
878    if (CurrentLineType == LT_Invalid)
879      return false;
880
881    determineTokenTypes(RootToken, /*IsRHS=*/false);
882
883    if (RootToken.Type == TT_ObjCMethodSpecifier)
884      CurrentLineType = LT_ObjCMethodDecl;
885    else if (RootToken.Type == TT_ObjCDecl)
886      CurrentLineType = LT_ObjCDecl;
887    else if (RootToken.Type == TT_ObjCProperty)
888      CurrentLineType = LT_ObjCProperty;
889
890    if (!RootToken.Children.empty())
891      calculateExtraInformation(RootToken.Children[0]);
892    return true;
893  }
894
895  LineType getLineType() {
896    return CurrentLineType;
897  }
898
899  const AnnotatedToken &getRootToken() {
900    return RootToken;
901  }
902
903private:
904  void determineTokenTypes(AnnotatedToken &Current, bool IsRHS) {
905    if (getPrecedence(Current) == prec::Assignment ||
906        Current.is(tok::kw_return) || Current.is(tok::kw_throw))
907      IsRHS = true;
908
909    if (Current.Type == TT_Unknown) {
910      if (Current.is(tok::star) || Current.is(tok::amp)) {
911        Current.Type = determineStarAmpUsage(Current, IsRHS);
912      } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
913                 Current.is(tok::caret)) {
914        Current.Type = determinePlusMinusCaretUsage(Current);
915      } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
916        Current.Type = determineIncrementUsage(Current);
917      } else if (Current.is(tok::exclaim)) {
918        Current.Type = TT_UnaryOperator;
919      } else if (isBinaryOperator(Current)) {
920        Current.Type = TT_BinaryOperator;
921      } else if (Current.is(tok::comment)) {
922        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
923                                            Lex.getLangOpts()));
924        if (StringRef(Data).startswith("//"))
925          Current.Type = TT_LineComment;
926        else
927          Current.Type = TT_BlockComment;
928      } else if (Current.is(tok::r_paren) &&
929                 (Current.Parent->Type == TT_PointerOrReference ||
930                  Current.Parent->Type == TT_TemplateCloser)) {
931        // FIXME: We need to get smarter and understand more cases of casts.
932        Current.Type = TT_CastRParen;
933      } else if (Current.is(tok::at) && Current.Children.size()) {
934        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
935        case tok::objc_interface:
936        case tok::objc_implementation:
937        case tok::objc_protocol:
938          Current.Type = TT_ObjCDecl;
939          break;
940        case tok::objc_property:
941          Current.Type = TT_ObjCProperty;
942          break;
943        default:
944          break;
945        }
946      }
947    }
948
949    if (!Current.Children.empty())
950      determineTokenTypes(Current.Children[0], IsRHS);
951  }
952
953  bool isBinaryOperator(const AnnotatedToken &Tok) {
954    // Comma is a binary operator, but does not behave as such wrt. formatting.
955    return getPrecedence(Tok) > prec::Comma;
956  }
957
958  TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsRHS) {
959    if (Tok.Parent == NULL)
960      return TT_UnaryOperator;
961    if (Tok.Children.size() == 0)
962      return TT_Unknown;
963    const FormatToken &PrevToken = Tok.Parent->FormatTok;
964    const FormatToken &NextToken = Tok.Children[0].FormatTok;
965
966    if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::l_square) ||
967        PrevToken.Tok.is(tok::comma) || PrevToken.Tok.is(tok::kw_return) ||
968        PrevToken.Tok.is(tok::colon) || Tok.Parent->Type == TT_BinaryOperator ||
969        Tok.Parent->Type == TT_CastRParen)
970      return TT_UnaryOperator;
971
972    if (PrevToken.Tok.isLiteral() || NextToken.Tok.isLiteral() ||
973        NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) ||
974        NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) ||
975        NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) ||
976        NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof))
977      return TT_BinaryOperator;
978
979    if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) ||
980        NextToken.Tok.is(tok::greater))
981      return TT_PointerOrReference;
982
983    // It is very unlikely that we are going to find a pointer or reference type
984    // definition on the RHS of an assignment.
985    if (IsRHS)
986      return TT_BinaryOperator;
987
988    return TT_PointerOrReference;
989  }
990
991  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
992    // Use heuristics to recognize unary operators.
993    if (Tok.Parent->is(tok::equal) || Tok.Parent->is(tok::l_paren) ||
994        Tok.Parent->is(tok::comma) || Tok.Parent->is(tok::l_square) ||
995        Tok.Parent->is(tok::question) || Tok.Parent->is(tok::colon) ||
996        Tok.Parent->is(tok::kw_return) || Tok.Parent->is(tok::kw_case) ||
997        Tok.Parent->is(tok::at))
998      return TT_UnaryOperator;
999
1000    // There can't be to consecutive binary operators.
1001    if (Tok.Parent->Type == TT_BinaryOperator)
1002      return TT_UnaryOperator;
1003
1004    // Fall back to marking the token as binary operator.
1005    return TT_BinaryOperator;
1006  }
1007
1008  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1009  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
1010    if (Tok.Parent != NULL && Tok.Parent->is(tok::identifier))
1011      return TT_TrailingUnaryOperator;
1012
1013    return TT_UnaryOperator;
1014  }
1015
1016  bool spaceRequiredBetween(const AnnotatedToken &Left,
1017                            const AnnotatedToken &Right) {
1018    if (Right.is(tok::hashhash))
1019      return Left.is(tok::hash);
1020    if (Left.is(tok::hashhash) || Left.is(tok::hash))
1021      return Right.is(tok::hash);
1022    if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
1023      return false;
1024    if (Right.is(tok::less) &&
1025        (Left.is(tok::kw_template) ||
1026         (CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1027      return true;
1028    if (Left.is(tok::arrow) || Right.is(tok::arrow))
1029      return false;
1030    if (Left.is(tok::exclaim) || Left.is(tok::tilde))
1031      return false;
1032    if (Left.is(tok::at) &&
1033        (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
1034         Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
1035         Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
1036         Right.is(tok::kw_true) || Right.is(tok::kw_false)))
1037      return false;
1038    if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
1039      return false;
1040    if (Right.is(tok::amp) || Right.is(tok::star))
1041      return Left.FormatTok.Tok.isLiteral() ||
1042             (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
1043              !Style.PointerAndReferenceBindToType);
1044    if (Left.is(tok::amp) || Left.is(tok::star))
1045      return Right.FormatTok.Tok.isLiteral() ||
1046             Style.PointerAndReferenceBindToType;
1047    if (Right.is(tok::star) && Left.is(tok::l_paren))
1048      return false;
1049    if (Left.is(tok::l_square) || Right.is(tok::l_square) ||
1050        Right.is(tok::r_square))
1051      return false;
1052    if (Left.is(tok::coloncolon) ||
1053        (Right.is(tok::coloncolon) &&
1054         (Left.is(tok::identifier) || Left.is(tok::greater))))
1055      return false;
1056    if (Left.is(tok::period) || Right.is(tok::period))
1057      return false;
1058    if (Left.is(tok::colon) || Right.is(tok::colon))
1059      return true;
1060    if (Left.is(tok::l_paren))
1061      return false;
1062    if (Right.is(tok::l_paren)) {
1063      return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) ||
1064             Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
1065             Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
1066             Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
1067             Left.is(tok::kw_delete);
1068    }
1069    if (Left.is(tok::at) &&
1070        Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1071      return false;
1072    if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1073      return false;
1074    return true;
1075  }
1076
1077  bool spaceRequiredBefore(const AnnotatedToken &Tok) {
1078    if (CurrentLineType == LT_ObjCMethodDecl) {
1079      if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
1080          Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
1081        return true;
1082      if (Tok.is(tok::colon))
1083        return false;
1084      if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1085        return Style.ObjCSpaceBeforeReturnType || Tok.isNot(tok::l_paren);
1086      if (Tok.Type == TT_ObjCSelectorStart)
1087        return !Style.ObjCSpaceBeforeReturnType;
1088      if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1089        // Don't space between ')' and <id>
1090        return false;
1091      if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
1092        // Don't space between ':' and '('
1093        return false;
1094    }
1095    if (CurrentLineType == LT_ObjCProperty &&
1096        (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1097      return false;
1098
1099    if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1100      return true;
1101    if (Tok.Type == TT_OverloadedOperator)
1102      return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
1103             Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
1104    if (Tok.Parent->Type == TT_OverloadedOperator)
1105      return false;
1106    if (Tok.is(tok::colon))
1107      return RootToken.isNot(tok::kw_case) && (!Tok.Children.empty());
1108    if (Tok.Parent->Type == TT_UnaryOperator ||
1109        Tok.Parent->Type == TT_CastRParen)
1110      return false;
1111    if (Tok.Type == TT_UnaryOperator)
1112      return Tok.Parent->isNot(tok::l_paren) &&
1113             Tok.Parent->isNot(tok::l_square) &&
1114             Tok.Parent->isNot(tok::at);
1115    if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1116      return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
1117             TT_TemplateCloser && Style.SplitTemplateClosingGreater;
1118    }
1119    if (Tok.Type == TT_DirectorySeparator ||
1120        Tok.Parent->Type == TT_DirectorySeparator)
1121      return false;
1122    if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1123      return true;
1124    if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1125      return false;
1126    if (Tok.is(tok::less) && RootToken.is(tok::hash))
1127      return true;
1128    if (Tok.Type == TT_TrailingUnaryOperator)
1129      return false;
1130    return spaceRequiredBetween(*Tok.Parent, Tok);
1131  }
1132
1133  bool canBreakBefore(const AnnotatedToken &Right) {
1134    const AnnotatedToken &Left = *Right.Parent;
1135    if (CurrentLineType == LT_ObjCMethodDecl) {
1136      if (Right.is(tok::identifier) && !Right.Children.empty() &&
1137          Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
1138        return true;
1139      if (CurrentLineType == LT_ObjCMethodDecl && Right.is(tok::identifier) &&
1140          Left.is(tok::l_paren) && Left.Parent->is(tok::colon))
1141        // Don't break this identifier as ':' or identifier
1142        // before it will break.
1143        return false;
1144      if (Right.is(tok::colon) && Left.is(tok::identifier) &&
1145          Left.CanBreakBefore)
1146        // Don't break at ':' if identifier before it can beak.
1147        return false;
1148    }
1149    if (Left.ClosesTemplateDeclaration)
1150      return true;
1151    if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1152        Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
1153      return false;
1154    if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl)
1155      return false;
1156
1157    if (Right.is(tok::comment))
1158      return !Right.Children.empty();
1159    if (Right.is(tok::r_paren) || Right.is(tok::l_brace) ||
1160        Right.is(tok::greater))
1161      return false;
1162    return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1163           Left.is(tok::comma) || Right.is(tok::lessless) ||
1164           Right.is(tok::arrow) || Right.is(tok::period) ||
1165           Right.is(tok::colon) || Left.is(tok::semi) ||
1166           Left.is(tok::l_brace) || Left.is(tok::question) ||
1167           Right.is(tok::r_brace) || Left.Type == TT_ConditionalExpr ||
1168           (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1169            Right.is(tok::identifier)) ||
1170           (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1171  }
1172
1173  FormatStyle Style;
1174  SourceManager &SourceMgr;
1175  Lexer &Lex;
1176  LineType CurrentLineType;
1177  AnnotatedToken RootToken;
1178};
1179
1180class LexerBasedFormatTokenSource : public FormatTokenSource {
1181public:
1182  LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1183      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1184        IdentTable(Lex.getLangOpts()) {
1185    Lex.SetKeepWhitespaceMode(true);
1186  }
1187
1188  virtual FormatToken getNextToken() {
1189    if (GreaterStashed) {
1190      FormatTok.NewlinesBefore = 0;
1191      FormatTok.WhiteSpaceStart =
1192          FormatTok.Tok.getLocation().getLocWithOffset(1);
1193      FormatTok.WhiteSpaceLength = 0;
1194      GreaterStashed = false;
1195      return FormatTok;
1196    }
1197
1198    FormatTok = FormatToken();
1199    Lex.LexFromRawLexer(FormatTok.Tok);
1200    StringRef Text = rawTokenText(FormatTok.Tok);
1201    FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1202    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1203      FormatTok.IsFirst = true;
1204
1205    // Consume and record whitespace until we find a significant token.
1206    while (FormatTok.Tok.is(tok::unknown)) {
1207      FormatTok.NewlinesBefore += Text.count('\n');
1208      FormatTok.HasUnescapedNewline = Text.count("\\\n") !=
1209                                      FormatTok.NewlinesBefore;
1210      FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1211
1212      if (FormatTok.Tok.is(tok::eof))
1213        return FormatTok;
1214      Lex.LexFromRawLexer(FormatTok.Tok);
1215      Text = rawTokenText(FormatTok.Tok);
1216    }
1217
1218    // Now FormatTok is the next non-whitespace token.
1219    FormatTok.TokenLength = Text.size();
1220
1221    // In case the token starts with escaped newlines, we want to
1222    // take them into account as whitespace - this pattern is quite frequent
1223    // in macro definitions.
1224    // FIXME: What do we want to do with other escaped spaces, and escaped
1225    // spaces or newlines in the middle of tokens?
1226    // FIXME: Add a more explicit test.
1227    unsigned i = 0;
1228    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1229      FormatTok.WhiteSpaceLength += 2;
1230      FormatTok.TokenLength -= 2;
1231      i += 2;
1232    }
1233
1234    if (FormatTok.Tok.is(tok::raw_identifier)) {
1235      IdentifierInfo &Info = IdentTable.get(Text);
1236      FormatTok.Tok.setIdentifierInfo(&Info);
1237      FormatTok.Tok.setKind(Info.getTokenID());
1238    }
1239
1240    if (FormatTok.Tok.is(tok::greatergreater)) {
1241      FormatTok.Tok.setKind(tok::greater);
1242      GreaterStashed = true;
1243    }
1244
1245    return FormatTok;
1246  }
1247
1248private:
1249  FormatToken FormatTok;
1250  bool GreaterStashed;
1251  Lexer &Lex;
1252  SourceManager &SourceMgr;
1253  IdentifierTable IdentTable;
1254
1255  /// Returns the text of \c FormatTok.
1256  StringRef rawTokenText(Token &Tok) {
1257    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1258                     Tok.getLength());
1259  }
1260};
1261
1262class Formatter : public UnwrappedLineConsumer {
1263public:
1264  Formatter(clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
1265            Lexer &Lex, SourceManager &SourceMgr,
1266            const std::vector<CharSourceRange> &Ranges)
1267      : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1268        Ranges(Ranges) {}
1269
1270  virtual ~Formatter() {}
1271
1272  tooling::Replacements format() {
1273    LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1274    UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1275    StructuralError = Parser.parse();
1276    unsigned PreviousEndOfLineColumn = 0;
1277    for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(),
1278                                              E = UnwrappedLines.end();
1279         I != E; ++I) {
1280      const UnwrappedLine &TheLine = *I;
1281      if (touchesRanges(TheLine)) {
1282        TokenAnnotator Annotator(TheLine, Style, SourceMgr, Lex);
1283        if (!Annotator.annotate())
1284          break;
1285        unsigned Indent = formatFirstToken(Annotator.getRootToken(),
1286                                           TheLine.Level, TheLine.InPPDirective,
1287                                           PreviousEndOfLineColumn);
1288        unsigned Limit = Style.ColumnLimit - (TheLine.InPPDirective ? 1 : 0) -
1289                         Indent;
1290        // Check whether the UnwrappedLine can be put onto a single line. If
1291        // so, this is bound to be the optimal solution (by definition) and we
1292        // don't need to analyze the entire solution space.
1293        bool FitsOnALine = fitsIntoLimit(Annotator.getRootToken(), Limit);
1294        UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1295                                         FitsOnALine, Annotator.getLineType(),
1296                                         Annotator.getRootToken(), Replaces,
1297                                         StructuralError);
1298        PreviousEndOfLineColumn = Formatter.format();
1299      } else {
1300        // If we did not reformat this unwrapped line, the column at the end of
1301        // the last token is unchanged - thus, we can calculate the end of the
1302        // last token, and return the result.
1303        const FormatToken *Last = getLastLine(TheLine);
1304        PreviousEndOfLineColumn =
1305            SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) +
1306            Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr,
1307                                   Lex.getLangOpts()) -
1308            1;
1309      }
1310    }
1311    return Replaces;
1312  }
1313
1314private:
1315  const FormatToken *getLastLine(const UnwrappedLine &TheLine) {
1316    const FormatToken *Last = &TheLine.RootToken;
1317    while (!Last->Children.empty())
1318      Last = &Last->Children.back();
1319    return Last;
1320  }
1321
1322  bool touchesRanges(const UnwrappedLine &TheLine) {
1323    const FormatToken *First = &TheLine.RootToken;
1324    const FormatToken *Last = getLastLine(TheLine);
1325    CharSourceRange LineRange = CharSourceRange::getTokenRange(
1326                                    First->Tok.getLocation(),
1327                                    Last->Tok.getLocation());
1328    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1329      if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1330                                               Ranges[i].getBegin()) &&
1331          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1332                                               LineRange.getBegin()))
1333        return true;
1334    }
1335    return false;
1336  }
1337
1338  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1339    UnwrappedLines.push_back(TheLine);
1340  }
1341
1342  /// \brief Add a new line and the required indent before the first Token
1343  /// of the \c UnwrappedLine if there was no structural parsing error.
1344  /// Returns the indent level of the \c UnwrappedLine.
1345  unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level,
1346                            bool InPPDirective,
1347                            unsigned PreviousEndOfLineColumn) {
1348    const FormatToken &Tok = RootToken.FormatTok;
1349    if (!Tok.WhiteSpaceStart.isValid() || StructuralError)
1350      return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1;
1351
1352    unsigned Newlines = std::min(Tok.NewlinesBefore,
1353                                 Style.MaxEmptyLinesToKeep + 1);
1354    if (Newlines == 0 && !Tok.IsFirst)
1355      Newlines = 1;
1356    unsigned Indent = Level * 2;
1357
1358    bool IsAccessModifier = false;
1359    if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1360        RootToken.is(tok::kw_private))
1361      IsAccessModifier = true;
1362    else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1363             (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1364              RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1365              RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1366              RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1367      IsAccessModifier = true;
1368
1369    if (IsAccessModifier &&
1370        static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
1371      Indent += Style.AccessModifierOffset;
1372    if (!InPPDirective || Tok.HasUnescapedNewline) {
1373      replaceWhitespace(Tok, Newlines, Indent, Style, SourceMgr, Replaces);
1374    } else {
1375      replacePPWhitespace(Tok, Newlines, Indent, PreviousEndOfLineColumn, Style,
1376                          SourceMgr, Replaces);
1377    }
1378    return Indent;
1379  }
1380
1381  clang::DiagnosticsEngine &Diag;
1382  FormatStyle Style;
1383  Lexer &Lex;
1384  SourceManager &SourceMgr;
1385  tooling::Replacements Replaces;
1386  std::vector<CharSourceRange> Ranges;
1387  std::vector<UnwrappedLine> UnwrappedLines;
1388  bool StructuralError;
1389};
1390
1391tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1392                               SourceManager &SourceMgr,
1393                               std::vector<CharSourceRange> Ranges) {
1394  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1395  TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts);
1396  DiagnosticPrinter.BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1397  DiagnosticsEngine Diagnostics(
1398      llvm::IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1399      &DiagnosticPrinter, false);
1400  Diagnostics.setSourceManager(&SourceMgr);
1401  Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1402  return formatter.format();
1403}
1404
1405LangOptions getFormattingLangOpts() {
1406  LangOptions LangOpts;
1407  LangOpts.CPlusPlus = 1;
1408  LangOpts.CPlusPlus11 = 1;
1409  LangOpts.Bool = 1;
1410  LangOpts.ObjC1 = 1;
1411  LangOpts.ObjC2 = 1;
1412  return LangOpts;
1413}
1414
1415} // namespace format
1416} // namespace clang
1417