1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements an indenter that manages the indentation of
12/// continuations.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
17#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
18
19#include "Encoding.h"
20#include "clang/Format/Format.h"
21#include "llvm/Support/Regex.h"
22
23namespace clang {
24class SourceManager;
25
26namespace format {
27
28class AnnotatedLine;
29struct FormatToken;
30struct LineState;
31struct ParenState;
32class WhitespaceManager;
33
34class ContinuationIndenter {
35public:
36  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
37  /// column \p FirstIndent.
38  ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
39                       WhitespaceManager &Whitespaces,
40                       encoding::Encoding Encoding,
41                       bool BinPackInconclusiveFunctions);
42
43  /// \brief Get the initial state, i.e. the state after placing \p Line's
44  /// first token at \p FirstIndent.
45  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
46                            bool DryRun);
47
48  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
49  // better home.
50  /// \brief Returns \c true, if a line break after \p State is allowed.
51  bool canBreak(const LineState &State);
52
53  /// \brief Returns \c true, if a line break after \p State is mandatory.
54  bool mustBreak(const LineState &State);
55
56  /// \brief Appends the next token to \p State and updates information
57  /// necessary for indentation.
58  ///
59  /// Puts the token on the current line if \p Newline is \c false and adds a
60  /// line break and necessary indentation otherwise.
61  ///
62  /// If \p DryRun is \c false, also creates and stores the required
63  /// \c Replacement.
64  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
65                           unsigned ExtraSpaces = 0);
66
67  /// \brief Get the column limit for this line. This is the style's column
68  /// limit, potentially reduced for preprocessor definitions.
69  unsigned getColumnLimit(const LineState &State) const;
70
71private:
72  /// \brief Mark the next token as consumed in \p State and modify its stacks
73  /// accordingly.
74  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
75
76  /// \brief Update 'State' according to the next token's fake left parentheses.
77  void moveStatePastFakeLParens(LineState &State, bool Newline);
78  /// \brief Update 'State' according to the next token's fake r_parens.
79  void moveStatePastFakeRParens(LineState &State);
80
81  /// \brief Update 'State' according to the next token being one of "(<{[".
82  void moveStatePastScopeOpener(LineState &State, bool Newline);
83  /// \brief Update 'State' according to the next token being one of ")>}]".
84  void moveStatePastScopeCloser(LineState &State);
85  /// \brief Update 'State' with the next token opening a nested block.
86  void moveStateToNewBlock(LineState &State);
87
88  /// \brief If the current token sticks out over the end of the line, break
89  /// it if possible.
90  ///
91  /// \returns An extra penalty if a token was broken, otherwise 0.
92  ///
93  /// The returned penalty will cover the cost of the additional line breaks and
94  /// column limit violation in all lines except for the last one. The penalty
95  /// for the column limit violation in the last line (and in single line
96  /// tokens) is handled in \c addNextStateToQueue.
97  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
98                                bool DryRun);
99
100  /// \brief Appends the next token to \p State and updates information
101  /// necessary for indentation.
102  ///
103  /// Puts the token on the current line.
104  ///
105  /// If \p DryRun is \c false, also creates and stores the required
106  /// \c Replacement.
107  void addTokenOnCurrentLine(LineState &State, bool DryRun,
108                             unsigned ExtraSpaces);
109
110  /// \brief Appends the next token to \p State and updates information
111  /// necessary for indentation.
112  ///
113  /// Adds a line break and necessary indentation.
114  ///
115  /// If \p DryRun is \c false, also creates and stores the required
116  /// \c Replacement.
117  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
118
119  /// \brief Calculate the new column for a line wrap before the next token.
120  unsigned getNewLineColumn(const LineState &State);
121
122  /// \brief Adds a multiline token to the \p State.
123  ///
124  /// \returns Extra penalty for the first line of the literal: last line is
125  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
126  /// matter, as we don't change them.
127  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
128
129  /// \brief Returns \c true if the next token starts a multiline string
130  /// literal.
131  ///
132  /// This includes implicitly concatenated strings, strings that will be broken
133  /// by clang-format and string literals with escaped newlines.
134  bool nextIsMultilineString(const LineState &State);
135
136  FormatStyle Style;
137  SourceManager &SourceMgr;
138  WhitespaceManager &Whitespaces;
139  encoding::Encoding Encoding;
140  bool BinPackInconclusiveFunctions;
141  llvm::Regex CommentPragmasRegex;
142};
143
144struct ParenState {
145  ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
146             bool AvoidBinPacking, bool NoLineBreak)
147      : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
148        FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
149        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
150        NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
151        StartOfFunctionCall(0), StartOfArraySubscripts(0),
152        NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
153        ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
154        AlignColons(true), ObjCSelectorNameFound(false),
155        HasMultipleNestedBlocks(false), JSFunctionInlined(false) {}
156
157  /// \brief The position to which a specific parenthesis level needs to be
158  /// indented.
159  unsigned Indent;
160
161  /// \brief The number of indentation levels of the block.
162  unsigned IndentLevel;
163
164  /// \brief The position of the last space on each level.
165  ///
166  /// Used e.g. to break like:
167  /// functionCall(Parameter, otherCall(
168  ///                             OtherParameter));
169  unsigned LastSpace;
170
171  /// \brief The position the first "<<" operator encountered on each level.
172  ///
173  /// Used to align "<<" operators. 0 if no such operator has been encountered
174  /// on a level.
175  unsigned FirstLessLess;
176
177  /// \brief Whether a newline needs to be inserted before the block's closing
178  /// brace.
179  ///
180  /// We only want to insert a newline before the closing brace if there also
181  /// was a newline after the beginning left brace.
182  bool BreakBeforeClosingBrace;
183
184  /// \brief The column of a \c ? in a conditional expression;
185  unsigned QuestionColumn;
186
187  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
188  /// lines, in this context.
189  bool AvoidBinPacking;
190
191  /// \brief Break after the next comma (or all the commas in this context if
192  /// \c AvoidBinPacking is \c true).
193  bool BreakBeforeParameter;
194
195  /// \brief Line breaking in this context would break a formatting rule.
196  bool NoLineBreak;
197
198  /// \brief True if the last binary operator on this level was wrapped to the
199  /// next line.
200  bool LastOperatorWrapped;
201
202  /// \brief The position of the colon in an ObjC method declaration/call.
203  unsigned ColonPos;
204
205  /// \brief The start of the most recent function in a builder-type call.
206  unsigned StartOfFunctionCall;
207
208  /// \brief Contains the start of array subscript expressions, so that they
209  /// can be aligned.
210  unsigned StartOfArraySubscripts;
211
212  /// \brief If a nested name specifier was broken over multiple lines, this
213  /// contains the start column of the second line. Otherwise 0.
214  unsigned NestedNameSpecifierContinuation;
215
216  /// \brief If a call expression was broken over multiple lines, this
217  /// contains the start column of the second line. Otherwise 0.
218  unsigned CallContinuation;
219
220  /// \brief The column of the first variable name in a variable declaration.
221  ///
222  /// Used to align further variables if necessary.
223  unsigned VariablePos;
224
225  /// \brief \c true if this \c ParenState already contains a line-break.
226  ///
227  /// The first line break in a certain \c ParenState causes extra penalty so
228  /// that clang-format prefers similar breaks, i.e. breaks in the same
229  /// parenthesis.
230  bool ContainsLineBreak;
231
232  /// \brief \c true if this \c ParenState contains multiple segments of a
233  /// builder-type call on one line.
234  bool ContainsUnwrappedBuilder;
235
236  /// \brief \c true if the colons of the curren ObjC method expression should
237  /// be aligned.
238  ///
239  /// Not considered for memoization as it will always have the same value at
240  /// the same token.
241  bool AlignColons;
242
243  /// \brief \c true if at least one selector name was found in the current
244  /// ObjC method expression.
245  ///
246  /// Not considered for memoization as it will always have the same value at
247  /// the same token.
248  bool ObjCSelectorNameFound;
249
250  /// \brief \c true if there are multiple nested blocks inside these parens.
251  ///
252  /// Not considered for memoization as it will always have the same value at
253  /// the same token.
254  bool HasMultipleNestedBlocks;
255
256  // \brief The previous JavaScript 'function' keyword is not wrapped to a new
257  // line.
258  bool JSFunctionInlined;
259
260  bool operator<(const ParenState &Other) const {
261    if (Indent != Other.Indent)
262      return Indent < Other.Indent;
263    if (LastSpace != Other.LastSpace)
264      return LastSpace < Other.LastSpace;
265    if (FirstLessLess != Other.FirstLessLess)
266      return FirstLessLess < Other.FirstLessLess;
267    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
268      return BreakBeforeClosingBrace;
269    if (QuestionColumn != Other.QuestionColumn)
270      return QuestionColumn < Other.QuestionColumn;
271    if (AvoidBinPacking != Other.AvoidBinPacking)
272      return AvoidBinPacking;
273    if (BreakBeforeParameter != Other.BreakBeforeParameter)
274      return BreakBeforeParameter;
275    if (NoLineBreak != Other.NoLineBreak)
276      return NoLineBreak;
277    if (LastOperatorWrapped != Other.LastOperatorWrapped)
278      return LastOperatorWrapped;
279    if (ColonPos != Other.ColonPos)
280      return ColonPos < Other.ColonPos;
281    if (StartOfFunctionCall != Other.StartOfFunctionCall)
282      return StartOfFunctionCall < Other.StartOfFunctionCall;
283    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
284      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
285    if (CallContinuation != Other.CallContinuation)
286      return CallContinuation < Other.CallContinuation;
287    if (VariablePos != Other.VariablePos)
288      return VariablePos < Other.VariablePos;
289    if (ContainsLineBreak != Other.ContainsLineBreak)
290      return ContainsLineBreak < Other.ContainsLineBreak;
291    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
292      return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
293    if (JSFunctionInlined != Other.JSFunctionInlined)
294      return JSFunctionInlined < Other.JSFunctionInlined;
295    return false;
296  }
297};
298
299/// \brief The current state when indenting a unwrapped line.
300///
301/// As the indenting tries different combinations this is copied by value.
302struct LineState {
303  /// \brief The number of used columns in the current line.
304  unsigned Column;
305
306  /// \brief The token that needs to be next formatted.
307  FormatToken *NextToken;
308
309  /// \brief \c true if this line contains a continued for-loop section.
310  bool LineContainsContinuedForLoopSection;
311
312  /// \brief The \c NestingLevel at the start of this line.
313  unsigned StartOfLineLevel;
314
315  /// \brief The lowest \c NestingLevel on the current line.
316  unsigned LowestLevelOnLine;
317
318  /// \brief The start column of the string literal, if we're in a string
319  /// literal sequence, 0 otherwise.
320  unsigned StartOfStringLiteral;
321
322  /// \brief A stack keeping track of properties applying to parenthesis
323  /// levels.
324  std::vector<ParenState> Stack;
325
326  /// \brief Ignore the stack of \c ParenStates for state comparison.
327  ///
328  /// In long and deeply nested unwrapped lines, the current algorithm can
329  /// be insufficient for finding the best formatting with a reasonable amount
330  /// of time and memory. Setting this flag will effectively lead to the
331  /// algorithm not analyzing some combinations. However, these combinations
332  /// rarely contain the optimal solution: In short, accepting a higher
333  /// penalty early would need to lead to different values in the \c
334  /// ParenState stack (in an otherwise identical state) and these different
335  /// values would need to lead to a significant amount of avoided penalty
336  /// later.
337  ///
338  /// FIXME: Come up with a better algorithm instead.
339  bool IgnoreStackForComparison;
340
341  /// \brief The indent of the first token.
342  unsigned FirstIndent;
343
344  /// \brief The line that is being formatted.
345  ///
346  /// Does not need to be considered for memoization because it doesn't change.
347  const AnnotatedLine *Line;
348
349  /// \brief Comparison operator to be able to used \c LineState in \c map.
350  bool operator<(const LineState &Other) const {
351    if (NextToken != Other.NextToken)
352      return NextToken < Other.NextToken;
353    if (Column != Other.Column)
354      return Column < Other.Column;
355    if (LineContainsContinuedForLoopSection !=
356        Other.LineContainsContinuedForLoopSection)
357      return LineContainsContinuedForLoopSection;
358    if (StartOfLineLevel != Other.StartOfLineLevel)
359      return StartOfLineLevel < Other.StartOfLineLevel;
360    if (LowestLevelOnLine != Other.LowestLevelOnLine)
361      return LowestLevelOnLine < Other.LowestLevelOnLine;
362    if (StartOfStringLiteral != Other.StartOfStringLiteral)
363      return StartOfStringLiteral < Other.StartOfStringLiteral;
364    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
365      return false;
366    return Stack < Other.Stack;
367  }
368};
369
370} // end namespace format
371} // end namespace clang
372
373#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
374