BreakableToken.h revision 0b62cc30c9aa462184de0435dc083d944a41d67f
1//===--- BreakableToken.h - Format C++ code -------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Declares BreakableToken, BreakableStringLiteral, and
12/// BreakableBlockComment classes, that contain token type-specific logic to
13/// break long lines in tokens.
14///
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
18#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
19
20#include "Encoding.h"
21#include "TokenAnnotator.h"
22#include "WhitespaceManager.h"
23#include <utility>
24
25namespace clang {
26namespace format {
27
28struct FormatStyle;
29
30/// \brief Base class for strategies on how to break tokens.
31///
32/// FIXME: The interface seems set in stone, so we might want to just pull the
33/// strategy into the class, instead of controlling it from the outside.
34class BreakableToken {
35public:
36  /// \brief Contains starting character index and length of split.
37  typedef std::pair<StringRef::size_type, unsigned> Split;
38
39  virtual ~BreakableToken() {}
40
41  /// \brief Returns the number of lines in this token in the original code.
42  virtual unsigned getLineCount() const = 0;
43
44  /// \brief Returns the number of columns required to format the piece of line
45  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46  ///
47  /// Note that previous breaks are not taken into account. \p Offset is always
48  /// specified from the start of the (original) line.
49  /// \p Length can be set to StringRef::npos, which means "to the end of line".
50  virtual unsigned
51  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                          StringRef::size_type Length) const = 0;
53
54  /// \brief Returns a range (offset, length) at which to break the line at
55  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56  /// violate \p ColumnLimit.
57  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                         unsigned ColumnLimit) const = 0;
59
60  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                           WhitespaceManager &Whitespaces) = 0;
63
64  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
65  virtual void replaceWhitespaceBefore(unsigned LineIndex,
66                                       WhitespaceManager &Whitespaces) {}
67
68protected:
69  BreakableToken(const FormatToken &Tok, bool InPPDirective,
70                 encoding::Encoding Encoding, const FormatStyle &Style)
71      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
72        Style(Style) {}
73
74  const FormatToken &Tok;
75  const bool InPPDirective;
76  const encoding::Encoding Encoding;
77  const FormatStyle &Style;
78};
79
80/// \brief Base class for single line tokens that can be broken.
81///
82/// \c getSplit() needs to be implemented by child classes.
83class BreakableSingleLineToken : public BreakableToken {
84public:
85  virtual unsigned getLineCount() const;
86  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
87                                           unsigned TailOffset,
88                                           StringRef::size_type Length) const;
89
90protected:
91  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
92                           StringRef Prefix, StringRef Postfix,
93                           bool InPPDirective, encoding::Encoding Encoding,
94                           const FormatStyle &Style);
95
96  // The column in which the token starts.
97  unsigned StartColumn;
98  // The prefix a line needs after a break in the token.
99  StringRef Prefix;
100  // The postfix a line needs before introducing a break.
101  StringRef Postfix;
102  // The token text excluding the prefix and postfix.
103  StringRef Line;
104};
105
106class BreakableStringLiteral : public BreakableSingleLineToken {
107public:
108  /// \brief Creates a breakable token for a single line string literal.
109  ///
110  /// \p StartColumn specifies the column in which the token will start
111  /// after formatting.
112  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
113                         bool InPPDirective, encoding::Encoding Encoding,
114                         const FormatStyle &Style);
115
116  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
117                         unsigned ColumnLimit) const;
118  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
119                           WhitespaceManager &Whitespaces);
120};
121
122class BreakableLineComment : public BreakableSingleLineToken {
123public:
124  /// \brief Creates a breakable token for a line comment.
125  ///
126  /// \p StartColumn specifies the column in which the comment will start
127  /// after formatting.
128  BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
129                       bool InPPDirective, encoding::Encoding Encoding,
130                       const FormatStyle &Style);
131
132  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
133                         unsigned ColumnLimit) const;
134  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
135                           WhitespaceManager &Whitespaces);
136  virtual void replaceWhitespaceBefore(unsigned LineIndex,
137                                       WhitespaceManager &Whitespaces);
138
139private:
140  // The prefix without an additional space if one was added.
141  StringRef OriginalPrefix;
142};
143
144class BreakableBlockComment : public BreakableToken {
145public:
146  /// \brief Creates a breakable token for a block comment.
147  ///
148  /// \p StartColumn specifies the column in which the comment will start
149  /// after formatting, while \p OriginalStartColumn specifies in which
150  /// column the comment started before formatting.
151  /// If the comment starts a line after formatting, set \p FirstInLine to true.
152  BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
153                        unsigned OriginaStartColumn, bool FirstInLine,
154                        bool InPPDirective, encoding::Encoding Encoding,
155                        const FormatStyle &Style);
156
157  virtual unsigned getLineCount() const;
158  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
159                                           unsigned TailOffset,
160                                           StringRef::size_type Length) const;
161  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
162                         unsigned ColumnLimit) const;
163  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
164                           WhitespaceManager &Whitespaces);
165  virtual void replaceWhitespaceBefore(unsigned LineIndex,
166                                       WhitespaceManager &Whitespaces);
167
168private:
169  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
170  // so that all whitespace between the lines is accounted to Lines[LineIndex]
171  // as leading whitespace:
172  // - Lines[LineIndex] points to the text after that whitespace
173  // - Lines[LineIndex-1] shrinks by its trailing whitespace
174  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
175  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
176  //
177  // Sets StartOfLineColumn to the intended column in which the text at
178  // Lines[LineIndex] starts (note that the decoration, if present, is not
179  // considered part of the text).
180  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
181
182  // Returns the column at which the text in line LineIndex starts, when broken
183  // at TailOffset. Note that the decoration (if present) is not considered part
184  // of the text.
185  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
186
187  // Contains the text of the lines of the block comment, excluding the leading
188  // /* in the first line and trailing */ in the last line, and excluding all
189  // trailing whitespace between the lines. Note that the decoration (if
190  // present) is also not considered part of the text.
191  SmallVector<StringRef, 16> Lines;
192
193  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
194  // front of Lines[i]. Note that this can include "* " sequences, which we
195  // regard as whitespace when all lines have a "*" prefix.
196  SmallVector<unsigned, 16> LeadingWhitespace;
197
198  // StartOfLineColumn[i] is the target column at which Line[i] should be.
199  // Note that this excludes a leading "* " or "*" in case all lines have
200  // a "*" prefix.
201  SmallVector<unsigned, 16> StartOfLineColumn;
202
203  // The column at which the text of a broken line should start.
204  // Note that an optional decoration would go before that column.
205  // IndentAtLineBreak is a uniform position for all lines in a block comment,
206  // regardless of their relative position.
207  // FIXME: Revisit the decision to do this; the main reason was to support
208  // patterns like
209  // /**************//**
210  //  * Comment
211  // We could also support such patterns by special casing the first line
212  // instead.
213  unsigned IndentAtLineBreak;
214
215  // This is to distinguish between the case when the last line was empty and
216  // the case when it started with a decoration ("*" or "* ").
217  bool LastLineNeedsDecoration;
218
219  // Either "* " if all lines begin with a "*", or empty.
220  StringRef Decoration;
221};
222
223} // namespace format
224} // namespace clang
225
226#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
227