1//===--- BreakableToken.h - Format C++ code -------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Declares BreakableToken, BreakableStringLiteral, and
12/// BreakableBlockComment classes, that contain token type-specific logic to
13/// break long lines in tokens.
14///
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
18#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
19
20#include "Encoding.h"
21#include "TokenAnnotator.h"
22#include "WhitespaceManager.h"
23#include <utility>
24
25namespace clang {
26namespace format {
27
28struct FormatStyle;
29
30/// \brief Base class for strategies on how to break tokens.
31///
32/// FIXME: The interface seems set in stone, so we might want to just pull the
33/// strategy into the class, instead of controlling it from the outside.
34class BreakableToken {
35public:
36  /// \brief Contains starting character index and length of split.
37  typedef std::pair<StringRef::size_type, unsigned> Split;
38
39  virtual ~BreakableToken() {}
40
41  /// \brief Returns the number of lines in this token in the original code.
42  virtual unsigned getLineCount() const = 0;
43
44  /// \brief Returns the number of columns required to format the piece of line
45  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46  ///
47  /// Note that previous breaks are not taken into account. \p Offset is always
48  /// specified from the start of the (original) line.
49  /// \p Length can be set to StringRef::npos, which means "to the end of line".
50  virtual unsigned
51  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                          StringRef::size_type Length) const = 0;
53
54  /// \brief Returns a range (offset, length) at which to break the line at
55  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56  /// violate \p ColumnLimit.
57  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                         unsigned ColumnLimit) const = 0;
59
60  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                           WhitespaceManager &Whitespaces) = 0;
63
64  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
65  virtual void replaceWhitespaceBefore(unsigned LineIndex,
66                                       WhitespaceManager &Whitespaces) {}
67
68protected:
69  BreakableToken(const FormatToken &Tok, bool InPPDirective,
70                 encoding::Encoding Encoding)
71      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
72
73  const FormatToken &Tok;
74  const bool InPPDirective;
75  const encoding::Encoding Encoding;
76};
77
78/// \brief Base class for single line tokens that can be broken.
79///
80/// \c getSplit() needs to be implemented by child classes.
81class BreakableSingleLineToken : public BreakableToken {
82public:
83  virtual unsigned getLineCount() const;
84  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
85                                           unsigned TailOffset,
86                                           StringRef::size_type Length) const;
87
88protected:
89  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
90                           StringRef Prefix, StringRef Postfix,
91                           bool InPPDirective, encoding::Encoding Encoding);
92
93  // The column in which the token starts.
94  unsigned StartColumn;
95  // The prefix a line needs after a break in the token.
96  StringRef Prefix;
97  // The postfix a line needs before introducing a break.
98  StringRef Postfix;
99  // The token text excluding the prefix and postfix.
100  StringRef Line;
101};
102
103class BreakableStringLiteral : public BreakableSingleLineToken {
104public:
105  /// \brief Creates a breakable token for a single line string literal.
106  ///
107  /// \p StartColumn specifies the column in which the token will start
108  /// after formatting.
109  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
110                         bool InPPDirective, encoding::Encoding Encoding);
111
112  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
113                         unsigned ColumnLimit) const;
114  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
115                           WhitespaceManager &Whitespaces);
116};
117
118class BreakableLineComment : public BreakableSingleLineToken {
119public:
120  /// \brief Creates a breakable token for a line comment.
121  ///
122  /// \p StartColumn specifies the column in which the comment will start
123  /// after formatting.
124  BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
125                       bool InPPDirective, encoding::Encoding Encoding);
126
127  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
128                         unsigned ColumnLimit) const;
129  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
130                           WhitespaceManager &Whitespaces);
131  virtual void replaceWhitespaceBefore(unsigned LineIndex,
132                                       WhitespaceManager &Whitespaces);
133
134private:
135  // The prefix without an additional space if one was added.
136  StringRef OriginalPrefix;
137};
138
139class BreakableBlockComment : public BreakableToken {
140public:
141  /// \brief Creates a breakable token for a block comment.
142  ///
143  /// \p StartColumn specifies the column in which the comment will start
144  /// after formatting, while \p OriginalStartColumn specifies in which
145  /// column the comment started before formatting.
146  /// If the comment starts a line after formatting, set \p FirstInLine to true.
147  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
148                        unsigned StartColumn, unsigned OriginaStartColumn,
149                        bool FirstInLine, bool InPPDirective,
150                        encoding::Encoding Encoding);
151
152  virtual unsigned getLineCount() const;
153  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
154                                           unsigned TailOffset,
155                                           StringRef::size_type Length) const;
156  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
157                         unsigned ColumnLimit) const;
158  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
159                           WhitespaceManager &Whitespaces);
160  virtual void replaceWhitespaceBefore(unsigned LineIndex,
161                                       WhitespaceManager &Whitespaces);
162
163private:
164  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
165  // so that all whitespace between the lines is accounted to Lines[LineIndex]
166  // as leading whitespace:
167  // - Lines[LineIndex] points to the text after that whitespace
168  // - Lines[LineIndex-1] shrinks by its trailing whitespace
169  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
170  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
171  //
172  // Sets StartOfLineColumn to the intended column in which the text at
173  // Lines[LineIndex] starts (note that the decoration, if present, is not
174  // considered part of the text).
175  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
176                        int IndentDelta);
177
178  // Returns the column at which the text in line LineIndex starts, when broken
179  // at TailOffset. Note that the decoration (if present) is not considered part
180  // of the text.
181  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
182
183  // Contains the text of the lines of the block comment, excluding the leading
184  // /* in the first line and trailing */ in the last line, and excluding all
185  // trailing whitespace between the lines. Note that the decoration (if
186  // present) is also not considered part of the text.
187  SmallVector<StringRef, 16> Lines;
188
189  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
190  // front of Lines[i]. Note that this can include "* " sequences, which we
191  // regard as whitespace when all lines have a "*" prefix.
192  SmallVector<unsigned, 16> LeadingWhitespace;
193
194  // StartOfLineColumn[i] is the target column at which Line[i] should be.
195  // Note that this excludes a leading "* " or "*" in case all lines have
196  // a "*" prefix.
197  SmallVector<unsigned, 16> StartOfLineColumn;
198
199  // The column at which the text of a broken line should start.
200  // Note that an optional decoration would go before that column.
201  // IndentAtLineBreak is a uniform position for all lines in a block comment,
202  // regardless of their relative position.
203  // FIXME: Revisit the decision to do this; the main reason was to support
204  // patterns like
205  // /**************//**
206  //  * Comment
207  // We could also support such patterns by special casing the first line
208  // instead.
209  unsigned IndentAtLineBreak;
210
211  // This is to distinguish between the case when the last line was empty and
212  // the case when it started with a decoration ("*" or "* ").
213  bool LastLineNeedsDecoration;
214
215  // Either "* " if all lines begin with a "*", or empty.
216  StringRef Decoration;
217};
218
219} // namespace format
220} // namespace clang
221
222#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
223