BreakableToken.h revision 0b62cc30c9aa462184de0435dc083d944a41d67f
1//===--- BreakableToken.h - Format C++ code -------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief Declares BreakableToken, BreakableStringLiteral, and 12/// BreakableBlockComment classes, that contain token type-specific logic to 13/// break long lines in tokens. 14/// 15//===----------------------------------------------------------------------===// 16 17#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H 18#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H 19 20#include "Encoding.h" 21#include "TokenAnnotator.h" 22#include "WhitespaceManager.h" 23#include <utility> 24 25namespace clang { 26namespace format { 27 28struct FormatStyle; 29 30/// \brief Base class for strategies on how to break tokens. 31/// 32/// FIXME: The interface seems set in stone, so we might want to just pull the 33/// strategy into the class, instead of controlling it from the outside. 34class BreakableToken { 35public: 36 /// \brief Contains starting character index and length of split. 37 typedef std::pair<StringRef::size_type, unsigned> Split; 38 39 virtual ~BreakableToken() {} 40 41 /// \brief Returns the number of lines in this token in the original code. 42 virtual unsigned getLineCount() const = 0; 43 44 /// \brief Returns the number of columns required to format the piece of line 45 /// at \p LineIndex, from byte offset \p Offset with length \p Length. 46 /// 47 /// Note that previous breaks are not taken into account. \p Offset is always 48 /// specified from the start of the (original) line. 49 /// \p Length can be set to StringRef::npos, which means "to the end of line". 50 virtual unsigned 51 getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, 52 StringRef::size_type Length) const = 0; 53 54 /// \brief Returns a range (offset, length) at which to break the line at 55 /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not 56 /// violate \p ColumnLimit. 57 virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, 58 unsigned ColumnLimit) const = 0; 59 60 /// \brief Emits the previously retrieved \p Split via \p Whitespaces. 61 virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 62 WhitespaceManager &Whitespaces) = 0; 63 64 /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. 65 virtual void replaceWhitespaceBefore(unsigned LineIndex, 66 WhitespaceManager &Whitespaces) {} 67 68protected: 69 BreakableToken(const FormatToken &Tok, bool InPPDirective, 70 encoding::Encoding Encoding, const FormatStyle &Style) 71 : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), 72 Style(Style) {} 73 74 const FormatToken &Tok; 75 const bool InPPDirective; 76 const encoding::Encoding Encoding; 77 const FormatStyle &Style; 78}; 79 80/// \brief Base class for single line tokens that can be broken. 81/// 82/// \c getSplit() needs to be implemented by child classes. 83class BreakableSingleLineToken : public BreakableToken { 84public: 85 virtual unsigned getLineCount() const; 86 virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, 87 unsigned TailOffset, 88 StringRef::size_type Length) const; 89 90protected: 91 BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, 92 StringRef Prefix, StringRef Postfix, 93 bool InPPDirective, encoding::Encoding Encoding, 94 const FormatStyle &Style); 95 96 // The column in which the token starts. 97 unsigned StartColumn; 98 // The prefix a line needs after a break in the token. 99 StringRef Prefix; 100 // The postfix a line needs before introducing a break. 101 StringRef Postfix; 102 // The token text excluding the prefix and postfix. 103 StringRef Line; 104}; 105 106class BreakableStringLiteral : public BreakableSingleLineToken { 107public: 108 /// \brief Creates a breakable token for a single line string literal. 109 /// 110 /// \p StartColumn specifies the column in which the token will start 111 /// after formatting. 112 BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, 113 bool InPPDirective, encoding::Encoding Encoding, 114 const FormatStyle &Style); 115 116 virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, 117 unsigned ColumnLimit) const; 118 virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 119 WhitespaceManager &Whitespaces); 120}; 121 122class BreakableLineComment : public BreakableSingleLineToken { 123public: 124 /// \brief Creates a breakable token for a line comment. 125 /// 126 /// \p StartColumn specifies the column in which the comment will start 127 /// after formatting. 128 BreakableLineComment(const FormatToken &Token, unsigned StartColumn, 129 bool InPPDirective, encoding::Encoding Encoding, 130 const FormatStyle &Style); 131 132 virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, 133 unsigned ColumnLimit) const; 134 virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 135 WhitespaceManager &Whitespaces); 136 virtual void replaceWhitespaceBefore(unsigned LineIndex, 137 WhitespaceManager &Whitespaces); 138 139private: 140 // The prefix without an additional space if one was added. 141 StringRef OriginalPrefix; 142}; 143 144class BreakableBlockComment : public BreakableToken { 145public: 146 /// \brief Creates a breakable token for a block comment. 147 /// 148 /// \p StartColumn specifies the column in which the comment will start 149 /// after formatting, while \p OriginalStartColumn specifies in which 150 /// column the comment started before formatting. 151 /// If the comment starts a line after formatting, set \p FirstInLine to true. 152 BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, 153 unsigned OriginaStartColumn, bool FirstInLine, 154 bool InPPDirective, encoding::Encoding Encoding, 155 const FormatStyle &Style); 156 157 virtual unsigned getLineCount() const; 158 virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, 159 unsigned TailOffset, 160 StringRef::size_type Length) const; 161 virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, 162 unsigned ColumnLimit) const; 163 virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 164 WhitespaceManager &Whitespaces); 165 virtual void replaceWhitespaceBefore(unsigned LineIndex, 166 WhitespaceManager &Whitespaces); 167 168private: 169 // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], 170 // so that all whitespace between the lines is accounted to Lines[LineIndex] 171 // as leading whitespace: 172 // - Lines[LineIndex] points to the text after that whitespace 173 // - Lines[LineIndex-1] shrinks by its trailing whitespace 174 // - LeadingWhitespace[LineIndex] is updated with the complete whitespace 175 // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] 176 // 177 // Sets StartOfLineColumn to the intended column in which the text at 178 // Lines[LineIndex] starts (note that the decoration, if present, is not 179 // considered part of the text). 180 void adjustWhitespace(unsigned LineIndex, int IndentDelta); 181 182 // Returns the column at which the text in line LineIndex starts, when broken 183 // at TailOffset. Note that the decoration (if present) is not considered part 184 // of the text. 185 unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; 186 187 // Contains the text of the lines of the block comment, excluding the leading 188 // /* in the first line and trailing */ in the last line, and excluding all 189 // trailing whitespace between the lines. Note that the decoration (if 190 // present) is also not considered part of the text. 191 SmallVector<StringRef, 16> Lines; 192 193 // LeadingWhitespace[i] is the number of characters regarded as whitespace in 194 // front of Lines[i]. Note that this can include "* " sequences, which we 195 // regard as whitespace when all lines have a "*" prefix. 196 SmallVector<unsigned, 16> LeadingWhitespace; 197 198 // StartOfLineColumn[i] is the target column at which Line[i] should be. 199 // Note that this excludes a leading "* " or "*" in case all lines have 200 // a "*" prefix. 201 SmallVector<unsigned, 16> StartOfLineColumn; 202 203 // The column at which the text of a broken line should start. 204 // Note that an optional decoration would go before that column. 205 // IndentAtLineBreak is a uniform position for all lines in a block comment, 206 // regardless of their relative position. 207 // FIXME: Revisit the decision to do this; the main reason was to support 208 // patterns like 209 // /**************//** 210 // * Comment 211 // We could also support such patterns by special casing the first line 212 // instead. 213 unsigned IndentAtLineBreak; 214 215 // This is to distinguish between the case when the last line was empty and 216 // the case when it started with a decoration ("*" or "* "). 217 bool LastLineNeedsDecoration; 218 219 // Either "* " if all lines begin with a "*", or empty. 220 StringRef Decoration; 221}; 222 223} // namespace format 224} // namespace clang 225 226#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H 227