1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Contains implementation of BreakableToken class and classes derived
12/// from it.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-token-breaker"
17
18#include "BreakableToken.h"
19#include "clang/Basic/CharInfo.h"
20#include "clang/Format/Format.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/Support/Debug.h"
23#include <algorithm>
24
25namespace clang {
26namespace format {
27
28static const char *const Blanks = " \t\v\f";
29static bool IsBlank(char C) {
30  switch (C) {
31  case ' ':
32  case '\t':
33  case '\v':
34  case '\f':
35    return true;
36  default:
37    return false;
38  }
39}
40
41static BreakableToken::Split getCommentSplit(StringRef Text,
42                                             unsigned ContentStartColumn,
43                                             unsigned ColumnLimit,
44                                             encoding::Encoding Encoding) {
45  if (ColumnLimit <= ContentStartColumn + 1)
46    return BreakableToken::Split(StringRef::npos, 0);
47
48  unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
49  unsigned MaxSplitBytes = 0;
50
51  for (unsigned NumChars = 0;
52       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
53    MaxSplitBytes +=
54        encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
55
56  StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
57  if (SpaceOffset == StringRef::npos ||
58      // Don't break at leading whitespace.
59      Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
60    // Make sure that we don't break at leading whitespace that
61    // reaches past MaxSplit.
62    StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
63    if (FirstNonWhitespace == StringRef::npos)
64      // If the comment is only whitespace, we cannot split.
65      return BreakableToken::Split(StringRef::npos, 0);
66    SpaceOffset = Text.find_first_of(
67        Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
68  }
69  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
70    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
71    StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
72    return BreakableToken::Split(BeforeCut.size(),
73                                 AfterCut.begin() - BeforeCut.end());
74  }
75  return BreakableToken::Split(StringRef::npos, 0);
76}
77
78static BreakableToken::Split getStringSplit(StringRef Text,
79                                            unsigned ContentStartColumn,
80                                            unsigned ColumnLimit,
81                                            encoding::Encoding Encoding) {
82  // FIXME: Reduce unit test case.
83  if (Text.empty())
84    return BreakableToken::Split(StringRef::npos, 0);
85  if (ColumnLimit <= ContentStartColumn)
86    return BreakableToken::Split(StringRef::npos, 0);
87  unsigned MaxSplit =
88      std::min<unsigned>(ColumnLimit - ContentStartColumn,
89                         encoding::getCodePointCount(Text, Encoding) - 1);
90  StringRef::size_type SpaceOffset = 0;
91  StringRef::size_type SlashOffset = 0;
92  StringRef::size_type WordStartOffset = 0;
93  StringRef::size_type SplitPoint = 0;
94  for (unsigned Chars = 0;;) {
95    unsigned Advance;
96    if (Text[0] == '\\') {
97      Advance = encoding::getEscapeSequenceLength(Text);
98      Chars += Advance;
99    } else {
100      Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
101      Chars += 1;
102    }
103
104    if (Chars > MaxSplit)
105      break;
106
107    if (IsBlank(Text[0]))
108      SpaceOffset = SplitPoint;
109    if (Text[0] == '/')
110      SlashOffset = SplitPoint;
111    if (Advance == 1 && !isAlphanumeric(Text[0]))
112      WordStartOffset = SplitPoint;
113
114    SplitPoint += Advance;
115    Text = Text.substr(Advance);
116  }
117
118  if (SpaceOffset != 0)
119    return BreakableToken::Split(SpaceOffset + 1, 0);
120  if (SlashOffset != 0)
121    return BreakableToken::Split(SlashOffset + 1, 0);
122  if (WordStartOffset != 0)
123    return BreakableToken::Split(WordStartOffset + 1, 0);
124  if (SplitPoint != 0)
125    return BreakableToken::Split(SplitPoint, 0);
126  return BreakableToken::Split(StringRef::npos, 0);
127}
128
129unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
130
131unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
132    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
133  return StartColumn + Prefix.size() + Postfix.size() +
134         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
135}
136
137BreakableSingleLineToken::BreakableSingleLineToken(
138    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
139    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
140    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
141      Prefix(Prefix), Postfix(Postfix) {
142  assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
143  Line = Tok.TokenText.substr(
144      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
145}
146
147BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
148                                               unsigned StartColumn,
149                                               bool InPPDirective,
150                                               encoding::Encoding Encoding)
151    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
152                               Encoding) {}
153
154BreakableToken::Split
155BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
156                                 unsigned ColumnLimit) const {
157  return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
158                        Encoding);
159}
160
161void BreakableStringLiteral::insertBreak(unsigned LineIndex,
162                                         unsigned TailOffset, Split Split,
163                                         WhitespaceManager &Whitespaces) {
164  Whitespaces.replaceWhitespaceInToken(
165      Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
166      Prefix, InPPDirective, 1, StartColumn);
167}
168
169static StringRef getLineCommentPrefix(StringRef Comment) {
170  static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" };
171  for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
172    if (Comment.startswith(KnownPrefixes[i]))
173      return KnownPrefixes[i];
174  return "";
175}
176
177BreakableLineComment::BreakableLineComment(const FormatToken &Token,
178                                           unsigned StartColumn,
179                                           bool InPPDirective,
180                                           encoding::Encoding Encoding)
181    : BreakableSingleLineToken(Token, StartColumn,
182                               getLineCommentPrefix(Token.TokenText), "",
183                               InPPDirective, Encoding) {
184  OriginalPrefix = Prefix;
185  if (Token.TokenText.size() > Prefix.size() &&
186      isAlphanumeric(Token.TokenText[Prefix.size()])) {
187    if (Prefix == "//")
188      Prefix = "// ";
189    else if (Prefix == "///")
190      Prefix = "/// ";
191  }
192}
193
194BreakableToken::Split
195BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
196                               unsigned ColumnLimit) const {
197  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
198                         ColumnLimit, Encoding);
199}
200
201void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
202                                       Split Split,
203                                       WhitespaceManager &Whitespaces) {
204  Whitespaces.replaceWhitespaceInToken(
205      Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
206      Postfix, Prefix, InPPDirective, 1, StartColumn);
207}
208
209void
210BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
211                                              WhitespaceManager &Whitespaces) {
212  if (OriginalPrefix != Prefix) {
213    Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
214                                         false, 0, 1);
215  }
216}
217
218BreakableBlockComment::BreakableBlockComment(
219    const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
220    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
221    encoding::Encoding Encoding)
222    : BreakableToken(Token, InPPDirective, Encoding) {
223  StringRef TokenText(Token.TokenText);
224  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
225  TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
226
227  int IndentDelta = StartColumn - OriginalStartColumn;
228  LeadingWhitespace.resize(Lines.size());
229  StartOfLineColumn.resize(Lines.size());
230  StartOfLineColumn[0] = StartColumn + 2;
231  for (size_t i = 1; i < Lines.size(); ++i)
232    adjustWhitespace(Style, i, IndentDelta);
233
234  Decoration = "* ";
235  if (Lines.size() == 1 && !FirstInLine) {
236    // Comments for which FirstInLine is false can start on arbitrary column,
237    // and available horizontal space can be too small to align consecutive
238    // lines with the first one.
239    // FIXME: We could, probably, align them to current indentation level, but
240    // now we just wrap them without stars.
241    Decoration = "";
242  }
243  for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
244    // If the last line is empty, the closing "*/" will have a star.
245    if (i + 1 == e && Lines[i].empty())
246      break;
247    while (!Lines[i].startswith(Decoration))
248      Decoration = Decoration.substr(0, Decoration.size() - 1);
249  }
250
251  LastLineNeedsDecoration = true;
252  IndentAtLineBreak = StartOfLineColumn[0] + 1;
253  for (size_t i = 1; i < Lines.size(); ++i) {
254    if (Lines[i].empty()) {
255      if (i + 1 == Lines.size()) {
256        // Empty last line means that we already have a star as a part of the
257        // trailing */. We also need to preserve whitespace, so that */ is
258        // correctly indented.
259        LastLineNeedsDecoration = false;
260      } else if (Decoration.empty()) {
261        // For all other lines, set the start column to 0 if they're empty, so
262        // we do not insert trailing whitespace anywhere.
263        StartOfLineColumn[i] = 0;
264      }
265      continue;
266    }
267    // The first line already excludes the star.
268    // For all other lines, adjust the line to exclude the star and
269    // (optionally) the first whitespace.
270    StartOfLineColumn[i] += Decoration.size();
271    Lines[i] = Lines[i].substr(Decoration.size());
272    LeadingWhitespace[i] += Decoration.size();
273    IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
274  }
275  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
276  DEBUG({
277    llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
278    for (size_t i = 0; i < Lines.size(); ++i) {
279      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
280                   << "\n";
281    }
282  });
283}
284
285void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
286                                             unsigned LineIndex,
287                                             int IndentDelta) {
288  // When in a preprocessor directive, the trailing backslash in a block comment
289  // is not needed, but can serve a purpose of uniformity with necessary escaped
290  // newlines outside the comment. In this case we remove it here before
291  // trimming the trailing whitespace. The backslash will be re-added later when
292  // inserting a line break.
293  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
294  if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
295    --EndOfPreviousLine;
296
297  // Calculate the end of the non-whitespace text in the previous line.
298  EndOfPreviousLine =
299      Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
300  if (EndOfPreviousLine == StringRef::npos)
301    EndOfPreviousLine = 0;
302  else
303    ++EndOfPreviousLine;
304  // Calculate the start of the non-whitespace text in the current line.
305  size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
306  if (StartOfLine == StringRef::npos)
307    StartOfLine = Lines[LineIndex].size();
308
309  // Adjust Lines to only contain relevant text.
310  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
311  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
312  // Adjust LeadingWhitespace to account all whitespace between the lines
313  // to the current line.
314  LeadingWhitespace[LineIndex] =
315      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
316
317  // FIXME: We currently count tabs as 1 character. To solve this, we need to
318  // get the correct indentation width of the start of the comment, which
319  // requires correct counting of the tab expansions before the comment, and
320  // a configurable tab width. Since the current implementation only breaks
321  // if leading tabs are intermixed with spaces, that is not a high priority.
322
323  // Adjust the start column uniformly accross all lines.
324  StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
325}
326
327unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
328
329unsigned BreakableBlockComment::getLineLengthAfterSplit(
330    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
331  return getContentStartColumn(LineIndex, Offset) +
332         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
333                                     Encoding) +
334         // The last line gets a "*/" postfix.
335         (LineIndex + 1 == Lines.size() ? 2 : 0);
336}
337
338BreakableToken::Split
339BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
340                                unsigned ColumnLimit) const {
341  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
342                         getContentStartColumn(LineIndex, TailOffset),
343                         ColumnLimit, Encoding);
344}
345
346void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
347                                        Split Split,
348                                        WhitespaceManager &Whitespaces) {
349  StringRef Text = Lines[LineIndex].substr(TailOffset);
350  StringRef Prefix = Decoration;
351  if (LineIndex + 1 == Lines.size() &&
352      Text.size() == Split.first + Split.second) {
353    // For the last line we need to break before "*/", but not to add "* ".
354    Prefix = "";
355  }
356
357  unsigned BreakOffsetInToken =
358      Text.data() - Tok.TokenText.data() + Split.first;
359  unsigned CharsToRemove = Split.second;
360  assert(IndentAtLineBreak >= Decoration.size());
361  Whitespaces.replaceWhitespaceInToken(Tok, BreakOffsetInToken, CharsToRemove,
362                                       "", Prefix, InPPDirective, 1,
363                                       IndentAtLineBreak - Decoration.size());
364}
365
366void
367BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
368                                               WhitespaceManager &Whitespaces) {
369  if (LineIndex == 0)
370    return;
371  StringRef Prefix = Decoration;
372  if (Lines[LineIndex].empty()) {
373    if (LineIndex + 1 == Lines.size()) {
374      if (!LastLineNeedsDecoration) {
375        // If the last line was empty, we don't need a prefix, as the */ will
376        // line up with the decoration (if it exists).
377        Prefix = "";
378      }
379    } else if (!Decoration.empty()) {
380      // For other empty lines, if we do have a decoration, adapt it to not
381      // contain a trailing whitespace.
382      Prefix = Prefix.substr(0, 1);
383    }
384  } else {
385    if (StartOfLineColumn[LineIndex] == 1) {
386      // This line starts immediately after the decorating *.
387      Prefix = Prefix.substr(0, 1);
388    }
389  }
390
391  unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
392                                     Tok.TokenText.data() -
393                                     LeadingWhitespace[LineIndex];
394  assert(StartOfLineColumn[LineIndex] >= Prefix.size());
395  Whitespaces.replaceWhitespaceInToken(
396      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
397      InPPDirective, 1, StartOfLineColumn[LineIndex] - Prefix.size());
398}
399
400unsigned
401BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
402                                             unsigned TailOffset) const {
403  // If we break, we always break at the predefined indent.
404  if (TailOffset != 0)
405    return IndentAtLineBreak;
406  return StartOfLineColumn[LineIndex];
407}
408
409} // namespace format
410} // namespace clang
411