1//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements WhitespaceManager class.
12///
13//===----------------------------------------------------------------------===//
14
15#include "WhitespaceManager.h"
16#include "llvm/ADT/STLExtras.h"
17
18namespace clang {
19namespace format {
20
21bool WhitespaceManager::Change::IsBeforeInFile::
22operator()(const Change &C1, const Change &C2) const {
23  return SourceMgr.isBeforeInTranslationUnit(
24      C1.OriginalWhitespaceRange.getBegin(),
25      C2.OriginalWhitespaceRange.getBegin());
26}
27
28WhitespaceManager::Change::Change(
29    bool CreateReplacement, SourceRange OriginalWhitespaceRange,
30    unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31    unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32    StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
33    bool IsStartOfDeclName)
34    : CreateReplacement(CreateReplacement),
35      OriginalWhitespaceRange(OriginalWhitespaceRange),
36      StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
37      PreviousLinePostfix(PreviousLinePostfix),
38      CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
39      ContinuesPPDirective(ContinuesPPDirective),
40      IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
41      Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
42      PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
43      StartOfBlockComment(nullptr), IndentationOffset(0) {}
44
45void WhitespaceManager::reset() {
46  Changes.clear();
47  Replaces.clear();
48}
49
50void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
51                                          unsigned IndentLevel, unsigned Spaces,
52                                          unsigned StartOfTokenColumn,
53                                          bool InPPDirective) {
54  if (Tok.Finalized)
55    return;
56  Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
57  Changes.push_back(
58      Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn,
59             Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
60             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
61}
62
63void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
64                                            bool InPPDirective) {
65  if (Tok.Finalized)
66    return;
67  Changes.push_back(
68      Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
69             /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
70             Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
71             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
72}
73
74void WhitespaceManager::replaceWhitespaceInToken(
75    const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
76    StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
77    unsigned Newlines, unsigned IndentLevel, int Spaces) {
78  if (Tok.Finalized)
79    return;
80  SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
81  Changes.push_back(Change(
82      true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
83      IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
84      CurrentPrefix,
85      // If we don't add a newline this change doesn't start a comment. Thus,
86      // when we align line comments, we don't need to treat this change as one.
87      // FIXME: We still need to take this change in account to properly
88      // calculate the new length of the comment and to calculate the changes
89      // for which to do the alignment when aligning comments.
90      Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
91      InPPDirective && !Tok.IsFirst,
92      Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
93}
94
95const tooling::Replacements &WhitespaceManager::generateReplacements() {
96  if (Changes.empty())
97    return Replaces;
98
99  std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
100  calculateLineBreakInformation();
101  alignConsecutiveDeclarations();
102  alignConsecutiveAssignments();
103  alignTrailingComments();
104  alignEscapedNewlines();
105  generateChanges();
106
107  return Replaces;
108}
109
110void WhitespaceManager::calculateLineBreakInformation() {
111  Changes[0].PreviousEndOfTokenColumn = 0;
112  for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
113    unsigned OriginalWhitespaceStart =
114        SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
115    unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
116        Changes[i - 1].OriginalWhitespaceRange.getEnd());
117    Changes[i - 1].TokenLength = OriginalWhitespaceStart -
118                                 PreviousOriginalWhitespaceEnd +
119                                 Changes[i].PreviousLinePostfix.size() +
120                                 Changes[i - 1].CurrentLinePrefix.size();
121
122    Changes[i].PreviousEndOfTokenColumn =
123        Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
124
125    Changes[i - 1].IsTrailingComment =
126        (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
127        Changes[i - 1].Kind == tok::comment;
128  }
129  // FIXME: The last token is currently not always an eof token; in those
130  // cases, setting TokenLength of the last token to 0 is wrong.
131  Changes.back().TokenLength = 0;
132  Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
133
134  const WhitespaceManager::Change *LastBlockComment = nullptr;
135  for (auto &Change : Changes) {
136    Change.StartOfBlockComment = nullptr;
137    Change.IndentationOffset = 0;
138    if (Change.Kind == tok::comment) {
139      LastBlockComment = &Change;
140    } else if (Change.Kind == tok::unknown) {
141      if ((Change.StartOfBlockComment = LastBlockComment))
142        Change.IndentationOffset =
143            Change.StartOfTokenColumn -
144            Change.StartOfBlockComment->StartOfTokenColumn;
145    } else {
146      LastBlockComment = nullptr;
147    }
148  }
149}
150
151// Align a single sequence of tokens, see AlignTokens below.
152template <typename F>
153static void
154AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
155                   SmallVector<WhitespaceManager::Change, 16> &Changes) {
156  bool FoundMatchOnLine = false;
157  int Shift = 0;
158  for (unsigned i = Start; i != End; ++i) {
159    if (Changes[i].NewlinesBefore > 0) {
160      FoundMatchOnLine = false;
161      Shift = 0;
162    }
163
164    // If this is the first matching token to be aligned, remember by how many
165    // spaces it has to be shifted, so the rest of the changes on the line are
166    // shifted by the same amount
167    if (!FoundMatchOnLine && Matches(Changes[i])) {
168      FoundMatchOnLine = true;
169      Shift = Column - Changes[i].StartOfTokenColumn;
170      Changes[i].Spaces += Shift;
171    }
172
173    assert(Shift >= 0);
174    Changes[i].StartOfTokenColumn += Shift;
175    if (i + 1 != Changes.size())
176      Changes[i + 1].PreviousEndOfTokenColumn += Shift;
177  }
178}
179
180// Walk through all of the changes and find sequences of matching tokens to
181// align. To do so, keep track of the lines and whether or not a matching token
182// was found on a line. If a matching token is found, extend the current
183// sequence. If the current line cannot be part of a sequence, e.g. because
184// there is an empty line before it or it contains only non-matching tokens,
185// finalize the previous sequence.
186template <typename F>
187static void AlignTokens(const FormatStyle &Style, F &&Matches,
188                        SmallVector<WhitespaceManager::Change, 16> &Changes) {
189  unsigned MinColumn = 0;
190  unsigned MaxColumn = UINT_MAX;
191
192  // Line number of the start and the end of the current token sequence.
193  unsigned StartOfSequence = 0;
194  unsigned EndOfSequence = 0;
195
196  // Keep track of the nesting level of matching tokens, i.e. the number of
197  // surrounding (), [], or {}. We will only align a sequence of matching
198  // token that share the same scope depth.
199  //
200  // FIXME: This could use FormatToken::NestingLevel information, but there is
201  // an outstanding issue wrt the brace scopes.
202  unsigned NestingLevelOfLastMatch = 0;
203  unsigned NestingLevel = 0;
204
205  // Keep track of the number of commas before the matching tokens, we will only
206  // align a sequence of matching tokens if they are preceded by the same number
207  // of commas.
208  unsigned CommasBeforeLastMatch = 0;
209  unsigned CommasBeforeMatch = 0;
210
211  // Whether a matching token has been found on the current line.
212  bool FoundMatchOnLine = false;
213
214  // Aligns a sequence of matching tokens, on the MinColumn column.
215  //
216  // Sequences start from the first matching token to align, and end at the
217  // first token of the first line that doesn't need to be aligned.
218  //
219  // We need to adjust the StartOfTokenColumn of each Change that is on a line
220  // containing any matching token to be aligned and located after such token.
221  auto AlignCurrentSequence = [&] {
222    if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
223      AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
224                         Changes);
225    MinColumn = 0;
226    MaxColumn = UINT_MAX;
227    StartOfSequence = 0;
228    EndOfSequence = 0;
229  };
230
231  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
232    if (Changes[i].NewlinesBefore != 0) {
233      CommasBeforeMatch = 0;
234      EndOfSequence = i;
235      // If there is a blank line, or if the last line didn't contain any
236      // matching token, the sequence ends here.
237      if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
238        AlignCurrentSequence();
239
240      FoundMatchOnLine = false;
241    }
242
243    if (Changes[i].Kind == tok::comma) {
244      ++CommasBeforeMatch;
245    } else if (Changes[i].Kind == tok::r_brace ||
246               Changes[i].Kind == tok::r_paren ||
247               Changes[i].Kind == tok::r_square) {
248      --NestingLevel;
249    } else if (Changes[i].Kind == tok::l_brace ||
250               Changes[i].Kind == tok::l_paren ||
251               Changes[i].Kind == tok::l_square) {
252      // We want sequences to skip over child scopes if possible, but not the
253      // other way around.
254      NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
255      ++NestingLevel;
256    }
257
258    if (!Matches(Changes[i]))
259      continue;
260
261    // If there is more than one matching token per line, or if the number of
262    // preceding commas, or the scope depth, do not match anymore, end the
263    // sequence.
264    if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
265        NestingLevel != NestingLevelOfLastMatch)
266      AlignCurrentSequence();
267
268    CommasBeforeLastMatch = CommasBeforeMatch;
269    NestingLevelOfLastMatch = NestingLevel;
270    FoundMatchOnLine = true;
271
272    if (StartOfSequence == 0)
273      StartOfSequence = i;
274
275    unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
276    int LineLengthAfter = -Changes[i].Spaces;
277    for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
278      LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
279    unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
280
281    // If we are restricted by the maximum column width, end the sequence.
282    if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
283        CommasBeforeLastMatch != CommasBeforeMatch) {
284      AlignCurrentSequence();
285      StartOfSequence = i;
286    }
287
288    MinColumn = std::max(MinColumn, ChangeMinColumn);
289    MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
290  }
291
292  EndOfSequence = Changes.size();
293  AlignCurrentSequence();
294}
295
296void WhitespaceManager::alignConsecutiveAssignments() {
297  if (!Style.AlignConsecutiveAssignments)
298    return;
299
300  AlignTokens(Style,
301              [&](const Change &C) {
302                // Do not align on equal signs that are first on a line.
303                if (C.NewlinesBefore > 0)
304                  return false;
305
306                // Do not align on equal signs that are last on a line.
307                if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
308                  return false;
309
310                return C.Kind == tok::equal;
311              },
312              Changes);
313}
314
315void WhitespaceManager::alignConsecutiveDeclarations() {
316  if (!Style.AlignConsecutiveDeclarations)
317    return;
318
319  // FIXME: Currently we don't handle properly the PointerAlignment: Right
320  // The * and & are not aligned and are left dangling. Something has to be done
321  // about it, but it raises the question of alignment of code like:
322  //   const char* const* v1;
323  //   float const* v2;
324  //   SomeVeryLongType const& v3;
325
326  AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
327              Changes);
328}
329
330void WhitespaceManager::alignTrailingComments() {
331  unsigned MinColumn = 0;
332  unsigned MaxColumn = UINT_MAX;
333  unsigned StartOfSequence = 0;
334  bool BreakBeforeNext = false;
335  unsigned Newlines = 0;
336  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
337    if (Changes[i].StartOfBlockComment)
338      continue;
339    Newlines += Changes[i].NewlinesBefore;
340    if (!Changes[i].IsTrailingComment)
341      continue;
342
343    unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
344    unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
345    if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
346      ChangeMaxColumn -= 2;
347    // If this comment follows an } in column 0, it probably documents the
348    // closing of a namespace and we don't want to align it.
349    bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
350                                  Changes[i - 1].Kind == tok::r_brace &&
351                                  Changes[i - 1].StartOfTokenColumn == 0;
352    bool WasAlignedWithStartOfNextLine = false;
353    if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
354      unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
355          Changes[i].OriginalWhitespaceRange.getEnd());
356      for (unsigned j = i + 1; j != e; ++j) {
357        if (Changes[j].Kind != tok::comment) { // Skip over comments.
358          unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
359              Changes[j].OriginalWhitespaceRange.getEnd());
360          // The start of the next token was previously aligned with the
361          // start of this comment.
362          WasAlignedWithStartOfNextLine =
363              CommentColumn == NextColumn ||
364              CommentColumn == NextColumn + Style.IndentWidth;
365          break;
366        }
367      }
368    }
369    if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
370      alignTrailingComments(StartOfSequence, i, MinColumn);
371      MinColumn = ChangeMinColumn;
372      MaxColumn = ChangeMinColumn;
373      StartOfSequence = i;
374    } else if (BreakBeforeNext || Newlines > 1 ||
375               (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
376               // Break the comment sequence if the previous line did not end
377               // in a trailing comment.
378               (Changes[i].NewlinesBefore == 1 && i > 0 &&
379                !Changes[i - 1].IsTrailingComment) ||
380               WasAlignedWithStartOfNextLine) {
381      alignTrailingComments(StartOfSequence, i, MinColumn);
382      MinColumn = ChangeMinColumn;
383      MaxColumn = ChangeMaxColumn;
384      StartOfSequence = i;
385    } else {
386      MinColumn = std::max(MinColumn, ChangeMinColumn);
387      MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
388    }
389    BreakBeforeNext =
390        (i == 0) || (Changes[i].NewlinesBefore > 1) ||
391        // Never start a sequence with a comment at the beginning of
392        // the line.
393        (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
394    Newlines = 0;
395  }
396  alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
397}
398
399void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
400                                              unsigned Column) {
401  for (unsigned i = Start; i != End; ++i) {
402    int Shift = 0;
403    if (Changes[i].IsTrailingComment) {
404      Shift = Column - Changes[i].StartOfTokenColumn;
405    }
406    if (Changes[i].StartOfBlockComment) {
407      Shift = Changes[i].IndentationOffset +
408              Changes[i].StartOfBlockComment->StartOfTokenColumn -
409              Changes[i].StartOfTokenColumn;
410    }
411    assert(Shift >= 0);
412    Changes[i].Spaces += Shift;
413    if (i + 1 != End)
414      Changes[i + 1].PreviousEndOfTokenColumn += Shift;
415    Changes[i].StartOfTokenColumn += Shift;
416  }
417}
418
419void WhitespaceManager::alignEscapedNewlines() {
420  unsigned MaxEndOfLine =
421      Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
422  unsigned StartOfMacro = 0;
423  for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
424    Change &C = Changes[i];
425    if (C.NewlinesBefore > 0) {
426      if (C.ContinuesPPDirective) {
427        MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
428      } else {
429        alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
430        MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
431        StartOfMacro = i;
432      }
433    }
434  }
435  alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
436}
437
438void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
439                                             unsigned Column) {
440  for (unsigned i = Start; i < End; ++i) {
441    Change &C = Changes[i];
442    if (C.NewlinesBefore > 0) {
443      assert(C.ContinuesPPDirective);
444      if (C.PreviousEndOfTokenColumn + 1 > Column)
445        C.EscapedNewlineColumn = 0;
446      else
447        C.EscapedNewlineColumn = Column;
448    }
449  }
450}
451
452void WhitespaceManager::generateChanges() {
453  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
454    const Change &C = Changes[i];
455    if (i > 0) {
456      assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
457                 C.OriginalWhitespaceRange.getBegin() &&
458             "Generating two replacements for the same location");
459    }
460    if (C.CreateReplacement) {
461      std::string ReplacementText = C.PreviousLinePostfix;
462      if (C.ContinuesPPDirective)
463        appendNewlineText(ReplacementText, C.NewlinesBefore,
464                          C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
465      else
466        appendNewlineText(ReplacementText, C.NewlinesBefore);
467      appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
468                       C.StartOfTokenColumn - std::max(0, C.Spaces));
469      ReplacementText.append(C.CurrentLinePrefix);
470      storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
471    }
472  }
473}
474
475void WhitespaceManager::storeReplacement(SourceRange Range,
476                                         StringRef Text) {
477  unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
478                              SourceMgr.getFileOffset(Range.getBegin());
479  // Don't create a replacement, if it does not change anything.
480  if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
481                WhitespaceLength) == Text)
482    return;
483  Replaces.insert(tooling::Replacement(
484      SourceMgr, CharSourceRange::getCharRange(Range), Text));
485}
486
487void WhitespaceManager::appendNewlineText(std::string &Text,
488                                          unsigned Newlines) {
489  for (unsigned i = 0; i < Newlines; ++i)
490    Text.append(UseCRLF ? "\r\n" : "\n");
491}
492
493void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
494                                          unsigned PreviousEndOfTokenColumn,
495                                          unsigned EscapedNewlineColumn) {
496  if (Newlines > 0) {
497    unsigned Offset =
498        std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
499    for (unsigned i = 0; i < Newlines; ++i) {
500      Text.append(EscapedNewlineColumn - Offset - 1, ' ');
501      Text.append(UseCRLF ? "\\\r\n" : "\\\n");
502      Offset = 0;
503    }
504  }
505}
506
507void WhitespaceManager::appendIndentText(std::string &Text,
508                                         unsigned IndentLevel, unsigned Spaces,
509                                         unsigned WhitespaceStartColumn) {
510  switch (Style.UseTab) {
511  case FormatStyle::UT_Never:
512    Text.append(Spaces, ' ');
513    break;
514  case FormatStyle::UT_Always: {
515    unsigned FirstTabWidth =
516        Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
517    // Indent with tabs only when there's at least one full tab.
518    if (FirstTabWidth + Style.TabWidth <= Spaces) {
519      Spaces -= FirstTabWidth;
520      Text.append("\t");
521    }
522    Text.append(Spaces / Style.TabWidth, '\t');
523    Text.append(Spaces % Style.TabWidth, ' ');
524    break;
525  }
526  case FormatStyle::UT_ForIndentation:
527    if (WhitespaceStartColumn == 0) {
528      unsigned Indentation = IndentLevel * Style.IndentWidth;
529      // This happens, e.g. when a line in a block comment is indented less than
530      // the first one.
531      if (Indentation > Spaces)
532        Indentation = Spaces;
533      unsigned Tabs = Indentation / Style.TabWidth;
534      Text.append(Tabs, '\t');
535      Spaces -= Tabs * Style.TabWidth;
536    }
537    Text.append(Spaces, ' ');
538    break;
539  }
540}
541
542} // namespace format
543} // namespace clang
544