1//===--- WhitespaceManager.cpp - Format C++ code --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements WhitespaceManager class. 12/// 13//===----------------------------------------------------------------------===// 14 15#include "WhitespaceManager.h" 16#include "llvm/ADT/STLExtras.h" 17 18namespace clang { 19namespace format { 20 21bool WhitespaceManager::Change::IsBeforeInFile:: 22operator()(const Change &C1, const Change &C2) const { 23 return SourceMgr.isBeforeInTranslationUnit( 24 C1.OriginalWhitespaceRange.getBegin(), 25 C2.OriginalWhitespaceRange.getBegin()); 26} 27 28WhitespaceManager::Change::Change( 29 bool CreateReplacement, SourceRange OriginalWhitespaceRange, 30 unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, 31 unsigned NewlinesBefore, StringRef PreviousLinePostfix, 32 StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, 33 bool IsStartOfDeclName) 34 : CreateReplacement(CreateReplacement), 35 OriginalWhitespaceRange(OriginalWhitespaceRange), 36 StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), 37 PreviousLinePostfix(PreviousLinePostfix), 38 CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), 39 ContinuesPPDirective(ContinuesPPDirective), 40 IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), 41 Spaces(Spaces), IsTrailingComment(false), TokenLength(0), 42 PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), 43 StartOfBlockComment(nullptr), IndentationOffset(0) {} 44 45void WhitespaceManager::reset() { 46 Changes.clear(); 47 Replaces.clear(); 48} 49 50void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, 51 unsigned IndentLevel, unsigned Spaces, 52 unsigned StartOfTokenColumn, 53 bool InPPDirective) { 54 if (Tok.Finalized) 55 return; 56 Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; 57 Changes.push_back( 58 Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn, 59 Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, 60 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); 61} 62 63void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, 64 bool InPPDirective) { 65 if (Tok.Finalized) 66 return; 67 Changes.push_back( 68 Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0, 69 /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", 70 Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, 71 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); 72} 73 74void WhitespaceManager::replaceWhitespaceInToken( 75 const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, 76 StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, 77 unsigned Newlines, unsigned IndentLevel, int Spaces) { 78 if (Tok.Finalized) 79 return; 80 SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); 81 Changes.push_back(Change( 82 true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), 83 IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, 84 CurrentPrefix, 85 // If we don't add a newline this change doesn't start a comment. Thus, 86 // when we align line comments, we don't need to treat this change as one. 87 // FIXME: We still need to take this change in account to properly 88 // calculate the new length of the comment and to calculate the changes 89 // for which to do the alignment when aligning comments. 90 Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown, 91 InPPDirective && !Tok.IsFirst, 92 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); 93} 94 95const tooling::Replacements &WhitespaceManager::generateReplacements() { 96 if (Changes.empty()) 97 return Replaces; 98 99 std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); 100 calculateLineBreakInformation(); 101 alignConsecutiveDeclarations(); 102 alignConsecutiveAssignments(); 103 alignTrailingComments(); 104 alignEscapedNewlines(); 105 generateChanges(); 106 107 return Replaces; 108} 109 110void WhitespaceManager::calculateLineBreakInformation() { 111 Changes[0].PreviousEndOfTokenColumn = 0; 112 for (unsigned i = 1, e = Changes.size(); i != e; ++i) { 113 unsigned OriginalWhitespaceStart = 114 SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin()); 115 unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset( 116 Changes[i - 1].OriginalWhitespaceRange.getEnd()); 117 Changes[i - 1].TokenLength = OriginalWhitespaceStart - 118 PreviousOriginalWhitespaceEnd + 119 Changes[i].PreviousLinePostfix.size() + 120 Changes[i - 1].CurrentLinePrefix.size(); 121 122 Changes[i].PreviousEndOfTokenColumn = 123 Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; 124 125 Changes[i - 1].IsTrailingComment = 126 (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) && 127 Changes[i - 1].Kind == tok::comment; 128 } 129 // FIXME: The last token is currently not always an eof token; in those 130 // cases, setting TokenLength of the last token to 0 is wrong. 131 Changes.back().TokenLength = 0; 132 Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; 133 134 const WhitespaceManager::Change *LastBlockComment = nullptr; 135 for (auto &Change : Changes) { 136 Change.StartOfBlockComment = nullptr; 137 Change.IndentationOffset = 0; 138 if (Change.Kind == tok::comment) { 139 LastBlockComment = &Change; 140 } else if (Change.Kind == tok::unknown) { 141 if ((Change.StartOfBlockComment = LastBlockComment)) 142 Change.IndentationOffset = 143 Change.StartOfTokenColumn - 144 Change.StartOfBlockComment->StartOfTokenColumn; 145 } else { 146 LastBlockComment = nullptr; 147 } 148 } 149} 150 151// Align a single sequence of tokens, see AlignTokens below. 152template <typename F> 153static void 154AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, 155 SmallVector<WhitespaceManager::Change, 16> &Changes) { 156 bool FoundMatchOnLine = false; 157 int Shift = 0; 158 for (unsigned i = Start; i != End; ++i) { 159 if (Changes[i].NewlinesBefore > 0) { 160 FoundMatchOnLine = false; 161 Shift = 0; 162 } 163 164 // If this is the first matching token to be aligned, remember by how many 165 // spaces it has to be shifted, so the rest of the changes on the line are 166 // shifted by the same amount 167 if (!FoundMatchOnLine && Matches(Changes[i])) { 168 FoundMatchOnLine = true; 169 Shift = Column - Changes[i].StartOfTokenColumn; 170 Changes[i].Spaces += Shift; 171 } 172 173 assert(Shift >= 0); 174 Changes[i].StartOfTokenColumn += Shift; 175 if (i + 1 != Changes.size()) 176 Changes[i + 1].PreviousEndOfTokenColumn += Shift; 177 } 178} 179 180// Walk through all of the changes and find sequences of matching tokens to 181// align. To do so, keep track of the lines and whether or not a matching token 182// was found on a line. If a matching token is found, extend the current 183// sequence. If the current line cannot be part of a sequence, e.g. because 184// there is an empty line before it or it contains only non-matching tokens, 185// finalize the previous sequence. 186template <typename F> 187static void AlignTokens(const FormatStyle &Style, F &&Matches, 188 SmallVector<WhitespaceManager::Change, 16> &Changes) { 189 unsigned MinColumn = 0; 190 unsigned MaxColumn = UINT_MAX; 191 192 // Line number of the start and the end of the current token sequence. 193 unsigned StartOfSequence = 0; 194 unsigned EndOfSequence = 0; 195 196 // Keep track of the nesting level of matching tokens, i.e. the number of 197 // surrounding (), [], or {}. We will only align a sequence of matching 198 // token that share the same scope depth. 199 // 200 // FIXME: This could use FormatToken::NestingLevel information, but there is 201 // an outstanding issue wrt the brace scopes. 202 unsigned NestingLevelOfLastMatch = 0; 203 unsigned NestingLevel = 0; 204 205 // Keep track of the number of commas before the matching tokens, we will only 206 // align a sequence of matching tokens if they are preceded by the same number 207 // of commas. 208 unsigned CommasBeforeLastMatch = 0; 209 unsigned CommasBeforeMatch = 0; 210 211 // Whether a matching token has been found on the current line. 212 bool FoundMatchOnLine = false; 213 214 // Aligns a sequence of matching tokens, on the MinColumn column. 215 // 216 // Sequences start from the first matching token to align, and end at the 217 // first token of the first line that doesn't need to be aligned. 218 // 219 // We need to adjust the StartOfTokenColumn of each Change that is on a line 220 // containing any matching token to be aligned and located after such token. 221 auto AlignCurrentSequence = [&] { 222 if (StartOfSequence > 0 && StartOfSequence < EndOfSequence) 223 AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches, 224 Changes); 225 MinColumn = 0; 226 MaxColumn = UINT_MAX; 227 StartOfSequence = 0; 228 EndOfSequence = 0; 229 }; 230 231 for (unsigned i = 0, e = Changes.size(); i != e; ++i) { 232 if (Changes[i].NewlinesBefore != 0) { 233 CommasBeforeMatch = 0; 234 EndOfSequence = i; 235 // If there is a blank line, or if the last line didn't contain any 236 // matching token, the sequence ends here. 237 if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine) 238 AlignCurrentSequence(); 239 240 FoundMatchOnLine = false; 241 } 242 243 if (Changes[i].Kind == tok::comma) { 244 ++CommasBeforeMatch; 245 } else if (Changes[i].Kind == tok::r_brace || 246 Changes[i].Kind == tok::r_paren || 247 Changes[i].Kind == tok::r_square) { 248 --NestingLevel; 249 } else if (Changes[i].Kind == tok::l_brace || 250 Changes[i].Kind == tok::l_paren || 251 Changes[i].Kind == tok::l_square) { 252 // We want sequences to skip over child scopes if possible, but not the 253 // other way around. 254 NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); 255 ++NestingLevel; 256 } 257 258 if (!Matches(Changes[i])) 259 continue; 260 261 // If there is more than one matching token per line, or if the number of 262 // preceding commas, or the scope depth, do not match anymore, end the 263 // sequence. 264 if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || 265 NestingLevel != NestingLevelOfLastMatch) 266 AlignCurrentSequence(); 267 268 CommasBeforeLastMatch = CommasBeforeMatch; 269 NestingLevelOfLastMatch = NestingLevel; 270 FoundMatchOnLine = true; 271 272 if (StartOfSequence == 0) 273 StartOfSequence = i; 274 275 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; 276 int LineLengthAfter = -Changes[i].Spaces; 277 for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j) 278 LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength; 279 unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter; 280 281 // If we are restricted by the maximum column width, end the sequence. 282 if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn || 283 CommasBeforeLastMatch != CommasBeforeMatch) { 284 AlignCurrentSequence(); 285 StartOfSequence = i; 286 } 287 288 MinColumn = std::max(MinColumn, ChangeMinColumn); 289 MaxColumn = std::min(MaxColumn, ChangeMaxColumn); 290 } 291 292 EndOfSequence = Changes.size(); 293 AlignCurrentSequence(); 294} 295 296void WhitespaceManager::alignConsecutiveAssignments() { 297 if (!Style.AlignConsecutiveAssignments) 298 return; 299 300 AlignTokens(Style, 301 [&](const Change &C) { 302 // Do not align on equal signs that are first on a line. 303 if (C.NewlinesBefore > 0) 304 return false; 305 306 // Do not align on equal signs that are last on a line. 307 if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) 308 return false; 309 310 return C.Kind == tok::equal; 311 }, 312 Changes); 313} 314 315void WhitespaceManager::alignConsecutiveDeclarations() { 316 if (!Style.AlignConsecutiveDeclarations) 317 return; 318 319 // FIXME: Currently we don't handle properly the PointerAlignment: Right 320 // The * and & are not aligned and are left dangling. Something has to be done 321 // about it, but it raises the question of alignment of code like: 322 // const char* const* v1; 323 // float const* v2; 324 // SomeVeryLongType const& v3; 325 326 AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, 327 Changes); 328} 329 330void WhitespaceManager::alignTrailingComments() { 331 unsigned MinColumn = 0; 332 unsigned MaxColumn = UINT_MAX; 333 unsigned StartOfSequence = 0; 334 bool BreakBeforeNext = false; 335 unsigned Newlines = 0; 336 for (unsigned i = 0, e = Changes.size(); i != e; ++i) { 337 if (Changes[i].StartOfBlockComment) 338 continue; 339 Newlines += Changes[i].NewlinesBefore; 340 if (!Changes[i].IsTrailingComment) 341 continue; 342 343 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; 344 unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; 345 if (i + 1 != e && Changes[i + 1].ContinuesPPDirective) 346 ChangeMaxColumn -= 2; 347 // If this comment follows an } in column 0, it probably documents the 348 // closing of a namespace and we don't want to align it. 349 bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && 350 Changes[i - 1].Kind == tok::r_brace && 351 Changes[i - 1].StartOfTokenColumn == 0; 352 bool WasAlignedWithStartOfNextLine = false; 353 if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. 354 unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( 355 Changes[i].OriginalWhitespaceRange.getEnd()); 356 for (unsigned j = i + 1; j != e; ++j) { 357 if (Changes[j].Kind != tok::comment) { // Skip over comments. 358 unsigned NextColumn = SourceMgr.getSpellingColumnNumber( 359 Changes[j].OriginalWhitespaceRange.getEnd()); 360 // The start of the next token was previously aligned with the 361 // start of this comment. 362 WasAlignedWithStartOfNextLine = 363 CommentColumn == NextColumn || 364 CommentColumn == NextColumn + Style.IndentWidth; 365 break; 366 } 367 } 368 } 369 if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) { 370 alignTrailingComments(StartOfSequence, i, MinColumn); 371 MinColumn = ChangeMinColumn; 372 MaxColumn = ChangeMinColumn; 373 StartOfSequence = i; 374 } else if (BreakBeforeNext || Newlines > 1 || 375 (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || 376 // Break the comment sequence if the previous line did not end 377 // in a trailing comment. 378 (Changes[i].NewlinesBefore == 1 && i > 0 && 379 !Changes[i - 1].IsTrailingComment) || 380 WasAlignedWithStartOfNextLine) { 381 alignTrailingComments(StartOfSequence, i, MinColumn); 382 MinColumn = ChangeMinColumn; 383 MaxColumn = ChangeMaxColumn; 384 StartOfSequence = i; 385 } else { 386 MinColumn = std::max(MinColumn, ChangeMinColumn); 387 MaxColumn = std::min(MaxColumn, ChangeMaxColumn); 388 } 389 BreakBeforeNext = 390 (i == 0) || (Changes[i].NewlinesBefore > 1) || 391 // Never start a sequence with a comment at the beginning of 392 // the line. 393 (Changes[i].NewlinesBefore == 1 && StartOfSequence == i); 394 Newlines = 0; 395 } 396 alignTrailingComments(StartOfSequence, Changes.size(), MinColumn); 397} 398 399void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, 400 unsigned Column) { 401 for (unsigned i = Start; i != End; ++i) { 402 int Shift = 0; 403 if (Changes[i].IsTrailingComment) { 404 Shift = Column - Changes[i].StartOfTokenColumn; 405 } 406 if (Changes[i].StartOfBlockComment) { 407 Shift = Changes[i].IndentationOffset + 408 Changes[i].StartOfBlockComment->StartOfTokenColumn - 409 Changes[i].StartOfTokenColumn; 410 } 411 assert(Shift >= 0); 412 Changes[i].Spaces += Shift; 413 if (i + 1 != End) 414 Changes[i + 1].PreviousEndOfTokenColumn += Shift; 415 Changes[i].StartOfTokenColumn += Shift; 416 } 417} 418 419void WhitespaceManager::alignEscapedNewlines() { 420 unsigned MaxEndOfLine = 421 Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; 422 unsigned StartOfMacro = 0; 423 for (unsigned i = 1, e = Changes.size(); i < e; ++i) { 424 Change &C = Changes[i]; 425 if (C.NewlinesBefore > 0) { 426 if (C.ContinuesPPDirective) { 427 MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine); 428 } else { 429 alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine); 430 MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; 431 StartOfMacro = i; 432 } 433 } 434 } 435 alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine); 436} 437 438void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End, 439 unsigned Column) { 440 for (unsigned i = Start; i < End; ++i) { 441 Change &C = Changes[i]; 442 if (C.NewlinesBefore > 0) { 443 assert(C.ContinuesPPDirective); 444 if (C.PreviousEndOfTokenColumn + 1 > Column) 445 C.EscapedNewlineColumn = 0; 446 else 447 C.EscapedNewlineColumn = Column; 448 } 449 } 450} 451 452void WhitespaceManager::generateChanges() { 453 for (unsigned i = 0, e = Changes.size(); i != e; ++i) { 454 const Change &C = Changes[i]; 455 if (i > 0) { 456 assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() != 457 C.OriginalWhitespaceRange.getBegin() && 458 "Generating two replacements for the same location"); 459 } 460 if (C.CreateReplacement) { 461 std::string ReplacementText = C.PreviousLinePostfix; 462 if (C.ContinuesPPDirective) 463 appendNewlineText(ReplacementText, C.NewlinesBefore, 464 C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); 465 else 466 appendNewlineText(ReplacementText, C.NewlinesBefore); 467 appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces), 468 C.StartOfTokenColumn - std::max(0, C.Spaces)); 469 ReplacementText.append(C.CurrentLinePrefix); 470 storeReplacement(C.OriginalWhitespaceRange, ReplacementText); 471 } 472 } 473} 474 475void WhitespaceManager::storeReplacement(SourceRange Range, 476 StringRef Text) { 477 unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - 478 SourceMgr.getFileOffset(Range.getBegin()); 479 // Don't create a replacement, if it does not change anything. 480 if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), 481 WhitespaceLength) == Text) 482 return; 483 Replaces.insert(tooling::Replacement( 484 SourceMgr, CharSourceRange::getCharRange(Range), Text)); 485} 486 487void WhitespaceManager::appendNewlineText(std::string &Text, 488 unsigned Newlines) { 489 for (unsigned i = 0; i < Newlines; ++i) 490 Text.append(UseCRLF ? "\r\n" : "\n"); 491} 492 493void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines, 494 unsigned PreviousEndOfTokenColumn, 495 unsigned EscapedNewlineColumn) { 496 if (Newlines > 0) { 497 unsigned Offset = 498 std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn); 499 for (unsigned i = 0; i < Newlines; ++i) { 500 Text.append(EscapedNewlineColumn - Offset - 1, ' '); 501 Text.append(UseCRLF ? "\\\r\n" : "\\\n"); 502 Offset = 0; 503 } 504 } 505} 506 507void WhitespaceManager::appendIndentText(std::string &Text, 508 unsigned IndentLevel, unsigned Spaces, 509 unsigned WhitespaceStartColumn) { 510 switch (Style.UseTab) { 511 case FormatStyle::UT_Never: 512 Text.append(Spaces, ' '); 513 break; 514 case FormatStyle::UT_Always: { 515 unsigned FirstTabWidth = 516 Style.TabWidth - WhitespaceStartColumn % Style.TabWidth; 517 // Indent with tabs only when there's at least one full tab. 518 if (FirstTabWidth + Style.TabWidth <= Spaces) { 519 Spaces -= FirstTabWidth; 520 Text.append("\t"); 521 } 522 Text.append(Spaces / Style.TabWidth, '\t'); 523 Text.append(Spaces % Style.TabWidth, ' '); 524 break; 525 } 526 case FormatStyle::UT_ForIndentation: 527 if (WhitespaceStartColumn == 0) { 528 unsigned Indentation = IndentLevel * Style.IndentWidth; 529 // This happens, e.g. when a line in a block comment is indented less than 530 // the first one. 531 if (Indentation > Spaces) 532 Indentation = Spaces; 533 unsigned Tabs = Indentation / Style.TabWidth; 534 Text.append(Tabs, '\t'); 535 Spaces -= Tabs * Style.TabWidth; 536 } 537 Text.append(Spaces, ' '); 538 break; 539 } 540} 541 542} // namespace format 543} // namespace clang 544