1//===--- FormatToken.cpp - Format C++ code --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements specific functions of \c FormatTokens and their 12/// roles. 13/// 14//===----------------------------------------------------------------------===// 15 16#include "ContinuationIndenter.h" 17#include "FormatToken.h" 18#include "clang/Format/Format.h" 19#include "llvm/ADT/SmallVector.h" 20#include "llvm/Support/Debug.h" 21#include <climits> 22 23namespace clang { 24namespace format { 25 26const char *getTokenTypeName(TokenType Type) { 27 static const char *const TokNames[] = { 28#define TYPE(X) #X, 29LIST_TOKEN_TYPES 30#undef TYPE 31 nullptr 32 }; 33 34 if (Type < NUM_TOKEN_TYPES) 35 return TokNames[Type]; 36 llvm_unreachable("unknown TokenType"); 37 return nullptr; 38} 39 40// FIXME: This is copy&pasted from Sema. Put it in a common place and remove 41// duplication. 42bool FormatToken::isSimpleTypeSpecifier() const { 43 switch (Tok.getKind()) { 44 case tok::kw_short: 45 case tok::kw_long: 46 case tok::kw___int64: 47 case tok::kw___int128: 48 case tok::kw_signed: 49 case tok::kw_unsigned: 50 case tok::kw_void: 51 case tok::kw_char: 52 case tok::kw_int: 53 case tok::kw_half: 54 case tok::kw_float: 55 case tok::kw_double: 56 case tok::kw_wchar_t: 57 case tok::kw_bool: 58 case tok::kw___underlying_type: 59 case tok::annot_typename: 60 case tok::kw_char16_t: 61 case tok::kw_char32_t: 62 case tok::kw_typeof: 63 case tok::kw_decltype: 64 return true; 65 default: 66 return false; 67 } 68} 69 70TokenRole::~TokenRole() {} 71 72void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} 73 74unsigned CommaSeparatedList::formatAfterToken(LineState &State, 75 ContinuationIndenter *Indenter, 76 bool DryRun) { 77 if (State.NextToken == nullptr || !State.NextToken->Previous) 78 return 0; 79 80 // Ensure that we start on the opening brace. 81 const FormatToken *LBrace = 82 State.NextToken->Previous->getPreviousNonComment(); 83 if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block || 84 LBrace->Type == TT_DictLiteral || 85 LBrace->Next->Type == TT_DesignatedInitializerPeriod) 86 return 0; 87 88 // Calculate the number of code points we have to format this list. As the 89 // first token is already placed, we have to subtract it. 90 unsigned RemainingCodePoints = 91 Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; 92 93 // Find the best ColumnFormat, i.e. the best number of columns to use. 94 const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); 95 // If no ColumnFormat can be used, the braced list would generally be 96 // bin-packed. Add a severe penalty to this so that column layouts are 97 // preferred if possible. 98 if (!Format) 99 return 10000; 100 101 // Format the entire list. 102 unsigned Penalty = 0; 103 unsigned Column = 0; 104 unsigned Item = 0; 105 while (State.NextToken != LBrace->MatchingParen) { 106 bool NewLine = false; 107 unsigned ExtraSpaces = 0; 108 109 // If the previous token was one of our commas, we are now on the next item. 110 if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { 111 if (!State.NextToken->isTrailingComment()) { 112 ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; 113 ++Column; 114 } 115 ++Item; 116 } 117 118 if (Column == Format->Columns || State.NextToken->MustBreakBefore) { 119 Column = 0; 120 NewLine = true; 121 } 122 123 // Place token using the continuation indenter and store the penalty. 124 Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); 125 } 126 return Penalty; 127} 128 129unsigned CommaSeparatedList::formatFromToken(LineState &State, 130 ContinuationIndenter *Indenter, 131 bool DryRun) { 132 if (HasNestedBracedList) 133 State.Stack.back().AvoidBinPacking = true; 134 return 0; 135} 136 137// Returns the lengths in code points between Begin and End (both included), 138// assuming that the entire sequence is put on a single line. 139static unsigned CodePointsBetween(const FormatToken *Begin, 140 const FormatToken *End) { 141 assert(End->TotalLength >= Begin->TotalLength); 142 return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; 143} 144 145void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { 146 // FIXME: At some point we might want to do this for other lists, too. 147 if (!Token->MatchingParen || Token->isNot(tok::l_brace)) 148 return; 149 150 // In C++11 braced list style, we should not format in columns unless they 151 // have many items (20 or more) or we allow bin-packing of function call 152 // arguments. 153 if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && 154 Commas.size() < 19) 155 return; 156 157 // Column format doesn't really make sense if we don't align after brackets. 158 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 159 return; 160 161 FormatToken *ItemBegin = Token->Next; 162 while (ItemBegin->isTrailingComment()) 163 ItemBegin = ItemBegin->Next; 164 SmallVector<bool, 8> MustBreakBeforeItem; 165 166 // The lengths of an item if it is put at the end of the line. This includes 167 // trailing comments which are otherwise ignored for column alignment. 168 SmallVector<unsigned, 8> EndOfLineItemLength; 169 170 bool HasSeparatingComment = false; 171 for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { 172 // Skip comments on their own line. 173 while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { 174 ItemBegin = ItemBegin->Next; 175 HasSeparatingComment = i > 0; 176 } 177 178 MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); 179 if (ItemBegin->is(tok::l_brace)) 180 HasNestedBracedList = true; 181 const FormatToken *ItemEnd = nullptr; 182 if (i == Commas.size()) { 183 ItemEnd = Token->MatchingParen; 184 const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); 185 ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); 186 if (Style.Cpp11BracedListStyle && 187 !ItemEnd->Previous->isTrailingComment()) { 188 // In Cpp11 braced list style, the } and possibly other subsequent 189 // tokens will need to stay on a line with the last element. 190 while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) 191 ItemEnd = ItemEnd->Next; 192 } else { 193 // In other braced lists styles, the "}" can be wrapped to the new line. 194 ItemEnd = Token->MatchingParen->Previous; 195 } 196 } else { 197 ItemEnd = Commas[i]; 198 // The comma is counted as part of the item when calculating the length. 199 ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 200 201 // Consume trailing comments so the are included in EndOfLineItemLength. 202 if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && 203 ItemEnd->Next->isTrailingComment()) 204 ItemEnd = ItemEnd->Next; 205 } 206 EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 207 // If there is a trailing comma in the list, the next item will start at the 208 // closing brace. Don't create an extra item for this. 209 if (ItemEnd->getNextNonComment() == Token->MatchingParen) 210 break; 211 ItemBegin = ItemEnd->Next; 212 } 213 214 // Don't use column layout for nested lists, lists with few elements and in 215 // presence of separating comments. 216 if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment) 217 return; 218 219 // We can never place more than ColumnLimit / 3 items in a row (because of the 220 // spaces and the comma). 221 unsigned MaxItems = Style.ColumnLimit / 3; 222 std::vector<unsigned> MinSizeInColumn; 223 MinSizeInColumn.reserve(MaxItems); 224 for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { 225 ColumnFormat Format; 226 Format.Columns = Columns; 227 Format.ColumnSizes.resize(Columns); 228 MinSizeInColumn.assign(Columns, UINT_MAX); 229 Format.LineCount = 1; 230 bool HasRowWithSufficientColumns = false; 231 unsigned Column = 0; 232 for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { 233 assert(i < MustBreakBeforeItem.size()); 234 if (MustBreakBeforeItem[i] || Column == Columns) { 235 ++Format.LineCount; 236 Column = 0; 237 } 238 if (Column == Columns - 1) 239 HasRowWithSufficientColumns = true; 240 unsigned Length = 241 (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; 242 Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); 243 MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); 244 ++Column; 245 } 246 // If all rows are terminated early (e.g. by trailing comments), we don't 247 // need to look further. 248 if (!HasRowWithSufficientColumns) 249 break; 250 Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. 251 252 for (unsigned i = 0; i < Columns; ++i) 253 Format.TotalWidth += Format.ColumnSizes[i]; 254 255 // Don't use this Format, if the difference between the longest and shortest 256 // element in a column exceeds a threshold to avoid excessive spaces. 257 if ([&] { 258 for (unsigned i = 0; i < Columns - 1; ++i) 259 if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) 260 return true; 261 return false; 262 }()) 263 continue; 264 265 // Ignore layouts that are bound to violate the column limit. 266 if (Format.TotalWidth > Style.ColumnLimit) 267 continue; 268 269 Formats.push_back(Format); 270 } 271} 272 273const CommaSeparatedList::ColumnFormat * 274CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { 275 const ColumnFormat *BestFormat = nullptr; 276 for (SmallVector<ColumnFormat, 4>::const_reverse_iterator 277 I = Formats.rbegin(), 278 E = Formats.rend(); 279 I != E; ++I) { 280 if (I->TotalWidth <= RemainingCharacters) { 281 if (BestFormat && I->LineCount > BestFormat->LineCount) 282 break; 283 BestFormat = &*I; 284 } 285 } 286 return BestFormat; 287} 288 289} // namespace format 290} // namespace clang 291