UnwrappedLineParser.cpp revision de7685487c5d628dd9fe64c4f861cd1888f50fc7
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#include "UnwrappedLineParser.h" 20#include "llvm/Support/raw_ostream.h" 21 22namespace clang { 23namespace format { 24 25class ScopedMacroState : public FormatTokenSource { 26public: 27 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 28 FormatToken &ResetToken) 29 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 30 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 31 TokenSource = this; 32 Line.Level = 0; 33 Line.InPPDirective = true; 34 } 35 36 ~ScopedMacroState() { 37 TokenSource = PreviousTokenSource; 38 ResetToken = Token; 39 Line.InPPDirective = false; 40 Line.Level = PreviousLineLevel; 41 } 42 43 virtual FormatToken getNextToken() { 44 // The \c UnwrappedLineParser guards against this by never calling 45 // \c getNextToken() after it has encountered the first eof token. 46 assert(!eof()); 47 Token = PreviousTokenSource->getNextToken(); 48 if (eof()) 49 return createEOF(); 50 return Token; 51 } 52 53private: 54 bool eof() { 55 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 56 } 57 58 FormatToken createEOF() { 59 FormatToken FormatTok; 60 FormatTok.Tok.startToken(); 61 FormatTok.Tok.setKind(tok::eof); 62 return FormatTok; 63 } 64 65 UnwrappedLine &Line; 66 FormatTokenSource *&TokenSource; 67 FormatToken &ResetToken; 68 unsigned PreviousLineLevel; 69 FormatTokenSource *PreviousTokenSource; 70 71 FormatToken Token; 72}; 73 74UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 75 FormatTokenSource &Tokens, 76 UnwrappedLineConsumer &Callback) 77 : Style(Style), Tokens(&Tokens), Callback(Callback) { 78} 79 80bool UnwrappedLineParser::parse() { 81 readToken(); 82 return parseFile(); 83} 84 85bool UnwrappedLineParser::parseFile() { 86 bool Error = parseLevel(/*HasOpeningBrace=*/false); 87 // Make sure to format the remaining tokens. 88 addUnwrappedLine(); 89 return Error; 90} 91 92bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 93 bool Error = false; 94 do { 95 switch (FormatTok.Tok.getKind()) { 96 case tok::comment: 97 nextToken(); 98 addUnwrappedLine(); 99 break; 100 case tok::l_brace: 101 Error |= parseBlock(); 102 addUnwrappedLine(); 103 break; 104 case tok::r_brace: 105 if (HasOpeningBrace) { 106 return false; 107 } else { 108 // Stray '}' is an error. 109 Error = true; 110 nextToken(); 111 addUnwrappedLine(); 112 } 113 break; 114 default: 115 parseStructuralElement(); 116 break; 117 } 118 } while (!eof()); 119 return Error; 120} 121 122bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 123 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 124 nextToken(); 125 126 addUnwrappedLine(); 127 128 Line.Level += AddLevels; 129 parseLevel(/*HasOpeningBrace=*/true); 130 Line.Level -= AddLevels; 131 132 if (!FormatTok.Tok.is(tok::r_brace)) 133 return true; 134 135 nextToken(); // Munch the closing brace. 136 return false; 137} 138 139void UnwrappedLineParser::parsePPDirective() { 140 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 141 ScopedMacroState MacroState(Line, Tokens, FormatTok); 142 nextToken(); 143 144 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 145 addUnwrappedLine(); 146 return; 147 } 148 149 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 150 case tok::pp_define: 151 parsePPDefine(); 152 break; 153 default: 154 parsePPUnknown(); 155 break; 156 } 157} 158 159void UnwrappedLineParser::parsePPDefine() { 160 nextToken(); 161 162 if (FormatTok.Tok.getKind() != tok::identifier) { 163 parsePPUnknown(); 164 return; 165 } 166 nextToken(); 167 if (FormatTok.Tok.getKind() == tok::l_paren) { 168 parseParens(); 169 } 170 addUnwrappedLine(); 171 Line.Level = 1; 172 173 // Errors during a preprocessor directive can only affect the layout of the 174 // preprocessor directive, and thus we ignore them. An alternative approach 175 // would be to use the same approach we use on the file level (no 176 // re-indentation if there was a structural error) within the macro 177 // definition. 178 parseFile(); 179} 180 181void UnwrappedLineParser::parsePPUnknown() { 182 do { 183 nextToken(); 184 } while (!eof()); 185 addUnwrappedLine(); 186} 187 188void UnwrappedLineParser::parseComments() { 189 // Consume leading line comments, e.g. for branches without compounds. 190 while (FormatTok.Tok.is(tok::comment)) { 191 nextToken(); 192 addUnwrappedLine(); 193 } 194} 195 196void UnwrappedLineParser::parseStructuralElement() { 197 parseComments(); 198 199 int TokenNumber = 0; 200 switch (FormatTok.Tok.getKind()) { 201 case tok::kw_namespace: 202 parseNamespace(); 203 return; 204 case tok::kw_inline: 205 nextToken(); 206 TokenNumber++; 207 if (FormatTok.Tok.is(tok::kw_namespace)) { 208 parseNamespace(); 209 return; 210 } 211 break; 212 case tok::kw_public: 213 case tok::kw_protected: 214 case tok::kw_private: 215 parseAccessSpecifier(); 216 return; 217 case tok::kw_if: 218 parseIfThenElse(); 219 return; 220 case tok::kw_for: 221 case tok::kw_while: 222 parseForOrWhileLoop(); 223 return; 224 case tok::kw_do: 225 parseDoWhile(); 226 return; 227 case tok::kw_switch: 228 parseSwitch(); 229 return; 230 case tok::kw_default: 231 nextToken(); 232 parseLabel(); 233 return; 234 case tok::kw_case: 235 parseCaseLabel(); 236 return; 237 default: 238 break; 239 } 240 do { 241 ++TokenNumber; 242 switch (FormatTok.Tok.getKind()) { 243 case tok::kw_enum: 244 parseEnum(); 245 return; 246 case tok::kw_struct: // fallthrough 247 case tok::kw_class: 248 parseStructOrClass(); 249 return; 250 case tok::semi: 251 nextToken(); 252 addUnwrappedLine(); 253 return; 254 case tok::l_paren: 255 parseParens(); 256 break; 257 case tok::l_brace: 258 parseBlock(); 259 addUnwrappedLine(); 260 return; 261 case tok::identifier: 262 nextToken(); 263 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 264 parseLabel(); 265 return; 266 } 267 break; 268 case tok::equal: 269 nextToken(); 270 // Skip initializers as they will be formatted by a later step. 271 if (FormatTok.Tok.is(tok::l_brace)) 272 nextToken(); 273 break; 274 default: 275 nextToken(); 276 break; 277 } 278 } while (!eof()); 279} 280 281void UnwrappedLineParser::parseParens() { 282 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 283 nextToken(); 284 do { 285 switch (FormatTok.Tok.getKind()) { 286 case tok::l_paren: 287 parseParens(); 288 break; 289 case tok::r_paren: 290 nextToken(); 291 return; 292 default: 293 nextToken(); 294 break; 295 } 296 } while (!eof()); 297} 298 299void UnwrappedLineParser::parseIfThenElse() { 300 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 301 nextToken(); 302 parseParens(); 303 bool NeedsUnwrappedLine = false; 304 if (FormatTok.Tok.is(tok::l_brace)) { 305 parseBlock(); 306 NeedsUnwrappedLine = true; 307 } else { 308 addUnwrappedLine(); 309 ++Line.Level; 310 parseStructuralElement(); 311 --Line.Level; 312 } 313 if (FormatTok.Tok.is(tok::kw_else)) { 314 nextToken(); 315 if (FormatTok.Tok.is(tok::l_brace)) { 316 parseBlock(); 317 addUnwrappedLine(); 318 } else if (FormatTok.Tok.is(tok::kw_if)) { 319 parseIfThenElse(); 320 } else { 321 addUnwrappedLine(); 322 ++Line.Level; 323 parseStructuralElement(); 324 --Line.Level; 325 } 326 } else if (NeedsUnwrappedLine) { 327 addUnwrappedLine(); 328 } 329} 330 331void UnwrappedLineParser::parseNamespace() { 332 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 333 nextToken(); 334 if (FormatTok.Tok.is(tok::identifier)) 335 nextToken(); 336 if (FormatTok.Tok.is(tok::l_brace)) { 337 parseBlock(0); 338 addUnwrappedLine(); 339 } 340 // FIXME: Add error handling. 341} 342 343void UnwrappedLineParser::parseForOrWhileLoop() { 344 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 345 "'for' or 'while' expected"); 346 nextToken(); 347 parseParens(); 348 if (FormatTok.Tok.is(tok::l_brace)) { 349 parseBlock(); 350 addUnwrappedLine(); 351 } else { 352 addUnwrappedLine(); 353 ++Line.Level; 354 parseStructuralElement(); 355 --Line.Level; 356 } 357} 358 359void UnwrappedLineParser::parseDoWhile() { 360 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 361 nextToken(); 362 if (FormatTok.Tok.is(tok::l_brace)) { 363 parseBlock(); 364 } else { 365 addUnwrappedLine(); 366 ++Line.Level; 367 parseStructuralElement(); 368 --Line.Level; 369 } 370 371 // FIXME: Add error handling. 372 if (!FormatTok.Tok.is(tok::kw_while)) { 373 addUnwrappedLine(); 374 return; 375 } 376 377 nextToken(); 378 parseStructuralElement(); 379} 380 381void UnwrappedLineParser::parseLabel() { 382 // FIXME: remove all asserts. 383 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 384 nextToken(); 385 unsigned OldLineLevel = Line.Level; 386 if (Line.Level > 0) 387 --Line.Level; 388 if (FormatTok.Tok.is(tok::l_brace)) { 389 parseBlock(); 390 } 391 addUnwrappedLine(); 392 Line.Level = OldLineLevel; 393} 394 395void UnwrappedLineParser::parseCaseLabel() { 396 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 397 // FIXME: fix handling of complex expressions here. 398 do { 399 nextToken(); 400 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 401 parseLabel(); 402} 403 404void UnwrappedLineParser::parseSwitch() { 405 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 406 nextToken(); 407 parseParens(); 408 if (FormatTok.Tok.is(tok::l_brace)) { 409 parseBlock(Style.IndentCaseLabels ? 2 : 1); 410 addUnwrappedLine(); 411 } else { 412 addUnwrappedLine(); 413 Line.Level += (Style.IndentCaseLabels ? 2 : 1); 414 parseStructuralElement(); 415 Line.Level -= (Style.IndentCaseLabels ? 2 : 1); 416 } 417} 418 419void UnwrappedLineParser::parseAccessSpecifier() { 420 nextToken(); 421 // Otherwise, we don't know what it is, and we'd better keep the next token. 422 if (FormatTok.Tok.is(tok::colon)) 423 nextToken(); 424 addUnwrappedLine(); 425} 426 427void UnwrappedLineParser::parseEnum() { 428 bool HasContents = false; 429 do { 430 switch (FormatTok.Tok.getKind()) { 431 case tok::l_brace: 432 nextToken(); 433 addUnwrappedLine(); 434 ++Line.Level; 435 parseComments(); 436 break; 437 case tok::l_paren: 438 parseParens(); 439 break; 440 case tok::comma: 441 nextToken(); 442 addUnwrappedLine(); 443 parseComments(); 444 break; 445 case tok::r_brace: 446 if (HasContents) 447 addUnwrappedLine(); 448 --Line.Level; 449 nextToken(); 450 break; 451 case tok::semi: 452 nextToken(); 453 addUnwrappedLine(); 454 return; 455 default: 456 HasContents = true; 457 nextToken(); 458 break; 459 } 460 } while (!eof()); 461} 462 463void UnwrappedLineParser::parseStructOrClass() { 464 nextToken(); 465 do { 466 switch (FormatTok.Tok.getKind()) { 467 case tok::l_brace: 468 // FIXME: Think about how to resolve the error handling here. 469 parseBlock(); 470 parseStructuralElement(); 471 return; 472 case tok::semi: 473 nextToken(); 474 addUnwrappedLine(); 475 return; 476 default: 477 nextToken(); 478 break; 479 } 480 } while (!eof()); 481} 482 483void UnwrappedLineParser::addUnwrappedLine() { 484 // Consume trailing comments. 485 while (!eof() && FormatTok.NewlinesBefore == 0 && 486 FormatTok.Tok.is(tok::comment)) { 487 nextToken(); 488 } 489 Callback.consumeUnwrappedLine(Line); 490 Line.Tokens.clear(); 491} 492 493bool UnwrappedLineParser::eof() const { 494 return FormatTok.Tok.is(tok::eof); 495} 496 497void UnwrappedLineParser::nextToken() { 498 if (eof()) 499 return; 500 Line.Tokens.push_back(FormatTok); 501 readToken(); 502} 503 504void UnwrappedLineParser::readToken() { 505 FormatTok = Tokens->getNextToken(); 506 while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) && 507 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 508 FormatTok.IsFirst)) { 509 // FIXME: This is incorrect - the correct way is to create a 510 // data structure that will construct the parts around the preprocessor 511 // directive as a structured \c UnwrappedLine. 512 addUnwrappedLine(); 513 parsePPDirective(); 514 } 515} 516 517} // end namespace format 518} // end namespace clang 519