UnwrappedLineParser.cpp revision 50767d8c8f2f667255bdb99692c0467ce992bc67
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#include "UnwrappedLineParser.h" 20#include "llvm/Support/raw_ostream.h" 21 22namespace clang { 23namespace format { 24 25class ScopedMacroState : public FormatTokenSource { 26public: 27 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 28 FormatToken &ResetToken) 29 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 30 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 31 TokenSource = this; 32 Line.Level = 0; 33 Line.InPPDirective = true; 34 } 35 36 ~ScopedMacroState() { 37 TokenSource = PreviousTokenSource; 38 ResetToken = Token; 39 Line.InPPDirective = false; 40 Line.Level = PreviousLineLevel; 41 } 42 43 virtual FormatToken getNextToken() { 44 // The \c UnwrappedLineParser guards against this by never calling 45 // \c getNextToken() after it has encountered the first eof token. 46 assert(!eof()); 47 Token = PreviousTokenSource->getNextToken(); 48 if (eof()) 49 return createEOF(); 50 return Token; 51 } 52 53private: 54 bool eof() { 55 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 56 } 57 58 FormatToken createEOF() { 59 FormatToken FormatTok; 60 FormatTok.Tok.startToken(); 61 FormatTok.Tok.setKind(tok::eof); 62 return FormatTok; 63 } 64 65 UnwrappedLine &Line; 66 FormatTokenSource *&TokenSource; 67 FormatToken &ResetToken; 68 unsigned PreviousLineLevel; 69 FormatTokenSource *PreviousTokenSource; 70 71 FormatToken Token; 72}; 73 74UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 75 FormatTokenSource &Tokens, 76 UnwrappedLineConsumer &Callback) 77 : Line(new UnwrappedLine), RootTokenInitialized(false), 78 LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Style(Style), 79 Tokens(&Tokens), Callback(Callback) { 80} 81 82bool UnwrappedLineParser::parse() { 83 readToken(); 84 return parseFile(); 85} 86 87bool UnwrappedLineParser::parseFile() { 88 bool Error = parseLevel(/*HasOpeningBrace=*/false); 89 // Make sure to format the remaining tokens. 90 addUnwrappedLine(); 91 return Error; 92} 93 94bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 95 bool Error = false; 96 do { 97 switch (FormatTok.Tok.getKind()) { 98 case tok::comment: 99 nextToken(); 100 addUnwrappedLine(); 101 break; 102 case tok::l_brace: 103 Error |= parseBlock(); 104 addUnwrappedLine(); 105 break; 106 case tok::r_brace: 107 if (HasOpeningBrace) { 108 return false; 109 } else { 110 // Stray '}' is an error. 111 Error = true; 112 nextToken(); 113 addUnwrappedLine(); 114 } 115 break; 116 default: 117 parseStructuralElement(); 118 break; 119 } 120 } while (!eof()); 121 return Error; 122} 123 124bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 125 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 126 nextToken(); 127 128 addUnwrappedLine(); 129 130 Line->Level += AddLevels; 131 parseLevel(/*HasOpeningBrace=*/true); 132 Line->Level -= AddLevels; 133 134 if (!FormatTok.Tok.is(tok::r_brace)) 135 return true; 136 137 nextToken(); // Munch the closing brace. 138 return false; 139} 140 141void UnwrappedLineParser::parsePPDirective() { 142 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 143 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 144 nextToken(); 145 146 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 147 addUnwrappedLine(); 148 return; 149 } 150 151 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 152 case tok::pp_define: 153 parsePPDefine(); 154 break; 155 default: 156 parsePPUnknown(); 157 break; 158 } 159} 160 161void UnwrappedLineParser::parsePPDefine() { 162 nextToken(); 163 164 if (FormatTok.Tok.getKind() != tok::identifier) { 165 parsePPUnknown(); 166 return; 167 } 168 nextToken(); 169 if (FormatTok.Tok.getKind() == tok::l_paren) { 170 parseParens(); 171 } 172 addUnwrappedLine(); 173 Line->Level = 1; 174 175 // Errors during a preprocessor directive can only affect the layout of the 176 // preprocessor directive, and thus we ignore them. An alternative approach 177 // would be to use the same approach we use on the file level (no 178 // re-indentation if there was a structural error) within the macro 179 // definition. 180 parseFile(); 181} 182 183void UnwrappedLineParser::parsePPUnknown() { 184 do { 185 nextToken(); 186 } while (!eof()); 187 addUnwrappedLine(); 188} 189 190void UnwrappedLineParser::parseComments() { 191 // Consume leading line comments, e.g. for branches without compounds. 192 while (FormatTok.Tok.is(tok::comment)) { 193 nextToken(); 194 addUnwrappedLine(); 195 } 196} 197 198void UnwrappedLineParser::parseStructuralElement() { 199 parseComments(); 200 201 int TokenNumber = 0; 202 switch (FormatTok.Tok.getKind()) { 203 case tok::at: 204 nextToken(); 205 switch (FormatTok.Tok.getObjCKeywordID()) { 206 case tok::objc_public: 207 case tok::objc_protected: 208 case tok::objc_package: 209 case tok::objc_private: 210 return parseAccessSpecifier(); 211 case tok::objc_interface: 212 case tok::objc_implementation: 213 return parseObjCInterfaceOrImplementation(); 214 case tok::objc_protocol: 215 return parseObjCProtocol(); 216 case tok::objc_end: 217 return; // Handled by the caller. 218 default: 219 break; 220 } 221 break; 222 case tok::kw_namespace: 223 parseNamespace(); 224 return; 225 case tok::kw_inline: 226 nextToken(); 227 TokenNumber++; 228 if (FormatTok.Tok.is(tok::kw_namespace)) { 229 parseNamespace(); 230 return; 231 } 232 break; 233 case tok::kw_public: 234 case tok::kw_protected: 235 case tok::kw_private: 236 parseAccessSpecifier(); 237 return; 238 case tok::kw_if: 239 parseIfThenElse(); 240 return; 241 case tok::kw_for: 242 case tok::kw_while: 243 parseForOrWhileLoop(); 244 return; 245 case tok::kw_do: 246 parseDoWhile(); 247 return; 248 case tok::kw_switch: 249 parseSwitch(); 250 return; 251 case tok::kw_default: 252 nextToken(); 253 parseLabel(); 254 return; 255 case tok::kw_case: 256 parseCaseLabel(); 257 return; 258 default: 259 break; 260 } 261 do { 262 ++TokenNumber; 263 switch (FormatTok.Tok.getKind()) { 264 case tok::kw_enum: 265 parseEnum(); 266 return; 267 case tok::kw_struct: // fallthrough 268 case tok::kw_class: 269 parseStructOrClass(); 270 return; 271 case tok::semi: 272 nextToken(); 273 addUnwrappedLine(); 274 return; 275 case tok::l_paren: 276 parseParens(); 277 break; 278 case tok::l_brace: 279 parseBlock(); 280 addUnwrappedLine(); 281 return; 282 case tok::identifier: 283 nextToken(); 284 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 285 parseLabel(); 286 return; 287 } 288 break; 289 case tok::equal: 290 nextToken(); 291 // Skip initializers as they will be formatted by a later step. 292 if (FormatTok.Tok.is(tok::l_brace)) 293 nextToken(); 294 break; 295 default: 296 nextToken(); 297 break; 298 } 299 } while (!eof()); 300} 301 302void UnwrappedLineParser::parseParens() { 303 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 304 nextToken(); 305 do { 306 switch (FormatTok.Tok.getKind()) { 307 case tok::l_paren: 308 parseParens(); 309 break; 310 case tok::r_paren: 311 nextToken(); 312 return; 313 default: 314 nextToken(); 315 break; 316 } 317 } while (!eof()); 318} 319 320void UnwrappedLineParser::parseIfThenElse() { 321 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 322 nextToken(); 323 parseParens(); 324 bool NeedsUnwrappedLine = false; 325 if (FormatTok.Tok.is(tok::l_brace)) { 326 parseBlock(); 327 NeedsUnwrappedLine = true; 328 } else { 329 addUnwrappedLine(); 330 ++Line->Level; 331 parseStructuralElement(); 332 --Line->Level; 333 } 334 if (FormatTok.Tok.is(tok::kw_else)) { 335 nextToken(); 336 if (FormatTok.Tok.is(tok::l_brace)) { 337 parseBlock(); 338 addUnwrappedLine(); 339 } else if (FormatTok.Tok.is(tok::kw_if)) { 340 parseIfThenElse(); 341 } else { 342 addUnwrappedLine(); 343 ++Line->Level; 344 parseStructuralElement(); 345 --Line->Level; 346 } 347 } else if (NeedsUnwrappedLine) { 348 addUnwrappedLine(); 349 } 350} 351 352void UnwrappedLineParser::parseNamespace() { 353 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 354 nextToken(); 355 if (FormatTok.Tok.is(tok::identifier)) 356 nextToken(); 357 if (FormatTok.Tok.is(tok::l_brace)) { 358 parseBlock(0); 359 addUnwrappedLine(); 360 } 361 // FIXME: Add error handling. 362} 363 364void UnwrappedLineParser::parseForOrWhileLoop() { 365 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 366 "'for' or 'while' expected"); 367 nextToken(); 368 parseParens(); 369 if (FormatTok.Tok.is(tok::l_brace)) { 370 parseBlock(); 371 addUnwrappedLine(); 372 } else { 373 addUnwrappedLine(); 374 ++Line->Level; 375 parseStructuralElement(); 376 --Line->Level; 377 } 378} 379 380void UnwrappedLineParser::parseDoWhile() { 381 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 382 nextToken(); 383 if (FormatTok.Tok.is(tok::l_brace)) { 384 parseBlock(); 385 } else { 386 addUnwrappedLine(); 387 ++Line->Level; 388 parseStructuralElement(); 389 --Line->Level; 390 } 391 392 // FIXME: Add error handling. 393 if (!FormatTok.Tok.is(tok::kw_while)) { 394 addUnwrappedLine(); 395 return; 396 } 397 398 nextToken(); 399 parseStructuralElement(); 400} 401 402void UnwrappedLineParser::parseLabel() { 403 // FIXME: remove all asserts. 404 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 405 nextToken(); 406 unsigned OldLineLevel = Line->Level; 407 if (Line->Level > 0) 408 --Line->Level; 409 if (FormatTok.Tok.is(tok::l_brace)) { 410 parseBlock(); 411 } 412 addUnwrappedLine(); 413 Line->Level = OldLineLevel; 414} 415 416void UnwrappedLineParser::parseCaseLabel() { 417 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 418 // FIXME: fix handling of complex expressions here. 419 do { 420 nextToken(); 421 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 422 parseLabel(); 423} 424 425void UnwrappedLineParser::parseSwitch() { 426 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 427 nextToken(); 428 parseParens(); 429 if (FormatTok.Tok.is(tok::l_brace)) { 430 parseBlock(Style.IndentCaseLabels ? 2 : 1); 431 addUnwrappedLine(); 432 } else { 433 addUnwrappedLine(); 434 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 435 parseStructuralElement(); 436 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 437 } 438} 439 440void UnwrappedLineParser::parseAccessSpecifier() { 441 nextToken(); 442 // Otherwise, we don't know what it is, and we'd better keep the next token. 443 if (FormatTok.Tok.is(tok::colon)) 444 nextToken(); 445 addUnwrappedLine(); 446} 447 448void UnwrappedLineParser::parseEnum() { 449 bool HasContents = false; 450 do { 451 switch (FormatTok.Tok.getKind()) { 452 case tok::l_brace: 453 nextToken(); 454 addUnwrappedLine(); 455 ++Line->Level; 456 parseComments(); 457 break; 458 case tok::l_paren: 459 parseParens(); 460 break; 461 case tok::comma: 462 nextToken(); 463 addUnwrappedLine(); 464 parseComments(); 465 break; 466 case tok::r_brace: 467 if (HasContents) 468 addUnwrappedLine(); 469 --Line->Level; 470 nextToken(); 471 break; 472 case tok::semi: 473 nextToken(); 474 addUnwrappedLine(); 475 return; 476 default: 477 HasContents = true; 478 nextToken(); 479 break; 480 } 481 } while (!eof()); 482} 483 484void UnwrappedLineParser::parseStructOrClass() { 485 nextToken(); 486 do { 487 switch (FormatTok.Tok.getKind()) { 488 case tok::l_brace: 489 // FIXME: Think about how to resolve the error handling here. 490 parseBlock(); 491 parseStructuralElement(); 492 return; 493 case tok::semi: 494 nextToken(); 495 addUnwrappedLine(); 496 return; 497 default: 498 nextToken(); 499 break; 500 } 501 } while (!eof()); 502} 503 504void UnwrappedLineParser::parseObjCProtocolList() { 505 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 506 do 507 nextToken(); 508 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 509 nextToken(); // Skip '>'. 510} 511 512void UnwrappedLineParser::parseObjCUntilAtEnd() { 513 do { 514 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 515 nextToken(); 516 addUnwrappedLine(); 517 break; 518 } 519 parseStructuralElement(); 520 } while (!eof()); 521} 522 523void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 524 nextToken(); 525 nextToken(); // interface name 526 527 // @interface can be followed by either a base class, or a category. 528 if (FormatTok.Tok.is(tok::colon)) { 529 nextToken(); 530 nextToken(); // base class name 531 } else if (FormatTok.Tok.is(tok::l_paren)) 532 // Skip category, if present. 533 parseParens(); 534 535 if (FormatTok.Tok.is(tok::less)) 536 parseObjCProtocolList(); 537 538 // If instance variables are present, keep the '{' on the first line too. 539 if (FormatTok.Tok.is(tok::l_brace)) 540 parseBlock(); 541 542 // With instance variables, this puts '}' on its own line. Without instance 543 // variables, this ends the @interface line. 544 addUnwrappedLine(); 545 546 parseObjCUntilAtEnd(); 547} 548 549void UnwrappedLineParser::parseObjCProtocol() { 550 nextToken(); 551 nextToken(); // protocol name 552 553 if (FormatTok.Tok.is(tok::less)) 554 parseObjCProtocolList(); 555 556 // Check for protocol declaration. 557 if (FormatTok.Tok.is(tok::semi)) { 558 nextToken(); 559 return addUnwrappedLine(); 560 } 561 562 addUnwrappedLine(); 563 parseObjCUntilAtEnd(); 564} 565 566void UnwrappedLineParser::addUnwrappedLine() { 567 if (!RootTokenInitialized) 568 return; 569 // Consume trailing comments. 570 while (!eof() && FormatTok.NewlinesBefore == 0 && 571 FormatTok.Tok.is(tok::comment)) { 572 nextToken(); 573 } 574 Callback.consumeUnwrappedLine(*Line); 575 RootTokenInitialized = false; 576 LastInCurrentLine = NULL; 577} 578 579bool UnwrappedLineParser::eof() const { 580 return FormatTok.Tok.is(tok::eof); 581} 582 583void UnwrappedLineParser::nextToken() { 584 if (eof()) 585 return; 586 if (RootTokenInitialized) { 587 assert(LastInCurrentLine->Children.empty()); 588 LastInCurrentLine->Children.push_back(FormatTok); 589 LastInCurrentLine = &LastInCurrentLine->Children.back(); 590 } else { 591 Line->RootToken = FormatTok; 592 RootTokenInitialized = true; 593 LastInCurrentLine = &Line->RootToken; 594 } 595 if (MustBreakBeforeNextToken) { 596 LastInCurrentLine->MustBreakBefore = true; 597 MustBreakBeforeNextToken = false; 598 } 599 readToken(); 600} 601 602void UnwrappedLineParser::readToken() { 603 FormatTok = Tokens->getNextToken(); 604 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 605 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 606 FormatTok.IsFirst)) { 607 UnwrappedLine* StoredLine = Line.take(); 608 Line.reset(new UnwrappedLine(*StoredLine)); 609 assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty()); 610 FormatToken *StoredLastInCurrentLine = LastInCurrentLine; 611 bool PreviousInitialized = RootTokenInitialized; 612 RootTokenInitialized = false; 613 LastInCurrentLine = NULL; 614 615 parsePPDirective(); 616 617 assert(!RootTokenInitialized); 618 Line.reset(StoredLine); 619 RootTokenInitialized = PreviousInitialized; 620 LastInCurrentLine = StoredLastInCurrentLine; 621 assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty()); 622 MustBreakBeforeNextToken = true; 623 } 624} 625 626} // end namespace format 627} // end namespace clang 628