UnwrappedLineParser.cpp revision 276a209451b8724807f2741f49c90bf27fc280c7
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#include "UnwrappedLineParser.h" 20#include "clang/Basic/Diagnostic.h" 21#include "llvm/Support/raw_ostream.h" 22 23// Uncomment to get debug output from the UnwrappedLineParser. 24// Use in combination with --gtest_filter=*TestName* to limit the output to a 25// single test. 26// #define UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 27 28namespace clang { 29namespace format { 30 31class ScopedMacroState : public FormatTokenSource { 32public: 33 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 34 FormatToken &ResetToken) 35 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 36 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 37 TokenSource = this; 38 Line.Level = 0; 39 Line.InPPDirective = true; 40 } 41 42 ~ScopedMacroState() { 43 TokenSource = PreviousTokenSource; 44 ResetToken = Token; 45 Line.InPPDirective = false; 46 Line.Level = PreviousLineLevel; 47 } 48 49 virtual FormatToken getNextToken() { 50 // The \c UnwrappedLineParser guards against this by never calling 51 // \c getNextToken() after it has encountered the first eof token. 52 assert(!eof()); 53 Token = PreviousTokenSource->getNextToken(); 54 if (eof()) 55 return createEOF(); 56 return Token; 57 } 58 59private: 60 bool eof() { 61 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 62 } 63 64 FormatToken createEOF() { 65 FormatToken FormatTok; 66 FormatTok.Tok.startToken(); 67 FormatTok.Tok.setKind(tok::eof); 68 return FormatTok; 69 } 70 71 UnwrappedLine &Line; 72 FormatTokenSource *&TokenSource; 73 FormatToken &ResetToken; 74 unsigned PreviousLineLevel; 75 FormatTokenSource *PreviousTokenSource; 76 77 FormatToken Token; 78}; 79 80class ScopedLineState { 81public: 82 ScopedLineState(UnwrappedLineParser &Parser) : Parser(Parser) { 83 PreBlockLine = Parser.Line.take(); 84 Parser.Line.reset(new UnwrappedLine(*PreBlockLine)); 85 assert(Parser.LastInCurrentLine == NULL || 86 Parser.LastInCurrentLine->Children.empty()); 87 PreBlockLastToken = Parser.LastInCurrentLine; 88 PreBlockRootTokenInitialized = Parser.RootTokenInitialized; 89 Parser.RootTokenInitialized = false; 90 Parser.LastInCurrentLine = NULL; 91 } 92 93 ~ScopedLineState() { 94 if (Parser.RootTokenInitialized) { 95 Parser.addUnwrappedLine(); 96 } 97 assert(!Parser.RootTokenInitialized); 98 Parser.Line.reset(PreBlockLine); 99 Parser.RootTokenInitialized = PreBlockRootTokenInitialized; 100 Parser.LastInCurrentLine = PreBlockLastToken; 101 assert(Parser.LastInCurrentLine == NULL || 102 Parser.LastInCurrentLine->Children.empty()); 103 Parser.MustBreakBeforeNextToken = true; 104 } 105 106private: 107 UnwrappedLineParser &Parser; 108 109 UnwrappedLine *PreBlockLine; 110 FormatToken* PreBlockLastToken; 111 bool PreBlockRootTokenInitialized; 112}; 113 114UnwrappedLineParser::UnwrappedLineParser( 115 clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 116 FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) 117 : Line(new UnwrappedLine), RootTokenInitialized(false), 118 LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Diag(Diag), 119 Style(Style), Tokens(&Tokens), Callback(Callback) { 120} 121 122bool UnwrappedLineParser::parse() { 123#ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 124 llvm::errs() << "----\n"; 125#endif 126 readToken(); 127 return parseFile(); 128} 129 130bool UnwrappedLineParser::parseFile() { 131 bool Error = parseLevel(/*HasOpeningBrace=*/false); 132 // Make sure to format the remaining tokens. 133 addUnwrappedLine(); 134 return Error; 135} 136 137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 138 bool Error = false; 139 do { 140 switch (FormatTok.Tok.getKind()) { 141 case tok::comment: 142 nextToken(); 143 addUnwrappedLine(); 144 break; 145 case tok::l_brace: 146 Error |= parseBlock(); 147 addUnwrappedLine(); 148 break; 149 case tok::r_brace: 150 if (HasOpeningBrace) { 151 return false; 152 } else { 153 Diag.Report(FormatTok.Tok.getLocation(), 154 Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, 155 "unexpected '}'")); 156 Error = true; 157 nextToken(); 158 addUnwrappedLine(); 159 } 160 break; 161 default: 162 parseStructuralElement(); 163 break; 164 } 165 } while (!eof()); 166 return Error; 167} 168 169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 170 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 171 nextToken(); 172 173 if (!FormatTok.Tok.is(tok::r_brace)) { 174 addUnwrappedLine(); 175 176 Line->Level += AddLevels; 177 parseLevel(/*HasOpeningBrace=*/true); 178 Line->Level -= AddLevels; 179 180 if (!FormatTok.Tok.is(tok::r_brace)) 181 return true; 182 183 } 184 nextToken(); // Munch the closing brace. 185 return false; 186} 187 188void UnwrappedLineParser::parsePPDirective() { 189 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 190 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 191 nextToken(); 192 193 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 194 addUnwrappedLine(); 195 return; 196 } 197 198 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 199 case tok::pp_define: 200 parsePPDefine(); 201 break; 202 default: 203 parsePPUnknown(); 204 break; 205 } 206} 207 208void UnwrappedLineParser::parsePPDefine() { 209 nextToken(); 210 211 if (FormatTok.Tok.getKind() != tok::identifier) { 212 parsePPUnknown(); 213 return; 214 } 215 nextToken(); 216 if (FormatTok.Tok.getKind() == tok::l_paren) { 217 parseParens(); 218 } 219 addUnwrappedLine(); 220 Line->Level = 1; 221 222 // Errors during a preprocessor directive can only affect the layout of the 223 // preprocessor directive, and thus we ignore them. An alternative approach 224 // would be to use the same approach we use on the file level (no 225 // re-indentation if there was a structural error) within the macro 226 // definition. 227 parseFile(); 228} 229 230void UnwrappedLineParser::parsePPUnknown() { 231 do { 232 nextToken(); 233 } while (!eof()); 234 addUnwrappedLine(); 235} 236 237void UnwrappedLineParser::parseComments() { 238 // Consume leading line comments, e.g. for branches without compounds. 239 while (FormatTok.Tok.is(tok::comment)) { 240 nextToken(); 241 addUnwrappedLine(); 242 } 243} 244 245void UnwrappedLineParser::parseStructuralElement() { 246 assert(!FormatTok.Tok.is(tok::l_brace)); 247 parseComments(); 248 249 int TokenNumber = 0; 250 switch (FormatTok.Tok.getKind()) { 251 case tok::at: 252 nextToken(); 253 switch (FormatTok.Tok.getObjCKeywordID()) { 254 case tok::objc_public: 255 case tok::objc_protected: 256 case tok::objc_package: 257 case tok::objc_private: 258 return parseAccessSpecifier(); 259 case tok::objc_interface: 260 case tok::objc_implementation: 261 return parseObjCInterfaceOrImplementation(); 262 case tok::objc_protocol: 263 return parseObjCProtocol(); 264 case tok::objc_end: 265 return; // Handled by the caller. 266 case tok::objc_optional: 267 case tok::objc_required: 268 nextToken(); 269 addUnwrappedLine(); 270 return; 271 default: 272 break; 273 } 274 break; 275 case tok::kw_namespace: 276 parseNamespace(); 277 return; 278 case tok::kw_inline: 279 nextToken(); 280 TokenNumber++; 281 if (FormatTok.Tok.is(tok::kw_namespace)) { 282 parseNamespace(); 283 return; 284 } 285 break; 286 case tok::kw_public: 287 case tok::kw_protected: 288 case tok::kw_private: 289 parseAccessSpecifier(); 290 return; 291 case tok::kw_if: 292 parseIfThenElse(); 293 return; 294 case tok::kw_for: 295 case tok::kw_while: 296 parseForOrWhileLoop(); 297 return; 298 case tok::kw_do: 299 parseDoWhile(); 300 return; 301 case tok::kw_switch: 302 parseSwitch(); 303 return; 304 case tok::kw_default: 305 nextToken(); 306 parseLabel(); 307 return; 308 case tok::kw_case: 309 parseCaseLabel(); 310 return; 311 default: 312 break; 313 } 314 do { 315 ++TokenNumber; 316 switch (FormatTok.Tok.getKind()) { 317 case tok::kw_enum: 318 parseEnum(); 319 return; 320 case tok::kw_struct: // fallthrough 321 case tok::kw_class: 322 parseStructOrClass(); 323 return; 324 case tok::semi: 325 nextToken(); 326 addUnwrappedLine(); 327 return; 328 case tok::l_paren: 329 parseParens(); 330 break; 331 case tok::l_brace: 332 // A block outside of parentheses must be the last part of a 333 // structural element. 334 // FIXME: Figure out cases where this is not true, and add projections for 335 // them (the one we know is missing are lambdas). 336 parseBlock(); 337 addUnwrappedLine(); 338 return; 339 case tok::identifier: 340 nextToken(); 341 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 342 parseLabel(); 343 return; 344 } 345 break; 346 case tok::equal: 347 nextToken(); 348 if (FormatTok.Tok.is(tok::l_brace)) { 349 parseBracedList(); 350 } 351 break; 352 default: 353 nextToken(); 354 break; 355 } 356 } while (!eof()); 357} 358 359void UnwrappedLineParser::parseBracedList() { 360 nextToken(); 361 362 do { 363 switch (FormatTok.Tok.getKind()) { 364 case tok::l_brace: 365 parseBracedList(); 366 break; 367 case tok::r_brace: 368 nextToken(); 369 return; 370 default: 371 nextToken(); 372 break; 373 } 374 } while (!eof()); 375} 376 377void UnwrappedLineParser::parseParens() { 378 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 379 nextToken(); 380 do { 381 switch (FormatTok.Tok.getKind()) { 382 case tok::l_paren: 383 parseParens(); 384 break; 385 case tok::r_paren: 386 nextToken(); 387 return; 388 case tok::l_brace: 389 { 390 nextToken(); 391 ScopedLineState LineState(*this); 392 Line->Level += 1; 393 parseLevel(/*HasOpeningBrace=*/true); 394 Line->Level -= 1; 395 } 396 break; 397 default: 398 nextToken(); 399 break; 400 } 401 } while (!eof()); 402} 403 404void UnwrappedLineParser::parseIfThenElse() { 405 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 406 nextToken(); 407 parseParens(); 408 bool NeedsUnwrappedLine = false; 409 if (FormatTok.Tok.is(tok::l_brace)) { 410 parseBlock(); 411 NeedsUnwrappedLine = true; 412 } else { 413 addUnwrappedLine(); 414 ++Line->Level; 415 parseStructuralElement(); 416 --Line->Level; 417 } 418 if (FormatTok.Tok.is(tok::kw_else)) { 419 nextToken(); 420 if (FormatTok.Tok.is(tok::l_brace)) { 421 parseBlock(); 422 addUnwrappedLine(); 423 } else if (FormatTok.Tok.is(tok::kw_if)) { 424 parseIfThenElse(); 425 } else { 426 addUnwrappedLine(); 427 ++Line->Level; 428 parseStructuralElement(); 429 --Line->Level; 430 } 431 } else if (NeedsUnwrappedLine) { 432 addUnwrappedLine(); 433 } 434} 435 436void UnwrappedLineParser::parseNamespace() { 437 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 438 nextToken(); 439 if (FormatTok.Tok.is(tok::identifier)) 440 nextToken(); 441 if (FormatTok.Tok.is(tok::l_brace)) { 442 parseBlock(0); 443 addUnwrappedLine(); 444 } 445 // FIXME: Add error handling. 446} 447 448void UnwrappedLineParser::parseForOrWhileLoop() { 449 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 450 "'for' or 'while' expected"); 451 nextToken(); 452 parseParens(); 453 if (FormatTok.Tok.is(tok::l_brace)) { 454 parseBlock(); 455 addUnwrappedLine(); 456 } else { 457 addUnwrappedLine(); 458 ++Line->Level; 459 parseStructuralElement(); 460 --Line->Level; 461 } 462} 463 464void UnwrappedLineParser::parseDoWhile() { 465 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 466 nextToken(); 467 if (FormatTok.Tok.is(tok::l_brace)) { 468 parseBlock(); 469 } else { 470 addUnwrappedLine(); 471 ++Line->Level; 472 parseStructuralElement(); 473 --Line->Level; 474 } 475 476 // FIXME: Add error handling. 477 if (!FormatTok.Tok.is(tok::kw_while)) { 478 addUnwrappedLine(); 479 return; 480 } 481 482 nextToken(); 483 parseStructuralElement(); 484} 485 486void UnwrappedLineParser::parseLabel() { 487 // FIXME: remove all asserts. 488 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 489 nextToken(); 490 unsigned OldLineLevel = Line->Level; 491 if (Line->Level > 0) 492 --Line->Level; 493 if (FormatTok.Tok.is(tok::l_brace)) { 494 parseBlock(); 495 } 496 addUnwrappedLine(); 497 Line->Level = OldLineLevel; 498} 499 500void UnwrappedLineParser::parseCaseLabel() { 501 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 502 // FIXME: fix handling of complex expressions here. 503 do { 504 nextToken(); 505 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 506 parseLabel(); 507} 508 509void UnwrappedLineParser::parseSwitch() { 510 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 511 nextToken(); 512 parseParens(); 513 if (FormatTok.Tok.is(tok::l_brace)) { 514 parseBlock(Style.IndentCaseLabels ? 2 : 1); 515 addUnwrappedLine(); 516 } else { 517 addUnwrappedLine(); 518 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 519 parseStructuralElement(); 520 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 521 } 522} 523 524void UnwrappedLineParser::parseAccessSpecifier() { 525 nextToken(); 526 // Otherwise, we don't know what it is, and we'd better keep the next token. 527 if (FormatTok.Tok.is(tok::colon)) 528 nextToken(); 529 addUnwrappedLine(); 530} 531 532void UnwrappedLineParser::parseEnum() { 533 bool HasContents = false; 534 do { 535 switch (FormatTok.Tok.getKind()) { 536 case tok::l_brace: 537 nextToken(); 538 addUnwrappedLine(); 539 ++Line->Level; 540 parseComments(); 541 break; 542 case tok::l_paren: 543 parseParens(); 544 break; 545 case tok::comma: 546 nextToken(); 547 addUnwrappedLine(); 548 parseComments(); 549 break; 550 case tok::r_brace: 551 if (HasContents) 552 addUnwrappedLine(); 553 --Line->Level; 554 nextToken(); 555 break; 556 case tok::semi: 557 nextToken(); 558 addUnwrappedLine(); 559 return; 560 default: 561 HasContents = true; 562 nextToken(); 563 break; 564 } 565 } while (!eof()); 566} 567 568void UnwrappedLineParser::parseStructOrClass() { 569 nextToken(); 570 do { 571 switch (FormatTok.Tok.getKind()) { 572 case tok::l_brace: 573 // FIXME: Think about how to resolve the error handling here. 574 parseBlock(); 575 parseStructuralElement(); 576 return; 577 case tok::semi: 578 nextToken(); 579 addUnwrappedLine(); 580 return; 581 default: 582 nextToken(); 583 break; 584 } 585 } while (!eof()); 586} 587 588void UnwrappedLineParser::parseObjCProtocolList() { 589 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 590 do 591 nextToken(); 592 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 593 nextToken(); // Skip '>'. 594} 595 596void UnwrappedLineParser::parseObjCUntilAtEnd() { 597 do { 598 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 599 nextToken(); 600 addUnwrappedLine(); 601 break; 602 } 603 parseStructuralElement(); 604 } while (!eof()); 605} 606 607void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 608 nextToken(); 609 nextToken(); // interface name 610 611 // @interface can be followed by either a base class, or a category. 612 if (FormatTok.Tok.is(tok::colon)) { 613 nextToken(); 614 nextToken(); // base class name 615 } else if (FormatTok.Tok.is(tok::l_paren)) 616 // Skip category, if present. 617 parseParens(); 618 619 if (FormatTok.Tok.is(tok::less)) 620 parseObjCProtocolList(); 621 622 // If instance variables are present, keep the '{' on the first line too. 623 if (FormatTok.Tok.is(tok::l_brace)) 624 parseBlock(); 625 626 // With instance variables, this puts '}' on its own line. Without instance 627 // variables, this ends the @interface line. 628 addUnwrappedLine(); 629 630 parseObjCUntilAtEnd(); 631} 632 633void UnwrappedLineParser::parseObjCProtocol() { 634 nextToken(); 635 nextToken(); // protocol name 636 637 if (FormatTok.Tok.is(tok::less)) 638 parseObjCProtocolList(); 639 640 // Check for protocol declaration. 641 if (FormatTok.Tok.is(tok::semi)) { 642 nextToken(); 643 return addUnwrappedLine(); 644 } 645 646 addUnwrappedLine(); 647 parseObjCUntilAtEnd(); 648} 649 650void UnwrappedLineParser::addUnwrappedLine() { 651 if (!RootTokenInitialized) 652 return; 653 // Consume trailing comments. 654 while (!eof() && FormatTok.NewlinesBefore == 0 && 655 FormatTok.Tok.is(tok::comment)) { 656 nextToken(); 657 } 658#ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 659 FormatToken* NextToken = &Line->RootToken; 660 llvm::errs() << "Line: "; 661 while (NextToken) { 662 llvm::errs() << NextToken->Tok.getName() << " "; 663 NextToken = NextToken->Children.empty() ? NULL : &NextToken->Children[0]; 664 } 665 llvm::errs() << "\n"; 666#endif 667 Callback.consumeUnwrappedLine(*Line); 668 RootTokenInitialized = false; 669 LastInCurrentLine = NULL; 670} 671 672bool UnwrappedLineParser::eof() const { 673 return FormatTok.Tok.is(tok::eof); 674} 675 676void UnwrappedLineParser::nextToken() { 677 if (eof()) 678 return; 679 if (RootTokenInitialized) { 680 assert(LastInCurrentLine->Children.empty()); 681 LastInCurrentLine->Children.push_back(FormatTok); 682 LastInCurrentLine = &LastInCurrentLine->Children.back(); 683 } else { 684 Line->RootToken = FormatTok; 685 RootTokenInitialized = true; 686 LastInCurrentLine = &Line->RootToken; 687 } 688 if (MustBreakBeforeNextToken) { 689 LastInCurrentLine->MustBreakBefore = true; 690 MustBreakBeforeNextToken = false; 691 } 692 readToken(); 693} 694 695void UnwrappedLineParser::readToken() { 696 FormatTok = Tokens->getNextToken(); 697 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 698 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 699 FormatTok.IsFirst)) { 700 ScopedLineState BlockState(*this); 701 parsePPDirective(); 702 } 703} 704 705} // end namespace format 706} // end namespace clang 707