UnwrappedLineParser.cpp revision d19dc2ddf3f8630cfdbefec490c0000c14bee6bd
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "format-parser"
20
21#include "UnwrappedLineParser.h"
22#include "clang/Basic/Diagnostic.h"
23#include "llvm/Support/Debug.h"
24
25// Uncomment to get debug output from tests:
26// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0)
27
28namespace clang {
29namespace format {
30
31class ScopedMacroState : public FormatTokenSource {
32public:
33  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
34                   FormatToken &ResetToken)
35      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
36        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
37    TokenSource = this;
38    Line.Level = 0;
39    Line.InPPDirective = true;
40  }
41
42  ~ScopedMacroState() {
43    TokenSource = PreviousTokenSource;
44    ResetToken = Token;
45    Line.InPPDirective = false;
46    Line.Level = PreviousLineLevel;
47  }
48
49  virtual FormatToken getNextToken() {
50    // The \c UnwrappedLineParser guards against this by never calling
51    // \c getNextToken() after it has encountered the first eof token.
52    assert(!eof());
53    Token = PreviousTokenSource->getNextToken();
54    if (eof())
55      return createEOF();
56    return Token;
57  }
58
59private:
60  bool eof() {
61    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
62  }
63
64  FormatToken createEOF() {
65    FormatToken FormatTok;
66    FormatTok.Tok.startToken();
67    FormatTok.Tok.setKind(tok::eof);
68    return FormatTok;
69  }
70
71  UnwrappedLine &Line;
72  FormatTokenSource *&TokenSource;
73  FormatToken &ResetToken;
74  unsigned PreviousLineLevel;
75  FormatTokenSource *PreviousTokenSource;
76
77  FormatToken Token;
78};
79
80class ScopedLineState {
81public:
82  ScopedLineState(UnwrappedLineParser &Parser,
83                  bool SwitchToPreprocessorLines = false)
84      : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
85    if (SwitchToPreprocessorLines)
86      Parser.CurrentLines = &Parser.PreprocessorDirectives;
87    PreBlockLine = Parser.Line.take();
88    Parser.Line.reset(new UnwrappedLine());
89    Parser.Line->Level = PreBlockLine->Level;
90    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
91  }
92
93  ~ScopedLineState() {
94    if (!Parser.Line->Tokens.empty()) {
95      Parser.addUnwrappedLine();
96    }
97    assert(Parser.Line->Tokens.empty());
98    Parser.Line.reset(PreBlockLine);
99    Parser.MustBreakBeforeNextToken = true;
100    if (SwitchToPreprocessorLines)
101      Parser.CurrentLines = &Parser.Lines;
102  }
103
104private:
105  UnwrappedLineParser &Parser;
106  const bool SwitchToPreprocessorLines;
107
108  UnwrappedLine *PreBlockLine;
109};
110
111UnwrappedLineParser::UnwrappedLineParser(
112    clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
113    FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
114    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
115      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
116      Callback(Callback) {}
117
118bool UnwrappedLineParser::parse() {
119  DEBUG(llvm::dbgs() << "----\n");
120  readToken();
121  bool Error = parseFile();
122  for (std::vector<UnwrappedLine>::iterator I = Lines.begin(),
123                                            E = Lines.end();
124       I != E; ++I) {
125    Callback.consumeUnwrappedLine(*I);
126  }
127  return Error;
128}
129
130bool UnwrappedLineParser::parseFile() {
131  bool Error = parseLevel(/*HasOpeningBrace=*/false);
132  // Make sure to format the remaining tokens.
133  addUnwrappedLine();
134  return Error;
135}
136
137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
138  bool Error = false;
139  do {
140    switch (FormatTok.Tok.getKind()) {
141    case tok::comment:
142      nextToken();
143      addUnwrappedLine();
144      break;
145    case tok::l_brace:
146      Error |= parseBlock();
147      addUnwrappedLine();
148      break;
149    case tok::r_brace:
150      if (HasOpeningBrace) {
151        return false;
152      } else {
153        Diag.Report(FormatTok.Tok.getLocation(),
154                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
155                                         "unexpected '}'"));
156        Error = true;
157        nextToken();
158        addUnwrappedLine();
159      }
160      break;
161    default:
162      parseStructuralElement();
163      break;
164    }
165  } while (!eof());
166  return Error;
167}
168
169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
170  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
171  nextToken();
172
173  if (!FormatTok.Tok.is(tok::r_brace)) {
174    addUnwrappedLine();
175
176    Line->Level += AddLevels;
177    parseLevel(/*HasOpeningBrace=*/true);
178    Line->Level -= AddLevels;
179
180    if (!FormatTok.Tok.is(tok::r_brace))
181      return true;
182
183  }
184  nextToken();  // Munch the closing brace.
185  return false;
186}
187
188void UnwrappedLineParser::parsePPDirective() {
189  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
190  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
191  nextToken();
192
193  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
194    addUnwrappedLine();
195    return;
196  }
197
198  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
199  case tok::pp_define:
200    parsePPDefine();
201    break;
202  default:
203    parsePPUnknown();
204    break;
205  }
206}
207
208void UnwrappedLineParser::parsePPDefine() {
209  nextToken();
210
211  if (FormatTok.Tok.getKind() != tok::identifier) {
212    parsePPUnknown();
213    return;
214  }
215  nextToken();
216  if (FormatTok.Tok.getKind() == tok::l_paren) {
217    parseParens();
218  }
219  addUnwrappedLine();
220  Line->Level = 1;
221
222  // Errors during a preprocessor directive can only affect the layout of the
223  // preprocessor directive, and thus we ignore them. An alternative approach
224  // would be to use the same approach we use on the file level (no
225  // re-indentation if there was a structural error) within the macro
226  // definition.
227  parseFile();
228}
229
230void UnwrappedLineParser::parsePPUnknown() {
231  do {
232    nextToken();
233  } while (!eof());
234  addUnwrappedLine();
235}
236
237void UnwrappedLineParser::parseComments() {
238  // Consume leading line comments, e.g. for branches without compounds.
239  while (FormatTok.Tok.is(tok::comment)) {
240    nextToken();
241    addUnwrappedLine();
242  }
243}
244
245void UnwrappedLineParser::parseStructuralElement() {
246  assert(!FormatTok.Tok.is(tok::l_brace));
247  parseComments();
248
249  int TokenNumber = 0;
250  switch (FormatTok.Tok.getKind()) {
251  case tok::at:
252    nextToken();
253    switch (FormatTok.Tok.getObjCKeywordID()) {
254    case tok::objc_public:
255    case tok::objc_protected:
256    case tok::objc_package:
257    case tok::objc_private:
258      return parseAccessSpecifier();
259    case tok::objc_interface:
260    case tok::objc_implementation:
261      return parseObjCInterfaceOrImplementation();
262    case tok::objc_protocol:
263      return parseObjCProtocol();
264    case tok::objc_end:
265      return; // Handled by the caller.
266    case tok::objc_optional:
267    case tok::objc_required:
268      nextToken();
269      addUnwrappedLine();
270      return;
271    default:
272      break;
273    }
274    break;
275  case tok::kw_namespace:
276    parseNamespace();
277    return;
278  case tok::kw_inline:
279    nextToken();
280    TokenNumber++;
281    if (FormatTok.Tok.is(tok::kw_namespace)) {
282      parseNamespace();
283      return;
284    }
285    break;
286  case tok::kw_public:
287  case tok::kw_protected:
288  case tok::kw_private:
289    parseAccessSpecifier();
290    return;
291  case tok::kw_if:
292    parseIfThenElse();
293    return;
294  case tok::kw_for:
295  case tok::kw_while:
296    parseForOrWhileLoop();
297    return;
298  case tok::kw_do:
299    parseDoWhile();
300    return;
301  case tok::kw_switch:
302    parseSwitch();
303    return;
304  case tok::kw_default:
305    nextToken();
306    parseLabel();
307    return;
308  case tok::kw_case:
309    parseCaseLabel();
310    return;
311  case tok::kw_return:
312    parseReturn();
313    return;
314  case tok::kw_extern:
315    nextToken();
316    if (FormatTok.Tok.is(tok::string_literal)) {
317      nextToken();
318      if (FormatTok.Tok.is(tok::l_brace)) {
319        parseBlock(0);
320        addUnwrappedLine();
321        return;
322      }
323    }
324    // In all other cases, parse the declaration.
325    break;
326  default:
327    break;
328  }
329  do {
330    ++TokenNumber;
331    switch (FormatTok.Tok.getKind()) {
332    case tok::kw_enum:
333      parseEnum();
334      return;
335    case tok::kw_struct:
336    case tok::kw_union:
337    case tok::kw_class:
338      parseRecord();
339      // A record declaration or definition is always the start of a structural
340      // element.
341      break;
342    case tok::semi:
343      nextToken();
344      addUnwrappedLine();
345      return;
346    case tok::r_brace:
347      addUnwrappedLine();
348      return;
349    case tok::l_paren:
350      parseParens();
351      break;
352    case tok::l_brace:
353      // A block outside of parentheses must be the last part of a
354      // structural element.
355      // FIXME: Figure out cases where this is not true, and add projections for
356      // them (the one we know is missing are lambdas).
357      parseBlock();
358      addUnwrappedLine();
359      return;
360    case tok::identifier:
361      nextToken();
362      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
363        parseLabel();
364        return;
365      }
366      break;
367    case tok::equal:
368      nextToken();
369      if (FormatTok.Tok.is(tok::l_brace)) {
370        parseBracedList();
371      }
372      break;
373    default:
374      nextToken();
375      break;
376    }
377  } while (!eof());
378}
379
380void UnwrappedLineParser::parseBracedList() {
381  nextToken();
382
383  do {
384    switch (FormatTok.Tok.getKind()) {
385    case tok::l_brace:
386      parseBracedList();
387      break;
388    case tok::r_brace:
389      nextToken();
390      return;
391    default:
392      nextToken();
393      break;
394    }
395  } while (!eof());
396}
397
398void UnwrappedLineParser::parseReturn() {
399  nextToken();
400
401  do {
402    switch (FormatTok.Tok.getKind()) {
403    case tok::l_brace:
404      parseBracedList();
405      break;
406    case tok::l_paren:
407      parseParens();
408      break;
409    case tok::r_brace:
410      // Assume missing ';'.
411      addUnwrappedLine();
412      return;
413    case tok::semi:
414      nextToken();
415      addUnwrappedLine();
416      return;
417    default:
418      nextToken();
419      break;
420    }
421  } while (!eof());
422}
423
424void UnwrappedLineParser::parseParens() {
425  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
426  nextToken();
427  do {
428    switch (FormatTok.Tok.getKind()) {
429    case tok::l_paren:
430      parseParens();
431      break;
432    case tok::r_paren:
433      nextToken();
434      return;
435    case tok::l_brace:
436      {
437        nextToken();
438        ScopedLineState LineState(*this);
439        Line->Level += 1;
440        parseLevel(/*HasOpeningBrace=*/true);
441        Line->Level -= 1;
442      }
443      break;
444    default:
445      nextToken();
446      break;
447    }
448  } while (!eof());
449}
450
451void UnwrappedLineParser::parseIfThenElse() {
452  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
453  nextToken();
454  if (FormatTok.Tok.is(tok::l_paren))
455    parseParens();
456  bool NeedsUnwrappedLine = false;
457  if (FormatTok.Tok.is(tok::l_brace)) {
458    parseBlock();
459    NeedsUnwrappedLine = true;
460  } else {
461    addUnwrappedLine();
462    ++Line->Level;
463    parseStructuralElement();
464    --Line->Level;
465  }
466  if (FormatTok.Tok.is(tok::kw_else)) {
467    nextToken();
468    if (FormatTok.Tok.is(tok::l_brace)) {
469      parseBlock();
470      addUnwrappedLine();
471    } else if (FormatTok.Tok.is(tok::kw_if)) {
472      parseIfThenElse();
473    } else {
474      addUnwrappedLine();
475      ++Line->Level;
476      parseStructuralElement();
477      --Line->Level;
478    }
479  } else if (NeedsUnwrappedLine) {
480    addUnwrappedLine();
481  }
482}
483
484void UnwrappedLineParser::parseNamespace() {
485  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
486  nextToken();
487  if (FormatTok.Tok.is(tok::identifier))
488    nextToken();
489  if (FormatTok.Tok.is(tok::l_brace)) {
490    parseBlock(0);
491    addUnwrappedLine();
492  }
493  // FIXME: Add error handling.
494}
495
496void UnwrappedLineParser::parseForOrWhileLoop() {
497  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
498         "'for' or 'while' expected");
499  nextToken();
500  if (FormatTok.Tok.is(tok::l_paren))
501    parseParens();
502  if (FormatTok.Tok.is(tok::l_brace)) {
503    parseBlock();
504    addUnwrappedLine();
505  } else {
506    addUnwrappedLine();
507    ++Line->Level;
508    parseStructuralElement();
509    --Line->Level;
510  }
511}
512
513void UnwrappedLineParser::parseDoWhile() {
514  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
515  nextToken();
516  if (FormatTok.Tok.is(tok::l_brace)) {
517    parseBlock();
518  } else {
519    addUnwrappedLine();
520    ++Line->Level;
521    parseStructuralElement();
522    --Line->Level;
523  }
524
525  // FIXME: Add error handling.
526  if (!FormatTok.Tok.is(tok::kw_while)) {
527    addUnwrappedLine();
528    return;
529  }
530
531  nextToken();
532  parseStructuralElement();
533}
534
535void UnwrappedLineParser::parseLabel() {
536  // FIXME: remove all asserts.
537  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
538  nextToken();
539  unsigned OldLineLevel = Line->Level;
540  if (Line->Level > 0)
541    --Line->Level;
542  if (FormatTok.Tok.is(tok::l_brace)) {
543    parseBlock();
544    if (FormatTok.Tok.is(tok::kw_break))
545      parseStructuralElement(); // "break;" after "}" goes on the same line.
546  }
547  addUnwrappedLine();
548  Line->Level = OldLineLevel;
549}
550
551void UnwrappedLineParser::parseCaseLabel() {
552  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
553  // FIXME: fix handling of complex expressions here.
554  do {
555    nextToken();
556  } while (!eof() && !FormatTok.Tok.is(tok::colon));
557  parseLabel();
558}
559
560void UnwrappedLineParser::parseSwitch() {
561  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
562  nextToken();
563  if (FormatTok.Tok.is(tok::l_paren))
564    parseParens();
565  if (FormatTok.Tok.is(tok::l_brace)) {
566    parseBlock(Style.IndentCaseLabels ? 2 : 1);
567    addUnwrappedLine();
568  } else {
569    addUnwrappedLine();
570    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
571    parseStructuralElement();
572    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
573  }
574}
575
576void UnwrappedLineParser::parseAccessSpecifier() {
577  nextToken();
578  // Otherwise, we don't know what it is, and we'd better keep the next token.
579  if (FormatTok.Tok.is(tok::colon))
580    nextToken();
581  addUnwrappedLine();
582}
583
584void UnwrappedLineParser::parseEnum() {
585  bool HasContents = false;
586  do {
587    switch (FormatTok.Tok.getKind()) {
588    case tok::l_brace:
589      nextToken();
590      addUnwrappedLine();
591      ++Line->Level;
592      parseComments();
593      break;
594    case tok::l_paren:
595      parseParens();
596      break;
597    case tok::comma:
598      nextToken();
599      addUnwrappedLine();
600      parseComments();
601      break;
602    case tok::r_brace:
603      if (HasContents)
604        addUnwrappedLine();
605      --Line->Level;
606      nextToken();
607      break;
608    case tok::semi:
609      nextToken();
610      addUnwrappedLine();
611      return;
612    default:
613      HasContents = true;
614      nextToken();
615      break;
616    }
617  } while (!eof());
618}
619
620void UnwrappedLineParser::parseRecord() {
621  nextToken();
622  if (FormatTok.Tok.is(tok::identifier) ||
623      FormatTok.Tok.is(tok::kw___attribute) ||
624      FormatTok.Tok.is(tok::kw___declspec)) {
625    nextToken();
626    // We can have macros or attributes in between 'class' and the class name.
627    if (FormatTok.Tok.is(tok::l_paren)) {
628      parseParens();
629    }
630    // The actual identifier can be a nested name specifier.
631    while (FormatTok.Tok.is(tok::identifier) ||
632           FormatTok.Tok.is(tok::coloncolon))
633      nextToken();
634
635    // Note that parsing away template declarations here leads to incorrectly
636    // accepting function declarations as record declarations.
637    // In general, we cannot solve this problem. Consider:
638    // class A<int> B() {}
639    // which can be a function definition or a class definition when B() is a
640    // macro. If we find enough real-world cases where this is a problem, we
641    // can parse for the 'template' keyword in the beginning of the statement,
642    // and thus rule out the record production in case there is no template
643    // (this would still leave us with an ambiguity between template function
644    // and class declarations).
645    if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
646      while (FormatTok.Tok.isNot(tok::l_brace)) {
647        if (FormatTok.Tok.is(tok::semi))
648          return;
649        nextToken();
650      }
651    }
652  }
653  if (FormatTok.Tok.is(tok::l_brace))
654    parseBlock();
655  // We fall through to parsing a structural element afterwards, so
656  // class A {} n, m;
657  // will end up in one unwrapped line.
658}
659
660void UnwrappedLineParser::parseObjCProtocolList() {
661  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
662  do
663    nextToken();
664  while (!eof() && FormatTok.Tok.isNot(tok::greater));
665  nextToken(); // Skip '>'.
666}
667
668void UnwrappedLineParser::parseObjCUntilAtEnd() {
669  do {
670    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
671      nextToken();
672      addUnwrappedLine();
673      break;
674    }
675    parseStructuralElement();
676  } while (!eof());
677}
678
679void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
680  nextToken();
681  nextToken();  // interface name
682
683  // @interface can be followed by either a base class, or a category.
684  if (FormatTok.Tok.is(tok::colon)) {
685    nextToken();
686    nextToken();  // base class name
687  } else if (FormatTok.Tok.is(tok::l_paren))
688    // Skip category, if present.
689    parseParens();
690
691  if (FormatTok.Tok.is(tok::less))
692    parseObjCProtocolList();
693
694  // If instance variables are present, keep the '{' on the first line too.
695  if (FormatTok.Tok.is(tok::l_brace))
696    parseBlock();
697
698  // With instance variables, this puts '}' on its own line.  Without instance
699  // variables, this ends the @interface line.
700  addUnwrappedLine();
701
702  parseObjCUntilAtEnd();
703}
704
705void UnwrappedLineParser::parseObjCProtocol() {
706  nextToken();
707  nextToken();  // protocol name
708
709  if (FormatTok.Tok.is(tok::less))
710    parseObjCProtocolList();
711
712  // Check for protocol declaration.
713  if (FormatTok.Tok.is(tok::semi)) {
714    nextToken();
715    return addUnwrappedLine();
716  }
717
718  addUnwrappedLine();
719  parseObjCUntilAtEnd();
720}
721
722void UnwrappedLineParser::addUnwrappedLine() {
723  if (Line->Tokens.empty())
724    return;
725  // Consume trailing comments.
726  while (!eof() && FormatTok.NewlinesBefore == 0 &&
727         FormatTok.Tok.is(tok::comment)) {
728    nextToken();
729  }
730  DEBUG({
731    llvm::dbgs() << "Line: ";
732    for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
733                                          E = Line->Tokens.end();
734         I != E; ++I) {
735      llvm::dbgs() << I->Tok.getName() << " ";
736
737    }
738    llvm::dbgs() << "\n";
739  });
740  CurrentLines->push_back(*Line);
741  Line->Tokens.clear();
742  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
743    for (std::vector<UnwrappedLine>::iterator I = PreprocessorDirectives
744             .begin(), E = PreprocessorDirectives.end();
745         I != E; ++I) {
746      CurrentLines->push_back(*I);
747    }
748    PreprocessorDirectives.clear();
749  }
750
751}
752
753bool UnwrappedLineParser::eof() const {
754  return FormatTok.Tok.is(tok::eof);
755}
756
757void UnwrappedLineParser::nextToken() {
758  if (eof())
759    return;
760  Line->Tokens.push_back(FormatTok);
761  if (MustBreakBeforeNextToken) {
762    Line->Tokens.back().MustBreakBefore = true;
763    MustBreakBeforeNextToken = false;
764  }
765  readToken();
766}
767
768void UnwrappedLineParser::readToken() {
769  FormatTok = Tokens->getNextToken();
770  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
771         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
772          FormatTok.IsFirst)) {
773    // If there is an unfinished unwrapped line, we flush the preprocessor
774    // directives only after that unwrapped line was finished later.
775    bool SwitchToPreprocessorLines = !Line->Tokens.empty() &&
776                                     CurrentLines == &Lines;
777    ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
778    parsePPDirective();
779  }
780}
781
782} // end namespace format
783} // end namespace clang
784