UnwrappedLineParser.cpp revision 3a3408cceb438af7cdebb5b3d938abb916162bb4
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "format-parser"
20
21#include "UnwrappedLineParser.h"
22#include "clang/Basic/Diagnostic.h"
23#include "llvm/Support/Debug.h"
24
25// Uncomment to get debug output from tests:
26// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0)
27
28namespace clang {
29namespace format {
30
31class ScopedMacroState : public FormatTokenSource {
32public:
33  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
34                   FormatToken &ResetToken)
35      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
36        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
37    TokenSource = this;
38    Line.Level = 0;
39    Line.InPPDirective = true;
40  }
41
42  ~ScopedMacroState() {
43    TokenSource = PreviousTokenSource;
44    ResetToken = Token;
45    Line.InPPDirective = false;
46    Line.Level = PreviousLineLevel;
47  }
48
49  virtual FormatToken getNextToken() {
50    // The \c UnwrappedLineParser guards against this by never calling
51    // \c getNextToken() after it has encountered the first eof token.
52    assert(!eof());
53    Token = PreviousTokenSource->getNextToken();
54    if (eof())
55      return createEOF();
56    return Token;
57  }
58
59private:
60  bool eof() {
61    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
62  }
63
64  FormatToken createEOF() {
65    FormatToken FormatTok;
66    FormatTok.Tok.startToken();
67    FormatTok.Tok.setKind(tok::eof);
68    return FormatTok;
69  }
70
71  UnwrappedLine &Line;
72  FormatTokenSource *&TokenSource;
73  FormatToken &ResetToken;
74  unsigned PreviousLineLevel;
75  FormatTokenSource *PreviousTokenSource;
76
77  FormatToken Token;
78};
79
80class ScopedLineState {
81public:
82  ScopedLineState(UnwrappedLineParser &Parser,
83                  bool SwitchToPreprocessorLines = false)
84      : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
85    if (SwitchToPreprocessorLines)
86      Parser.CurrentLines = &Parser.PreprocessorDirectives;
87    PreBlockLine = Parser.Line.take();
88    Parser.Line.reset(new UnwrappedLine());
89    Parser.Line->Level = PreBlockLine->Level;
90    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
91  }
92
93  ~ScopedLineState() {
94    if (!Parser.Line->Tokens.empty()) {
95      Parser.addUnwrappedLine();
96    }
97    assert(Parser.Line->Tokens.empty());
98    Parser.Line.reset(PreBlockLine);
99    Parser.MustBreakBeforeNextToken = true;
100    if (SwitchToPreprocessorLines)
101      Parser.CurrentLines = &Parser.Lines;
102  }
103
104private:
105  UnwrappedLineParser &Parser;
106  const bool SwitchToPreprocessorLines;
107
108  UnwrappedLine *PreBlockLine;
109};
110
111UnwrappedLineParser::UnwrappedLineParser(
112    clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
113    FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
114    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
115      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
116      Callback(Callback) {}
117
118bool UnwrappedLineParser::parse() {
119  DEBUG(llvm::dbgs() << "----\n");
120  readToken();
121  bool Error = parseFile();
122  for (std::vector<UnwrappedLine>::iterator I = Lines.begin(),
123                                            E = Lines.end();
124       I != E; ++I) {
125    Callback.consumeUnwrappedLine(*I);
126  }
127  return Error;
128}
129
130bool UnwrappedLineParser::parseFile() {
131  bool Error = parseLevel(/*HasOpeningBrace=*/false);
132  // Make sure to format the remaining tokens.
133  addUnwrappedLine();
134  return Error;
135}
136
137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
138  bool Error = false;
139  do {
140    switch (FormatTok.Tok.getKind()) {
141    case tok::comment:
142      nextToken();
143      addUnwrappedLine();
144      break;
145    case tok::l_brace:
146      Error |= parseBlock();
147      addUnwrappedLine();
148      break;
149    case tok::r_brace:
150      if (HasOpeningBrace) {
151        return false;
152      } else {
153        Diag.Report(FormatTok.Tok.getLocation(),
154                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
155                                         "unexpected '}'"));
156        Error = true;
157        nextToken();
158        addUnwrappedLine();
159      }
160      break;
161    default:
162      parseStructuralElement();
163      break;
164    }
165  } while (!eof());
166  return Error;
167}
168
169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
170  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
171  nextToken();
172
173  if (!FormatTok.Tok.is(tok::r_brace)) {
174    addUnwrappedLine();
175
176    Line->Level += AddLevels;
177    parseLevel(/*HasOpeningBrace=*/true);
178    Line->Level -= AddLevels;
179
180    if (!FormatTok.Tok.is(tok::r_brace))
181      return true;
182
183  }
184  nextToken();  // Munch the closing brace.
185  return false;
186}
187
188void UnwrappedLineParser::parsePPDirective() {
189  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
190  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
191  nextToken();
192
193  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
194    addUnwrappedLine();
195    return;
196  }
197
198  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
199  case tok::pp_define:
200    parsePPDefine();
201    break;
202  default:
203    parsePPUnknown();
204    break;
205  }
206}
207
208void UnwrappedLineParser::parsePPDefine() {
209  nextToken();
210
211  if (FormatTok.Tok.getKind() != tok::identifier) {
212    parsePPUnknown();
213    return;
214  }
215  nextToken();
216  if (FormatTok.Tok.getKind() == tok::l_paren) {
217    parseParens();
218  }
219  addUnwrappedLine();
220  Line->Level = 1;
221
222  // Errors during a preprocessor directive can only affect the layout of the
223  // preprocessor directive, and thus we ignore them. An alternative approach
224  // would be to use the same approach we use on the file level (no
225  // re-indentation if there was a structural error) within the macro
226  // definition.
227  parseFile();
228}
229
230void UnwrappedLineParser::parsePPUnknown() {
231  do {
232    nextToken();
233  } while (!eof());
234  addUnwrappedLine();
235}
236
237void UnwrappedLineParser::parseComments() {
238  // Consume leading line comments, e.g. for branches without compounds.
239  while (FormatTok.Tok.is(tok::comment)) {
240    nextToken();
241    addUnwrappedLine();
242  }
243}
244
245void UnwrappedLineParser::parseStructuralElement() {
246  assert(!FormatTok.Tok.is(tok::l_brace));
247  parseComments();
248
249  int TokenNumber = 0;
250  switch (FormatTok.Tok.getKind()) {
251  case tok::at:
252    nextToken();
253    switch (FormatTok.Tok.getObjCKeywordID()) {
254    case tok::objc_public:
255    case tok::objc_protected:
256    case tok::objc_package:
257    case tok::objc_private:
258      return parseAccessSpecifier();
259    case tok::objc_interface:
260    case tok::objc_implementation:
261      return parseObjCInterfaceOrImplementation();
262    case tok::objc_protocol:
263      return parseObjCProtocol();
264    case tok::objc_end:
265      return; // Handled by the caller.
266    case tok::objc_optional:
267    case tok::objc_required:
268      nextToken();
269      addUnwrappedLine();
270      return;
271    default:
272      break;
273    }
274    break;
275  case tok::kw_namespace:
276    parseNamespace();
277    return;
278  case tok::kw_inline:
279    nextToken();
280    TokenNumber++;
281    if (FormatTok.Tok.is(tok::kw_namespace)) {
282      parseNamespace();
283      return;
284    }
285    break;
286  case tok::kw_public:
287  case tok::kw_protected:
288  case tok::kw_private:
289    parseAccessSpecifier();
290    return;
291  case tok::kw_if:
292    parseIfThenElse();
293    return;
294  case tok::kw_for:
295  case tok::kw_while:
296    parseForOrWhileLoop();
297    return;
298  case tok::kw_do:
299    parseDoWhile();
300    return;
301  case tok::kw_switch:
302    parseSwitch();
303    return;
304  case tok::kw_default:
305    nextToken();
306    parseLabel();
307    return;
308  case tok::kw_case:
309    parseCaseLabel();
310    return;
311  case tok::kw_return:
312    parseReturn();
313    return;
314  default:
315    break;
316  }
317  do {
318    ++TokenNumber;
319    switch (FormatTok.Tok.getKind()) {
320    case tok::kw_enum:
321      parseEnum();
322      return;
323    case tok::kw_struct:
324    case tok::kw_union:
325    case tok::kw_class:
326      parseRecord();
327      // A record declaration or definition is always the start of a structural
328      // element.
329      break;
330    case tok::semi:
331      nextToken();
332      addUnwrappedLine();
333      return;
334    case tok::r_brace:
335      addUnwrappedLine();
336      return;
337    case tok::l_paren:
338      parseParens();
339      break;
340    case tok::l_brace:
341      // A block outside of parentheses must be the last part of a
342      // structural element.
343      // FIXME: Figure out cases where this is not true, and add projections for
344      // them (the one we know is missing are lambdas).
345      parseBlock();
346      addUnwrappedLine();
347      return;
348    case tok::identifier:
349      nextToken();
350      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
351        parseLabel();
352        return;
353      }
354      break;
355    case tok::equal:
356      nextToken();
357      if (FormatTok.Tok.is(tok::l_brace)) {
358        parseBracedList();
359      }
360      break;
361    default:
362      nextToken();
363      break;
364    }
365  } while (!eof());
366}
367
368void UnwrappedLineParser::parseBracedList() {
369  nextToken();
370
371  do {
372    switch (FormatTok.Tok.getKind()) {
373    case tok::l_brace:
374      parseBracedList();
375      break;
376    case tok::r_brace:
377      nextToken();
378      return;
379    default:
380      nextToken();
381      break;
382    }
383  } while (!eof());
384}
385
386void UnwrappedLineParser::parseReturn() {
387  nextToken();
388
389  do {
390    switch (FormatTok.Tok.getKind()) {
391    case tok::l_brace:
392      parseBracedList();
393      break;
394    case tok::l_paren:
395      parseParens();
396      break;
397    case tok::r_brace:
398      // Assume missing ';'.
399      addUnwrappedLine();
400      return;
401    case tok::semi:
402      nextToken();
403      addUnwrappedLine();
404      return;
405    default:
406      nextToken();
407      break;
408    }
409  } while (!eof());
410}
411
412void UnwrappedLineParser::parseParens() {
413  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
414  nextToken();
415  do {
416    switch (FormatTok.Tok.getKind()) {
417    case tok::l_paren:
418      parseParens();
419      break;
420    case tok::r_paren:
421      nextToken();
422      return;
423    case tok::l_brace:
424      {
425        nextToken();
426        ScopedLineState LineState(*this);
427        Line->Level += 1;
428        parseLevel(/*HasOpeningBrace=*/true);
429        Line->Level -= 1;
430      }
431      break;
432    default:
433      nextToken();
434      break;
435    }
436  } while (!eof());
437}
438
439void UnwrappedLineParser::parseIfThenElse() {
440  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
441  nextToken();
442  if (FormatTok.Tok.is(tok::l_paren))
443    parseParens();
444  bool NeedsUnwrappedLine = false;
445  if (FormatTok.Tok.is(tok::l_brace)) {
446    parseBlock();
447    NeedsUnwrappedLine = true;
448  } else {
449    addUnwrappedLine();
450    ++Line->Level;
451    parseStructuralElement();
452    --Line->Level;
453  }
454  if (FormatTok.Tok.is(tok::kw_else)) {
455    nextToken();
456    if (FormatTok.Tok.is(tok::l_brace)) {
457      parseBlock();
458      addUnwrappedLine();
459    } else if (FormatTok.Tok.is(tok::kw_if)) {
460      parseIfThenElse();
461    } else {
462      addUnwrappedLine();
463      ++Line->Level;
464      parseStructuralElement();
465      --Line->Level;
466    }
467  } else if (NeedsUnwrappedLine) {
468    addUnwrappedLine();
469  }
470}
471
472void UnwrappedLineParser::parseNamespace() {
473  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
474  nextToken();
475  if (FormatTok.Tok.is(tok::identifier))
476    nextToken();
477  if (FormatTok.Tok.is(tok::l_brace)) {
478    parseBlock(0);
479    addUnwrappedLine();
480  }
481  // FIXME: Add error handling.
482}
483
484void UnwrappedLineParser::parseForOrWhileLoop() {
485  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
486         "'for' or 'while' expected");
487  nextToken();
488  if (FormatTok.Tok.is(tok::l_paren))
489    parseParens();
490  if (FormatTok.Tok.is(tok::l_brace)) {
491    parseBlock();
492    addUnwrappedLine();
493  } else {
494    addUnwrappedLine();
495    ++Line->Level;
496    parseStructuralElement();
497    --Line->Level;
498  }
499}
500
501void UnwrappedLineParser::parseDoWhile() {
502  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
503  nextToken();
504  if (FormatTok.Tok.is(tok::l_brace)) {
505    parseBlock();
506  } else {
507    addUnwrappedLine();
508    ++Line->Level;
509    parseStructuralElement();
510    --Line->Level;
511  }
512
513  // FIXME: Add error handling.
514  if (!FormatTok.Tok.is(tok::kw_while)) {
515    addUnwrappedLine();
516    return;
517  }
518
519  nextToken();
520  parseStructuralElement();
521}
522
523void UnwrappedLineParser::parseLabel() {
524  // FIXME: remove all asserts.
525  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
526  nextToken();
527  unsigned OldLineLevel = Line->Level;
528  if (Line->Level > 0)
529    --Line->Level;
530  if (FormatTok.Tok.is(tok::l_brace)) {
531    parseBlock();
532    if (FormatTok.Tok.is(tok::kw_break))
533      parseStructuralElement(); // "break;" after "}" goes on the same line.
534  }
535  addUnwrappedLine();
536  Line->Level = OldLineLevel;
537}
538
539void UnwrappedLineParser::parseCaseLabel() {
540  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
541  // FIXME: fix handling of complex expressions here.
542  do {
543    nextToken();
544  } while (!eof() && !FormatTok.Tok.is(tok::colon));
545  parseLabel();
546}
547
548void UnwrappedLineParser::parseSwitch() {
549  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
550  nextToken();
551  if (FormatTok.Tok.is(tok::l_paren))
552    parseParens();
553  if (FormatTok.Tok.is(tok::l_brace)) {
554    parseBlock(Style.IndentCaseLabels ? 2 : 1);
555    addUnwrappedLine();
556  } else {
557    addUnwrappedLine();
558    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
559    parseStructuralElement();
560    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
561  }
562}
563
564void UnwrappedLineParser::parseAccessSpecifier() {
565  nextToken();
566  // Otherwise, we don't know what it is, and we'd better keep the next token.
567  if (FormatTok.Tok.is(tok::colon))
568    nextToken();
569  addUnwrappedLine();
570}
571
572void UnwrappedLineParser::parseEnum() {
573  bool HasContents = false;
574  do {
575    switch (FormatTok.Tok.getKind()) {
576    case tok::l_brace:
577      nextToken();
578      addUnwrappedLine();
579      ++Line->Level;
580      parseComments();
581      break;
582    case tok::l_paren:
583      parseParens();
584      break;
585    case tok::comma:
586      nextToken();
587      addUnwrappedLine();
588      parseComments();
589      break;
590    case tok::r_brace:
591      if (HasContents)
592        addUnwrappedLine();
593      --Line->Level;
594      nextToken();
595      break;
596    case tok::semi:
597      nextToken();
598      addUnwrappedLine();
599      return;
600    default:
601      HasContents = true;
602      nextToken();
603      break;
604    }
605  } while (!eof());
606}
607
608void UnwrappedLineParser::parseRecord() {
609  nextToken();
610  if (FormatTok.Tok.is(tok::identifier) ||
611      FormatTok.Tok.is(tok::kw___attribute) ||
612      FormatTok.Tok.is(tok::kw___declspec)) {
613    nextToken();
614    // We can have macros or attributes in between 'class' and the class name.
615    if (FormatTok.Tok.is(tok::l_paren)) {
616      parseParens();
617    }
618    // The actual identifier can be a nested name specifier.
619    while (FormatTok.Tok.is(tok::identifier) ||
620           FormatTok.Tok.is(tok::coloncolon))
621      nextToken();
622
623    // Note that parsing away template declarations here leads to incorrectly
624    // accepting function declarations as record declarations.
625    // In general, we cannot solve this problem. Consider:
626    // class A<int> B() {}
627    // which can be a function definition or a class definition when B() is a
628    // macro. If we find enough real-world cases where this is a problem, we
629    // can parse for the 'template' keyword in the beginning of the statement,
630    // and thus rule out the record production in case there is no template
631    // (this would still leave us with an ambiguity between template function
632    // and class declarations).
633    if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
634      while (FormatTok.Tok.isNot(tok::l_brace)) {
635        if (FormatTok.Tok.is(tok::semi))
636          return;
637        nextToken();
638      }
639    }
640  }
641  if (FormatTok.Tok.is(tok::l_brace))
642    parseBlock();
643  // We fall through to parsing a structural element afterwards, so
644  // class A {} n, m;
645  // will end up in one unwrapped line.
646}
647
648void UnwrappedLineParser::parseObjCProtocolList() {
649  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
650  do
651    nextToken();
652  while (!eof() && FormatTok.Tok.isNot(tok::greater));
653  nextToken(); // Skip '>'.
654}
655
656void UnwrappedLineParser::parseObjCUntilAtEnd() {
657  do {
658    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
659      nextToken();
660      addUnwrappedLine();
661      break;
662    }
663    parseStructuralElement();
664  } while (!eof());
665}
666
667void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
668  nextToken();
669  nextToken();  // interface name
670
671  // @interface can be followed by either a base class, or a category.
672  if (FormatTok.Tok.is(tok::colon)) {
673    nextToken();
674    nextToken();  // base class name
675  } else if (FormatTok.Tok.is(tok::l_paren))
676    // Skip category, if present.
677    parseParens();
678
679  if (FormatTok.Tok.is(tok::less))
680    parseObjCProtocolList();
681
682  // If instance variables are present, keep the '{' on the first line too.
683  if (FormatTok.Tok.is(tok::l_brace))
684    parseBlock();
685
686  // With instance variables, this puts '}' on its own line.  Without instance
687  // variables, this ends the @interface line.
688  addUnwrappedLine();
689
690  parseObjCUntilAtEnd();
691}
692
693void UnwrappedLineParser::parseObjCProtocol() {
694  nextToken();
695  nextToken();  // protocol name
696
697  if (FormatTok.Tok.is(tok::less))
698    parseObjCProtocolList();
699
700  // Check for protocol declaration.
701  if (FormatTok.Tok.is(tok::semi)) {
702    nextToken();
703    return addUnwrappedLine();
704  }
705
706  addUnwrappedLine();
707  parseObjCUntilAtEnd();
708}
709
710void UnwrappedLineParser::addUnwrappedLine() {
711  if (Line->Tokens.empty())
712    return;
713  // Consume trailing comments.
714  while (!eof() && FormatTok.NewlinesBefore == 0 &&
715         FormatTok.Tok.is(tok::comment)) {
716    nextToken();
717  }
718  DEBUG({
719    llvm::dbgs() << "Line: ";
720    for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
721                                          E = Line->Tokens.end();
722         I != E; ++I) {
723      llvm::dbgs() << I->Tok.getName() << " ";
724
725    }
726    llvm::dbgs() << "\n";
727  });
728  CurrentLines->push_back(*Line);
729  Line->Tokens.clear();
730  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
731    for (std::vector<UnwrappedLine>::iterator I = PreprocessorDirectives
732             .begin(), E = PreprocessorDirectives.end();
733         I != E; ++I) {
734      CurrentLines->push_back(*I);
735    }
736    PreprocessorDirectives.clear();
737  }
738
739}
740
741bool UnwrappedLineParser::eof() const {
742  return FormatTok.Tok.is(tok::eof);
743}
744
745void UnwrappedLineParser::nextToken() {
746  if (eof())
747    return;
748  Line->Tokens.push_back(FormatTok);
749  if (MustBreakBeforeNextToken) {
750    Line->Tokens.back().MustBreakBefore = true;
751    MustBreakBeforeNextToken = false;
752  }
753  readToken();
754}
755
756void UnwrappedLineParser::readToken() {
757  FormatTok = Tokens->getNextToken();
758  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
759         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
760          FormatTok.IsFirst)) {
761    // If there is an unfinished unwrapped line, we flush the preprocessor
762    // directives only after that unwrapped line was finished later.
763    bool SwitchToPreprocessorLines = !Line->Tokens.empty() &&
764                                     CurrentLines == &Lines;
765    ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
766    parsePPDirective();
767  }
768}
769
770} // end namespace format
771} // end namespace clang
772