UnwrappedLineParser.cpp revision 308232c0aef773d24693f9e456dc3bc983934f7f
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "format-parser"
20
21#include "UnwrappedLineParser.h"
22#include "clang/Basic/Diagnostic.h"
23#include "llvm/Support/Debug.h"
24
25// Uncomment to get debug output from tests:
26// #define DEBUG_WITH_TYPE(T, X) do { X; } while(0)
27
28namespace clang {
29namespace format {
30
31class ScopedMacroState : public FormatTokenSource {
32public:
33  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
34                   FormatToken &ResetToken)
35      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
36        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
37    TokenSource = this;
38    Line.Level = 0;
39    Line.InPPDirective = true;
40  }
41
42  ~ScopedMacroState() {
43    TokenSource = PreviousTokenSource;
44    ResetToken = Token;
45    Line.InPPDirective = false;
46    Line.Level = PreviousLineLevel;
47  }
48
49  virtual FormatToken getNextToken() {
50    // The \c UnwrappedLineParser guards against this by never calling
51    // \c getNextToken() after it has encountered the first eof token.
52    assert(!eof());
53    Token = PreviousTokenSource->getNextToken();
54    if (eof())
55      return createEOF();
56    return Token;
57  }
58
59private:
60  bool eof() {
61    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
62  }
63
64  FormatToken createEOF() {
65    FormatToken FormatTok;
66    FormatTok.Tok.startToken();
67    FormatTok.Tok.setKind(tok::eof);
68    return FormatTok;
69  }
70
71  UnwrappedLine &Line;
72  FormatTokenSource *&TokenSource;
73  FormatToken &ResetToken;
74  unsigned PreviousLineLevel;
75  FormatTokenSource *PreviousTokenSource;
76
77  FormatToken Token;
78};
79
80class ScopedLineState {
81public:
82  ScopedLineState(UnwrappedLineParser &Parser,
83                  bool SwitchToPreprocessorLines = false)
84      : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
85    if (SwitchToPreprocessorLines)
86      Parser.CurrentLines = &Parser.PreprocessorDirectives;
87    PreBlockLine = Parser.Line.take();
88    Parser.Line.reset(new UnwrappedLine());
89    Parser.Line->Level = PreBlockLine->Level;
90    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
91  }
92
93  ~ScopedLineState() {
94    if (!Parser.Line->Tokens.empty()) {
95      Parser.addUnwrappedLine();
96    }
97    assert(Parser.Line->Tokens.empty());
98    Parser.Line.reset(PreBlockLine);
99    Parser.MustBreakBeforeNextToken = true;
100    if (SwitchToPreprocessorLines)
101      Parser.CurrentLines = &Parser.Lines;
102  }
103
104private:
105  UnwrappedLineParser &Parser;
106  const bool SwitchToPreprocessorLines;
107
108  UnwrappedLine *PreBlockLine;
109};
110
111UnwrappedLineParser::UnwrappedLineParser(
112    clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
113    FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
114    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
115      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
116      Callback(Callback) {}
117
118bool UnwrappedLineParser::parse() {
119  DEBUG(llvm::dbgs() << "----\n");
120  readToken();
121  bool Error = parseFile();
122  for (std::vector<UnwrappedLine>::iterator I = Lines.begin(),
123                                            E = Lines.end();
124       I != E; ++I) {
125    Callback.consumeUnwrappedLine(*I);
126  }
127  return Error;
128}
129
130bool UnwrappedLineParser::parseFile() {
131  bool Error = parseLevel(/*HasOpeningBrace=*/false);
132  // Make sure to format the remaining tokens.
133  addUnwrappedLine();
134  return Error;
135}
136
137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
138  bool Error = false;
139  do {
140    switch (FormatTok.Tok.getKind()) {
141    case tok::comment:
142      nextToken();
143      addUnwrappedLine();
144      break;
145    case tok::l_brace:
146      Error |= parseBlock();
147      addUnwrappedLine();
148      break;
149    case tok::r_brace:
150      if (HasOpeningBrace) {
151        return false;
152      } else {
153        Diag.Report(FormatTok.Tok.getLocation(),
154                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
155                                         "unexpected '}'"));
156        Error = true;
157        nextToken();
158        addUnwrappedLine();
159      }
160      break;
161    default:
162      parseStructuralElement();
163      break;
164    }
165  } while (!eof());
166  return Error;
167}
168
169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
170  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
171  nextToken();
172
173  addUnwrappedLine();
174
175  Line->Level += AddLevels;
176  parseLevel(/*HasOpeningBrace=*/true);
177  Line->Level -= AddLevels;
178
179  if (!FormatTok.Tok.is(tok::r_brace))
180    return true;
181
182  nextToken();  // Munch the closing brace.
183  return false;
184}
185
186void UnwrappedLineParser::parsePPDirective() {
187  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
188  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
189  nextToken();
190
191  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
192    addUnwrappedLine();
193    return;
194  }
195
196  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
197  case tok::pp_define:
198    parsePPDefine();
199    break;
200  default:
201    parsePPUnknown();
202    break;
203  }
204}
205
206void UnwrappedLineParser::parsePPDefine() {
207  nextToken();
208
209  if (FormatTok.Tok.getKind() != tok::identifier) {
210    parsePPUnknown();
211    return;
212  }
213  nextToken();
214  if (FormatTok.Tok.getKind() == tok::l_paren) {
215    parseParens();
216  }
217  addUnwrappedLine();
218  Line->Level = 1;
219
220  // Errors during a preprocessor directive can only affect the layout of the
221  // preprocessor directive, and thus we ignore them. An alternative approach
222  // would be to use the same approach we use on the file level (no
223  // re-indentation if there was a structural error) within the macro
224  // definition.
225  parseFile();
226}
227
228void UnwrappedLineParser::parsePPUnknown() {
229  do {
230    nextToken();
231  } while (!eof());
232  addUnwrappedLine();
233}
234
235void UnwrappedLineParser::parseComments() {
236  // Consume leading line comments, e.g. for branches without compounds.
237  while (FormatTok.Tok.is(tok::comment)) {
238    nextToken();
239    addUnwrappedLine();
240  }
241}
242
243void UnwrappedLineParser::parseStructuralElement() {
244  assert(!FormatTok.Tok.is(tok::l_brace));
245  parseComments();
246
247  int TokenNumber = 0;
248  switch (FormatTok.Tok.getKind()) {
249  case tok::at:
250    nextToken();
251    switch (FormatTok.Tok.getObjCKeywordID()) {
252    case tok::objc_public:
253    case tok::objc_protected:
254    case tok::objc_package:
255    case tok::objc_private:
256      return parseAccessSpecifier();
257    case tok::objc_interface:
258    case tok::objc_implementation:
259      return parseObjCInterfaceOrImplementation();
260    case tok::objc_protocol:
261      return parseObjCProtocol();
262    case tok::objc_end:
263      return; // Handled by the caller.
264    case tok::objc_optional:
265    case tok::objc_required:
266      nextToken();
267      addUnwrappedLine();
268      return;
269    default:
270      break;
271    }
272    break;
273  case tok::kw_namespace:
274    parseNamespace();
275    return;
276  case tok::kw_inline:
277    nextToken();
278    TokenNumber++;
279    if (FormatTok.Tok.is(tok::kw_namespace)) {
280      parseNamespace();
281      return;
282    }
283    break;
284  case tok::kw_public:
285  case tok::kw_protected:
286  case tok::kw_private:
287    parseAccessSpecifier();
288    return;
289  case tok::kw_if:
290    parseIfThenElse();
291    return;
292  case tok::kw_for:
293  case tok::kw_while:
294    parseForOrWhileLoop();
295    return;
296  case tok::kw_do:
297    parseDoWhile();
298    return;
299  case tok::kw_switch:
300    parseSwitch();
301    return;
302  case tok::kw_default:
303    nextToken();
304    parseLabel();
305    return;
306  case tok::kw_case:
307    parseCaseLabel();
308    return;
309  case tok::kw_return:
310    parseReturn();
311    return;
312  case tok::kw_extern:
313    nextToken();
314    if (FormatTok.Tok.is(tok::string_literal)) {
315      nextToken();
316      if (FormatTok.Tok.is(tok::l_brace)) {
317        parseBlock(0);
318        addUnwrappedLine();
319        return;
320      }
321    }
322    // In all other cases, parse the declaration.
323    break;
324  default:
325    break;
326  }
327  do {
328    ++TokenNumber;
329    switch (FormatTok.Tok.getKind()) {
330    case tok::kw_enum:
331      parseEnum();
332      break;
333    case tok::kw_struct:
334    case tok::kw_union:
335    case tok::kw_class:
336      parseRecord();
337      // A record declaration or definition is always the start of a structural
338      // element.
339      break;
340    case tok::semi:
341      nextToken();
342      addUnwrappedLine();
343      return;
344    case tok::r_brace:
345      addUnwrappedLine();
346      return;
347    case tok::l_paren:
348      parseParens();
349      break;
350    case tok::l_brace:
351      // A block outside of parentheses must be the last part of a
352      // structural element.
353      // FIXME: Figure out cases where this is not true, and add projections for
354      // them (the one we know is missing are lambdas).
355      parseBlock();
356      addUnwrappedLine();
357      return;
358    case tok::identifier:
359      nextToken();
360      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
361        parseLabel();
362        return;
363      }
364      break;
365    case tok::equal:
366      nextToken();
367      if (FormatTok.Tok.is(tok::l_brace)) {
368        parseBracedList();
369      }
370      break;
371    default:
372      nextToken();
373      break;
374    }
375  } while (!eof());
376}
377
378void UnwrappedLineParser::parseBracedList() {
379  nextToken();
380
381  do {
382    switch (FormatTok.Tok.getKind()) {
383    case tok::l_brace:
384      parseBracedList();
385      break;
386    case tok::r_brace:
387      nextToken();
388      return;
389    default:
390      nextToken();
391      break;
392    }
393  } while (!eof());
394}
395
396void UnwrappedLineParser::parseReturn() {
397  nextToken();
398
399  do {
400    switch (FormatTok.Tok.getKind()) {
401    case tok::l_brace:
402      parseBracedList();
403      break;
404    case tok::l_paren:
405      parseParens();
406      break;
407    case tok::r_brace:
408      // Assume missing ';'.
409      addUnwrappedLine();
410      return;
411    case tok::semi:
412      nextToken();
413      addUnwrappedLine();
414      return;
415    default:
416      nextToken();
417      break;
418    }
419  } while (!eof());
420}
421
422void UnwrappedLineParser::parseParens() {
423  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
424  nextToken();
425  do {
426    switch (FormatTok.Tok.getKind()) {
427    case tok::l_paren:
428      parseParens();
429      break;
430    case tok::r_paren:
431      nextToken();
432      return;
433    case tok::l_brace:
434      {
435        nextToken();
436        ScopedLineState LineState(*this);
437        Line->Level += 1;
438        parseLevel(/*HasOpeningBrace=*/true);
439        Line->Level -= 1;
440      }
441      break;
442    default:
443      nextToken();
444      break;
445    }
446  } while (!eof());
447}
448
449void UnwrappedLineParser::parseIfThenElse() {
450  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
451  nextToken();
452  if (FormatTok.Tok.is(tok::l_paren))
453    parseParens();
454  bool NeedsUnwrappedLine = false;
455  if (FormatTok.Tok.is(tok::l_brace)) {
456    parseBlock();
457    NeedsUnwrappedLine = true;
458  } else {
459    addUnwrappedLine();
460    ++Line->Level;
461    parseStructuralElement();
462    --Line->Level;
463  }
464  if (FormatTok.Tok.is(tok::kw_else)) {
465    nextToken();
466    if (FormatTok.Tok.is(tok::l_brace)) {
467      parseBlock();
468      addUnwrappedLine();
469    } else if (FormatTok.Tok.is(tok::kw_if)) {
470      parseIfThenElse();
471    } else {
472      addUnwrappedLine();
473      ++Line->Level;
474      parseStructuralElement();
475      --Line->Level;
476    }
477  } else if (NeedsUnwrappedLine) {
478    addUnwrappedLine();
479  }
480}
481
482void UnwrappedLineParser::parseNamespace() {
483  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
484  nextToken();
485  if (FormatTok.Tok.is(tok::identifier))
486    nextToken();
487  if (FormatTok.Tok.is(tok::l_brace)) {
488    parseBlock(0);
489    addUnwrappedLine();
490  }
491  // FIXME: Add error handling.
492}
493
494void UnwrappedLineParser::parseForOrWhileLoop() {
495  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
496         "'for' or 'while' expected");
497  nextToken();
498  if (FormatTok.Tok.is(tok::l_paren))
499    parseParens();
500  if (FormatTok.Tok.is(tok::l_brace)) {
501    parseBlock();
502    addUnwrappedLine();
503  } else {
504    addUnwrappedLine();
505    ++Line->Level;
506    parseStructuralElement();
507    --Line->Level;
508  }
509}
510
511void UnwrappedLineParser::parseDoWhile() {
512  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
513  nextToken();
514  if (FormatTok.Tok.is(tok::l_brace)) {
515    parseBlock();
516  } else {
517    addUnwrappedLine();
518    ++Line->Level;
519    parseStructuralElement();
520    --Line->Level;
521  }
522
523  // FIXME: Add error handling.
524  if (!FormatTok.Tok.is(tok::kw_while)) {
525    addUnwrappedLine();
526    return;
527  }
528
529  nextToken();
530  parseStructuralElement();
531}
532
533void UnwrappedLineParser::parseLabel() {
534  // FIXME: remove all asserts.
535  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
536  nextToken();
537  unsigned OldLineLevel = Line->Level;
538  if (Line->Level > 0)
539    --Line->Level;
540  if (FormatTok.Tok.is(tok::l_brace)) {
541    parseBlock();
542    if (FormatTok.Tok.is(tok::kw_break))
543      parseStructuralElement(); // "break;" after "}" goes on the same line.
544  }
545  addUnwrappedLine();
546  Line->Level = OldLineLevel;
547}
548
549void UnwrappedLineParser::parseCaseLabel() {
550  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
551  // FIXME: fix handling of complex expressions here.
552  do {
553    nextToken();
554  } while (!eof() && !FormatTok.Tok.is(tok::colon));
555  parseLabel();
556}
557
558void UnwrappedLineParser::parseSwitch() {
559  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
560  nextToken();
561  if (FormatTok.Tok.is(tok::l_paren))
562    parseParens();
563  if (FormatTok.Tok.is(tok::l_brace)) {
564    parseBlock(Style.IndentCaseLabels ? 2 : 1);
565    addUnwrappedLine();
566  } else {
567    addUnwrappedLine();
568    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
569    parseStructuralElement();
570    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
571  }
572}
573
574void UnwrappedLineParser::parseAccessSpecifier() {
575  nextToken();
576  // Otherwise, we don't know what it is, and we'd better keep the next token.
577  if (FormatTok.Tok.is(tok::colon))
578    nextToken();
579  addUnwrappedLine();
580}
581
582void UnwrappedLineParser::parseEnum() {
583  nextToken();
584  if (FormatTok.Tok.is(tok::identifier) ||
585      FormatTok.Tok.is(tok::kw___attribute) ||
586      FormatTok.Tok.is(tok::kw___declspec)) {
587    nextToken();
588    // We can have macros or attributes in between 'enum' and the enum name.
589    if (FormatTok.Tok.is(tok::l_paren)) {
590      parseParens();
591    }
592    if (FormatTok.Tok.is(tok::identifier))
593      nextToken();
594  }
595  if (FormatTok.Tok.is(tok::l_brace)) {
596    nextToken();
597    addUnwrappedLine();
598    ++Line->Level;
599    do {
600      switch (FormatTok.Tok.getKind()) {
601      case tok::comment:
602        // FIXME: Handle comments centrally, instead of special casing
603        // them everywhere.
604        parseComments();
605        break;
606      case tok::l_paren:
607        parseParens();
608        break;
609      case tok::r_brace:
610        addUnwrappedLine();
611        nextToken();
612        --Line->Level;
613        return;
614      case tok::comma:
615        nextToken();
616        addUnwrappedLine();
617        break;
618      default:
619        nextToken();
620        break;
621      }
622    } while (!eof());
623  }
624  // We fall through to parsing a structural element afterwards, so that in
625  // enum A {} n, m;
626  // "} n, m;" will end up in one unwrapped line.
627}
628
629void UnwrappedLineParser::parseRecord() {
630  nextToken();
631  if (FormatTok.Tok.is(tok::identifier) ||
632      FormatTok.Tok.is(tok::kw___attribute) ||
633      FormatTok.Tok.is(tok::kw___declspec)) {
634    nextToken();
635    // We can have macros or attributes in between 'class' and the class name.
636    if (FormatTok.Tok.is(tok::l_paren)) {
637      parseParens();
638    }
639    // The actual identifier can be a nested name specifier.
640    while (FormatTok.Tok.is(tok::identifier) ||
641           FormatTok.Tok.is(tok::coloncolon))
642      nextToken();
643
644    // Note that parsing away template declarations here leads to incorrectly
645    // accepting function declarations as record declarations.
646    // In general, we cannot solve this problem. Consider:
647    // class A<int> B() {}
648    // which can be a function definition or a class definition when B() is a
649    // macro. If we find enough real-world cases where this is a problem, we
650    // can parse for the 'template' keyword in the beginning of the statement,
651    // and thus rule out the record production in case there is no template
652    // (this would still leave us with an ambiguity between template function
653    // and class declarations).
654    if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
655      while (FormatTok.Tok.isNot(tok::l_brace)) {
656        if (FormatTok.Tok.is(tok::semi))
657          return;
658        nextToken();
659      }
660    }
661  }
662  if (FormatTok.Tok.is(tok::l_brace))
663    parseBlock();
664  // We fall through to parsing a structural element afterwards, so
665  // class A {} n, m;
666  // will end up in one unwrapped line.
667}
668
669void UnwrappedLineParser::parseObjCProtocolList() {
670  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
671  do
672    nextToken();
673  while (!eof() && FormatTok.Tok.isNot(tok::greater));
674  nextToken(); // Skip '>'.
675}
676
677void UnwrappedLineParser::parseObjCUntilAtEnd() {
678  do {
679    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
680      nextToken();
681      addUnwrappedLine();
682      break;
683    }
684    parseStructuralElement();
685  } while (!eof());
686}
687
688void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
689  nextToken();
690  nextToken();  // interface name
691
692  // @interface can be followed by either a base class, or a category.
693  if (FormatTok.Tok.is(tok::colon)) {
694    nextToken();
695    nextToken();  // base class name
696  } else if (FormatTok.Tok.is(tok::l_paren))
697    // Skip category, if present.
698    parseParens();
699
700  if (FormatTok.Tok.is(tok::less))
701    parseObjCProtocolList();
702
703  // If instance variables are present, keep the '{' on the first line too.
704  if (FormatTok.Tok.is(tok::l_brace))
705    parseBlock();
706
707  // With instance variables, this puts '}' on its own line.  Without instance
708  // variables, this ends the @interface line.
709  addUnwrappedLine();
710
711  parseObjCUntilAtEnd();
712}
713
714void UnwrappedLineParser::parseObjCProtocol() {
715  nextToken();
716  nextToken();  // protocol name
717
718  if (FormatTok.Tok.is(tok::less))
719    parseObjCProtocolList();
720
721  // Check for protocol declaration.
722  if (FormatTok.Tok.is(tok::semi)) {
723    nextToken();
724    return addUnwrappedLine();
725  }
726
727  addUnwrappedLine();
728  parseObjCUntilAtEnd();
729}
730
731void UnwrappedLineParser::addUnwrappedLine() {
732  if (Line->Tokens.empty())
733    return;
734  // Consume trailing comments.
735  while (!eof() && FormatTok.NewlinesBefore == 0 &&
736         FormatTok.Tok.is(tok::comment)) {
737    nextToken();
738  }
739  DEBUG({
740    llvm::dbgs() << "Line: ";
741    for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
742                                          E = Line->Tokens.end();
743         I != E; ++I) {
744      llvm::dbgs() << I->Tok.getName() << " ";
745
746    }
747    llvm::dbgs() << "\n";
748  });
749  CurrentLines->push_back(*Line);
750  Line->Tokens.clear();
751  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
752    for (std::vector<UnwrappedLine>::iterator I = PreprocessorDirectives
753             .begin(), E = PreprocessorDirectives.end();
754         I != E; ++I) {
755      CurrentLines->push_back(*I);
756    }
757    PreprocessorDirectives.clear();
758  }
759
760}
761
762bool UnwrappedLineParser::eof() const {
763  return FormatTok.Tok.is(tok::eof);
764}
765
766void UnwrappedLineParser::nextToken() {
767  if (eof())
768    return;
769  Line->Tokens.push_back(FormatTok);
770  if (MustBreakBeforeNextToken) {
771    Line->Tokens.back().MustBreakBefore = true;
772    MustBreakBeforeNextToken = false;
773  }
774  readToken();
775}
776
777void UnwrappedLineParser::readToken() {
778  FormatTok = Tokens->getNextToken();
779  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
780         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
781          FormatTok.IsFirst)) {
782    // If there is an unfinished unwrapped line, we flush the preprocessor
783    // directives only after that unwrapped line was finished later.
784    bool SwitchToPreprocessorLines = !Line->Tokens.empty() &&
785                                     CurrentLines == &Lines;
786    ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
787    parsePPDirective();
788  }
789}
790
791} // end namespace format
792} // end namespace clang
793