UnwrappedLineParser.cpp revision 276a209451b8724807f2741f49c90bf27fc280c7
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/Diagnostic.h"
21#include "llvm/Support/raw_ostream.h"
22
23// Uncomment to get debug output from the UnwrappedLineParser.
24// Use in combination with --gtest_filter=*TestName* to limit the output to a
25// single test.
26// #define UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
27
28namespace clang {
29namespace format {
30
31class ScopedMacroState : public FormatTokenSource {
32public:
33  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
34                   FormatToken &ResetToken)
35      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
36        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
37    TokenSource = this;
38    Line.Level = 0;
39    Line.InPPDirective = true;
40  }
41
42  ~ScopedMacroState() {
43    TokenSource = PreviousTokenSource;
44    ResetToken = Token;
45    Line.InPPDirective = false;
46    Line.Level = PreviousLineLevel;
47  }
48
49  virtual FormatToken getNextToken() {
50    // The \c UnwrappedLineParser guards against this by never calling
51    // \c getNextToken() after it has encountered the first eof token.
52    assert(!eof());
53    Token = PreviousTokenSource->getNextToken();
54    if (eof())
55      return createEOF();
56    return Token;
57  }
58
59private:
60  bool eof() {
61    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
62  }
63
64  FormatToken createEOF() {
65    FormatToken FormatTok;
66    FormatTok.Tok.startToken();
67    FormatTok.Tok.setKind(tok::eof);
68    return FormatTok;
69  }
70
71  UnwrappedLine &Line;
72  FormatTokenSource *&TokenSource;
73  FormatToken &ResetToken;
74  unsigned PreviousLineLevel;
75  FormatTokenSource *PreviousTokenSource;
76
77  FormatToken Token;
78};
79
80class ScopedLineState {
81public:
82  ScopedLineState(UnwrappedLineParser &Parser) : Parser(Parser) {
83    PreBlockLine = Parser.Line.take();
84    Parser.Line.reset(new UnwrappedLine(*PreBlockLine));
85    assert(Parser.LastInCurrentLine == NULL ||
86           Parser.LastInCurrentLine->Children.empty());
87    PreBlockLastToken = Parser.LastInCurrentLine;
88    PreBlockRootTokenInitialized = Parser.RootTokenInitialized;
89    Parser.RootTokenInitialized = false;
90    Parser.LastInCurrentLine = NULL;
91  }
92
93  ~ScopedLineState() {
94    if (Parser.RootTokenInitialized) {
95      Parser.addUnwrappedLine();
96    }
97    assert(!Parser.RootTokenInitialized);
98    Parser.Line.reset(PreBlockLine);
99    Parser.RootTokenInitialized = PreBlockRootTokenInitialized;
100    Parser.LastInCurrentLine = PreBlockLastToken;
101    assert(Parser.LastInCurrentLine == NULL ||
102           Parser.LastInCurrentLine->Children.empty());
103    Parser.MustBreakBeforeNextToken = true;
104  }
105
106private:
107  UnwrappedLineParser &Parser;
108
109  UnwrappedLine *PreBlockLine;
110  FormatToken* PreBlockLastToken;
111  bool PreBlockRootTokenInitialized;
112};
113
114UnwrappedLineParser::UnwrappedLineParser(
115    clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
116    FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
117    : Line(new UnwrappedLine), RootTokenInitialized(false),
118      LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Diag(Diag),
119      Style(Style), Tokens(&Tokens), Callback(Callback) {
120}
121
122bool UnwrappedLineParser::parse() {
123#ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
124  llvm::errs() << "----\n";
125#endif
126  readToken();
127  return parseFile();
128}
129
130bool UnwrappedLineParser::parseFile() {
131  bool Error = parseLevel(/*HasOpeningBrace=*/false);
132  // Make sure to format the remaining tokens.
133  addUnwrappedLine();
134  return Error;
135}
136
137bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
138  bool Error = false;
139  do {
140    switch (FormatTok.Tok.getKind()) {
141    case tok::comment:
142      nextToken();
143      addUnwrappedLine();
144      break;
145    case tok::l_brace:
146      Error |= parseBlock();
147      addUnwrappedLine();
148      break;
149    case tok::r_brace:
150      if (HasOpeningBrace) {
151        return false;
152      } else {
153        Diag.Report(FormatTok.Tok.getLocation(),
154                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
155                                         "unexpected '}'"));
156        Error = true;
157        nextToken();
158        addUnwrappedLine();
159      }
160      break;
161    default:
162      parseStructuralElement();
163      break;
164    }
165  } while (!eof());
166  return Error;
167}
168
169bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
170  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
171  nextToken();
172
173  if (!FormatTok.Tok.is(tok::r_brace)) {
174    addUnwrappedLine();
175
176    Line->Level += AddLevels;
177    parseLevel(/*HasOpeningBrace=*/true);
178    Line->Level -= AddLevels;
179
180    if (!FormatTok.Tok.is(tok::r_brace))
181      return true;
182
183  }
184  nextToken();  // Munch the closing brace.
185  return false;
186}
187
188void UnwrappedLineParser::parsePPDirective() {
189  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
190  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
191  nextToken();
192
193  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
194    addUnwrappedLine();
195    return;
196  }
197
198  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
199  case tok::pp_define:
200    parsePPDefine();
201    break;
202  default:
203    parsePPUnknown();
204    break;
205  }
206}
207
208void UnwrappedLineParser::parsePPDefine() {
209  nextToken();
210
211  if (FormatTok.Tok.getKind() != tok::identifier) {
212    parsePPUnknown();
213    return;
214  }
215  nextToken();
216  if (FormatTok.Tok.getKind() == tok::l_paren) {
217    parseParens();
218  }
219  addUnwrappedLine();
220  Line->Level = 1;
221
222  // Errors during a preprocessor directive can only affect the layout of the
223  // preprocessor directive, and thus we ignore them. An alternative approach
224  // would be to use the same approach we use on the file level (no
225  // re-indentation if there was a structural error) within the macro
226  // definition.
227  parseFile();
228}
229
230void UnwrappedLineParser::parsePPUnknown() {
231  do {
232    nextToken();
233  } while (!eof());
234  addUnwrappedLine();
235}
236
237void UnwrappedLineParser::parseComments() {
238  // Consume leading line comments, e.g. for branches without compounds.
239  while (FormatTok.Tok.is(tok::comment)) {
240    nextToken();
241    addUnwrappedLine();
242  }
243}
244
245void UnwrappedLineParser::parseStructuralElement() {
246  assert(!FormatTok.Tok.is(tok::l_brace));
247  parseComments();
248
249  int TokenNumber = 0;
250  switch (FormatTok.Tok.getKind()) {
251  case tok::at:
252    nextToken();
253    switch (FormatTok.Tok.getObjCKeywordID()) {
254    case tok::objc_public:
255    case tok::objc_protected:
256    case tok::objc_package:
257    case tok::objc_private:
258      return parseAccessSpecifier();
259    case tok::objc_interface:
260    case tok::objc_implementation:
261      return parseObjCInterfaceOrImplementation();
262    case tok::objc_protocol:
263      return parseObjCProtocol();
264    case tok::objc_end:
265      return; // Handled by the caller.
266    case tok::objc_optional:
267    case tok::objc_required:
268      nextToken();
269      addUnwrappedLine();
270      return;
271    default:
272      break;
273    }
274    break;
275  case tok::kw_namespace:
276    parseNamespace();
277    return;
278  case tok::kw_inline:
279    nextToken();
280    TokenNumber++;
281    if (FormatTok.Tok.is(tok::kw_namespace)) {
282      parseNamespace();
283      return;
284    }
285    break;
286  case tok::kw_public:
287  case tok::kw_protected:
288  case tok::kw_private:
289    parseAccessSpecifier();
290    return;
291  case tok::kw_if:
292    parseIfThenElse();
293    return;
294  case tok::kw_for:
295  case tok::kw_while:
296    parseForOrWhileLoop();
297    return;
298  case tok::kw_do:
299    parseDoWhile();
300    return;
301  case tok::kw_switch:
302    parseSwitch();
303    return;
304  case tok::kw_default:
305    nextToken();
306    parseLabel();
307    return;
308  case tok::kw_case:
309    parseCaseLabel();
310    return;
311  default:
312    break;
313  }
314  do {
315    ++TokenNumber;
316    switch (FormatTok.Tok.getKind()) {
317    case tok::kw_enum:
318      parseEnum();
319      return;
320    case tok::kw_struct:  // fallthrough
321    case tok::kw_class:
322      parseStructOrClass();
323      return;
324    case tok::semi:
325      nextToken();
326      addUnwrappedLine();
327      return;
328    case tok::l_paren:
329      parseParens();
330      break;
331    case tok::l_brace:
332      // A block outside of parentheses must be the last part of a
333      // structural element.
334      // FIXME: Figure out cases where this is not true, and add projections for
335      // them (the one we know is missing are lambdas).
336      parseBlock();
337      addUnwrappedLine();
338      return;
339    case tok::identifier:
340      nextToken();
341      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
342        parseLabel();
343        return;
344      }
345      break;
346    case tok::equal:
347      nextToken();
348      if (FormatTok.Tok.is(tok::l_brace)) {
349        parseBracedList();
350      }
351      break;
352    default:
353      nextToken();
354      break;
355    }
356  } while (!eof());
357}
358
359void UnwrappedLineParser::parseBracedList() {
360  nextToken();
361
362  do {
363    switch (FormatTok.Tok.getKind()) {
364    case tok::l_brace:
365      parseBracedList();
366      break;
367    case tok::r_brace:
368      nextToken();
369      return;
370    default:
371      nextToken();
372      break;
373    }
374  } while (!eof());
375}
376
377void UnwrappedLineParser::parseParens() {
378  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
379  nextToken();
380  do {
381    switch (FormatTok.Tok.getKind()) {
382    case tok::l_paren:
383      parseParens();
384      break;
385    case tok::r_paren:
386      nextToken();
387      return;
388    case tok::l_brace:
389      {
390        nextToken();
391        ScopedLineState LineState(*this);
392        Line->Level += 1;
393        parseLevel(/*HasOpeningBrace=*/true);
394        Line->Level -= 1;
395      }
396      break;
397    default:
398      nextToken();
399      break;
400    }
401  } while (!eof());
402}
403
404void UnwrappedLineParser::parseIfThenElse() {
405  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
406  nextToken();
407  parseParens();
408  bool NeedsUnwrappedLine = false;
409  if (FormatTok.Tok.is(tok::l_brace)) {
410    parseBlock();
411    NeedsUnwrappedLine = true;
412  } else {
413    addUnwrappedLine();
414    ++Line->Level;
415    parseStructuralElement();
416    --Line->Level;
417  }
418  if (FormatTok.Tok.is(tok::kw_else)) {
419    nextToken();
420    if (FormatTok.Tok.is(tok::l_brace)) {
421      parseBlock();
422      addUnwrappedLine();
423    } else if (FormatTok.Tok.is(tok::kw_if)) {
424      parseIfThenElse();
425    } else {
426      addUnwrappedLine();
427      ++Line->Level;
428      parseStructuralElement();
429      --Line->Level;
430    }
431  } else if (NeedsUnwrappedLine) {
432    addUnwrappedLine();
433  }
434}
435
436void UnwrappedLineParser::parseNamespace() {
437  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
438  nextToken();
439  if (FormatTok.Tok.is(tok::identifier))
440    nextToken();
441  if (FormatTok.Tok.is(tok::l_brace)) {
442    parseBlock(0);
443    addUnwrappedLine();
444  }
445  // FIXME: Add error handling.
446}
447
448void UnwrappedLineParser::parseForOrWhileLoop() {
449  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
450         "'for' or 'while' expected");
451  nextToken();
452  parseParens();
453  if (FormatTok.Tok.is(tok::l_brace)) {
454    parseBlock();
455    addUnwrappedLine();
456  } else {
457    addUnwrappedLine();
458    ++Line->Level;
459    parseStructuralElement();
460    --Line->Level;
461  }
462}
463
464void UnwrappedLineParser::parseDoWhile() {
465  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
466  nextToken();
467  if (FormatTok.Tok.is(tok::l_brace)) {
468    parseBlock();
469  } else {
470    addUnwrappedLine();
471    ++Line->Level;
472    parseStructuralElement();
473    --Line->Level;
474  }
475
476  // FIXME: Add error handling.
477  if (!FormatTok.Tok.is(tok::kw_while)) {
478    addUnwrappedLine();
479    return;
480  }
481
482  nextToken();
483  parseStructuralElement();
484}
485
486void UnwrappedLineParser::parseLabel() {
487  // FIXME: remove all asserts.
488  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
489  nextToken();
490  unsigned OldLineLevel = Line->Level;
491  if (Line->Level > 0)
492    --Line->Level;
493  if (FormatTok.Tok.is(tok::l_brace)) {
494    parseBlock();
495  }
496  addUnwrappedLine();
497  Line->Level = OldLineLevel;
498}
499
500void UnwrappedLineParser::parseCaseLabel() {
501  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
502  // FIXME: fix handling of complex expressions here.
503  do {
504    nextToken();
505  } while (!eof() && !FormatTok.Tok.is(tok::colon));
506  parseLabel();
507}
508
509void UnwrappedLineParser::parseSwitch() {
510  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
511  nextToken();
512  parseParens();
513  if (FormatTok.Tok.is(tok::l_brace)) {
514    parseBlock(Style.IndentCaseLabels ? 2 : 1);
515    addUnwrappedLine();
516  } else {
517    addUnwrappedLine();
518    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
519    parseStructuralElement();
520    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
521  }
522}
523
524void UnwrappedLineParser::parseAccessSpecifier() {
525  nextToken();
526  // Otherwise, we don't know what it is, and we'd better keep the next token.
527  if (FormatTok.Tok.is(tok::colon))
528    nextToken();
529  addUnwrappedLine();
530}
531
532void UnwrappedLineParser::parseEnum() {
533  bool HasContents = false;
534  do {
535    switch (FormatTok.Tok.getKind()) {
536    case tok::l_brace:
537      nextToken();
538      addUnwrappedLine();
539      ++Line->Level;
540      parseComments();
541      break;
542    case tok::l_paren:
543      parseParens();
544      break;
545    case tok::comma:
546      nextToken();
547      addUnwrappedLine();
548      parseComments();
549      break;
550    case tok::r_brace:
551      if (HasContents)
552        addUnwrappedLine();
553      --Line->Level;
554      nextToken();
555      break;
556    case tok::semi:
557      nextToken();
558      addUnwrappedLine();
559      return;
560    default:
561      HasContents = true;
562      nextToken();
563      break;
564    }
565  } while (!eof());
566}
567
568void UnwrappedLineParser::parseStructOrClass() {
569  nextToken();
570  do {
571    switch (FormatTok.Tok.getKind()) {
572    case tok::l_brace:
573      // FIXME: Think about how to resolve the error handling here.
574      parseBlock();
575      parseStructuralElement();
576      return;
577    case tok::semi:
578      nextToken();
579      addUnwrappedLine();
580      return;
581    default:
582      nextToken();
583      break;
584    }
585  } while (!eof());
586}
587
588void UnwrappedLineParser::parseObjCProtocolList() {
589  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
590  do
591    nextToken();
592  while (!eof() && FormatTok.Tok.isNot(tok::greater));
593  nextToken(); // Skip '>'.
594}
595
596void UnwrappedLineParser::parseObjCUntilAtEnd() {
597  do {
598    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
599      nextToken();
600      addUnwrappedLine();
601      break;
602    }
603    parseStructuralElement();
604  } while (!eof());
605}
606
607void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
608  nextToken();
609  nextToken();  // interface name
610
611  // @interface can be followed by either a base class, or a category.
612  if (FormatTok.Tok.is(tok::colon)) {
613    nextToken();
614    nextToken();  // base class name
615  } else if (FormatTok.Tok.is(tok::l_paren))
616    // Skip category, if present.
617    parseParens();
618
619  if (FormatTok.Tok.is(tok::less))
620    parseObjCProtocolList();
621
622  // If instance variables are present, keep the '{' on the first line too.
623  if (FormatTok.Tok.is(tok::l_brace))
624    parseBlock();
625
626  // With instance variables, this puts '}' on its own line.  Without instance
627  // variables, this ends the @interface line.
628  addUnwrappedLine();
629
630  parseObjCUntilAtEnd();
631}
632
633void UnwrappedLineParser::parseObjCProtocol() {
634  nextToken();
635  nextToken();  // protocol name
636
637  if (FormatTok.Tok.is(tok::less))
638    parseObjCProtocolList();
639
640  // Check for protocol declaration.
641  if (FormatTok.Tok.is(tok::semi)) {
642    nextToken();
643    return addUnwrappedLine();
644  }
645
646  addUnwrappedLine();
647  parseObjCUntilAtEnd();
648}
649
650void UnwrappedLineParser::addUnwrappedLine() {
651  if (!RootTokenInitialized)
652    return;
653  // Consume trailing comments.
654  while (!eof() && FormatTok.NewlinesBefore == 0 &&
655         FormatTok.Tok.is(tok::comment)) {
656    nextToken();
657  }
658#ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
659  FormatToken* NextToken = &Line->RootToken;
660  llvm::errs() << "Line: ";
661  while (NextToken) {
662    llvm::errs() << NextToken->Tok.getName() << " ";
663    NextToken = NextToken->Children.empty() ? NULL : &NextToken->Children[0];
664  }
665  llvm::errs() << "\n";
666#endif
667  Callback.consumeUnwrappedLine(*Line);
668  RootTokenInitialized = false;
669  LastInCurrentLine = NULL;
670}
671
672bool UnwrappedLineParser::eof() const {
673  return FormatTok.Tok.is(tok::eof);
674}
675
676void UnwrappedLineParser::nextToken() {
677  if (eof())
678    return;
679  if (RootTokenInitialized) {
680    assert(LastInCurrentLine->Children.empty());
681    LastInCurrentLine->Children.push_back(FormatTok);
682    LastInCurrentLine = &LastInCurrentLine->Children.back();
683  } else {
684    Line->RootToken = FormatTok;
685    RootTokenInitialized = true;
686    LastInCurrentLine = &Line->RootToken;
687  }
688  if (MustBreakBeforeNextToken) {
689    LastInCurrentLine->MustBreakBefore = true;
690    MustBreakBeforeNextToken = false;
691  }
692  readToken();
693}
694
695void UnwrappedLineParser::readToken() {
696  FormatTok = Tokens->getNextToken();
697  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
698         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
699          FormatTok.IsFirst)) {
700    ScopedLineState BlockState(*this);
701    parsePPDirective();
702  }
703}
704
705} // end namespace format
706} // end namespace clang
707