UnwrappedLineParser.cpp revision 50767d8c8f2f667255bdb99692c0467ce992bc67
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "UnwrappedLineParser.h"
20#include "llvm/Support/raw_ostream.h"
21
22namespace clang {
23namespace format {
24
25class ScopedMacroState : public FormatTokenSource {
26public:
27  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
28                   FormatToken &ResetToken)
29      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
30        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
31    TokenSource = this;
32    Line.Level = 0;
33    Line.InPPDirective = true;
34  }
35
36  ~ScopedMacroState() {
37    TokenSource = PreviousTokenSource;
38    ResetToken = Token;
39    Line.InPPDirective = false;
40    Line.Level = PreviousLineLevel;
41  }
42
43  virtual FormatToken getNextToken() {
44    // The \c UnwrappedLineParser guards against this by never calling
45    // \c getNextToken() after it has encountered the first eof token.
46    assert(!eof());
47    Token = PreviousTokenSource->getNextToken();
48    if (eof())
49      return createEOF();
50    return Token;
51  }
52
53private:
54  bool eof() {
55    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
56  }
57
58  FormatToken createEOF() {
59    FormatToken FormatTok;
60    FormatTok.Tok.startToken();
61    FormatTok.Tok.setKind(tok::eof);
62    return FormatTok;
63  }
64
65  UnwrappedLine &Line;
66  FormatTokenSource *&TokenSource;
67  FormatToken &ResetToken;
68  unsigned PreviousLineLevel;
69  FormatTokenSource *PreviousTokenSource;
70
71  FormatToken Token;
72};
73
74UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
75                                         FormatTokenSource &Tokens,
76                                         UnwrappedLineConsumer &Callback)
77    : Line(new UnwrappedLine), RootTokenInitialized(false),
78      LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Style(Style),
79      Tokens(&Tokens), Callback(Callback) {
80}
81
82bool UnwrappedLineParser::parse() {
83  readToken();
84  return parseFile();
85}
86
87bool UnwrappedLineParser::parseFile() {
88  bool Error = parseLevel(/*HasOpeningBrace=*/false);
89  // Make sure to format the remaining tokens.
90  addUnwrappedLine();
91  return Error;
92}
93
94bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
95  bool Error = false;
96  do {
97    switch (FormatTok.Tok.getKind()) {
98    case tok::comment:
99      nextToken();
100      addUnwrappedLine();
101      break;
102    case tok::l_brace:
103      Error |= parseBlock();
104      addUnwrappedLine();
105      break;
106    case tok::r_brace:
107      if (HasOpeningBrace) {
108        return false;
109      } else {
110        // Stray '}' is an error.
111        Error = true;
112        nextToken();
113        addUnwrappedLine();
114      }
115      break;
116    default:
117      parseStructuralElement();
118      break;
119    }
120  } while (!eof());
121  return Error;
122}
123
124bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
125  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
126  nextToken();
127
128  addUnwrappedLine();
129
130  Line->Level += AddLevels;
131  parseLevel(/*HasOpeningBrace=*/true);
132  Line->Level -= AddLevels;
133
134  if (!FormatTok.Tok.is(tok::r_brace))
135    return true;
136
137  nextToken();  // Munch the closing brace.
138  return false;
139}
140
141void UnwrappedLineParser::parsePPDirective() {
142  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
143  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
144  nextToken();
145
146  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
147    addUnwrappedLine();
148    return;
149  }
150
151  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
152  case tok::pp_define:
153    parsePPDefine();
154    break;
155  default:
156    parsePPUnknown();
157    break;
158  }
159}
160
161void UnwrappedLineParser::parsePPDefine() {
162  nextToken();
163
164  if (FormatTok.Tok.getKind() != tok::identifier) {
165    parsePPUnknown();
166    return;
167  }
168  nextToken();
169  if (FormatTok.Tok.getKind() == tok::l_paren) {
170    parseParens();
171  }
172  addUnwrappedLine();
173  Line->Level = 1;
174
175  // Errors during a preprocessor directive can only affect the layout of the
176  // preprocessor directive, and thus we ignore them. An alternative approach
177  // would be to use the same approach we use on the file level (no
178  // re-indentation if there was a structural error) within the macro
179  // definition.
180  parseFile();
181}
182
183void UnwrappedLineParser::parsePPUnknown() {
184  do {
185    nextToken();
186  } while (!eof());
187  addUnwrappedLine();
188}
189
190void UnwrappedLineParser::parseComments() {
191  // Consume leading line comments, e.g. for branches without compounds.
192  while (FormatTok.Tok.is(tok::comment)) {
193    nextToken();
194    addUnwrappedLine();
195  }
196}
197
198void UnwrappedLineParser::parseStructuralElement() {
199  parseComments();
200
201  int TokenNumber = 0;
202  switch (FormatTok.Tok.getKind()) {
203  case tok::at:
204    nextToken();
205    switch (FormatTok.Tok.getObjCKeywordID()) {
206    case tok::objc_public:
207    case tok::objc_protected:
208    case tok::objc_package:
209    case tok::objc_private:
210      return parseAccessSpecifier();
211    case tok::objc_interface:
212    case tok::objc_implementation:
213      return parseObjCInterfaceOrImplementation();
214    case tok::objc_protocol:
215      return parseObjCProtocol();
216    case tok::objc_end:
217      return; // Handled by the caller.
218    default:
219      break;
220    }
221    break;
222  case tok::kw_namespace:
223    parseNamespace();
224    return;
225  case tok::kw_inline:
226    nextToken();
227    TokenNumber++;
228    if (FormatTok.Tok.is(tok::kw_namespace)) {
229      parseNamespace();
230      return;
231    }
232    break;
233  case tok::kw_public:
234  case tok::kw_protected:
235  case tok::kw_private:
236    parseAccessSpecifier();
237    return;
238  case tok::kw_if:
239    parseIfThenElse();
240    return;
241  case tok::kw_for:
242  case tok::kw_while:
243    parseForOrWhileLoop();
244    return;
245  case tok::kw_do:
246    parseDoWhile();
247    return;
248  case tok::kw_switch:
249    parseSwitch();
250    return;
251  case tok::kw_default:
252    nextToken();
253    parseLabel();
254    return;
255  case tok::kw_case:
256    parseCaseLabel();
257    return;
258  default:
259    break;
260  }
261  do {
262    ++TokenNumber;
263    switch (FormatTok.Tok.getKind()) {
264    case tok::kw_enum:
265      parseEnum();
266      return;
267    case tok::kw_struct:  // fallthrough
268    case tok::kw_class:
269      parseStructOrClass();
270      return;
271    case tok::semi:
272      nextToken();
273      addUnwrappedLine();
274      return;
275    case tok::l_paren:
276      parseParens();
277      break;
278    case tok::l_brace:
279      parseBlock();
280      addUnwrappedLine();
281      return;
282    case tok::identifier:
283      nextToken();
284      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
285        parseLabel();
286        return;
287      }
288      break;
289    case tok::equal:
290      nextToken();
291      // Skip initializers as they will be formatted by a later step.
292      if (FormatTok.Tok.is(tok::l_brace))
293        nextToken();
294      break;
295    default:
296      nextToken();
297      break;
298    }
299  } while (!eof());
300}
301
302void UnwrappedLineParser::parseParens() {
303  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
304  nextToken();
305  do {
306    switch (FormatTok.Tok.getKind()) {
307    case tok::l_paren:
308      parseParens();
309      break;
310    case tok::r_paren:
311      nextToken();
312      return;
313    default:
314      nextToken();
315      break;
316    }
317  } while (!eof());
318}
319
320void UnwrappedLineParser::parseIfThenElse() {
321  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
322  nextToken();
323  parseParens();
324  bool NeedsUnwrappedLine = false;
325  if (FormatTok.Tok.is(tok::l_brace)) {
326    parseBlock();
327    NeedsUnwrappedLine = true;
328  } else {
329    addUnwrappedLine();
330    ++Line->Level;
331    parseStructuralElement();
332    --Line->Level;
333  }
334  if (FormatTok.Tok.is(tok::kw_else)) {
335    nextToken();
336    if (FormatTok.Tok.is(tok::l_brace)) {
337      parseBlock();
338      addUnwrappedLine();
339    } else if (FormatTok.Tok.is(tok::kw_if)) {
340      parseIfThenElse();
341    } else {
342      addUnwrappedLine();
343      ++Line->Level;
344      parseStructuralElement();
345      --Line->Level;
346    }
347  } else if (NeedsUnwrappedLine) {
348    addUnwrappedLine();
349  }
350}
351
352void UnwrappedLineParser::parseNamespace() {
353  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
354  nextToken();
355  if (FormatTok.Tok.is(tok::identifier))
356    nextToken();
357  if (FormatTok.Tok.is(tok::l_brace)) {
358    parseBlock(0);
359    addUnwrappedLine();
360  }
361  // FIXME: Add error handling.
362}
363
364void UnwrappedLineParser::parseForOrWhileLoop() {
365  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
366         "'for' or 'while' expected");
367  nextToken();
368  parseParens();
369  if (FormatTok.Tok.is(tok::l_brace)) {
370    parseBlock();
371    addUnwrappedLine();
372  } else {
373    addUnwrappedLine();
374    ++Line->Level;
375    parseStructuralElement();
376    --Line->Level;
377  }
378}
379
380void UnwrappedLineParser::parseDoWhile() {
381  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
382  nextToken();
383  if (FormatTok.Tok.is(tok::l_brace)) {
384    parseBlock();
385  } else {
386    addUnwrappedLine();
387    ++Line->Level;
388    parseStructuralElement();
389    --Line->Level;
390  }
391
392  // FIXME: Add error handling.
393  if (!FormatTok.Tok.is(tok::kw_while)) {
394    addUnwrappedLine();
395    return;
396  }
397
398  nextToken();
399  parseStructuralElement();
400}
401
402void UnwrappedLineParser::parseLabel() {
403  // FIXME: remove all asserts.
404  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
405  nextToken();
406  unsigned OldLineLevel = Line->Level;
407  if (Line->Level > 0)
408    --Line->Level;
409  if (FormatTok.Tok.is(tok::l_brace)) {
410    parseBlock();
411  }
412  addUnwrappedLine();
413  Line->Level = OldLineLevel;
414}
415
416void UnwrappedLineParser::parseCaseLabel() {
417  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
418  // FIXME: fix handling of complex expressions here.
419  do {
420    nextToken();
421  } while (!eof() && !FormatTok.Tok.is(tok::colon));
422  parseLabel();
423}
424
425void UnwrappedLineParser::parseSwitch() {
426  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
427  nextToken();
428  parseParens();
429  if (FormatTok.Tok.is(tok::l_brace)) {
430    parseBlock(Style.IndentCaseLabels ? 2 : 1);
431    addUnwrappedLine();
432  } else {
433    addUnwrappedLine();
434    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
435    parseStructuralElement();
436    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
437  }
438}
439
440void UnwrappedLineParser::parseAccessSpecifier() {
441  nextToken();
442  // Otherwise, we don't know what it is, and we'd better keep the next token.
443  if (FormatTok.Tok.is(tok::colon))
444    nextToken();
445  addUnwrappedLine();
446}
447
448void UnwrappedLineParser::parseEnum() {
449  bool HasContents = false;
450  do {
451    switch (FormatTok.Tok.getKind()) {
452    case tok::l_brace:
453      nextToken();
454      addUnwrappedLine();
455      ++Line->Level;
456      parseComments();
457      break;
458    case tok::l_paren:
459      parseParens();
460      break;
461    case tok::comma:
462      nextToken();
463      addUnwrappedLine();
464      parseComments();
465      break;
466    case tok::r_brace:
467      if (HasContents)
468        addUnwrappedLine();
469      --Line->Level;
470      nextToken();
471      break;
472    case tok::semi:
473      nextToken();
474      addUnwrappedLine();
475      return;
476    default:
477      HasContents = true;
478      nextToken();
479      break;
480    }
481  } while (!eof());
482}
483
484void UnwrappedLineParser::parseStructOrClass() {
485  nextToken();
486  do {
487    switch (FormatTok.Tok.getKind()) {
488    case tok::l_brace:
489      // FIXME: Think about how to resolve the error handling here.
490      parseBlock();
491      parseStructuralElement();
492      return;
493    case tok::semi:
494      nextToken();
495      addUnwrappedLine();
496      return;
497    default:
498      nextToken();
499      break;
500    }
501  } while (!eof());
502}
503
504void UnwrappedLineParser::parseObjCProtocolList() {
505  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
506  do
507    nextToken();
508  while (!eof() && FormatTok.Tok.isNot(tok::greater));
509  nextToken(); // Skip '>'.
510}
511
512void UnwrappedLineParser::parseObjCUntilAtEnd() {
513  do {
514    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
515      nextToken();
516      addUnwrappedLine();
517      break;
518    }
519    parseStructuralElement();
520  } while (!eof());
521}
522
523void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
524  nextToken();
525  nextToken();  // interface name
526
527  // @interface can be followed by either a base class, or a category.
528  if (FormatTok.Tok.is(tok::colon)) {
529    nextToken();
530    nextToken();  // base class name
531  } else if (FormatTok.Tok.is(tok::l_paren))
532    // Skip category, if present.
533    parseParens();
534
535  if (FormatTok.Tok.is(tok::less))
536    parseObjCProtocolList();
537
538  // If instance variables are present, keep the '{' on the first line too.
539  if (FormatTok.Tok.is(tok::l_brace))
540    parseBlock();
541
542  // With instance variables, this puts '}' on its own line.  Without instance
543  // variables, this ends the @interface line.
544  addUnwrappedLine();
545
546  parseObjCUntilAtEnd();
547}
548
549void UnwrappedLineParser::parseObjCProtocol() {
550  nextToken();
551  nextToken();  // protocol name
552
553  if (FormatTok.Tok.is(tok::less))
554    parseObjCProtocolList();
555
556  // Check for protocol declaration.
557  if (FormatTok.Tok.is(tok::semi)) {
558    nextToken();
559    return addUnwrappedLine();
560  }
561
562  addUnwrappedLine();
563  parseObjCUntilAtEnd();
564}
565
566void UnwrappedLineParser::addUnwrappedLine() {
567  if (!RootTokenInitialized)
568    return;
569  // Consume trailing comments.
570  while (!eof() && FormatTok.NewlinesBefore == 0 &&
571         FormatTok.Tok.is(tok::comment)) {
572    nextToken();
573  }
574  Callback.consumeUnwrappedLine(*Line);
575  RootTokenInitialized = false;
576  LastInCurrentLine = NULL;
577}
578
579bool UnwrappedLineParser::eof() const {
580  return FormatTok.Tok.is(tok::eof);
581}
582
583void UnwrappedLineParser::nextToken() {
584  if (eof())
585    return;
586  if (RootTokenInitialized) {
587    assert(LastInCurrentLine->Children.empty());
588    LastInCurrentLine->Children.push_back(FormatTok);
589    LastInCurrentLine = &LastInCurrentLine->Children.back();
590  } else {
591    Line->RootToken = FormatTok;
592    RootTokenInitialized = true;
593    LastInCurrentLine = &Line->RootToken;
594  }
595  if (MustBreakBeforeNextToken) {
596    LastInCurrentLine->MustBreakBefore = true;
597    MustBreakBeforeNextToken = false;
598  }
599  readToken();
600}
601
602void UnwrappedLineParser::readToken() {
603  FormatTok = Tokens->getNextToken();
604  while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
605         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
606          FormatTok.IsFirst)) {
607    UnwrappedLine* StoredLine = Line.take();
608    Line.reset(new UnwrappedLine(*StoredLine));
609    assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty());
610    FormatToken *StoredLastInCurrentLine = LastInCurrentLine;
611    bool PreviousInitialized = RootTokenInitialized;
612    RootTokenInitialized = false;
613    LastInCurrentLine = NULL;
614
615    parsePPDirective();
616
617    assert(!RootTokenInitialized);
618    Line.reset(StoredLine);
619    RootTokenInitialized = PreviousInitialized;
620    LastInCurrentLine = StoredLastInCurrentLine;
621    assert(LastInCurrentLine == NULL || LastInCurrentLine->Children.empty());
622    MustBreakBeforeNextToken = true;
623  }
624}
625
626} // end namespace format
627} // end namespace clang
628