UnwrappedLineParser.cpp revision de7685487c5d628dd9fe64c4f861cd1888f50fc7
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "UnwrappedLineParser.h"
20#include "llvm/Support/raw_ostream.h"
21
22namespace clang {
23namespace format {
24
25class ScopedMacroState : public FormatTokenSource {
26public:
27  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
28                   FormatToken &ResetToken)
29      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
30        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
31    TokenSource = this;
32    Line.Level = 0;
33    Line.InPPDirective = true;
34  }
35
36  ~ScopedMacroState() {
37    TokenSource = PreviousTokenSource;
38    ResetToken = Token;
39    Line.InPPDirective = false;
40    Line.Level = PreviousLineLevel;
41  }
42
43  virtual FormatToken getNextToken() {
44    // The \c UnwrappedLineParser guards against this by never calling
45    // \c getNextToken() after it has encountered the first eof token.
46    assert(!eof());
47    Token = PreviousTokenSource->getNextToken();
48    if (eof())
49      return createEOF();
50    return Token;
51  }
52
53private:
54  bool eof() {
55    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
56  }
57
58  FormatToken createEOF() {
59    FormatToken FormatTok;
60    FormatTok.Tok.startToken();
61    FormatTok.Tok.setKind(tok::eof);
62    return FormatTok;
63  }
64
65  UnwrappedLine &Line;
66  FormatTokenSource *&TokenSource;
67  FormatToken &ResetToken;
68  unsigned PreviousLineLevel;
69  FormatTokenSource *PreviousTokenSource;
70
71  FormatToken Token;
72};
73
74UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
75                                         FormatTokenSource &Tokens,
76                                         UnwrappedLineConsumer &Callback)
77    : Style(Style), Tokens(&Tokens), Callback(Callback) {
78}
79
80bool UnwrappedLineParser::parse() {
81  readToken();
82  return parseFile();
83}
84
85bool UnwrappedLineParser::parseFile() {
86  bool Error = parseLevel(/*HasOpeningBrace=*/false);
87  // Make sure to format the remaining tokens.
88  addUnwrappedLine();
89  return Error;
90}
91
92bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
93  bool Error = false;
94  do {
95    switch (FormatTok.Tok.getKind()) {
96    case tok::comment:
97      nextToken();
98      addUnwrappedLine();
99      break;
100    case tok::l_brace:
101      Error |= parseBlock();
102      addUnwrappedLine();
103      break;
104    case tok::r_brace:
105      if (HasOpeningBrace) {
106        return false;
107      } else {
108        // Stray '}' is an error.
109        Error = true;
110        nextToken();
111        addUnwrappedLine();
112      }
113      break;
114    default:
115      parseStructuralElement();
116      break;
117    }
118  } while (!eof());
119  return Error;
120}
121
122bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
123  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
124  nextToken();
125
126  addUnwrappedLine();
127
128  Line.Level += AddLevels;
129  parseLevel(/*HasOpeningBrace=*/true);
130  Line.Level -= AddLevels;
131
132  if (!FormatTok.Tok.is(tok::r_brace))
133    return true;
134
135  nextToken();  // Munch the closing brace.
136  return false;
137}
138
139void UnwrappedLineParser::parsePPDirective() {
140  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
141  ScopedMacroState MacroState(Line, Tokens, FormatTok);
142  nextToken();
143
144  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
145    addUnwrappedLine();
146    return;
147  }
148
149  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
150  case tok::pp_define:
151    parsePPDefine();
152    break;
153  default:
154    parsePPUnknown();
155    break;
156  }
157}
158
159void UnwrappedLineParser::parsePPDefine() {
160  nextToken();
161
162  if (FormatTok.Tok.getKind() != tok::identifier) {
163    parsePPUnknown();
164    return;
165  }
166  nextToken();
167  if (FormatTok.Tok.getKind() == tok::l_paren) {
168    parseParens();
169  }
170  addUnwrappedLine();
171  Line.Level = 1;
172
173  // Errors during a preprocessor directive can only affect the layout of the
174  // preprocessor directive, and thus we ignore them. An alternative approach
175  // would be to use the same approach we use on the file level (no
176  // re-indentation if there was a structural error) within the macro
177  // definition.
178  parseFile();
179}
180
181void UnwrappedLineParser::parsePPUnknown() {
182  do {
183    nextToken();
184  } while (!eof());
185  addUnwrappedLine();
186}
187
188void UnwrappedLineParser::parseComments() {
189  // Consume leading line comments, e.g. for branches without compounds.
190  while (FormatTok.Tok.is(tok::comment)) {
191    nextToken();
192    addUnwrappedLine();
193  }
194}
195
196void UnwrappedLineParser::parseStructuralElement() {
197  parseComments();
198
199  int TokenNumber = 0;
200  switch (FormatTok.Tok.getKind()) {
201  case tok::kw_namespace:
202    parseNamespace();
203    return;
204  case tok::kw_inline:
205    nextToken();
206    TokenNumber++;
207    if (FormatTok.Tok.is(tok::kw_namespace)) {
208      parseNamespace();
209      return;
210    }
211    break;
212  case tok::kw_public:
213  case tok::kw_protected:
214  case tok::kw_private:
215    parseAccessSpecifier();
216    return;
217  case tok::kw_if:
218    parseIfThenElse();
219    return;
220  case tok::kw_for:
221  case tok::kw_while:
222    parseForOrWhileLoop();
223    return;
224  case tok::kw_do:
225    parseDoWhile();
226    return;
227  case tok::kw_switch:
228    parseSwitch();
229    return;
230  case tok::kw_default:
231    nextToken();
232    parseLabel();
233    return;
234  case tok::kw_case:
235    parseCaseLabel();
236    return;
237  default:
238    break;
239  }
240  do {
241    ++TokenNumber;
242    switch (FormatTok.Tok.getKind()) {
243    case tok::kw_enum:
244      parseEnum();
245      return;
246    case tok::kw_struct:  // fallthrough
247    case tok::kw_class:
248      parseStructOrClass();
249      return;
250    case tok::semi:
251      nextToken();
252      addUnwrappedLine();
253      return;
254    case tok::l_paren:
255      parseParens();
256      break;
257    case tok::l_brace:
258      parseBlock();
259      addUnwrappedLine();
260      return;
261    case tok::identifier:
262      nextToken();
263      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
264        parseLabel();
265        return;
266      }
267      break;
268    case tok::equal:
269      nextToken();
270      // Skip initializers as they will be formatted by a later step.
271      if (FormatTok.Tok.is(tok::l_brace))
272        nextToken();
273      break;
274    default:
275      nextToken();
276      break;
277    }
278  } while (!eof());
279}
280
281void UnwrappedLineParser::parseParens() {
282  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
283  nextToken();
284  do {
285    switch (FormatTok.Tok.getKind()) {
286    case tok::l_paren:
287      parseParens();
288      break;
289    case tok::r_paren:
290      nextToken();
291      return;
292    default:
293      nextToken();
294      break;
295    }
296  } while (!eof());
297}
298
299void UnwrappedLineParser::parseIfThenElse() {
300  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
301  nextToken();
302  parseParens();
303  bool NeedsUnwrappedLine = false;
304  if (FormatTok.Tok.is(tok::l_brace)) {
305    parseBlock();
306    NeedsUnwrappedLine = true;
307  } else {
308    addUnwrappedLine();
309    ++Line.Level;
310    parseStructuralElement();
311    --Line.Level;
312  }
313  if (FormatTok.Tok.is(tok::kw_else)) {
314    nextToken();
315    if (FormatTok.Tok.is(tok::l_brace)) {
316      parseBlock();
317      addUnwrappedLine();
318    } else if (FormatTok.Tok.is(tok::kw_if)) {
319      parseIfThenElse();
320    } else {
321      addUnwrappedLine();
322      ++Line.Level;
323      parseStructuralElement();
324      --Line.Level;
325    }
326  } else if (NeedsUnwrappedLine) {
327    addUnwrappedLine();
328  }
329}
330
331void UnwrappedLineParser::parseNamespace() {
332  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
333  nextToken();
334  if (FormatTok.Tok.is(tok::identifier))
335    nextToken();
336  if (FormatTok.Tok.is(tok::l_brace)) {
337    parseBlock(0);
338    addUnwrappedLine();
339  }
340  // FIXME: Add error handling.
341}
342
343void UnwrappedLineParser::parseForOrWhileLoop() {
344  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
345         "'for' or 'while' expected");
346  nextToken();
347  parseParens();
348  if (FormatTok.Tok.is(tok::l_brace)) {
349    parseBlock();
350    addUnwrappedLine();
351  } else {
352    addUnwrappedLine();
353    ++Line.Level;
354    parseStructuralElement();
355    --Line.Level;
356  }
357}
358
359void UnwrappedLineParser::parseDoWhile() {
360  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
361  nextToken();
362  if (FormatTok.Tok.is(tok::l_brace)) {
363    parseBlock();
364  } else {
365    addUnwrappedLine();
366    ++Line.Level;
367    parseStructuralElement();
368    --Line.Level;
369  }
370
371  // FIXME: Add error handling.
372  if (!FormatTok.Tok.is(tok::kw_while)) {
373    addUnwrappedLine();
374    return;
375  }
376
377  nextToken();
378  parseStructuralElement();
379}
380
381void UnwrappedLineParser::parseLabel() {
382  // FIXME: remove all asserts.
383  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
384  nextToken();
385  unsigned OldLineLevel = Line.Level;
386  if (Line.Level > 0)
387    --Line.Level;
388  if (FormatTok.Tok.is(tok::l_brace)) {
389    parseBlock();
390  }
391  addUnwrappedLine();
392  Line.Level = OldLineLevel;
393}
394
395void UnwrappedLineParser::parseCaseLabel() {
396  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
397  // FIXME: fix handling of complex expressions here.
398  do {
399    nextToken();
400  } while (!eof() && !FormatTok.Tok.is(tok::colon));
401  parseLabel();
402}
403
404void UnwrappedLineParser::parseSwitch() {
405  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
406  nextToken();
407  parseParens();
408  if (FormatTok.Tok.is(tok::l_brace)) {
409    parseBlock(Style.IndentCaseLabels ? 2 : 1);
410    addUnwrappedLine();
411  } else {
412    addUnwrappedLine();
413    Line.Level += (Style.IndentCaseLabels ? 2 : 1);
414    parseStructuralElement();
415    Line.Level -= (Style.IndentCaseLabels ? 2 : 1);
416  }
417}
418
419void UnwrappedLineParser::parseAccessSpecifier() {
420  nextToken();
421  // Otherwise, we don't know what it is, and we'd better keep the next token.
422  if (FormatTok.Tok.is(tok::colon))
423    nextToken();
424  addUnwrappedLine();
425}
426
427void UnwrappedLineParser::parseEnum() {
428  bool HasContents = false;
429  do {
430    switch (FormatTok.Tok.getKind()) {
431    case tok::l_brace:
432      nextToken();
433      addUnwrappedLine();
434      ++Line.Level;
435      parseComments();
436      break;
437    case tok::l_paren:
438      parseParens();
439      break;
440    case tok::comma:
441      nextToken();
442      addUnwrappedLine();
443      parseComments();
444      break;
445    case tok::r_brace:
446      if (HasContents)
447        addUnwrappedLine();
448      --Line.Level;
449      nextToken();
450      break;
451    case tok::semi:
452      nextToken();
453      addUnwrappedLine();
454      return;
455    default:
456      HasContents = true;
457      nextToken();
458      break;
459    }
460  } while (!eof());
461}
462
463void UnwrappedLineParser::parseStructOrClass() {
464  nextToken();
465  do {
466    switch (FormatTok.Tok.getKind()) {
467    case tok::l_brace:
468      // FIXME: Think about how to resolve the error handling here.
469      parseBlock();
470      parseStructuralElement();
471      return;
472    case tok::semi:
473      nextToken();
474      addUnwrappedLine();
475      return;
476    default:
477      nextToken();
478      break;
479    }
480  } while (!eof());
481}
482
483void UnwrappedLineParser::addUnwrappedLine() {
484  // Consume trailing comments.
485  while (!eof() && FormatTok.NewlinesBefore == 0 &&
486         FormatTok.Tok.is(tok::comment)) {
487    nextToken();
488  }
489  Callback.consumeUnwrappedLine(Line);
490  Line.Tokens.clear();
491}
492
493bool UnwrappedLineParser::eof() const {
494  return FormatTok.Tok.is(tok::eof);
495}
496
497void UnwrappedLineParser::nextToken() {
498  if (eof())
499    return;
500  Line.Tokens.push_back(FormatTok);
501  readToken();
502}
503
504void UnwrappedLineParser::readToken() {
505  FormatTok = Tokens->getNextToken();
506  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
507         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
508          FormatTok.IsFirst)) {
509    // FIXME: This is incorrect - the correct way is to create a
510    // data structure that will construct the parts around the preprocessor
511    // directive as a structured \c UnwrappedLine.
512    addUnwrappedLine();
513    parsePPDirective();
514  }
515}
516
517} // end namespace format
518} // end namespace clang
519