UnwrappedLineParser.cpp revision f6fd00b12ae7d89436d32851c9bcc8dd3d046ad3
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15/// where it can be used to format real code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "UnwrappedLineParser.h"
20#include "llvm/Support/raw_ostream.h"
21
22namespace clang {
23namespace format {
24
25class ScopedMacroState : public FormatTokenSource {
26public:
27  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
28                   FormatToken &ResetToken)
29      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
30        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
31    TokenSource = this;
32    // FIXME: Back up all other state (errors, line indent, etc) and reset after
33    // parsing the macro.
34    Line.Level = 0;
35    Line.InPPDirective = true;
36  }
37
38  ~ScopedMacroState() {
39    TokenSource = PreviousTokenSource;
40    ResetToken = Token;
41    Line.InPPDirective = false;
42    Line.Level = PreviousLineLevel;
43  }
44
45  virtual FormatToken getNextToken() {
46    // FIXME: Write test that breaks due to a missing
47    // if (eof()) return createEOF();
48    Token = PreviousTokenSource->getNextToken();
49    if (eof())
50      return createEOF();
51    return Token;
52  }
53
54private:
55  bool eof() {
56    return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
57  }
58
59  FormatToken createEOF() {
60    FormatToken FormatTok;
61    FormatTok.Tok.startToken();
62    FormatTok.Tok.setKind(tok::eof);
63    return FormatTok;
64  }
65
66  UnwrappedLine &Line;
67  FormatTokenSource *&TokenSource;
68  FormatToken &ResetToken;
69  unsigned PreviousLineLevel;
70  FormatTokenSource *PreviousTokenSource;
71
72  FormatToken Token;
73};
74
75UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
76                                         FormatTokenSource &Tokens,
77                                         UnwrappedLineConsumer &Callback)
78    : Style(Style), Tokens(&Tokens), Callback(Callback) {
79}
80
81bool UnwrappedLineParser::parse() {
82  readToken();
83  return parseFile();
84}
85
86bool UnwrappedLineParser::parseFile() {
87  bool Error = parseLevel();
88  // Make sure to format the remaining tokens.
89  addUnwrappedLine();
90  return Error;
91}
92
93bool UnwrappedLineParser::parseLevel() {
94  bool Error = false;
95  do {
96    switch (FormatTok.Tok.getKind()) {
97    case tok::comment:
98      nextToken();
99      addUnwrappedLine();
100      break;
101    case tok::l_brace:
102      Error |= parseBlock();
103      addUnwrappedLine();
104      break;
105    case tok::r_brace:
106      // Stray '}' is an error.
107      return true;
108    default:
109      parseStatement();
110      break;
111    }
112  } while (!eof());
113  return Error;
114}
115
116bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
117  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
118  nextToken();
119
120  addUnwrappedLine();
121
122  Line.Level += AddLevels;
123  parseLevel();
124  Line.Level -= AddLevels;
125
126  // FIXME: Add error handling.
127  if (!FormatTok.Tok.is(tok::r_brace))
128    return true;
129
130  nextToken();
131  if (FormatTok.Tok.is(tok::semi))
132    nextToken();
133  return false;
134}
135
136void UnwrappedLineParser::parsePPDirective() {
137  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
138  ScopedMacroState MacroState(Line, Tokens, FormatTok);
139  nextToken();
140
141  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
142    addUnwrappedLine();
143    return;
144  }
145
146  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
147  case tok::pp_define:
148    parsePPDefine();
149    break;
150  default:
151    parsePPUnknown();
152    break;
153  }
154}
155
156void UnwrappedLineParser::parsePPDefine() {
157  nextToken();
158
159  if (FormatTok.Tok.getKind() != tok::identifier) {
160    parsePPUnknown();
161    return;
162  }
163  nextToken();
164  if (FormatTok.Tok.getKind() == tok::l_paren) {
165    parseParens();
166  }
167  addUnwrappedLine();
168  Line.Level = 1;
169  parseFile();
170}
171
172void UnwrappedLineParser::parsePPUnknown() {
173  do {
174    nextToken();
175  } while (!eof());
176  addUnwrappedLine();
177}
178
179void UnwrappedLineParser::parseComments() {
180  // Consume leading line comments, e.g. for branches without compounds.
181  while (FormatTok.Tok.is(tok::comment)) {
182    nextToken();
183    addUnwrappedLine();
184  }
185}
186
187void UnwrappedLineParser::parseStatement() {
188  parseComments();
189
190  int TokenNumber = 0;
191  switch (FormatTok.Tok.getKind()) {
192  case tok::kw_namespace:
193    parseNamespace();
194    return;
195  case tok::kw_inline:
196    nextToken();
197    TokenNumber++;
198    if (FormatTok.Tok.is(tok::kw_namespace)) {
199      parseNamespace();
200      return;
201    }
202    break;
203  case tok::kw_public:
204  case tok::kw_protected:
205  case tok::kw_private:
206    parseAccessSpecifier();
207    return;
208  case tok::kw_if:
209    parseIfThenElse();
210    return;
211  case tok::kw_for:
212  case tok::kw_while:
213    parseForOrWhileLoop();
214    return;
215  case tok::kw_do:
216    parseDoWhile();
217    return;
218  case tok::kw_switch:
219    parseSwitch();
220    return;
221  case tok::kw_default:
222    nextToken();
223    parseLabel();
224    return;
225  case tok::kw_case:
226    parseCaseLabel();
227    return;
228  default:
229    break;
230  }
231  do {
232    ++TokenNumber;
233    switch (FormatTok.Tok.getKind()) {
234    case tok::kw_enum:
235      parseEnum();
236      return;
237    case tok::semi:
238      nextToken();
239      addUnwrappedLine();
240      return;
241    case tok::l_paren:
242      parseParens();
243      break;
244    case tok::l_brace:
245      parseBlock();
246      addUnwrappedLine();
247      return;
248    case tok::identifier:
249      nextToken();
250      if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
251        parseLabel();
252        return;
253      }
254      break;
255    case tok::equal:
256      nextToken();
257      // Skip initializers as they will be formatted by a later step.
258      if (FormatTok.Tok.is(tok::l_brace))
259        nextToken();
260      break;
261    default:
262      nextToken();
263      break;
264    }
265  } while (!eof());
266}
267
268void UnwrappedLineParser::parseParens() {
269  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
270  nextToken();
271  do {
272    switch (FormatTok.Tok.getKind()) {
273    case tok::l_paren:
274      parseParens();
275      break;
276    case tok::r_paren:
277      nextToken();
278      return;
279    default:
280      nextToken();
281      break;
282    }
283  } while (!eof());
284}
285
286void UnwrappedLineParser::parseIfThenElse() {
287  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
288  nextToken();
289  parseParens();
290  bool NeedsUnwrappedLine = false;
291  if (FormatTok.Tok.is(tok::l_brace)) {
292    parseBlock();
293    NeedsUnwrappedLine = true;
294  } else {
295    addUnwrappedLine();
296    ++Line.Level;
297    parseStatement();
298    --Line.Level;
299  }
300  if (FormatTok.Tok.is(tok::kw_else)) {
301    nextToken();
302    if (FormatTok.Tok.is(tok::l_brace)) {
303      parseBlock();
304      addUnwrappedLine();
305    } else if (FormatTok.Tok.is(tok::kw_if)) {
306      parseIfThenElse();
307    } else {
308      addUnwrappedLine();
309      ++Line.Level;
310      parseStatement();
311      --Line.Level;
312    }
313  } else if (NeedsUnwrappedLine) {
314    addUnwrappedLine();
315  }
316}
317
318void UnwrappedLineParser::parseNamespace() {
319  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
320  nextToken();
321  if (FormatTok.Tok.is(tok::identifier))
322    nextToken();
323  if (FormatTok.Tok.is(tok::l_brace)) {
324    parseBlock(0);
325    addUnwrappedLine();
326  }
327  // FIXME: Add error handling.
328}
329
330void UnwrappedLineParser::parseForOrWhileLoop() {
331  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
332         "'for' or 'while' expected");
333  nextToken();
334  parseParens();
335  if (FormatTok.Tok.is(tok::l_brace)) {
336    parseBlock();
337    addUnwrappedLine();
338  } else {
339    addUnwrappedLine();
340    ++Line.Level;
341    parseStatement();
342    --Line.Level;
343  }
344}
345
346void UnwrappedLineParser::parseDoWhile() {
347  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
348  nextToken();
349  if (FormatTok.Tok.is(tok::l_brace)) {
350    parseBlock();
351  } else {
352    addUnwrappedLine();
353    ++Line.Level;
354    parseStatement();
355    --Line.Level;
356  }
357
358  // FIXME: Add error handling.
359  if (!FormatTok.Tok.is(tok::kw_while)) {
360    addUnwrappedLine();
361    return;
362  }
363
364  nextToken();
365  parseStatement();
366}
367
368void UnwrappedLineParser::parseLabel() {
369  // FIXME: remove all asserts.
370  assert(FormatTok.Tok.is(tok::colon) && "':' expected");
371  nextToken();
372  unsigned OldLineLevel = Line.Level;
373  if (Line.Level > 0)
374    --Line.Level;
375  if (FormatTok.Tok.is(tok::l_brace)) {
376    parseBlock();
377  }
378  addUnwrappedLine();
379  Line.Level = OldLineLevel;
380}
381
382void UnwrappedLineParser::parseCaseLabel() {
383  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
384  // FIXME: fix handling of complex expressions here.
385  do {
386    nextToken();
387  } while (!eof() && !FormatTok.Tok.is(tok::colon));
388  parseLabel();
389}
390
391void UnwrappedLineParser::parseSwitch() {
392  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
393  nextToken();
394  parseParens();
395  if (FormatTok.Tok.is(tok::l_brace)) {
396    parseBlock(Style.IndentCaseLabels ? 2 : 1);
397    addUnwrappedLine();
398  } else {
399    addUnwrappedLine();
400    Line.Level += (Style.IndentCaseLabels ? 2 : 1);
401    parseStatement();
402    Line.Level -= (Style.IndentCaseLabels ? 2 : 1);
403  }
404}
405
406void UnwrappedLineParser::parseAccessSpecifier() {
407  nextToken();
408  // Otherwise, we don't know what it is, and we'd better keep the next token.
409  if (FormatTok.Tok.is(tok::colon))
410    nextToken();
411  addUnwrappedLine();
412}
413
414void UnwrappedLineParser::parseEnum() {
415  bool HasContents = false;
416  do {
417    switch (FormatTok.Tok.getKind()) {
418    case tok::l_brace:
419      nextToken();
420      addUnwrappedLine();
421      ++Line.Level;
422      parseComments();
423      break;
424    case tok::l_paren:
425      parseParens();
426      break;
427    case tok::comma:
428      nextToken();
429      addUnwrappedLine();
430      parseComments();
431      break;
432    case tok::r_brace:
433      if (HasContents)
434        addUnwrappedLine();
435      --Line.Level;
436      nextToken();
437      break;
438    case tok::semi:
439      nextToken();
440      addUnwrappedLine();
441      return;
442    default:
443      HasContents = true;
444      nextToken();
445      break;
446    }
447  } while (!eof());
448}
449
450void UnwrappedLineParser::addUnwrappedLine() {
451  // Consume trailing comments.
452  while (!eof() && FormatTok.NewlinesBefore == 0 &&
453         FormatTok.Tok.is(tok::comment)) {
454    nextToken();
455  }
456  Callback.consumeUnwrappedLine(Line);
457  Line.Tokens.clear();
458}
459
460bool UnwrappedLineParser::eof() const {
461  return FormatTok.Tok.is(tok::eof);
462}
463
464void UnwrappedLineParser::nextToken() {
465  if (eof())
466    return;
467  Line.Tokens.push_back(FormatTok);
468  readToken();
469}
470
471void UnwrappedLineParser::readToken() {
472  FormatTok = Tokens->getNextToken();
473  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
474         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
475          FormatTok.IsFirst)) {
476    // FIXME: This is incorrect - the correct way is to create a
477    // data structure that will construct the parts around the preprocessor
478    // directive as a structured \c UnwrappedLine.
479    addUnwrappedLine();
480    parsePPDirective();
481  }
482}
483
484}  // end namespace format
485}  // end namespace clang
486