UnwrappedLineParser.cpp revision ad3094b7d85a4f8eeaf3f60364d5e8796537f061
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-parser"
17
18#include "UnwrappedLineParser.h"
19#include "llvm/Support/Debug.h"
20
21namespace clang {
22namespace format {
23
24class ScopedDeclarationState {
25public:
26  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
27                         bool MustBeDeclaration)
28      : Line(Line), Stack(Stack) {
29    Line.MustBeDeclaration = MustBeDeclaration;
30    Stack.push_back(MustBeDeclaration);
31  }
32  ~ScopedDeclarationState() {
33    Stack.pop_back();
34    if (!Stack.empty())
35      Line.MustBeDeclaration = Stack.back();
36    else
37      Line.MustBeDeclaration = true;
38  }
39private:
40  UnwrappedLine &Line;
41  std::vector<bool> &Stack;
42};
43
44class ScopedMacroState : public FormatTokenSource {
45public:
46  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
47                   FormatToken &ResetToken, bool &StructuralError)
48      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
49        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
50        StructuralError(StructuralError),
51        PreviousStructuralError(StructuralError) {
52    TokenSource = this;
53    Line.Level = 0;
54    Line.InPPDirective = true;
55  }
56
57  ~ScopedMacroState() {
58    TokenSource = PreviousTokenSource;
59    ResetToken = Token;
60    Line.InPPDirective = false;
61    Line.Level = PreviousLineLevel;
62    StructuralError = PreviousStructuralError;
63  }
64
65  virtual FormatToken getNextToken() {
66    // The \c UnwrappedLineParser guards against this by never calling
67    // \c getNextToken() after it has encountered the first eof token.
68    assert(!eof());
69    Token = PreviousTokenSource->getNextToken();
70    if (eof())
71      return createEOF();
72    return Token;
73  }
74
75  virtual unsigned getPosition() {
76    return PreviousTokenSource->getPosition();
77  }
78
79  virtual FormatToken setPosition(unsigned Position) {
80    Token = PreviousTokenSource->setPosition(Position);
81    return Token;
82  }
83
84private:
85  bool eof() { return Token.HasUnescapedNewline; }
86
87  FormatToken createEOF() {
88    FormatToken FormatTok;
89    FormatTok.Tok.startToken();
90    FormatTok.Tok.setKind(tok::eof);
91    return FormatTok;
92  }
93
94  UnwrappedLine &Line;
95  FormatTokenSource *&TokenSource;
96  FormatToken &ResetToken;
97  unsigned PreviousLineLevel;
98  FormatTokenSource *PreviousTokenSource;
99  bool &StructuralError;
100  bool PreviousStructuralError;
101
102  FormatToken Token;
103};
104
105class ScopedLineState {
106public:
107  ScopedLineState(UnwrappedLineParser &Parser,
108                  bool SwitchToPreprocessorLines = false)
109      : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
110    if (SwitchToPreprocessorLines)
111      Parser.CurrentLines = &Parser.PreprocessorDirectives;
112    PreBlockLine = Parser.Line.take();
113    Parser.Line.reset(new UnwrappedLine());
114    Parser.Line->Level = PreBlockLine->Level;
115    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
116  }
117
118  ~ScopedLineState() {
119    if (!Parser.Line->Tokens.empty()) {
120      Parser.addUnwrappedLine();
121    }
122    assert(Parser.Line->Tokens.empty());
123    Parser.Line.reset(PreBlockLine);
124    Parser.MustBreakBeforeNextToken = true;
125    if (SwitchToPreprocessorLines)
126      Parser.CurrentLines = &Parser.Lines;
127  }
128
129private:
130  UnwrappedLineParser &Parser;
131  const bool SwitchToPreprocessorLines;
132
133  UnwrappedLine *PreBlockLine;
134};
135
136class IndexedTokenSource : public FormatTokenSource {
137public:
138  IndexedTokenSource(ArrayRef<FormatToken> Tokens)
139      : Tokens(Tokens), Position(-1) {}
140
141  virtual FormatToken getNextToken() {
142    ++Position;
143    return Tokens[Position];
144  }
145
146  virtual unsigned getPosition() {
147    assert(Position >= 0);
148    return Position;
149  }
150
151  virtual FormatToken setPosition(unsigned P) {
152    Position = P;
153    return Tokens[Position];
154  }
155
156private:
157  ArrayRef<FormatToken> Tokens;
158  int Position;
159};
160
161UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
162                                         FormatTokenSource &Tokens,
163                                         UnwrappedLineConsumer &Callback)
164    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
165      CurrentLines(&Lines), StructuralError(false), Style(Style),
166      Tokens(NULL), Callback(Callback) {
167  FormatToken Tok;
168  do {
169    Tok = Tokens.getNextToken();
170    AllTokens.push_back(Tok);
171  } while (Tok.Tok.isNot(tok::eof));
172  LBraces.resize(AllTokens.size(), BS_Unknown);
173}
174
175bool UnwrappedLineParser::parse() {
176  DEBUG(llvm::dbgs() << "----\n");
177  IndexedTokenSource TokenSource(AllTokens);
178  Tokens = &TokenSource;
179  readToken();
180  parseFile();
181  for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
182       I != E; ++I) {
183    Callback.consumeUnwrappedLine(*I);
184  }
185
186  // Create line with eof token.
187  pushToken(FormatTok);
188  Callback.consumeUnwrappedLine(*Line);
189  return StructuralError;
190}
191
192void UnwrappedLineParser::parseFile() {
193  ScopedDeclarationState DeclarationState(
194      *Line, DeclarationScopeStack,
195      /*MustBeDeclaration=*/ !Line->InPPDirective);
196  parseLevel(/*HasOpeningBrace=*/ false);
197  // Make sure to format the remaining tokens.
198  flushComments(true);
199  addUnwrappedLine();
200}
201
202void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
203  do {
204    switch (FormatTok.Tok.getKind()) {
205    case tok::comment:
206      nextToken();
207      addUnwrappedLine();
208      break;
209    case tok::l_brace:
210      // FIXME: Add parameter whether this can happen - if this happens, we must
211      // be in a non-declaration context.
212      parseBlock(/*MustBeDeclaration=*/ false);
213      addUnwrappedLine();
214      break;
215    case tok::r_brace:
216      if (HasOpeningBrace)
217        return;
218      StructuralError = true;
219      nextToken();
220      addUnwrappedLine();
221      break;
222    default:
223      parseStructuralElement();
224      break;
225    }
226  } while (!eof());
227}
228
229void UnwrappedLineParser::calculateBraceTypes() {
230  // We'll parse forward through the tokens until we hit
231  // a closing brace or eof - note that getNextToken() will
232  // parse macros, so this will magically work inside macro
233  // definitions, too.
234  unsigned StoredPosition = Tokens->getPosition();
235  unsigned Position = StoredPosition;
236  FormatToken Tok = FormatTok;
237  // Keep a stack of positions of lbrace tokens. We will
238  // update information about whether an lbrace starts a
239  // braced init list or a different block during the loop.
240  SmallVector<unsigned, 8> LBraceStack;
241  assert(Tok.Tok.is(tok::l_brace));
242  do {
243    FormatToken NextTok = Tokens->getNextToken();
244    switch (Tok.Tok.getKind()) {
245    case tok::l_brace:
246      LBraceStack.push_back(Position);
247      break;
248    case tok::r_brace:
249      if (!LBraceStack.empty()) {
250        if (LBraces[LBraceStack.back()] == BS_Unknown) {
251          // If there is a comma, semicolon or right paren after the closing
252          // brace, we assume this is a braced initializer list.
253
254          // FIXME: Note that this currently works only because we do not
255          // use the brace information while inside a braced init list.
256          // Thus, if the parent is a braced init list, we consider all
257          // brace blocks inside it braced init list. That works good enough
258          // for now, but we will need to fix it to correctly handle lambdas.
259          if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) ||
260              NextTok.Tok.is(tok::r_paren))
261            LBraces[LBraceStack.back()] = BS_BracedInit;
262          else
263            LBraces[LBraceStack.back()] = BS_Block;
264        }
265        LBraceStack.pop_back();
266      }
267      break;
268    case tok::semi:
269    case tok::kw_if:
270    case tok::kw_while:
271    case tok::kw_for:
272    case tok::kw_switch:
273    case tok::kw_try:
274      if (!LBraceStack.empty())
275        LBraces[LBraceStack.back()] = BS_Block;
276      break;
277    default:
278      break;
279    }
280    Tok = NextTok;
281    ++Position;
282  } while (Tok.Tok.isNot(tok::eof));
283  // Assume other blocks for all unclosed opening braces.
284  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
285    if (LBraces[LBraceStack[i]] == BS_Unknown)
286      LBraces[LBraceStack[i]] = BS_Block;
287  }
288  FormatTok = Tokens->setPosition(StoredPosition);
289}
290
291void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
292                                     unsigned AddLevels) {
293  assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
294  nextToken();
295
296  addUnwrappedLine();
297
298  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
299                                          MustBeDeclaration);
300  Line->Level += AddLevels;
301  parseLevel(/*HasOpeningBrace=*/ true);
302
303  if (!FormatTok.Tok.is(tok::r_brace)) {
304    Line->Level -= AddLevels;
305    StructuralError = true;
306    return;
307  }
308
309  nextToken(); // Munch the closing brace.
310  Line->Level -= AddLevels;
311}
312
313void UnwrappedLineParser::parsePPDirective() {
314  assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
315  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
316  nextToken();
317
318  if (FormatTok.Tok.getIdentifierInfo() == NULL) {
319    parsePPUnknown();
320    return;
321  }
322
323  switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
324  case tok::pp_define:
325    parsePPDefine();
326    break;
327  default:
328    parsePPUnknown();
329    break;
330  }
331}
332
333void UnwrappedLineParser::parsePPDefine() {
334  nextToken();
335
336  if (FormatTok.Tok.getKind() != tok::identifier) {
337    parsePPUnknown();
338    return;
339  }
340  nextToken();
341  if (FormatTok.Tok.getKind() == tok::l_paren &&
342      FormatTok.WhitespaceRange.getBegin() ==
343          FormatTok.WhitespaceRange.getEnd()) {
344    parseParens();
345  }
346  addUnwrappedLine();
347  Line->Level = 1;
348
349  // Errors during a preprocessor directive can only affect the layout of the
350  // preprocessor directive, and thus we ignore them. An alternative approach
351  // would be to use the same approach we use on the file level (no
352  // re-indentation if there was a structural error) within the macro
353  // definition.
354  parseFile();
355}
356
357void UnwrappedLineParser::parsePPUnknown() {
358  do {
359    nextToken();
360  } while (!eof());
361  addUnwrappedLine();
362}
363
364// Here we blacklist certain tokens that are not usually the first token in an
365// unwrapped line. This is used in attempt to distinguish macro calls without
366// trailing semicolons from other constructs split to several lines.
367bool tokenCanStartNewLine(clang::Token Tok) {
368  // Semicolon can be a null-statement, l_square can be a start of a macro or
369  // a C++11 attribute, but this doesn't seem to be common.
370  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
371         Tok.isNot(tok::l_square) &&
372         // Tokens that can only be used as binary operators and a part of
373         // overloaded operator names.
374         Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
375         Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
376         Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
377         Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
378         Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
379         Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
380         Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
381         Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
382         Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
383         Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
384         Tok.isNot(tok::lesslessequal) &&
385         // Colon is used in labels, base class lists, initializer lists,
386         // range-based for loops, ternary operator, but should never be the
387         // first token in an unwrapped line.
388         Tok.isNot(tok::colon);
389}
390
391void UnwrappedLineParser::parseStructuralElement() {
392  assert(!FormatTok.Tok.is(tok::l_brace));
393  switch (FormatTok.Tok.getKind()) {
394  case tok::at:
395    nextToken();
396    if (FormatTok.Tok.is(tok::l_brace)) {
397      parseBracedList();
398      break;
399    }
400    switch (FormatTok.Tok.getObjCKeywordID()) {
401    case tok::objc_public:
402    case tok::objc_protected:
403    case tok::objc_package:
404    case tok::objc_private:
405      return parseAccessSpecifier();
406    case tok::objc_interface:
407    case tok::objc_implementation:
408      return parseObjCInterfaceOrImplementation();
409    case tok::objc_protocol:
410      return parseObjCProtocol();
411    case tok::objc_end:
412      return; // Handled by the caller.
413    case tok::objc_optional:
414    case tok::objc_required:
415      nextToken();
416      addUnwrappedLine();
417      return;
418    default:
419      break;
420    }
421    break;
422  case tok::kw_namespace:
423    parseNamespace();
424    return;
425  case tok::kw_inline:
426    nextToken();
427    if (FormatTok.Tok.is(tok::kw_namespace)) {
428      parseNamespace();
429      return;
430    }
431    break;
432  case tok::kw_public:
433  case tok::kw_protected:
434  case tok::kw_private:
435    parseAccessSpecifier();
436    return;
437  case tok::kw_if:
438    parseIfThenElse();
439    return;
440  case tok::kw_for:
441  case tok::kw_while:
442    parseForOrWhileLoop();
443    return;
444  case tok::kw_do:
445    parseDoWhile();
446    return;
447  case tok::kw_switch:
448    parseSwitch();
449    return;
450  case tok::kw_default:
451    nextToken();
452    parseLabel();
453    return;
454  case tok::kw_case:
455    parseCaseLabel();
456    return;
457  case tok::kw_return:
458    parseReturn();
459    return;
460  case tok::kw_extern:
461    nextToken();
462    if (FormatTok.Tok.is(tok::string_literal)) {
463      nextToken();
464      if (FormatTok.Tok.is(tok::l_brace)) {
465        parseBlock(/*MustBeDeclaration=*/ true, 0);
466        addUnwrappedLine();
467        return;
468      }
469    }
470    // In all other cases, parse the declaration.
471    break;
472  default:
473    break;
474  }
475  do {
476    switch (FormatTok.Tok.getKind()) {
477    case tok::at:
478      nextToken();
479      if (FormatTok.Tok.is(tok::l_brace))
480        parseBracedList();
481      break;
482    case tok::kw_enum:
483      parseEnum();
484      break;
485    case tok::kw_struct:
486    case tok::kw_union:
487    case tok::kw_class:
488      parseRecord();
489      // A record declaration or definition is always the start of a structural
490      // element.
491      break;
492    case tok::semi:
493      nextToken();
494      addUnwrappedLine();
495      return;
496    case tok::r_brace:
497      addUnwrappedLine();
498      return;
499    case tok::l_paren:
500      parseParens();
501      break;
502    case tok::l_brace:
503      if (!tryToParseBracedList()) {
504        // A block outside of parentheses must be the last part of a
505        // structural element.
506        // FIXME: Figure out cases where this is not true, and add projections
507        // for them (the one we know is missing are lambdas).
508        if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
509            Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
510          addUnwrappedLine();
511        parseBlock(/*MustBeDeclaration=*/ false);
512        addUnwrappedLine();
513        return;
514      }
515      // Otherwise this was a braced init list, and the structural
516      // element continues.
517      break;
518    case tok::identifier:
519      nextToken();
520      if (Line->Tokens.size() == 1) {
521        if (FormatTok.Tok.is(tok::colon)) {
522          parseLabel();
523          return;
524        }
525        // Recognize function-like macro usages without trailing semicolon.
526        if (FormatTok.Tok.is(tok::l_paren)) {
527          parseParens();
528          if (FormatTok.HasUnescapedNewline &&
529              tokenCanStartNewLine(FormatTok.Tok)) {
530            addUnwrappedLine();
531            return;
532          }
533        }
534      }
535      break;
536    case tok::equal:
537      nextToken();
538      if (FormatTok.Tok.is(tok::l_brace)) {
539        parseBracedList();
540      }
541      break;
542    default:
543      nextToken();
544      break;
545    }
546  } while (!eof());
547}
548
549bool UnwrappedLineParser::tryToParseBracedList() {
550  if (LBraces[Tokens->getPosition()] == BS_Unknown)
551    calculateBraceTypes();
552  assert(LBraces[Tokens->getPosition()] != BS_Unknown);
553  if (LBraces[Tokens->getPosition()] == BS_Block)
554    return false;
555  parseBracedList();
556  return true;
557}
558
559void UnwrappedLineParser::parseBracedList() {
560  nextToken();
561
562  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
563  // replace this by using parseAssigmentExpression() inside.
564  bool StartOfExpression = true;
565  do {
566    // FIXME: When we start to support lambdas, we'll want to parse them away
567    // here, otherwise our bail-out scenarios below break. The better solution
568    // might be to just implement a more or less complete expression parser.
569    switch (FormatTok.Tok.getKind()) {
570    case tok::l_brace:
571      if (!StartOfExpression) {
572        // Probably a missing closing brace. Bail out.
573        addUnwrappedLine();
574        return;
575      }
576      parseBracedList();
577      StartOfExpression = false;
578      break;
579    case tok::r_brace:
580      nextToken();
581      return;
582    case tok::semi:
583      // Probably a missing closing brace. Bail out.
584      return;
585    case tok::comma:
586      nextToken();
587      StartOfExpression = true;
588      break;
589    default:
590      nextToken();
591      StartOfExpression = false;
592      break;
593    }
594  } while (!eof());
595}
596
597void UnwrappedLineParser::parseReturn() {
598  nextToken();
599
600  do {
601    switch (FormatTok.Tok.getKind()) {
602    case tok::l_brace:
603      parseBracedList();
604      if (FormatTok.Tok.isNot(tok::semi)) {
605        // Assume missing ';'.
606        addUnwrappedLine();
607        return;
608      }
609      break;
610    case tok::l_paren:
611      parseParens();
612      break;
613    case tok::r_brace:
614      // Assume missing ';'.
615      addUnwrappedLine();
616      return;
617    case tok::semi:
618      nextToken();
619      addUnwrappedLine();
620      return;
621    default:
622      nextToken();
623      break;
624    }
625  } while (!eof());
626}
627
628void UnwrappedLineParser::parseParens() {
629  assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
630  nextToken();
631  do {
632    switch (FormatTok.Tok.getKind()) {
633    case tok::l_paren:
634      parseParens();
635      break;
636    case tok::r_paren:
637      nextToken();
638      return;
639    case tok::l_brace: {
640      if (!tryToParseBracedList()) {
641        nextToken();
642        ScopedLineState LineState(*this);
643        ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
644                                                /*MustBeDeclaration=*/ false);
645        Line->Level += 1;
646        parseLevel(/*HasOpeningBrace=*/ true);
647        Line->Level -= 1;
648      }
649      break;
650    }
651    case tok::at:
652      nextToken();
653      if (FormatTok.Tok.is(tok::l_brace))
654        parseBracedList();
655      break;
656    default:
657      nextToken();
658      break;
659    }
660  } while (!eof());
661}
662
663void UnwrappedLineParser::parseIfThenElse() {
664  assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
665  nextToken();
666  if (FormatTok.Tok.is(tok::l_paren))
667    parseParens();
668  bool NeedsUnwrappedLine = false;
669  if (FormatTok.Tok.is(tok::l_brace)) {
670    parseBlock(/*MustBeDeclaration=*/ false);
671    NeedsUnwrappedLine = true;
672  } else {
673    addUnwrappedLine();
674    ++Line->Level;
675    parseStructuralElement();
676    --Line->Level;
677  }
678  if (FormatTok.Tok.is(tok::kw_else)) {
679    nextToken();
680    if (FormatTok.Tok.is(tok::l_brace)) {
681      parseBlock(/*MustBeDeclaration=*/ false);
682      addUnwrappedLine();
683    } else if (FormatTok.Tok.is(tok::kw_if)) {
684      parseIfThenElse();
685    } else {
686      addUnwrappedLine();
687      ++Line->Level;
688      parseStructuralElement();
689      --Line->Level;
690    }
691  } else if (NeedsUnwrappedLine) {
692    addUnwrappedLine();
693  }
694}
695
696void UnwrappedLineParser::parseNamespace() {
697  assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
698  nextToken();
699  if (FormatTok.Tok.is(tok::identifier))
700    nextToken();
701  if (FormatTok.Tok.is(tok::l_brace)) {
702    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
703      addUnwrappedLine();
704
705    parseBlock(/*MustBeDeclaration=*/ true, 0);
706    // Munch the semicolon after a namespace. This is more common than one would
707    // think. Puttin the semicolon into its own line is very ugly.
708    if (FormatTok.Tok.is(tok::semi))
709      nextToken();
710    addUnwrappedLine();
711  }
712  // FIXME: Add error handling.
713}
714
715void UnwrappedLineParser::parseForOrWhileLoop() {
716  assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
717         "'for' or 'while' expected");
718  nextToken();
719  if (FormatTok.Tok.is(tok::l_paren))
720    parseParens();
721  if (FormatTok.Tok.is(tok::l_brace)) {
722    parseBlock(/*MustBeDeclaration=*/ false);
723    addUnwrappedLine();
724  } else {
725    addUnwrappedLine();
726    ++Line->Level;
727    parseStructuralElement();
728    --Line->Level;
729  }
730}
731
732void UnwrappedLineParser::parseDoWhile() {
733  assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
734  nextToken();
735  if (FormatTok.Tok.is(tok::l_brace)) {
736    parseBlock(/*MustBeDeclaration=*/ false);
737  } else {
738    addUnwrappedLine();
739    ++Line->Level;
740    parseStructuralElement();
741    --Line->Level;
742  }
743
744  // FIXME: Add error handling.
745  if (!FormatTok.Tok.is(tok::kw_while)) {
746    addUnwrappedLine();
747    return;
748  }
749
750  nextToken();
751  parseStructuralElement();
752}
753
754void UnwrappedLineParser::parseLabel() {
755  if (FormatTok.Tok.isNot(tok::colon))
756    return;
757  nextToken();
758  unsigned OldLineLevel = Line->Level;
759  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
760    --Line->Level;
761  if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) {
762    parseBlock(/*MustBeDeclaration=*/ false);
763    if (FormatTok.Tok.is(tok::kw_break))
764      parseStructuralElement(); // "break;" after "}" goes on the same line.
765  }
766  addUnwrappedLine();
767  Line->Level = OldLineLevel;
768}
769
770void UnwrappedLineParser::parseCaseLabel() {
771  assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
772  // FIXME: fix handling of complex expressions here.
773  do {
774    nextToken();
775  } while (!eof() && !FormatTok.Tok.is(tok::colon));
776  parseLabel();
777}
778
779void UnwrappedLineParser::parseSwitch() {
780  assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
781  nextToken();
782  if (FormatTok.Tok.is(tok::l_paren))
783    parseParens();
784  if (FormatTok.Tok.is(tok::l_brace)) {
785    parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1);
786    addUnwrappedLine();
787  } else {
788    addUnwrappedLine();
789    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
790    parseStructuralElement();
791    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
792  }
793}
794
795void UnwrappedLineParser::parseAccessSpecifier() {
796  nextToken();
797  // Otherwise, we don't know what it is, and we'd better keep the next token.
798  if (FormatTok.Tok.is(tok::colon))
799    nextToken();
800  addUnwrappedLine();
801}
802
803void UnwrappedLineParser::parseEnum() {
804  nextToken();
805  if (FormatTok.Tok.is(tok::identifier) ||
806      FormatTok.Tok.is(tok::kw___attribute) ||
807      FormatTok.Tok.is(tok::kw___declspec)) {
808    nextToken();
809    // We can have macros or attributes in between 'enum' and the enum name.
810    if (FormatTok.Tok.is(tok::l_paren)) {
811      parseParens();
812    }
813    if (FormatTok.Tok.is(tok::identifier))
814      nextToken();
815  }
816  if (FormatTok.Tok.is(tok::l_brace)) {
817    nextToken();
818    addUnwrappedLine();
819    ++Line->Level;
820    do {
821      switch (FormatTok.Tok.getKind()) {
822      case tok::l_paren:
823        parseParens();
824        break;
825      case tok::r_brace:
826        addUnwrappedLine();
827        nextToken();
828        --Line->Level;
829        return;
830      case tok::comma:
831        nextToken();
832        addUnwrappedLine();
833        break;
834      default:
835        nextToken();
836        break;
837      }
838    } while (!eof());
839  }
840  // We fall through to parsing a structural element afterwards, so that in
841  // enum A {} n, m;
842  // "} n, m;" will end up in one unwrapped line.
843}
844
845void UnwrappedLineParser::parseRecord() {
846  nextToken();
847  if (FormatTok.Tok.is(tok::identifier) ||
848      FormatTok.Tok.is(tok::kw___attribute) ||
849      FormatTok.Tok.is(tok::kw___declspec)) {
850    nextToken();
851    // We can have macros or attributes in between 'class' and the class name.
852    if (FormatTok.Tok.is(tok::l_paren)) {
853      parseParens();
854    }
855    // The actual identifier can be a nested name specifier, and in macros
856    // it is often token-pasted.
857    while (FormatTok.Tok.is(tok::identifier) ||
858           FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash))
859      nextToken();
860
861    // Note that parsing away template declarations here leads to incorrectly
862    // accepting function declarations as record declarations.
863    // In general, we cannot solve this problem. Consider:
864    // class A<int> B() {}
865    // which can be a function definition or a class definition when B() is a
866    // macro. If we find enough real-world cases where this is a problem, we
867    // can parse for the 'template' keyword in the beginning of the statement,
868    // and thus rule out the record production in case there is no template
869    // (this would still leave us with an ambiguity between template function
870    // and class declarations).
871    if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
872      while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) {
873        if (FormatTok.Tok.is(tok::semi))
874          return;
875        nextToken();
876      }
877    }
878  }
879  if (FormatTok.Tok.is(tok::l_brace)) {
880    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
881      addUnwrappedLine();
882
883    parseBlock(/*MustBeDeclaration=*/ true);
884  }
885  // We fall through to parsing a structural element afterwards, so
886  // class A {} n, m;
887  // will end up in one unwrapped line.
888}
889
890void UnwrappedLineParser::parseObjCProtocolList() {
891  assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
892  do
893    nextToken();
894  while (!eof() && FormatTok.Tok.isNot(tok::greater));
895  nextToken(); // Skip '>'.
896}
897
898void UnwrappedLineParser::parseObjCUntilAtEnd() {
899  do {
900    if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
901      nextToken();
902      addUnwrappedLine();
903      break;
904    }
905    parseStructuralElement();
906  } while (!eof());
907}
908
909void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
910  nextToken();
911  nextToken(); // interface name
912
913  // @interface can be followed by either a base class, or a category.
914  if (FormatTok.Tok.is(tok::colon)) {
915    nextToken();
916    nextToken(); // base class name
917  } else if (FormatTok.Tok.is(tok::l_paren))
918    // Skip category, if present.
919    parseParens();
920
921  if (FormatTok.Tok.is(tok::less))
922    parseObjCProtocolList();
923
924  // If instance variables are present, keep the '{' on the first line too.
925  if (FormatTok.Tok.is(tok::l_brace))
926    parseBlock(/*MustBeDeclaration=*/ true);
927
928  // With instance variables, this puts '}' on its own line.  Without instance
929  // variables, this ends the @interface line.
930  addUnwrappedLine();
931
932  parseObjCUntilAtEnd();
933}
934
935void UnwrappedLineParser::parseObjCProtocol() {
936  nextToken();
937  nextToken(); // protocol name
938
939  if (FormatTok.Tok.is(tok::less))
940    parseObjCProtocolList();
941
942  // Check for protocol declaration.
943  if (FormatTok.Tok.is(tok::semi)) {
944    nextToken();
945    return addUnwrappedLine();
946  }
947
948  addUnwrappedLine();
949  parseObjCUntilAtEnd();
950}
951
952void UnwrappedLineParser::addUnwrappedLine() {
953  if (Line->Tokens.empty())
954    return;
955  DEBUG({
956    llvm::dbgs() << "Line(" << Line->Level << ")"
957                 << (Line->InPPDirective ? " MACRO" : "") << ": ";
958    for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
959                                          E = Line->Tokens.end();
960         I != E; ++I) {
961      llvm::dbgs() << I->Tok.getName() << " ";
962
963    }
964    llvm::dbgs() << "\n";
965  });
966  CurrentLines->push_back(*Line);
967  Line->Tokens.clear();
968  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
969    for (std::vector<UnwrappedLine>::iterator
970             I = PreprocessorDirectives.begin(),
971             E = PreprocessorDirectives.end();
972         I != E; ++I) {
973      CurrentLines->push_back(*I);
974    }
975    PreprocessorDirectives.clear();
976  }
977}
978
979bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); }
980
981void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
982  bool JustComments = Line->Tokens.empty();
983  for (SmallVectorImpl<FormatToken>::const_iterator
984           I = CommentsBeforeNextToken.begin(),
985           E = CommentsBeforeNextToken.end();
986       I != E; ++I) {
987    if (I->NewlinesBefore && JustComments) {
988      addUnwrappedLine();
989    }
990    pushToken(*I);
991  }
992  if (NewlineBeforeNext && JustComments) {
993    addUnwrappedLine();
994  }
995  CommentsBeforeNextToken.clear();
996}
997
998void UnwrappedLineParser::nextToken() {
999  if (eof())
1000    return;
1001  flushComments(FormatTok.NewlinesBefore > 0);
1002  pushToken(FormatTok);
1003  readToken();
1004}
1005
1006void UnwrappedLineParser::readToken() {
1007  bool CommentsInCurrentLine = true;
1008  do {
1009    FormatTok = Tokens->getNextToken();
1010    while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
1011           (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) {
1012      // If there is an unfinished unwrapped line, we flush the preprocessor
1013      // directives only after that unwrapped line was finished later.
1014      bool SwitchToPreprocessorLines =
1015          !Line->Tokens.empty() && CurrentLines == &Lines;
1016      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1017      // Comments stored before the preprocessor directive need to be output
1018      // before the preprocessor directive, at the same level as the
1019      // preprocessor directive, as we consider them to apply to the directive.
1020      flushComments(FormatTok.NewlinesBefore > 0);
1021      parsePPDirective();
1022    }
1023    if (!FormatTok.Tok.is(tok::comment))
1024      return;
1025    if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) {
1026      CommentsInCurrentLine = false;
1027    }
1028    if (CommentsInCurrentLine) {
1029      pushToken(FormatTok);
1030    } else {
1031      CommentsBeforeNextToken.push_back(FormatTok);
1032    }
1033  } while (!eof());
1034}
1035
1036void UnwrappedLineParser::pushToken(const FormatToken &Tok) {
1037  Line->Tokens.push_back(Tok);
1038  if (MustBreakBeforeNextToken) {
1039    Line->Tokens.back().MustBreakBefore = true;
1040    MustBreakBeforeNextToken = false;
1041  }
1042}
1043
1044} // end namespace format
1045} // end namespace clang
1046