UnwrappedLineParser.cpp revision 02eacc2cf85fe11577bf6ff20f6c203e87147c57
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-parser"
17
18#include "UnwrappedLineParser.h"
19#include "llvm/Support/Debug.h"
20
21namespace clang {
22namespace format {
23
24class FormatTokenSource {
25public:
26  virtual ~FormatTokenSource() {}
27  virtual FormatToken *getNextToken() = 0;
28
29  virtual unsigned getPosition() = 0;
30  virtual FormatToken *setPosition(unsigned Position) = 0;
31};
32
33namespace {
34
35class ScopedDeclarationState {
36public:
37  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38                         bool MustBeDeclaration)
39      : Line(Line), Stack(Stack) {
40    Line.MustBeDeclaration = MustBeDeclaration;
41    Stack.push_back(MustBeDeclaration);
42  }
43  ~ScopedDeclarationState() {
44    Stack.pop_back();
45    if (!Stack.empty())
46      Line.MustBeDeclaration = Stack.back();
47    else
48      Line.MustBeDeclaration = true;
49  }
50
51private:
52  UnwrappedLine &Line;
53  std::vector<bool> &Stack;
54};
55
56class ScopedMacroState : public FormatTokenSource {
57public:
58  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59                   FormatToken *&ResetToken, bool &StructuralError)
60      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62        StructuralError(StructuralError),
63        PreviousStructuralError(StructuralError), Token(NULL) {
64    TokenSource = this;
65    Line.Level = 0;
66    Line.InPPDirective = true;
67  }
68
69  ~ScopedMacroState() {
70    TokenSource = PreviousTokenSource;
71    ResetToken = Token;
72    Line.InPPDirective = false;
73    Line.Level = PreviousLineLevel;
74    StructuralError = PreviousStructuralError;
75  }
76
77  virtual FormatToken *getNextToken() {
78    // The \c UnwrappedLineParser guards against this by never calling
79    // \c getNextToken() after it has encountered the first eof token.
80    assert(!eof());
81    Token = PreviousTokenSource->getNextToken();
82    if (eof())
83      return getFakeEOF();
84    return Token;
85  }
86
87  virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88
89  virtual FormatToken *setPosition(unsigned Position) {
90    Token = PreviousTokenSource->setPosition(Position);
91    return Token;
92  }
93
94private:
95  bool eof() { return Token && Token->HasUnescapedNewline; }
96
97  FormatToken *getFakeEOF() {
98    static bool EOFInitialized = false;
99    static FormatToken FormatTok;
100    if (!EOFInitialized) {
101      FormatTok.Tok.startToken();
102      FormatTok.Tok.setKind(tok::eof);
103      EOFInitialized = true;
104    }
105    return &FormatTok;
106  }
107
108  UnwrappedLine &Line;
109  FormatTokenSource *&TokenSource;
110  FormatToken *&ResetToken;
111  unsigned PreviousLineLevel;
112  FormatTokenSource *PreviousTokenSource;
113  bool &StructuralError;
114  bool PreviousStructuralError;
115
116  FormatToken *Token;
117};
118
119} // end anonymous namespace
120
121class ScopedLineState {
122public:
123  ScopedLineState(UnwrappedLineParser &Parser,
124                  bool SwitchToPreprocessorLines = false)
125      : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
126    if (SwitchToPreprocessorLines)
127      Parser.CurrentLines = &Parser.PreprocessorDirectives;
128    PreBlockLine = Parser.Line.take();
129    Parser.Line.reset(new UnwrappedLine());
130    Parser.Line->Level = PreBlockLine->Level;
131    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132  }
133
134  ~ScopedLineState() {
135    if (!Parser.Line->Tokens.empty()) {
136      Parser.addUnwrappedLine();
137    }
138    assert(Parser.Line->Tokens.empty());
139    Parser.Line.reset(PreBlockLine);
140    Parser.MustBreakBeforeNextToken = true;
141    if (SwitchToPreprocessorLines)
142      Parser.CurrentLines = &Parser.Lines;
143  }
144
145private:
146  UnwrappedLineParser &Parser;
147  const bool SwitchToPreprocessorLines;
148
149  UnwrappedLine *PreBlockLine;
150};
151
152namespace {
153
154class IndexedTokenSource : public FormatTokenSource {
155public:
156  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
157      : Tokens(Tokens), Position(-1) {}
158
159  virtual FormatToken *getNextToken() {
160    ++Position;
161    return Tokens[Position];
162  }
163
164  virtual unsigned getPosition() {
165    assert(Position >= 0);
166    return Position;
167  }
168
169  virtual FormatToken *setPosition(unsigned P) {
170    Position = P;
171    return Tokens[Position];
172  }
173
174private:
175  ArrayRef<FormatToken *> Tokens;
176  int Position;
177};
178
179} // end anonymous namespace
180
181UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
182                                         ArrayRef<FormatToken *> Tokens,
183                                         UnwrappedLineConsumer &Callback)
184    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
185      CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
186      Callback(Callback), AllTokens(Tokens) {
187  LBraces.resize(Tokens.size(), BS_Unknown);
188}
189
190bool UnwrappedLineParser::parse() {
191  DEBUG(llvm::dbgs() << "----\n");
192  IndexedTokenSource TokenSource(AllTokens);
193  Tokens = &TokenSource;
194  readToken();
195  parseFile();
196  for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
197       I != E; ++I) {
198    Callback.consumeUnwrappedLine(*I);
199  }
200
201  // Create line with eof token.
202  pushToken(FormatTok);
203  Callback.consumeUnwrappedLine(*Line);
204  return StructuralError;
205}
206
207void UnwrappedLineParser::parseFile() {
208  ScopedDeclarationState DeclarationState(
209      *Line, DeclarationScopeStack,
210      /*MustBeDeclaration=*/ !Line->InPPDirective);
211  parseLevel(/*HasOpeningBrace=*/false);
212  // Make sure to format the remaining tokens.
213  flushComments(true);
214  addUnwrappedLine();
215}
216
217void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
218  do {
219    switch (FormatTok->Tok.getKind()) {
220    case tok::comment:
221      nextToken();
222      addUnwrappedLine();
223      break;
224    case tok::l_brace:
225      // FIXME: Add parameter whether this can happen - if this happens, we must
226      // be in a non-declaration context.
227      parseBlock(/*MustBeDeclaration=*/false);
228      addUnwrappedLine();
229      break;
230    case tok::r_brace:
231      if (HasOpeningBrace)
232        return;
233      StructuralError = true;
234      nextToken();
235      addUnwrappedLine();
236      break;
237    default:
238      parseStructuralElement();
239      break;
240    }
241  } while (!eof());
242}
243
244void UnwrappedLineParser::calculateBraceTypes() {
245  // We'll parse forward through the tokens until we hit
246  // a closing brace or eof - note that getNextToken() will
247  // parse macros, so this will magically work inside macro
248  // definitions, too.
249  unsigned StoredPosition = Tokens->getPosition();
250  unsigned Position = StoredPosition;
251  FormatToken *Tok = FormatTok;
252  // Keep a stack of positions of lbrace tokens. We will
253  // update information about whether an lbrace starts a
254  // braced init list or a different block during the loop.
255  SmallVector<unsigned, 8> LBraceStack;
256  assert(Tok->Tok.is(tok::l_brace));
257  do {
258    // Get next none-comment token.
259    FormatToken *NextTok;
260    do {
261      NextTok = Tokens->getNextToken();
262    } while (NextTok->is(tok::comment));
263
264    switch (Tok->Tok.getKind()) {
265    case tok::l_brace:
266      LBraceStack.push_back(Position);
267      break;
268    case tok::r_brace:
269      if (!LBraceStack.empty()) {
270        if (LBraces[LBraceStack.back()] == BS_Unknown) {
271          // If there is a comma, semicolon or right paren after the closing
272          // brace, we assume this is a braced initializer list.
273
274          // FIXME: Note that this currently works only because we do not
275          // use the brace information while inside a braced init list.
276          // Thus, if the parent is a braced init list, we consider all
277          // brace blocks inside it braced init list. That works good enough
278          // for now, but we will need to fix it to correctly handle lambdas.
279          if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
280                               tok::l_brace, tok::colon))
281            LBraces[LBraceStack.back()] = BS_BracedInit;
282          else
283            LBraces[LBraceStack.back()] = BS_Block;
284        }
285        LBraceStack.pop_back();
286      }
287      break;
288    case tok::semi:
289    case tok::kw_if:
290    case tok::kw_while:
291    case tok::kw_for:
292    case tok::kw_switch:
293    case tok::kw_try:
294      if (!LBraceStack.empty())
295        LBraces[LBraceStack.back()] = BS_Block;
296      break;
297    default:
298      break;
299    }
300    Tok = NextTok;
301    ++Position;
302  } while (Tok->Tok.isNot(tok::eof));
303  // Assume other blocks for all unclosed opening braces.
304  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
305    if (LBraces[LBraceStack[i]] == BS_Unknown)
306      LBraces[LBraceStack[i]] = BS_Block;
307  }
308  FormatTok = Tokens->setPosition(StoredPosition);
309}
310
311void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
312                                     unsigned AddLevels) {
313  assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
314  nextToken();
315
316  addUnwrappedLine();
317
318  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
319                                          MustBeDeclaration);
320  Line->Level += AddLevels;
321  parseLevel(/*HasOpeningBrace=*/true);
322
323  if (!FormatTok->Tok.is(tok::r_brace)) {
324    Line->Level -= AddLevels;
325    StructuralError = true;
326    return;
327  }
328
329  nextToken(); // Munch the closing brace.
330  Line->Level -= AddLevels;
331}
332
333void UnwrappedLineParser::parsePPDirective() {
334  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
335  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
336  nextToken();
337
338  if (FormatTok->Tok.getIdentifierInfo() == NULL) {
339    parsePPUnknown();
340    return;
341  }
342
343  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
344  case tok::pp_define:
345    parsePPDefine();
346    return;
347  case tok::pp_if:
348    parsePPIf();
349    break;
350  case tok::pp_ifdef:
351  case tok::pp_ifndef:
352    parsePPIfdef();
353    break;
354  case tok::pp_else:
355    parsePPElse();
356    break;
357  case tok::pp_elif:
358    parsePPElIf();
359    break;
360  case tok::pp_endif:
361    parsePPEndIf();
362    break;
363  default:
364    parsePPUnknown();
365    break;
366  }
367}
368
369void UnwrappedLineParser::pushPPConditional() {
370  if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
371    PPStack.push_back(PP_Unreachable);
372  else
373    PPStack.push_back(PP_Conditional);
374}
375
376void UnwrappedLineParser::parsePPIf() {
377  nextToken();
378  if ((FormatTok->Tok.isLiteral() &&
379       StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) ==
380           "0") ||
381      FormatTok->Tok.is(tok::kw_false)) {
382    PPStack.push_back(PP_Unreachable);
383  } else {
384    pushPPConditional();
385  }
386  parsePPUnknown();
387}
388
389void UnwrappedLineParser::parsePPIfdef() {
390  pushPPConditional();
391  parsePPUnknown();
392}
393
394void UnwrappedLineParser::parsePPElse() {
395  if (!PPStack.empty())
396    PPStack.pop_back();
397  pushPPConditional();
398  parsePPUnknown();
399}
400
401void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
402
403void UnwrappedLineParser::parsePPEndIf() {
404  if (!PPStack.empty())
405    PPStack.pop_back();
406  parsePPUnknown();
407}
408
409void UnwrappedLineParser::parsePPDefine() {
410  nextToken();
411
412  if (FormatTok->Tok.getKind() != tok::identifier) {
413    parsePPUnknown();
414    return;
415  }
416  nextToken();
417  if (FormatTok->Tok.getKind() == tok::l_paren &&
418      FormatTok->WhitespaceRange.getBegin() ==
419          FormatTok->WhitespaceRange.getEnd()) {
420    parseParens();
421  }
422  addUnwrappedLine();
423  Line->Level = 1;
424
425  // Errors during a preprocessor directive can only affect the layout of the
426  // preprocessor directive, and thus we ignore them. An alternative approach
427  // would be to use the same approach we use on the file level (no
428  // re-indentation if there was a structural error) within the macro
429  // definition.
430  parseFile();
431}
432
433void UnwrappedLineParser::parsePPUnknown() {
434  do {
435    nextToken();
436  } while (!eof());
437  addUnwrappedLine();
438}
439
440// Here we blacklist certain tokens that are not usually the first token in an
441// unwrapped line. This is used in attempt to distinguish macro calls without
442// trailing semicolons from other constructs split to several lines.
443bool tokenCanStartNewLine(clang::Token Tok) {
444  // Semicolon can be a null-statement, l_square can be a start of a macro or
445  // a C++11 attribute, but this doesn't seem to be common.
446  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
447         Tok.isNot(tok::l_square) &&
448         // Tokens that can only be used as binary operators and a part of
449         // overloaded operator names.
450         Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
451         Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
452         Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
453         Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
454         Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
455         Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
456         Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
457         Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
458         Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
459         Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
460         Tok.isNot(tok::lesslessequal) &&
461         // Colon is used in labels, base class lists, initializer lists,
462         // range-based for loops, ternary operator, but should never be the
463         // first token in an unwrapped line.
464         Tok.isNot(tok::colon);
465}
466
467void UnwrappedLineParser::parseStructuralElement() {
468  assert(!FormatTok->Tok.is(tok::l_brace));
469  switch (FormatTok->Tok.getKind()) {
470  case tok::at:
471    nextToken();
472    if (FormatTok->Tok.is(tok::l_brace)) {
473      parseBracedList();
474      break;
475    }
476    switch (FormatTok->Tok.getObjCKeywordID()) {
477    case tok::objc_public:
478    case tok::objc_protected:
479    case tok::objc_package:
480    case tok::objc_private:
481      return parseAccessSpecifier();
482    case tok::objc_interface:
483    case tok::objc_implementation:
484      return parseObjCInterfaceOrImplementation();
485    case tok::objc_protocol:
486      return parseObjCProtocol();
487    case tok::objc_end:
488      return; // Handled by the caller.
489    case tok::objc_optional:
490    case tok::objc_required:
491      nextToken();
492      addUnwrappedLine();
493      return;
494    default:
495      break;
496    }
497    break;
498  case tok::kw_namespace:
499    parseNamespace();
500    return;
501  case tok::kw_inline:
502    nextToken();
503    if (FormatTok->Tok.is(tok::kw_namespace)) {
504      parseNamespace();
505      return;
506    }
507    break;
508  case tok::kw_public:
509  case tok::kw_protected:
510  case tok::kw_private:
511    parseAccessSpecifier();
512    return;
513  case tok::kw_if:
514    parseIfThenElse();
515    return;
516  case tok::kw_for:
517  case tok::kw_while:
518    parseForOrWhileLoop();
519    return;
520  case tok::kw_do:
521    parseDoWhile();
522    return;
523  case tok::kw_switch:
524    parseSwitch();
525    return;
526  case tok::kw_default:
527    nextToken();
528    parseLabel();
529    return;
530  case tok::kw_case:
531    parseCaseLabel();
532    return;
533  case tok::kw_return:
534    parseReturn();
535    return;
536  case tok::kw_extern:
537    nextToken();
538    if (FormatTok->Tok.is(tok::string_literal)) {
539      nextToken();
540      if (FormatTok->Tok.is(tok::l_brace)) {
541        parseBlock(/*MustBeDeclaration=*/true, 0);
542        addUnwrappedLine();
543        return;
544      }
545    }
546    // In all other cases, parse the declaration.
547    break;
548  default:
549    break;
550  }
551  do {
552    switch (FormatTok->Tok.getKind()) {
553    case tok::at:
554      nextToken();
555      if (FormatTok->Tok.is(tok::l_brace))
556        parseBracedList();
557      break;
558    case tok::kw_enum:
559      parseEnum();
560      break;
561    case tok::kw_struct:
562    case tok::kw_union:
563    case tok::kw_class:
564      parseRecord();
565      // A record declaration or definition is always the start of a structural
566      // element.
567      break;
568    case tok::semi:
569      nextToken();
570      addUnwrappedLine();
571      return;
572    case tok::r_brace:
573      addUnwrappedLine();
574      return;
575    case tok::l_paren:
576      parseParens();
577      break;
578    case tok::l_brace:
579      if (!tryToParseBracedList()) {
580        // A block outside of parentheses must be the last part of a
581        // structural element.
582        // FIXME: Figure out cases where this is not true, and add projections
583        // for them (the one we know is missing are lambdas).
584        if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
585            Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
586          addUnwrappedLine();
587        parseBlock(/*MustBeDeclaration=*/false);
588        addUnwrappedLine();
589        return;
590      }
591      // Otherwise this was a braced init list, and the structural
592      // element continues.
593      break;
594    case tok::identifier: {
595      StringRef Text = FormatTok->TokenText;
596      nextToken();
597      if (Line->Tokens.size() == 1) {
598        if (FormatTok->Tok.is(tok::colon)) {
599          parseLabel();
600          return;
601        }
602        // Recognize function-like macro usages without trailing semicolon.
603        if (FormatTok->Tok.is(tok::l_paren)) {
604          parseParens();
605          if (FormatTok->HasUnescapedNewline &&
606              tokenCanStartNewLine(FormatTok->Tok)) {
607            addUnwrappedLine();
608            return;
609          }
610        } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
611                   Text == Text.upper()) {
612          // Recognize free-standing macros like Q_OBJECT.
613          addUnwrappedLine();
614          return;
615        }
616      }
617      break;
618    }
619    case tok::equal:
620      nextToken();
621      if (FormatTok->Tok.is(tok::l_brace)) {
622        parseBracedList();
623      }
624      break;
625    default:
626      nextToken();
627      break;
628    }
629  } while (!eof());
630}
631
632bool UnwrappedLineParser::tryToParseBracedList() {
633  if (LBraces[Tokens->getPosition()] == BS_Unknown)
634    calculateBraceTypes();
635  assert(LBraces[Tokens->getPosition()] != BS_Unknown);
636  if (LBraces[Tokens->getPosition()] == BS_Block)
637    return false;
638  parseBracedList();
639  return true;
640}
641
642void UnwrappedLineParser::parseBracedList() {
643  nextToken();
644
645  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
646  // replace this by using parseAssigmentExpression() inside.
647  do {
648    // FIXME: When we start to support lambdas, we'll want to parse them away
649    // here, otherwise our bail-out scenarios below break. The better solution
650    // might be to just implement a more or less complete expression parser.
651    switch (FormatTok->Tok.getKind()) {
652    case tok::l_brace:
653      parseBracedList();
654      break;
655    case tok::r_brace:
656      nextToken();
657      return;
658    case tok::semi:
659      // Probably a missing closing brace. Bail out.
660      return;
661    case tok::comma:
662      nextToken();
663      break;
664    default:
665      nextToken();
666      break;
667    }
668  } while (!eof());
669}
670
671void UnwrappedLineParser::parseReturn() {
672  nextToken();
673
674  do {
675    switch (FormatTok->Tok.getKind()) {
676    case tok::l_brace:
677      parseBracedList();
678      if (FormatTok->Tok.isNot(tok::semi)) {
679        // Assume missing ';'.
680        addUnwrappedLine();
681        return;
682      }
683      break;
684    case tok::l_paren:
685      parseParens();
686      break;
687    case tok::r_brace:
688      // Assume missing ';'.
689      addUnwrappedLine();
690      return;
691    case tok::semi:
692      nextToken();
693      addUnwrappedLine();
694      return;
695    default:
696      nextToken();
697      break;
698    }
699  } while (!eof());
700}
701
702void UnwrappedLineParser::parseParens() {
703  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
704  nextToken();
705  do {
706    switch (FormatTok->Tok.getKind()) {
707    case tok::l_paren:
708      parseParens();
709      break;
710    case tok::r_paren:
711      nextToken();
712      return;
713    case tok::r_brace:
714      // A "}" inside parenthesis is an error if there wasn't a matching "{".
715      return;
716    case tok::l_brace: {
717      if (!tryToParseBracedList()) {
718        nextToken();
719        {
720          ScopedLineState LineState(*this);
721          ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
722                                                  /*MustBeDeclaration=*/false);
723          Line->Level += 1;
724          parseLevel(/*HasOpeningBrace=*/true);
725          Line->Level -= 1;
726        }
727        nextToken();
728      }
729      break;
730    }
731    case tok::at:
732      nextToken();
733      if (FormatTok->Tok.is(tok::l_brace))
734        parseBracedList();
735      break;
736    default:
737      nextToken();
738      break;
739    }
740  } while (!eof());
741}
742
743void UnwrappedLineParser::parseIfThenElse() {
744  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
745  nextToken();
746  if (FormatTok->Tok.is(tok::l_paren))
747    parseParens();
748  bool NeedsUnwrappedLine = false;
749  if (FormatTok->Tok.is(tok::l_brace)) {
750    parseBlock(/*MustBeDeclaration=*/false);
751    NeedsUnwrappedLine = true;
752  } else {
753    addUnwrappedLine();
754    ++Line->Level;
755    parseStructuralElement();
756    --Line->Level;
757  }
758  if (FormatTok->Tok.is(tok::kw_else)) {
759    nextToken();
760    if (FormatTok->Tok.is(tok::l_brace)) {
761      parseBlock(/*MustBeDeclaration=*/false);
762      addUnwrappedLine();
763    } else if (FormatTok->Tok.is(tok::kw_if)) {
764      parseIfThenElse();
765    } else {
766      addUnwrappedLine();
767      ++Line->Level;
768      parseStructuralElement();
769      --Line->Level;
770    }
771  } else if (NeedsUnwrappedLine) {
772    addUnwrappedLine();
773  }
774}
775
776void UnwrappedLineParser::parseNamespace() {
777  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
778  nextToken();
779  if (FormatTok->Tok.is(tok::identifier))
780    nextToken();
781  if (FormatTok->Tok.is(tok::l_brace)) {
782    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
783      addUnwrappedLine();
784
785    parseBlock(/*MustBeDeclaration=*/true, 0);
786    // Munch the semicolon after a namespace. This is more common than one would
787    // think. Puttin the semicolon into its own line is very ugly.
788    if (FormatTok->Tok.is(tok::semi))
789      nextToken();
790    addUnwrappedLine();
791  }
792  // FIXME: Add error handling.
793}
794
795void UnwrappedLineParser::parseForOrWhileLoop() {
796  assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
797         "'for' or 'while' expected");
798  nextToken();
799  if (FormatTok->Tok.is(tok::l_paren))
800    parseParens();
801  if (FormatTok->Tok.is(tok::l_brace)) {
802    parseBlock(/*MustBeDeclaration=*/false);
803    addUnwrappedLine();
804  } else {
805    addUnwrappedLine();
806    ++Line->Level;
807    parseStructuralElement();
808    --Line->Level;
809  }
810}
811
812void UnwrappedLineParser::parseDoWhile() {
813  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
814  nextToken();
815  if (FormatTok->Tok.is(tok::l_brace)) {
816    parseBlock(/*MustBeDeclaration=*/false);
817  } else {
818    addUnwrappedLine();
819    ++Line->Level;
820    parseStructuralElement();
821    --Line->Level;
822  }
823
824  // FIXME: Add error handling.
825  if (!FormatTok->Tok.is(tok::kw_while)) {
826    addUnwrappedLine();
827    return;
828  }
829
830  nextToken();
831  parseStructuralElement();
832}
833
834void UnwrappedLineParser::parseLabel() {
835  if (FormatTok->Tok.isNot(tok::colon))
836    return;
837  nextToken();
838  unsigned OldLineLevel = Line->Level;
839  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
840    --Line->Level;
841  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
842    parseBlock(/*MustBeDeclaration=*/false);
843    if (FormatTok->Tok.is(tok::kw_break))
844      parseStructuralElement(); // "break;" after "}" goes on the same line.
845  }
846  addUnwrappedLine();
847  Line->Level = OldLineLevel;
848}
849
850void UnwrappedLineParser::parseCaseLabel() {
851  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
852  // FIXME: fix handling of complex expressions here.
853  do {
854    nextToken();
855  } while (!eof() && !FormatTok->Tok.is(tok::colon));
856  parseLabel();
857}
858
859void UnwrappedLineParser::parseSwitch() {
860  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
861  nextToken();
862  if (FormatTok->Tok.is(tok::l_paren))
863    parseParens();
864  if (FormatTok->Tok.is(tok::l_brace)) {
865    parseBlock(/*MustBeDeclaration=*/false, Style.IndentCaseLabels ? 2 : 1);
866    addUnwrappedLine();
867  } else {
868    addUnwrappedLine();
869    Line->Level += (Style.IndentCaseLabels ? 2 : 1);
870    parseStructuralElement();
871    Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
872  }
873}
874
875void UnwrappedLineParser::parseAccessSpecifier() {
876  nextToken();
877  // Otherwise, we don't know what it is, and we'd better keep the next token.
878  if (FormatTok->Tok.is(tok::colon))
879    nextToken();
880  addUnwrappedLine();
881}
882
883void UnwrappedLineParser::parseEnum() {
884  nextToken();
885  if (FormatTok->Tok.is(tok::identifier) ||
886      FormatTok->Tok.is(tok::kw___attribute) ||
887      FormatTok->Tok.is(tok::kw___declspec)) {
888    nextToken();
889    // We can have macros or attributes in between 'enum' and the enum name.
890    if (FormatTok->Tok.is(tok::l_paren)) {
891      parseParens();
892    }
893    if (FormatTok->Tok.is(tok::identifier))
894      nextToken();
895  }
896  if (FormatTok->Tok.is(tok::l_brace)) {
897    nextToken();
898    addUnwrappedLine();
899    ++Line->Level;
900    do {
901      switch (FormatTok->Tok.getKind()) {
902      case tok::l_paren:
903        parseParens();
904        break;
905      case tok::r_brace:
906        addUnwrappedLine();
907        nextToken();
908        --Line->Level;
909        return;
910      case tok::comma:
911        nextToken();
912        addUnwrappedLine();
913        break;
914      default:
915        nextToken();
916        break;
917      }
918    } while (!eof());
919  }
920  // We fall through to parsing a structural element afterwards, so that in
921  // enum A {} n, m;
922  // "} n, m;" will end up in one unwrapped line.
923}
924
925void UnwrappedLineParser::parseRecord() {
926  nextToken();
927  if (FormatTok->Tok.is(tok::identifier) ||
928      FormatTok->Tok.is(tok::kw___attribute) ||
929      FormatTok->Tok.is(tok::kw___declspec)) {
930    nextToken();
931    // We can have macros or attributes in between 'class' and the class name.
932    if (FormatTok->Tok.is(tok::l_paren)) {
933      parseParens();
934    }
935    // The actual identifier can be a nested name specifier, and in macros
936    // it is often token-pasted.
937    while (FormatTok->Tok.is(tok::identifier) ||
938           FormatTok->Tok.is(tok::coloncolon) ||
939           FormatTok->Tok.is(tok::hashhash))
940      nextToken();
941
942    // Note that parsing away template declarations here leads to incorrectly
943    // accepting function declarations as record declarations.
944    // In general, we cannot solve this problem. Consider:
945    // class A<int> B() {}
946    // which can be a function definition or a class definition when B() is a
947    // macro. If we find enough real-world cases where this is a problem, we
948    // can parse for the 'template' keyword in the beginning of the statement,
949    // and thus rule out the record production in case there is no template
950    // (this would still leave us with an ambiguity between template function
951    // and class declarations).
952    if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
953      while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
954        if (FormatTok->Tok.is(tok::semi))
955          return;
956        nextToken();
957      }
958    }
959  }
960  if (FormatTok->Tok.is(tok::l_brace)) {
961    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
962      addUnwrappedLine();
963
964    parseBlock(/*MustBeDeclaration=*/true);
965  }
966  // We fall through to parsing a structural element afterwards, so
967  // class A {} n, m;
968  // will end up in one unwrapped line.
969}
970
971void UnwrappedLineParser::parseObjCProtocolList() {
972  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
973  do
974    nextToken();
975  while (!eof() && FormatTok->Tok.isNot(tok::greater));
976  nextToken(); // Skip '>'.
977}
978
979void UnwrappedLineParser::parseObjCUntilAtEnd() {
980  do {
981    if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
982      nextToken();
983      addUnwrappedLine();
984      break;
985    }
986    parseStructuralElement();
987  } while (!eof());
988}
989
990void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
991  nextToken();
992  nextToken(); // interface name
993
994  // @interface can be followed by either a base class, or a category.
995  if (FormatTok->Tok.is(tok::colon)) {
996    nextToken();
997    nextToken(); // base class name
998  } else if (FormatTok->Tok.is(tok::l_paren))
999    // Skip category, if present.
1000    parseParens();
1001
1002  if (FormatTok->Tok.is(tok::less))
1003    parseObjCProtocolList();
1004
1005  // If instance variables are present, keep the '{' on the first line too.
1006  if (FormatTok->Tok.is(tok::l_brace))
1007    parseBlock(/*MustBeDeclaration=*/true);
1008
1009  // With instance variables, this puts '}' on its own line.  Without instance
1010  // variables, this ends the @interface line.
1011  addUnwrappedLine();
1012
1013  parseObjCUntilAtEnd();
1014}
1015
1016void UnwrappedLineParser::parseObjCProtocol() {
1017  nextToken();
1018  nextToken(); // protocol name
1019
1020  if (FormatTok->Tok.is(tok::less))
1021    parseObjCProtocolList();
1022
1023  // Check for protocol declaration.
1024  if (FormatTok->Tok.is(tok::semi)) {
1025    nextToken();
1026    return addUnwrappedLine();
1027  }
1028
1029  addUnwrappedLine();
1030  parseObjCUntilAtEnd();
1031}
1032
1033void UnwrappedLineParser::addUnwrappedLine() {
1034  if (Line->Tokens.empty())
1035    return;
1036  DEBUG({
1037    llvm::dbgs() << "Line(" << Line->Level << ")"
1038                 << (Line->InPPDirective ? " MACRO" : "") << ": ";
1039    for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(),
1040                                            E = Line->Tokens.end();
1041         I != E; ++I) {
1042      llvm::dbgs() << (*I)->Tok.getName() << " ";
1043    }
1044    llvm::dbgs() << "\n";
1045  });
1046  CurrentLines->push_back(*Line);
1047  Line->Tokens.clear();
1048  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1049    for (std::vector<UnwrappedLine>::iterator
1050             I = PreprocessorDirectives.begin(),
1051             E = PreprocessorDirectives.end();
1052         I != E; ++I) {
1053      CurrentLines->push_back(*I);
1054    }
1055    PreprocessorDirectives.clear();
1056  }
1057}
1058
1059bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1060
1061void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1062  bool JustComments = Line->Tokens.empty();
1063  for (SmallVectorImpl<FormatToken *>::const_iterator
1064           I = CommentsBeforeNextToken.begin(),
1065           E = CommentsBeforeNextToken.end();
1066       I != E; ++I) {
1067    if ((*I)->NewlinesBefore && JustComments) {
1068      addUnwrappedLine();
1069    }
1070    pushToken(*I);
1071  }
1072  if (NewlineBeforeNext && JustComments) {
1073    addUnwrappedLine();
1074  }
1075  CommentsBeforeNextToken.clear();
1076}
1077
1078void UnwrappedLineParser::nextToken() {
1079  if (eof())
1080    return;
1081  flushComments(FormatTok->NewlinesBefore > 0);
1082  pushToken(FormatTok);
1083  readToken();
1084}
1085
1086void UnwrappedLineParser::readToken() {
1087  bool CommentsInCurrentLine = true;
1088  do {
1089    FormatTok = Tokens->getNextToken();
1090    while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1091           (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1092      // If there is an unfinished unwrapped line, we flush the preprocessor
1093      // directives only after that unwrapped line was finished later.
1094      bool SwitchToPreprocessorLines =
1095          !Line->Tokens.empty() && CurrentLines == &Lines;
1096      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1097      // Comments stored before the preprocessor directive need to be output
1098      // before the preprocessor directive, at the same level as the
1099      // preprocessor directive, as we consider them to apply to the directive.
1100      flushComments(FormatTok->NewlinesBefore > 0);
1101      parsePPDirective();
1102    }
1103
1104    if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1105        !Line->InPPDirective) {
1106      continue;
1107    }
1108
1109    if (!FormatTok->Tok.is(tok::comment))
1110      return;
1111    if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1112      CommentsInCurrentLine = false;
1113    }
1114    if (CommentsInCurrentLine) {
1115      pushToken(FormatTok);
1116    } else {
1117      CommentsBeforeNextToken.push_back(FormatTok);
1118    }
1119  } while (!eof());
1120}
1121
1122void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1123  Line->Tokens.push_back(Tok);
1124  if (MustBreakBeforeNextToken) {
1125    Line->Tokens.back()->MustBreakBefore = true;
1126    MustBreakBeforeNextToken = false;
1127  }
1128}
1129
1130} // end namespace format
1131} // end namespace clang
1132