1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
17#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
18
19#include "FormatToken.h"
20#include "clang/Basic/IdentifierTable.h"
21#include "clang/Format/Format.h"
22#include <list>
23
24namespace clang {
25namespace format {
26
27struct UnwrappedLineNode;
28
29/// \brief An unwrapped line is a sequence of \c Token, that we would like to
30/// put on a single line if there was no column limit.
31///
32/// This is used as a main interface between the \c UnwrappedLineParser and the
33/// \c UnwrappedLineFormatter. The key property is that changing the formatting
34/// within an unwrapped line does not affect any other unwrapped lines.
35struct UnwrappedLine {
36  UnwrappedLine();
37
38  // FIXME: Don't use std::list here.
39  /// \brief The \c Tokens comprising this \c UnwrappedLine.
40  std::list<UnwrappedLineNode> Tokens;
41
42  /// \brief The indent level of the \c UnwrappedLine.
43  unsigned Level;
44
45  /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
46  bool InPPDirective;
47
48  bool MustBeDeclaration;
49};
50
51class UnwrappedLineConsumer {
52public:
53  virtual ~UnwrappedLineConsumer() {}
54  virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
55  virtual void finishRun() = 0;
56};
57
58class FormatTokenSource;
59
60class UnwrappedLineParser {
61public:
62  UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
63                      UnwrappedLineConsumer &Callback);
64
65  /// Returns true in case of a structural error.
66  bool parse();
67
68private:
69  void reset();
70  void parseFile();
71  void parseLevel(bool HasOpeningBrace);
72  void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
73                  bool MunchSemi = true);
74  void parseChildBlock();
75  void parsePPDirective();
76  void parsePPDefine();
77  void parsePPIf(bool IfDef);
78  void parsePPElIf();
79  void parsePPElse();
80  void parsePPEndIf();
81  void parsePPUnknown();
82  void parseStructuralElement();
83  bool tryToParseBracedList();
84  bool parseBracedList(bool ContinueOnSemicolons = false);
85  void parseParens();
86  void parseSquare();
87  void parseIfThenElse();
88  void parseTryCatch();
89  void parseForOrWhileLoop();
90  void parseDoWhile();
91  void parseLabel();
92  void parseCaseLabel();
93  void parseSwitch();
94  void parseNamespace();
95  void parseAccessSpecifier();
96  void parseEnum();
97  void parseRecord();
98  void parseObjCProtocolList();
99  void parseObjCUntilAtEnd();
100  void parseObjCInterfaceOrImplementation();
101  void parseObjCProtocol();
102  bool tryToParseLambda();
103  bool tryToParseLambdaIntroducer();
104  void tryToParseJSFunction();
105  void addUnwrappedLine();
106  bool eof() const;
107  void nextToken();
108  void readToken();
109  void flushComments(bool NewlineBeforeNext);
110  void pushToken(FormatToken *Tok);
111  void calculateBraceTypes();
112
113  // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
114  // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
115  // this branch either cannot be taken (for example '#if false'), or should
116  // not be taken in this round.
117  void conditionalCompilationCondition(bool Unreachable);
118  void conditionalCompilationStart(bool Unreachable);
119  void conditionalCompilationAlternative();
120  void conditionalCompilationEnd();
121
122  bool isOnNewLine(const FormatToken &FormatTok);
123
124  // FIXME: We are constantly running into bugs where Line.Level is incorrectly
125  // subtracted from beyond 0. Introduce a method to subtract from Line.Level
126  // and use that everywhere in the Parser.
127  std::unique_ptr<UnwrappedLine> Line;
128
129  // Comments are sorted into unwrapped lines by whether they are in the same
130  // line as the previous token, or not. If not, they belong to the next token.
131  // Since the next token might already be in a new unwrapped line, we need to
132  // store the comments belonging to that token.
133  SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
134  FormatToken *FormatTok;
135  bool MustBreakBeforeNextToken;
136
137  // The parsed lines. Only added to through \c CurrentLines.
138  SmallVector<UnwrappedLine, 8> Lines;
139
140  // Preprocessor directives are parsed out-of-order from other unwrapped lines.
141  // Thus, we need to keep a list of preprocessor directives to be reported
142  // after an unwarpped line that has been started was finished.
143  SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
144
145  // New unwrapped lines are added via CurrentLines.
146  // Usually points to \c &Lines. While parsing a preprocessor directive when
147  // there is an unfinished previous unwrapped line, will point to
148  // \c &PreprocessorDirectives.
149  SmallVectorImpl<UnwrappedLine> *CurrentLines;
150
151  // We store for each line whether it must be a declaration depending on
152  // whether we are in a compound statement or not.
153  std::vector<bool> DeclarationScopeStack;
154
155  // Will be true if we encounter an error that leads to possibily incorrect
156  // indentation levels.
157  bool StructuralError;
158
159  const FormatStyle &Style;
160  FormatTokenSource *Tokens;
161  UnwrappedLineConsumer &Callback;
162
163  // FIXME: This is a temporary measure until we have reworked the ownership
164  // of the format tokens. The goal is to have the actual tokens created and
165  // owned outside of and handed into the UnwrappedLineParser.
166  ArrayRef<FormatToken *> AllTokens;
167
168  // Represents preprocessor branch type, so we can find matching
169  // #if/#else/#endif directives.
170  enum PPBranchKind {
171    PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
172    PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
173  };
174
175  // Keeps a stack of currently active preprocessor branching directives.
176  SmallVector<PPBranchKind, 16> PPStack;
177
178  // The \c UnwrappedLineParser re-parses the code for each combination
179  // of preprocessor branches that can be taken.
180  // To that end, we take the same branch (#if, #else, or one of the #elif
181  // branches) for each nesting level of preprocessor branches.
182  // \c PPBranchLevel stores the current nesting level of preprocessor
183  // branches during one pass over the code.
184  int PPBranchLevel;
185
186  // Contains the current branch (#if, #else or one of the #elif branches)
187  // for each nesting level.
188  SmallVector<int, 8> PPLevelBranchIndex;
189
190  // Contains the maximum number of branches at each nesting level.
191  SmallVector<int, 8> PPLevelBranchCount;
192
193  // Contains the number of branches per nesting level we are currently
194  // in while parsing a preprocessor branch sequence.
195  // This is used to update PPLevelBranchCount at the end of a branch
196  // sequence.
197  std::stack<int> PPChainBranchIndex;
198
199  friend class ScopedLineState;
200  friend class CompoundStatementIndenter;
201};
202
203struct UnwrappedLineNode {
204  UnwrappedLineNode() : Tok(nullptr) {}
205  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
206
207  FormatToken *Tok;
208  SmallVector<UnwrappedLine, 0> Children;
209};
210
211inline UnwrappedLine::UnwrappedLine()
212    : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
213
214} // end namespace format
215} // end namespace clang
216
217#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
218