TokenAnnotator.h revision 3c08a818a6ac9115fe8880af9bbf5a0a87bdffaa
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/OperatorPrecedence.h"
21#include "clang/Format/Format.h"
22#include <string>
23
24namespace clang {
25class Lexer;
26class SourceManager;
27
28namespace format {
29
30enum TokenType {
31  TT_BinaryOperator,
32  TT_BlockComment,
33  TT_CastRParen,
34  TT_ConditionalExpr,
35  TT_CtorInitializerColon,
36  TT_ImplicitStringLiteral,
37  TT_InheritanceColon,
38  TT_LineComment,
39  TT_ObjCArrayLiteral,
40  TT_ObjCBlockLParen,
41  TT_ObjCDecl,
42  TT_ObjCForIn,
43  TT_ObjCMethodExpr,
44  TT_ObjCMethodSpecifier,
45  TT_ObjCProperty,
46  TT_ObjCSelectorName,
47  TT_OverloadedOperatorLParen,
48  TT_PointerOrReference,
49  TT_PureVirtualSpecifier,
50  TT_RangeBasedForLoopColon,
51  TT_StartOfName,
52  TT_TemplateCloser,
53  TT_TemplateOpener,
54  TT_TrailingUnaryOperator,
55  TT_UnaryOperator,
56  TT_Unknown
57};
58
59enum LineType {
60  LT_Invalid,
61  LT_Other,
62  LT_BuilderTypeCall,
63  LT_PreprocessorDirective,
64  LT_VirtualFunctionDecl,
65  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
66  LT_ObjCMethodDecl,
67  LT_ObjCProperty // An @property line.
68};
69
70class AnnotatedToken {
71public:
72  explicit AnnotatedToken(const FormatToken &FormatTok)
73      : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
74        CanBreakBefore(false), MustBreakBefore(false),
75        ClosesTemplateDeclaration(false), MatchingParen(NULL),
76        ParameterCount(0), BindingStrength(0), SplitPenalty(0),
77        LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
78        FakeRParens(0) {
79  }
80
81  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
82  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
83
84  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
85    return FormatTok.Tok.isObjCAtKeyword(Kind);
86  }
87
88  FormatToken FormatTok;
89
90  TokenType Type;
91
92  unsigned SpacesRequiredBefore;
93  bool CanBreakBefore;
94  bool MustBreakBefore;
95
96  bool ClosesTemplateDeclaration;
97
98  AnnotatedToken *MatchingParen;
99
100  /// \brief Number of parameters, if this is "(", "[" or "<".
101  ///
102  /// This is initialized to 1 as we don't need to distinguish functions with
103  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
104  /// the number of commas.
105  unsigned ParameterCount;
106
107  /// \brief The total length of the line up to and including this token.
108  unsigned TotalLength;
109
110  // FIXME: Come up with a 'cleaner' concept.
111  /// \brief The binding strength of a token. This is a combined value of
112  /// operator precedence, parenthesis nesting, etc.
113  unsigned BindingStrength;
114
115  /// \brief Penalty for inserting a line break before this token.
116  unsigned SplitPenalty;
117
118  /// \brief If this is the first ObjC selector name in an ObjC method
119  /// definition or call, this contains the length of the longest name.
120  unsigned LongestObjCSelectorName;
121
122  std::vector<AnnotatedToken> Children;
123  AnnotatedToken *Parent;
124
125  /// \brief Insert this many fake ( before this token for correct indentation.
126  unsigned FakeLParens;
127  /// \brief Insert this many fake ) after this token for correct indentation.
128  unsigned FakeRParens;
129
130  const AnnotatedToken *getPreviousNoneComment() const {
131    AnnotatedToken *Tok = Parent;
132    while (Tok != NULL && Tok->is(tok::comment))
133      Tok = Tok->Parent;
134    return Tok;
135  }
136};
137
138class AnnotatedLine {
139public:
140  AnnotatedLine(const UnwrappedLine &Line)
141      : First(Line.Tokens.front()), Level(Line.Level),
142        InPPDirective(Line.InPPDirective),
143        MustBeDeclaration(Line.MustBeDeclaration),
144        MightBeFunctionDecl(false) {
145    assert(!Line.Tokens.empty());
146    AnnotatedToken *Current = &First;
147    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
148                                                E = Line.Tokens.end();
149         I != E; ++I) {
150      Current->Children.push_back(AnnotatedToken(*I));
151      Current->Children[0].Parent = Current;
152      Current = &Current->Children[0];
153    }
154    Last = Current;
155  }
156  AnnotatedLine(const AnnotatedLine &Other)
157      : First(Other.First), Type(Other.Type), Level(Other.Level),
158        InPPDirective(Other.InPPDirective),
159        MustBeDeclaration(Other.MustBeDeclaration),
160        MightBeFunctionDecl(Other.MightBeFunctionDecl) {
161    Last = &First;
162    while (!Last->Children.empty()) {
163      Last->Children[0].Parent = Last;
164      Last = &Last->Children[0];
165    }
166  }
167
168  AnnotatedToken First;
169  AnnotatedToken *Last;
170
171  LineType Type;
172  unsigned Level;
173  bool InPPDirective;
174  bool MustBeDeclaration;
175  bool MightBeFunctionDecl;
176};
177
178inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
179  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
180}
181
182/// \brief Determines extra information about the tokens comprising an
183/// \c UnwrappedLine.
184class TokenAnnotator {
185public:
186  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
187                 IdentifierInfo &Ident_in)
188      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
189  }
190
191  void annotate(AnnotatedLine &Line);
192  void calculateFormattingInformation(AnnotatedLine &Line);
193
194private:
195  /// \brief Calculate the penalty for splitting before \c Tok.
196  unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
197
198  bool spaceRequiredBetween(const AnnotatedLine &Line,
199                            const AnnotatedToken &Left,
200                            const AnnotatedToken &Right);
201
202  bool spaceRequiredBefore(const AnnotatedLine &Line,
203                           const AnnotatedToken &Tok);
204
205  bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
206
207  const FormatStyle &Style;
208  SourceManager &SourceMgr;
209  Lexer &Lex;
210
211  // Contextual keywords:
212  IdentifierInfo &Ident_in;
213};
214
215} // end namespace format
216} // end namespace clang
217
218#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
219