TokenAnnotator.h revision 395228fdc343df39c2507e414dc1406a185c6d37
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18 19#include "UnwrappedLineParser.h" 20#include "clang/Basic/OperatorPrecedence.h" 21#include "clang/Format/Format.h" 22#include <string> 23 24namespace clang { 25class Lexer; 26class SourceManager; 27 28namespace format { 29 30enum TokenType { 31 TT_BinaryOperator, 32 TT_BlockComment, 33 TT_CastRParen, 34 TT_ConditionalExpr, 35 TT_CtorInitializerColon, 36 TT_ImplicitStringLiteral, 37 TT_InlineASMColon, 38 TT_InheritanceColon, 39 TT_LineComment, 40 TT_ObjCArrayLiteral, 41 TT_ObjCBlockLParen, 42 TT_ObjCDecl, 43 TT_ObjCForIn, 44 TT_ObjCMethodExpr, 45 TT_ObjCMethodSpecifier, 46 TT_ObjCProperty, 47 TT_ObjCSelectorName, 48 TT_OverloadedOperatorLParen, 49 TT_PointerOrReference, 50 TT_PureVirtualSpecifier, 51 TT_RangeBasedForLoopColon, 52 TT_StartOfName, 53 TT_TemplateCloser, 54 TT_TemplateOpener, 55 TT_TrailingUnaryOperator, 56 TT_UnaryOperator, 57 TT_Unknown 58}; 59 60enum LineType { 61 LT_Invalid, 62 LT_Other, 63 LT_BuilderTypeCall, 64 LT_PreprocessorDirective, 65 LT_VirtualFunctionDecl, 66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67 LT_ObjCMethodDecl, 68 LT_ObjCProperty // An @property line. 69}; 70 71class AnnotatedToken { 72public: 73 explicit AnnotatedToken(const FormatToken &FormatTok) 74 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 75 CanBreakBefore(false), MustBreakBefore(false), 76 ClosesTemplateDeclaration(false), MatchingParen(NULL), 77 ParameterCount(0), BindingStrength(0), SplitPenalty(0), 78 LongestObjCSelectorName(0), DefinesFunctionType(false), Parent(NULL), 79 FakeRParens(0), LastInChainOfCalls(false), 80 PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {} 81 82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83 84 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 85 return is(K1) || is(K2); 86 } 87 88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 89 return is(K1) || is(K2) || is(K3); 90 } 91 92 bool isOneOf( 93 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 94 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 95 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, 96 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, 97 tok::TokenKind K10 = tok::NUM_TOKENS, 98 tok::TokenKind K11 = tok::NUM_TOKENS, 99 tok::TokenKind K12 = tok::NUM_TOKENS) const { 100 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 101 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 102 } 103 104 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 105 106 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 107 return FormatTok.Tok.isObjCAtKeyword(Kind); 108 } 109 110 bool isAccessSpecifier(bool ColonRequired = true) const { 111 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 112 (!ColonRequired || 113 (!Children.empty() && Children[0].is(tok::colon))); 114 } 115 116 bool isObjCAccessSpecifier() const { 117 return is(tok::at) && !Children.empty() && 118 (Children[0].isObjCAtKeyword(tok::objc_public) || 119 Children[0].isObjCAtKeyword(tok::objc_protected) || 120 Children[0].isObjCAtKeyword(tok::objc_package) || 121 Children[0].isObjCAtKeyword(tok::objc_private)); 122 } 123 124 /// \brief Returns whether \p Tok is ([{ or a template opening <. 125 bool opensScope() const; 126 /// \brief Returns whether \p Tok is )]} or a template opening >. 127 bool closesScope() const; 128 129 bool isUnaryOperator() const; 130 bool isBinaryOperator() const; 131 bool isTrailingComment() const; 132 133 FormatToken FormatTok; 134 135 TokenType Type; 136 137 unsigned SpacesRequiredBefore; 138 bool CanBreakBefore; 139 bool MustBreakBefore; 140 141 bool ClosesTemplateDeclaration; 142 143 AnnotatedToken *MatchingParen; 144 145 /// \brief Number of parameters, if this is "(", "[" or "<". 146 /// 147 /// This is initialized to 1 as we don't need to distinguish functions with 148 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 149 /// the number of commas. 150 unsigned ParameterCount; 151 152 /// \brief The total length of the line up to and including this token. 153 unsigned TotalLength; 154 155 // FIXME: Come up with a 'cleaner' concept. 156 /// \brief The binding strength of a token. This is a combined value of 157 /// operator precedence, parenthesis nesting, etc. 158 unsigned BindingStrength; 159 160 /// \brief Penalty for inserting a line break before this token. 161 unsigned SplitPenalty; 162 163 /// \brief If this is the first ObjC selector name in an ObjC method 164 /// definition or call, this contains the length of the longest name. 165 unsigned LongestObjCSelectorName; 166 167 /// \brief \c true if this is a "(" that starts a function type definition. 168 bool DefinesFunctionType; 169 170 std::vector<AnnotatedToken> Children; 171 AnnotatedToken *Parent; 172 173 /// \brief Stores the number of required fake parentheses and the 174 /// corresponding operator precedence. 175 /// 176 /// If multiple fake parentheses start at a token, this vector stores them in 177 /// reverse order, i.e. inner fake parenthesis first. 178 SmallVector<prec::Level, 4> FakeLParens; 179 /// \brief Insert this many fake ) after this token for correct indentation. 180 unsigned FakeRParens; 181 182 /// \brief Is this the last "." or "->" in a builder-type call? 183 bool LastInChainOfCalls; 184 185 /// \brief Is this token part of a \c DeclStmt defining multiple variables? 186 /// 187 /// Only set if \c Type == \c TT_StartOfName. 188 bool PartOfMultiVariableDeclStmt; 189 190 /// \brief Set to \c true for "("-tokens if this is the last token other than 191 /// ")" in the next higher parenthesis level. 192 /// 193 /// If this is \c true, no more formatting decisions have to be made on the 194 /// next higher parenthesis level, enabling optimizations. 195 /// 196 /// Example: 197 /// \code 198 /// aaaaaa(aaaaaa()); 199 /// ^ // Set to true for this parenthesis. 200 /// \endcode 201 bool NoMoreTokensOnLevel; 202 203 /// \brief Returns the previous token ignoring comments. 204 AnnotatedToken *getPreviousNoneComment() const; 205 206 /// \brief Returns the next token ignoring comments. 207 const AnnotatedToken *getNextNoneComment() const; 208}; 209 210class AnnotatedLine { 211public: 212 AnnotatedLine(const UnwrappedLine &Line) 213 : First(Line.Tokens.front()), Level(Line.Level), 214 InPPDirective(Line.InPPDirective), 215 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 216 StartsDefinition(false) { 217 assert(!Line.Tokens.empty()); 218 AnnotatedToken *Current = &First; 219 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 220 E = Line.Tokens.end(); 221 I != E; ++I) { 222 Current->Children.push_back(AnnotatedToken(*I)); 223 Current->Children[0].Parent = Current; 224 Current = &Current->Children[0]; 225 } 226 Last = Current; 227 } 228 AnnotatedLine(const AnnotatedLine &Other) 229 : First(Other.First), Type(Other.Type), Level(Other.Level), 230 InPPDirective(Other.InPPDirective), 231 MustBeDeclaration(Other.MustBeDeclaration), 232 MightBeFunctionDecl(Other.MightBeFunctionDecl), 233 StartsDefinition(Other.StartsDefinition) { 234 Last = &First; 235 while (!Last->Children.empty()) { 236 Last->Children[0].Parent = Last; 237 Last = &Last->Children[0]; 238 } 239 } 240 241 AnnotatedToken First; 242 AnnotatedToken *Last; 243 244 LineType Type; 245 unsigned Level; 246 bool InPPDirective; 247 bool MustBeDeclaration; 248 bool MightBeFunctionDecl; 249 bool StartsDefinition; 250}; 251 252inline prec::Level getPrecedence(const AnnotatedToken &Tok) { 253 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 254} 255 256/// \brief Determines extra information about the tokens comprising an 257/// \c UnwrappedLine. 258class TokenAnnotator { 259public: 260 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 261 IdentifierInfo &Ident_in) 262 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 263 } 264 265 void annotate(AnnotatedLine &Line); 266 void calculateFormattingInformation(AnnotatedLine &Line); 267 268private: 269 /// \brief Calculate the penalty for splitting before \c Tok. 270 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 271 272 bool spaceRequiredBetween(const AnnotatedLine &Line, 273 const AnnotatedToken &Left, 274 const AnnotatedToken &Right); 275 276 bool spaceRequiredBefore(const AnnotatedLine &Line, 277 const AnnotatedToken &Tok); 278 279 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 280 281 void printDebugInfo(const AnnotatedLine &Line); 282 283 const FormatStyle &Style; 284 SourceManager &SourceMgr; 285 Lexer &Lex; 286 287 // Contextual keywords: 288 IdentifierInfo &Ident_in; 289}; 290 291} // end namespace format 292} // end namespace clang 293 294#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 295