TokenAnnotator.h revision 3c08a818a6ac9115fe8880af9bbf5a0a87bdffaa
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18 19#include "UnwrappedLineParser.h" 20#include "clang/Basic/OperatorPrecedence.h" 21#include "clang/Format/Format.h" 22#include <string> 23 24namespace clang { 25class Lexer; 26class SourceManager; 27 28namespace format { 29 30enum TokenType { 31 TT_BinaryOperator, 32 TT_BlockComment, 33 TT_CastRParen, 34 TT_ConditionalExpr, 35 TT_CtorInitializerColon, 36 TT_ImplicitStringLiteral, 37 TT_InheritanceColon, 38 TT_LineComment, 39 TT_ObjCArrayLiteral, 40 TT_ObjCBlockLParen, 41 TT_ObjCDecl, 42 TT_ObjCForIn, 43 TT_ObjCMethodExpr, 44 TT_ObjCMethodSpecifier, 45 TT_ObjCProperty, 46 TT_ObjCSelectorName, 47 TT_OverloadedOperatorLParen, 48 TT_PointerOrReference, 49 TT_PureVirtualSpecifier, 50 TT_RangeBasedForLoopColon, 51 TT_StartOfName, 52 TT_TemplateCloser, 53 TT_TemplateOpener, 54 TT_TrailingUnaryOperator, 55 TT_UnaryOperator, 56 TT_Unknown 57}; 58 59enum LineType { 60 LT_Invalid, 61 LT_Other, 62 LT_BuilderTypeCall, 63 LT_PreprocessorDirective, 64 LT_VirtualFunctionDecl, 65 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 66 LT_ObjCMethodDecl, 67 LT_ObjCProperty // An @property line. 68}; 69 70class AnnotatedToken { 71public: 72 explicit AnnotatedToken(const FormatToken &FormatTok) 73 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 74 CanBreakBefore(false), MustBreakBefore(false), 75 ClosesTemplateDeclaration(false), MatchingParen(NULL), 76 ParameterCount(0), BindingStrength(0), SplitPenalty(0), 77 LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0), 78 FakeRParens(0) { 79 } 80 81 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 82 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 83 84 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 85 return FormatTok.Tok.isObjCAtKeyword(Kind); 86 } 87 88 FormatToken FormatTok; 89 90 TokenType Type; 91 92 unsigned SpacesRequiredBefore; 93 bool CanBreakBefore; 94 bool MustBreakBefore; 95 96 bool ClosesTemplateDeclaration; 97 98 AnnotatedToken *MatchingParen; 99 100 /// \brief Number of parameters, if this is "(", "[" or "<". 101 /// 102 /// This is initialized to 1 as we don't need to distinguish functions with 103 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 104 /// the number of commas. 105 unsigned ParameterCount; 106 107 /// \brief The total length of the line up to and including this token. 108 unsigned TotalLength; 109 110 // FIXME: Come up with a 'cleaner' concept. 111 /// \brief The binding strength of a token. This is a combined value of 112 /// operator precedence, parenthesis nesting, etc. 113 unsigned BindingStrength; 114 115 /// \brief Penalty for inserting a line break before this token. 116 unsigned SplitPenalty; 117 118 /// \brief If this is the first ObjC selector name in an ObjC method 119 /// definition or call, this contains the length of the longest name. 120 unsigned LongestObjCSelectorName; 121 122 std::vector<AnnotatedToken> Children; 123 AnnotatedToken *Parent; 124 125 /// \brief Insert this many fake ( before this token for correct indentation. 126 unsigned FakeLParens; 127 /// \brief Insert this many fake ) after this token for correct indentation. 128 unsigned FakeRParens; 129 130 const AnnotatedToken *getPreviousNoneComment() const { 131 AnnotatedToken *Tok = Parent; 132 while (Tok != NULL && Tok->is(tok::comment)) 133 Tok = Tok->Parent; 134 return Tok; 135 } 136}; 137 138class AnnotatedLine { 139public: 140 AnnotatedLine(const UnwrappedLine &Line) 141 : First(Line.Tokens.front()), Level(Line.Level), 142 InPPDirective(Line.InPPDirective), 143 MustBeDeclaration(Line.MustBeDeclaration), 144 MightBeFunctionDecl(false) { 145 assert(!Line.Tokens.empty()); 146 AnnotatedToken *Current = &First; 147 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 148 E = Line.Tokens.end(); 149 I != E; ++I) { 150 Current->Children.push_back(AnnotatedToken(*I)); 151 Current->Children[0].Parent = Current; 152 Current = &Current->Children[0]; 153 } 154 Last = Current; 155 } 156 AnnotatedLine(const AnnotatedLine &Other) 157 : First(Other.First), Type(Other.Type), Level(Other.Level), 158 InPPDirective(Other.InPPDirective), 159 MustBeDeclaration(Other.MustBeDeclaration), 160 MightBeFunctionDecl(Other.MightBeFunctionDecl) { 161 Last = &First; 162 while (!Last->Children.empty()) { 163 Last->Children[0].Parent = Last; 164 Last = &Last->Children[0]; 165 } 166 } 167 168 AnnotatedToken First; 169 AnnotatedToken *Last; 170 171 LineType Type; 172 unsigned Level; 173 bool InPPDirective; 174 bool MustBeDeclaration; 175 bool MightBeFunctionDecl; 176}; 177 178inline prec::Level getPrecedence(const AnnotatedToken &Tok) { 179 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 180} 181 182/// \brief Determines extra information about the tokens comprising an 183/// \c UnwrappedLine. 184class TokenAnnotator { 185public: 186 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 187 IdentifierInfo &Ident_in) 188 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 189 } 190 191 void annotate(AnnotatedLine &Line); 192 void calculateFormattingInformation(AnnotatedLine &Line); 193 194private: 195 /// \brief Calculate the penalty for splitting before \c Tok. 196 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 197 198 bool spaceRequiredBetween(const AnnotatedLine &Line, 199 const AnnotatedToken &Left, 200 const AnnotatedToken &Right); 201 202 bool spaceRequiredBefore(const AnnotatedLine &Line, 203 const AnnotatedToken &Tok); 204 205 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 206 207 const FormatStyle &Style; 208 SourceManager &SourceMgr; 209 Lexer &Lex; 210 211 // Contextual keywords: 212 IdentifierInfo &Ident_in; 213}; 214 215} // end namespace format 216} // end namespace clang 217 218#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 219