Token.h revision 39a8de10c18365bde7062d8959b7ed525449c561
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the Token interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_TOKEN_H 15#define LLVM_CLANG_TOKEN_H 16 17#include "clang/Basic/TemplateKinds.h" 18#include "clang/Basic/TokenKinds.h" 19#include "clang/Basic/SourceLocation.h" 20 21namespace clang { 22 23class IdentifierInfo; 24 25/// Token - This structure provides full information about a lexed token. 26/// It is not intended to be space efficient, it is intended to return as much 27/// information as possible about each returned token. This is expected to be 28/// compressed into a smaller form if memory footprint is important. 29/// 30/// The parser can create a special "annotation token" representing a stream of 31/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" 32/// can be represented by a single typename annotation token that carries 33/// information about the SourceRange of the tokens and the type object. 34class Token { 35 /// The location of the token. 36 SourceLocation Loc; 37 38 // Conceptually these next two fields could be in a union. However, this 39 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical 40 // routine. Keeping as separate members with casts until a more beautiful fix 41 // presents itself. 42 43 /// UintData - This holds either the length of the token text, when 44 /// a normal token, or the end of the SourceRange when an annotation 45 /// token. 46 unsigned UintData; 47 48 /// PtrData - This is a union of four different pointer types, which depends 49 /// on what type of token this is: 50 /// Identifiers, keywords, etc: 51 /// This is an IdentifierInfo*, which contains the uniqued identifier 52 /// spelling. 53 /// Literals: isLiteral() returns true. 54 /// This is a pointer to the start of the token in a text buffer, which 55 /// may be dirty (have trigraphs / escaped newlines). 56 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). 57 /// This is a pointer to sema-specific data for the annotation token. 58 /// Other: 59 /// This is null. 60 void *PtrData; 61 62 /// Kind - The actual flavor of token this is. 63 /// 64 unsigned Kind : 8; // DON'T make Kind a 'tok::TokenKind'; 65 // MSVC will treat it as a signed char and 66 // TokenKinds > 127 won't be handled correctly. 67 68 /// Flags - Bits we track about this token, members of the TokenFlags enum. 69 unsigned Flags : 8; 70public: 71 72 // Various flags set per token: 73 enum TokenFlags { 74 StartOfLine = 0x01, // At start of line or only after whitespace. 75 LeadingSpace = 0x02, // Whitespace exists before this token. 76 DisableExpand = 0x04, // This identifier may never be macro expanded. 77 NeedsCleaning = 0x08 // Contained an escaped newline or trigraph. 78 }; 79 80 tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } 81 void setKind(tok::TokenKind K) { Kind = K; } 82 83 /// is/isNot - Predicates to check if this token is a specific kind, as in 84 /// "if (Tok.is(tok::l_brace)) {...}". 85 bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } 86 bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } 87 88 /// isLiteral - Return true if this is a "literal", like a numeric 89 /// constant, string, etc. 90 bool isLiteral() const { 91 return is(tok::numeric_constant) || is(tok::char_constant) || 92 is(tok::string_literal) || is(tok::wide_string_literal) || 93 is(tok::angle_string_literal); 94 } 95 96 bool isAnnotation() const { 97 return is(tok::annot_typename) || 98 is(tok::annot_cxxscope) || 99 is(tok::annot_template_id); 100 } 101 102 /// getLocation - Return a source location identifier for the specified 103 /// offset in the current file. 104 SourceLocation getLocation() const { return Loc; } 105 unsigned getLength() const { 106 assert(!isAnnotation() && "Annotation tokens have no length field"); 107 return UintData; 108 } 109 110 void setLocation(SourceLocation L) { Loc = L; } 111 void setLength(unsigned Len) { 112 assert(!isAnnotation() && "Annotation tokens have no length field"); 113 UintData = Len; 114 } 115 116 SourceLocation getAnnotationEndLoc() const { 117 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 118 return SourceLocation::getFromRawEncoding(UintData); 119 } 120 void setAnnotationEndLoc(SourceLocation L) { 121 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 122 UintData = L.getRawEncoding(); 123 } 124 125 /// getAnnotationRange - SourceRange of the group of tokens that this 126 /// annotation token represents. 127 SourceRange getAnnotationRange() const { 128 return SourceRange(getLocation(), getAnnotationEndLoc()); 129 } 130 void setAnnotationRange(SourceRange R) { 131 setLocation(R.getBegin()); 132 setAnnotationEndLoc(R.getEnd()); 133 } 134 135 const char *getName() const { 136 return tok::getTokenName( (tok::TokenKind) Kind); 137 } 138 139 /// startToken - Reset all flags to cleared. 140 /// 141 void startToken() { 142 Kind = tok::unknown; 143 Flags = 0; 144 PtrData = 0; 145 Loc = SourceLocation(); 146 } 147 148 IdentifierInfo *getIdentifierInfo() const { 149 assert(!isAnnotation() && "Used IdentInfo on annotation token!"); 150 if (isLiteral()) return 0; 151 return (IdentifierInfo*) PtrData; 152 } 153 void setIdentifierInfo(IdentifierInfo *II) { 154 PtrData = (void*) II; 155 } 156 157 /// getLiteralData - For a literal token (numeric constant, string, etc), this 158 /// returns a pointer to the start of it in the text buffer if known, null 159 /// otherwise. 160 const char *getLiteralData() const { 161 assert(isLiteral() && "Cannot get literal data of non-literal"); 162 return reinterpret_cast<const char*>(PtrData); 163 } 164 void setLiteralData(const char *Ptr) { 165 assert(isLiteral() && "Cannot set literal data of non-literal"); 166 PtrData = (void*)Ptr; 167 } 168 169 void *getAnnotationValue() const { 170 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 171 return PtrData; 172 } 173 void setAnnotationValue(void *val) { 174 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 175 PtrData = val; 176 } 177 178 /// setFlag - Set the specified flag. 179 void setFlag(TokenFlags Flag) { 180 Flags |= Flag; 181 } 182 183 /// clearFlag - Unset the specified flag. 184 void clearFlag(TokenFlags Flag) { 185 Flags &= ~Flag; 186 } 187 188 /// getFlags - Return the internal represtation of the flags. 189 /// Only intended for low-level operations such as writing tokens to 190 // disk. 191 unsigned getFlags() const { 192 return Flags; 193 } 194 195 /// setFlagValue - Set a flag to either true or false. 196 void setFlagValue(TokenFlags Flag, bool Val) { 197 if (Val) 198 setFlag(Flag); 199 else 200 clearFlag(Flag); 201 } 202 203 /// isAtStartOfLine - Return true if this token is at the start of a line. 204 /// 205 bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; } 206 207 /// hasLeadingSpace - Return true if this token has whitespace before it. 208 /// 209 bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; } 210 211 /// isExpandDisabled - Return true if this identifier token should never 212 /// be expanded in the future, due to C99 6.10.3.4p2. 213 bool isExpandDisabled() const { 214 return (Flags & DisableExpand) ? true : false; 215 } 216 217 /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. 218 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; 219 220 /// getObjCKeywordID - Return the ObjC keyword kind. 221 tok::ObjCKeywordKind getObjCKeywordID() const; 222 223 /// needsCleaning - Return true if this token has trigraphs or escaped 224 /// newlines in it. 225 /// 226 bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } 227}; 228 229/// PPConditionalInfo - Information about the conditional stack (#if directives) 230/// currently active. 231struct PPConditionalInfo { 232 /// IfLoc - Location where the conditional started. 233 /// 234 SourceLocation IfLoc; 235 236 /// WasSkipping - True if this was contained in a skipping directive, e.g. 237 /// in a "#if 0" block. 238 bool WasSkipping; 239 240 /// FoundNonSkip - True if we have emitted tokens already, and now we're in 241 /// an #else block or something. Only useful in Skipping blocks. 242 bool FoundNonSkip; 243 244 /// FoundElse - True if we've seen a #else in this block. If so, 245 /// #elif/#else directives are not allowed. 246 bool FoundElse; 247}; 248 249/// TemplateIdAnnotation - Information about a template-id annotation 250/// token, which contains the template declaration, template 251/// arguments, whether those template arguments were types or 252/// expressions, and the source locations for important tokens. All of 253/// the information about template arguments is allocated directly 254/// after this structure. 255struct TemplateIdAnnotation { 256 /// TemplateNameLoc - The location of the template name within the 257 /// source. 258 SourceLocation TemplateNameLoc; 259 260 /// FIXME: Temporarily stores the name of a specialization 261 IdentifierInfo *Name; 262 263 /// The declaration of the template corresponding to the 264 /// template-name. This is an Action::DeclTy*. 265 void *Template; 266 267 /// The kind of template that Template refers to. 268 TemplateNameKind Kind; 269 270 /// The location of the '<' before the template argument 271 /// list. 272 SourceLocation LAngleLoc; 273 274 /// The location of the '>' after the template argument 275 /// list. 276 SourceLocation RAngleLoc; 277 278 /// NumArgs - The number of template arguments. 279 unsigned NumArgs; 280 281 /// \brief Retrieves a pointer to the template arguments 282 void **getTemplateArgs() { return (void **)(this + 1); } 283 284 /// \brief Retrieves a pointer to the array of template argument 285 /// locations. 286 SourceLocation *getTemplateArgLocations() { 287 return (SourceLocation *)(getTemplateArgs() + NumArgs); 288 } 289 290 /// \brief Retrieves a pointer to the array of flags that states 291 /// whether the template arguments are types. 292 bool *getTemplateArgIsType() { 293 return (bool *)(getTemplateArgLocations() + NumArgs); 294 } 295 296 static TemplateIdAnnotation* Allocate(unsigned NumArgs) { 297 TemplateIdAnnotation *TemplateId 298 = (TemplateIdAnnotation *)malloc(sizeof(TemplateIdAnnotation) + 299 sizeof(void*) * NumArgs + 300 sizeof(SourceLocation) * NumArgs + 301 sizeof(bool) * NumArgs); 302 TemplateId->NumArgs = NumArgs; 303 return TemplateId; 304 } 305 306 void Destroy() { free(this); } 307}; 308 309} // end namespace clang 310 311#endif 312