Token.h revision 0016d519b831859526b79405cdae4c64c73731c8
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the Token interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_TOKEN_H 15#define LLVM_CLANG_TOKEN_H 16 17#include "llvm/Support/Allocator.h" 18#include "clang/Basic/TemplateKinds.h" 19#include "clang/Basic/TokenKinds.h" 20#include "clang/Basic/SourceLocation.h" 21#include "clang/Basic/OperatorKinds.h" 22#include "clang/Basic/IdentifierTable.h" 23#include <cstdlib> 24 25namespace clang { 26 27/// Token - This structure provides full information about a lexed token. 28/// It is not intended to be space efficient, it is intended to return as much 29/// information as possible about each returned token. This is expected to be 30/// compressed into a smaller form if memory footprint is important. 31/// 32/// The parser can create a special "annotation token" representing a stream of 33/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" 34/// can be represented by a single typename annotation token that carries 35/// information about the SourceRange of the tokens and the type object. 36class Token { 37 /// An extra-large structure for storing the data needed for a user-defined 38 /// literal - the raw literal, and the identifier suffix. 39 struct UDLData { 40 IdentifierInfo *II; 41 const char *LiteralData; 42 unsigned LiteralLength; 43 }; 44 45 /// The location of the token. 46 SourceLocation Loc; 47 48 // Conceptually these next two fields could be in a union. However, this 49 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical 50 // routine. Keeping as separate members with casts until a more beautiful fix 51 // presents itself. 52 53 /// UintData - This holds either the length of the token text, when 54 /// a normal token, or the end of the SourceRange when an annotation 55 /// token. 56 unsigned UintData; 57 58 /// PtrData - This is a union of five different pointer types, which depends 59 /// on what type of token this is: 60 /// Identifiers, keywords, etc: 61 /// This is an IdentifierInfo*, which contains the uniqued identifier 62 /// spelling. 63 /// Literals: isLiteral() returns true. 64 /// This is a pointer to the start of the token in a text buffer, which 65 /// may be dirty (have trigraphs / escaped newlines). 66 /// User-defined literals: isUserDefinedLiteral() returns true. 67 /// This is a pointer to a UDLData. 68 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). 69 /// This is a pointer to sema-specific data for the annotation token. 70 /// Other: 71 /// This is null. 72 void *PtrData; 73 74 /// Kind - The actual flavor of token this is. 75 /// 76 unsigned char Kind; // DON'T make Kind a 'tok::TokenKind'; 77 // MSVC will treat it as a signed char and 78 // TokenKinds > 127 won't be handled correctly. 79 80 /// Flags - Bits we track about this token, members of the TokenFlags enum. 81 unsigned char Flags; 82public: 83 84 /// Various flags set per token: 85 enum TokenFlags { 86 StartOfLine = 0x01, ///< At start of line or only after whitespace 87 LeadingSpace = 0x02, ///< Whitespace exists before this token 88 DisableExpand = 0x04, ///< This identifier may never be macro expanded 89 NeedsCleaning = 0x08, ///< Contained an escaped newline or trigraph 90 UserDefinedLiteral = 0x10, ///< This literal has a ud-suffix 91 LiteralPortionClean = 0x20 ///< A UDL's literal portion needs no cleaning 92 }; 93 94 tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } 95 void setKind(tok::TokenKind K) { Kind = K; } 96 97 /// is/isNot - Predicates to check if this token is a specific kind, as in 98 /// "if (Tok.is(tok::l_brace)) {...}". 99 bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } 100 bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } 101 102 /// isLiteral - Return true if this is a "literal", like a numeric 103 /// constant, string, etc. 104 bool isLiteral() const { 105 return is(tok::numeric_constant) || is(tok::char_constant) || 106 is(tok::string_literal) || is(tok::wide_string_literal) || 107 is(tok::angle_string_literal); 108 } 109 110 bool isAnnotation() const { 111 return is(tok::annot_typename) || 112 is(tok::annot_cxxscope) || 113 is(tok::annot_template_id); 114 } 115 116 /// getLocation - Return a source location identifier for the specified 117 /// offset in the current file. 118 SourceLocation getLocation() const { return Loc; } 119 unsigned getLength() const { 120 assert(!isAnnotation() && "Annotation tokens have no length field"); 121 return UintData; 122 } 123 /// getLiteralLength - Return the length of the literal portion of the token, 124 /// which may not be the token length if this is a user-defined literal. 125 unsigned getLiteralLength() const { 126 assert(isLiteral() && "Using getLiteralLength on a non-literal token"); 127 if (isUserDefinedLiteral()) 128 return reinterpret_cast<UDLData*>(PtrData)->LiteralLength; 129 else 130 return UintData; 131 } 132 133 void setLocation(SourceLocation L) { Loc = L; } 134 void setLength(unsigned Len) { 135 assert(!isAnnotation() && "Annotation tokens have no length field"); 136 UintData = Len; 137 } 138 void setLiteralLength(unsigned Len) { 139 assert(isLiteral() && "Using setLiteralLength on a non-literal token"); 140 if (isUserDefinedLiteral()) 141 reinterpret_cast<UDLData*>(PtrData)->LiteralLength = Len; 142 else 143 UintData = Len; 144 } 145 146 /// makeUserDefinedLiteral - Set this token to be a user-defined literal 147 void makeUserDefinedLiteral(llvm::BumpPtrAllocator &Alloc) { 148 PtrData = Alloc.Allocate(sizeof(UDLData), 4); 149 setFlag(UserDefinedLiteral); 150 } 151 152 SourceLocation getAnnotationEndLoc() const { 153 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 154 return SourceLocation::getFromRawEncoding(UintData); 155 } 156 void setAnnotationEndLoc(SourceLocation L) { 157 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 158 UintData = L.getRawEncoding(); 159 } 160 161 SourceLocation getLastLoc() const { 162 return isAnnotation() ? getAnnotationEndLoc() : getLocation(); 163 } 164 165 /// getAnnotationRange - SourceRange of the group of tokens that this 166 /// annotation token represents. 167 SourceRange getAnnotationRange() const { 168 return SourceRange(getLocation(), getAnnotationEndLoc()); 169 } 170 void setAnnotationRange(SourceRange R) { 171 setLocation(R.getBegin()); 172 setAnnotationEndLoc(R.getEnd()); 173 } 174 175 const char *getName() const { 176 return tok::getTokenName( (tok::TokenKind) Kind); 177 } 178 179 /// startToken - Reset all flags to cleared. 180 /// 181 void startToken() { 182 Kind = tok::unknown; 183 Flags = 0; 184 PtrData = 0; 185 UintData = 0; 186 Loc = SourceLocation(); 187 } 188 189 IdentifierInfo *getIdentifierInfo() const { 190 assert(!isAnnotation() && "Used IdentInfo on annotation token!"); 191 if (isUserDefinedLiteral()) 192 return reinterpret_cast<UDLData*>(PtrData)->II; 193 else if (isLiteral()) 194 return 0; 195 else 196 return reinterpret_cast<IdentifierInfo*>(PtrData); 197 } 198 void setIdentifierInfo(IdentifierInfo *II) { 199 if (isUserDefinedLiteral()) 200 reinterpret_cast<UDLData*>(PtrData)->II = II; 201 else 202 PtrData = (void*)II; 203 } 204 205 /// getLiteralData - For a literal token (numeric constant, string, etc), this 206 /// returns a pointer to the start of it in the text buffer if known, null 207 /// otherwise. 208 const char *getLiteralData() const { 209 assert(isLiteral() && "Cannot get literal data of non-literal"); 210 if (isUserDefinedLiteral()) 211 return reinterpret_cast<UDLData*>(PtrData)->LiteralData; 212 else 213 return reinterpret_cast<const char*>(PtrData); 214 } 215 void setLiteralData(const char *Ptr) { 216 assert(isLiteral() && "Cannot set literal data of non-literal"); 217 if (isUserDefinedLiteral()) 218 reinterpret_cast<UDLData*>(PtrData)->LiteralData = Ptr; 219 else 220 PtrData = const_cast<char*>(Ptr); 221 } 222 223 void *getAnnotationValue() const { 224 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 225 return PtrData; 226 } 227 void setAnnotationValue(void *val) { 228 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 229 PtrData = val; 230 } 231 232 /// setFlag - Set the specified flag. 233 void setFlag(TokenFlags Flag) { 234 Flags |= Flag; 235 } 236 237 /// clearFlag - Unset the specified flag. 238 void clearFlag(TokenFlags Flag) { 239 Flags &= ~Flag; 240 } 241 242 /// getFlags - Return the internal represtation of the flags. 243 /// Only intended for low-level operations such as writing tokens to 244 // disk. 245 unsigned getFlags() const { 246 return Flags; 247 } 248 249 /// setFlagValue - Set a flag to either true or false. 250 void setFlagValue(TokenFlags Flag, bool Val) { 251 if (Val) 252 setFlag(Flag); 253 else 254 clearFlag(Flag); 255 } 256 257 /// isAtStartOfLine - Return true if this token is at the start of a line. 258 /// 259 bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; } 260 261 /// hasLeadingSpace - Return true if this token has whitespace before it. 262 /// 263 bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; } 264 265 /// isExpandDisabled - Return true if this identifier token should never 266 /// be expanded in the future, due to C99 6.10.3.4p2. 267 bool isExpandDisabled() const { 268 return (Flags & DisableExpand) ? true : false; 269 } 270 271 /// isUserDefinedLiteral - Return true if this is a C++0x user-defined literal 272 /// token. 273 bool isUserDefinedLiteral() const { 274 return (Flags & UserDefinedLiteral) ? true : false; 275 } 276 277 /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. 278 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; 279 280 /// getObjCKeywordID - Return the ObjC keyword kind. 281 tok::ObjCKeywordKind getObjCKeywordID() const; 282 283 /// needsCleaning - Return true if this token has trigraphs or escaped 284 /// newlines in it. 285 bool needsCleaning() const { 286 return (Flags & NeedsCleaning) ? true : false; 287 } 288 289 /// literalNeedsCleaning - Return true if the literal portion of this token 290 /// needs cleaning. 291 bool literalNeedsCleaning() const { 292 assert(isLiteral() && "Using literalNeedsCleaning on a non-literal token"); 293 return (Flags & NeedsCleaning) ? ((Flags & LiteralPortionClean) ? false : true) 294 : false; 295 } 296}; 297 298/// PPConditionalInfo - Information about the conditional stack (#if directives) 299/// currently active. 300struct PPConditionalInfo { 301 /// IfLoc - Location where the conditional started. 302 /// 303 SourceLocation IfLoc; 304 305 /// WasSkipping - True if this was contained in a skipping directive, e.g. 306 /// in a "#if 0" block. 307 bool WasSkipping; 308 309 /// FoundNonSkip - True if we have emitted tokens already, and now we're in 310 /// an #else block or something. Only useful in Skipping blocks. 311 bool FoundNonSkip; 312 313 /// FoundElse - True if we've seen a #else in this block. If so, 314 /// #elif/#else directives are not allowed. 315 bool FoundElse; 316}; 317 318} // end namespace clang 319 320namespace llvm { 321 template <> 322 struct isPodLike<clang::Token> { static const bool value = true; }; 323} // end namespace llvm 324 325#endif 326