IdentifierTable.h revision ea684e699ea84e61711e279f5fa7a1b9f3d46bc2
1//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the IdentifierInfo, IdentifierTable, and Selector 11// interfaces. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18#include "clang/Basic/OperatorKinds.h" 19#include "clang/Basic/TokenKinds.h" 20#include "llvm/ADT/StringMap.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/ADT/OwningPtr.h" 23#include "llvm/Bitcode/SerializationFwd.h" 24#include <string> 25#include <cassert> 26 27namespace llvm { 28 template <typename T> struct DenseMapInfo; 29} 30 31namespace clang { 32 class LangOptions; 33 class IdentifierInfo; 34 class IdentifierTable; 35 class SourceLocation; 36 class MultiKeywordSelector; // private class used by Selector 37 class DeclarationName; // AST class that stores declaration names 38 39 /// IdentifierLocPair - A simple pair of identifier info and location. 40 typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair; 41 42 43/// IdentifierInfo - One of these records is kept for each identifier that 44/// is lexed. This contains information about whether the token was #define'd, 45/// is a language keyword, or if it is a front-end token of some sort (e.g. a 46/// variable or function name). The preprocessor keeps this information in a 47/// set, and all tok::identifier tokens have a pointer to one of these. 48class IdentifierInfo { 49 // Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a 50 // signed char and TokenKinds > 127 won't be handled correctly. 51 unsigned TokenID : 8; // Front-end token ID or tok::identifier. 52 // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 53 // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values 54 // are for builtins. 55 unsigned ObjCOrBuiltinID :10; 56 bool HasMacro : 1; // True if there is a #define for this. 57 bool IsExtension : 1; // True if identifier is a lang extension. 58 bool IsPoisoned : 1; // True if identifier is poisoned. 59 bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword. 60 bool NeedsHandleIdentifier : 1; // See "RecomputeNeedsHandleIdentifier". 61 // 9 bits left in 32-bit word. 62 void *FETokenInfo; // Managed by the language front-end. 63 llvm::StringMapEntry<IdentifierInfo*> *Entry; 64 65 IdentifierInfo(const IdentifierInfo&); // NONCOPYABLE. 66 void operator=(const IdentifierInfo&); // NONASSIGNABLE. 67 68 friend class IdentifierTable; 69 70public: 71 IdentifierInfo(); 72 73 74 /// isStr - Return true if this is the identifier for the specified string. 75 /// This is intended to be used for string literals only: II->isStr("foo"). 76 template <std::size_t StrLen> 77 bool isStr(const char (&Str)[StrLen]) const { 78 return getLength() == StrLen-1 && !memcmp(getName(), Str, StrLen-1); 79 } 80 81 /// getName - Return the actual string for this identifier. The returned 82 /// string is properly null terminated. 83 /// 84 const char *getName() const { 85 if (Entry) return Entry->getKeyData(); 86 // FIXME: This is gross. It would be best not to embed specific details 87 // of the PTH file format here. 88 // The 'this' pointer really points to a 89 // std::pair<IdentifierInfo, const char*>, where internal pointer 90 // points to the external string data. 91 return ((std::pair<IdentifierInfo, const char*>*) this)->second; 92 } 93 94 /// getLength - Efficiently return the length of this identifier info. 95 /// 96 unsigned getLength() const { 97 if (Entry) return Entry->getKeyLength(); 98 // FIXME: This is gross. It would be best not to embed specific details 99 // of the PTH file format here. 100 // The 'this' pointer really points to a 101 // std::pair<IdentifierInfo, const char*>, where internal pointer 102 // points to the external string data. 103 const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second-2; 104 return (((unsigned) p[0]) 105 | (((unsigned) p[1]) << 8)) - 1; 106 } 107 108 /// hasMacroDefinition - Return true if this identifier is #defined to some 109 /// other value. 110 bool hasMacroDefinition() const { 111 return HasMacro; 112 } 113 void setHasMacroDefinition(bool Val) { 114 if (HasMacro == Val) return; 115 116 HasMacro = Val; 117 if (Val) 118 NeedsHandleIdentifier = 1; 119 else 120 RecomputeNeedsHandleIdentifier(); 121 } 122 123 /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API 124 /// can be used to cause the lexer to map identifiers to source-language 125 /// tokens. 126 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 127 void setTokenID(tok::TokenKind ID) { TokenID = ID; } 128 129 /// getPPKeywordID - Return the preprocessor keyword ID for this identifier. 130 /// For example, "define" will return tok::pp_define. 131 tok::PPKeywordKind getPPKeywordID() const; 132 133 /// getObjCKeywordID - Return the Objective-C keyword ID for the this 134 /// identifier. For example, 'class' will return tok::objc_class if ObjC is 135 /// enabled. 136 tok::ObjCKeywordKind getObjCKeywordID() const { 137 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 138 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 139 else 140 return tok::objc_not_keyword; 141 } 142 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 143 144 /// getBuiltinID - Return a value indicating whether this is a builtin 145 /// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. 146 /// 2+ are specific builtin functions. 147 unsigned getBuiltinID() const { 148 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 149 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 150 else 151 return 0; 152 } 153 void setBuiltinID(unsigned ID) { 154 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 155 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 156 && "ID too large for field!"); 157 } 158 159 /// get/setExtension - Initialize information about whether or not this 160 /// language token is an extension. This controls extension warnings, and is 161 /// only valid if a custom token ID is set. 162 bool isExtensionToken() const { return IsExtension; } 163 void setIsExtensionToken(bool Val) { 164 IsExtension = Val; 165 if (Val) 166 NeedsHandleIdentifier = 1; 167 else 168 RecomputeNeedsHandleIdentifier(); 169 } 170 171 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 172 /// Preprocessor will emit an error every time this token is used. 173 void setIsPoisoned(bool Value = true) { 174 IsPoisoned = Value; 175 if (Value) 176 NeedsHandleIdentifier = 1; 177 else 178 RecomputeNeedsHandleIdentifier(); 179 } 180 181 /// isPoisoned - Return true if this token has been poisoned. 182 bool isPoisoned() const { return IsPoisoned; } 183 184 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 185 /// this identifier is a C++ alternate representation of an operator. 186 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 187 IsCPPOperatorKeyword = Val; 188 if (Val) 189 NeedsHandleIdentifier = 1; 190 else 191 RecomputeNeedsHandleIdentifier(); 192 } 193 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 194 195 /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to 196 /// associate arbitrary metadata with this token. 197 template<typename T> 198 T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); } 199 void setFETokenInfo(void *T) { FETokenInfo = T; } 200 201 /// isHandleIdentifierCase - Return true if the Preprocessor::HandleIdentifier 202 /// must be called on a token of this identifier. If this returns false, we 203 /// know that HandleIdentifier will not affect the token. 204 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 205 206 /// Emit - Serialize this IdentifierInfo to a bitstream. 207 void Emit(llvm::Serializer& S) const; 208 209 /// Read - Deserialize an IdentifierInfo object from a bitstream. 210 void Read(llvm::Deserializer& D); 211 212private: 213 /// RecomputeNeedsHandleIdentifier - The Preprocessor::HandleIdentifier does 214 /// several special (but rare) things to identifiers of various sorts. For 215 /// example, it changes the "for" keyword token from tok::identifier to 216 /// tok::for. 217 /// 218 /// This method is very tied to the definition of HandleIdentifier. Any 219 /// change to it should be reflected here. 220 void RecomputeNeedsHandleIdentifier() { 221 NeedsHandleIdentifier = 222 (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() | 223 isExtensionToken()); 224 } 225}; 226 227/// IdentifierInfoLookup - An abstract class used by IdentifierTable that 228/// provides an interface for for performing lookups from strings 229/// (const char *) to IdentiferInfo objects. 230class IdentifierInfoLookup { 231public: 232 virtual ~IdentifierInfoLookup(); 233 234 /// get - Return the identifier token info for the specified named identifier. 235 /// Unlike the version in IdentifierTable, this returns a pointer instead 236 /// of a reference. If the pointer is NULL then the IdentifierInfo cannot 237 /// be found. 238 virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd) = 0; 239}; 240 241/// IdentifierTable - This table implements an efficient mapping from strings to 242/// IdentifierInfo nodes. It has no other purpose, but this is an 243/// extremely performance-critical piece of the code, as each occurrance of 244/// every identifier goes through here when lexed. 245class IdentifierTable { 246 // Shark shows that using MallocAllocator is *much* slower than using this 247 // BumpPtrAllocator! 248 typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy; 249 HashTableTy HashTable; 250 251 IdentifierInfoLookup* ExternalLookup; 252 253public: 254 /// IdentifierTable ctor - Create the identifier table, populating it with 255 /// info about the language keywords for the language specified by LangOpts. 256 IdentifierTable(const LangOptions &LangOpts, 257 IdentifierInfoLookup* externalLookup = 0); 258 259 llvm::BumpPtrAllocator& getAllocator() { 260 return HashTable.getAllocator(); 261 } 262 263 /// get - Return the identifier token info for the specified named identifier. 264 /// 265 IdentifierInfo &get(const char *NameStart, const char *NameEnd) { 266 llvm::StringMapEntry<IdentifierInfo*> &Entry = 267 HashTable.GetOrCreateValue(NameStart, NameEnd); 268 269 IdentifierInfo *II = Entry.getValue(); 270 271 if (!II) { 272 while (1) { 273 if (ExternalLookup) { 274 II = ExternalLookup->get(NameStart, NameEnd); 275 if (II) break; 276 } 277 278 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 279 II = new (Mem) IdentifierInfo(); 280 break; 281 } 282 283 Entry.setValue(II); 284 II->Entry = &Entry; 285 } 286 287 assert(II->Entry != 0); 288 return *II; 289 } 290 291 IdentifierInfo &get(const char *Name) { 292 return get(Name, Name+strlen(Name)); 293 } 294 IdentifierInfo &get(const std::string &Name) { 295 // Don't use c_str() here: no need to be null terminated. 296 const char *NameBytes = &Name[0]; 297 return get(NameBytes, NameBytes+Name.size()); 298 } 299 300private: 301 typedef HashTableTy::const_iterator iterator; 302 typedef HashTableTy::const_iterator const_iterator; 303 304 iterator begin() const { return HashTable.begin(); } 305 iterator end() const { return HashTable.end(); } 306public: 307 308 unsigned size() const { return HashTable.size(); } 309 310 /// PrintStats - Print some statistics to stderr that indicate how well the 311 /// hashing is doing. 312 void PrintStats() const; 313 314 void AddKeywords(const LangOptions &LangOpts); 315 316 /// Emit - Serialize this IdentifierTable to a bitstream. This should 317 /// be called AFTER objects that externally reference the identifiers in the 318 /// table have been serialized. This is because only the identifiers that 319 /// are actually referenced are serialized. 320 void Emit(llvm::Serializer& S) const; 321 322 /// Create - Deserialize an IdentifierTable from a bitstream. 323 static IdentifierTable* CreateAndRegister(llvm::Deserializer& D); 324 325private: 326 /// This ctor is not intended to be used by anyone except for object 327 /// serialization. 328 IdentifierTable(); 329}; 330 331/// Selector - This smart pointer class efficiently represents Objective-C 332/// method names. This class will either point to an IdentifierInfo or a 333/// MultiKeywordSelector (which is private). This enables us to optimize 334/// selectors that take no arguments and selectors that take 1 argument, which 335/// accounts for 78% of all selectors in Cocoa.h. 336class Selector { 337 enum IdentifierInfoFlag { 338 // MultiKeywordSelector = 0. 339 ZeroArg = 0x1, 340 OneArg = 0x2, 341 ArgFlags = ZeroArg|OneArg 342 }; 343 uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo. 344 345 Selector(IdentifierInfo *II, unsigned nArgs) { 346 InfoPtr = reinterpret_cast<uintptr_t>(II); 347 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 348 assert(nArgs < 2 && "nArgs not equal to 0/1"); 349 InfoPtr |= nArgs+1; 350 } 351 Selector(MultiKeywordSelector *SI) { 352 InfoPtr = reinterpret_cast<uintptr_t>(SI); 353 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 354 } 355 Selector(uintptr_t V) : InfoPtr(V) {} 356public: 357 friend class SelectorTable; // only the SelectorTable can create these 358 friend class DeclarationName; // and the AST's DeclarationName. 359 360 /// The default ctor should only be used when creating data structures that 361 /// will contain selectors. 362 Selector() : InfoPtr(0) {} 363 364 IdentifierInfo *getAsIdentifierInfo() const { 365 if (getIdentifierInfoFlag()) 366 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 367 return 0; 368 } 369 unsigned getIdentifierInfoFlag() const { 370 return InfoPtr & ArgFlags; 371 } 372 /// operator==/!= - Indicate whether the specified selectors are identical. 373 bool operator==(Selector RHS) const { 374 return InfoPtr == RHS.InfoPtr; 375 } 376 bool operator!=(Selector RHS) const { 377 return InfoPtr != RHS.InfoPtr; 378 } 379 void *getAsOpaquePtr() const { 380 return reinterpret_cast<void*>(InfoPtr); 381 } 382 // Predicates to identify the selector type. 383 bool isKeywordSelector() const { 384 return getIdentifierInfoFlag() != ZeroArg; 385 } 386 bool isUnarySelector() const { 387 return getIdentifierInfoFlag() == ZeroArg; 388 } 389 unsigned getNumArgs() const; 390 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 391 392 /// getAsString - Derive the full selector name (e.g. "foo:bar:") and return 393 /// it as an std::string. 394 std::string getAsString() const; 395 396 static Selector getEmptyMarker() { 397 return Selector(uintptr_t(-1)); 398 } 399 static Selector getTombstoneMarker() { 400 return Selector(uintptr_t(-2)); 401 } 402 403 // Emit - Emit a selector to bitcode. 404 void Emit(llvm::Serializer& S) const; 405 406 // ReadVal - Read a selector from bitcode. 407 static Selector ReadVal(llvm::Deserializer& D); 408}; 409 410/// SelectorTable - This table allows us to fully hide how we implement 411/// multi-keyword caching. 412class SelectorTable { 413 void *Impl; // Actually a FoldingSet<MultiKeywordSelector>* 414 SelectorTable(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT 415 void operator=(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT 416public: 417 SelectorTable(); 418 ~SelectorTable(); 419 420 /// getSelector - This can create any sort of selector. NumArgs indicates 421 /// whether this is a no argument selector "foo", a single argument selector 422 /// "foo:" or multi-argument "foo:bar:". 423 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 424 425 Selector getUnarySelector(IdentifierInfo *ID) { 426 return Selector(ID, 1); 427 } 428 Selector getNullarySelector(IdentifierInfo *ID) { 429 return Selector(ID, 0); 430 } 431 432 // Emit - Emit a SelectorTable to bitcode. 433 void Emit(llvm::Serializer& S) const; 434 435 // Create - Reconstitute a SelectorTable from bitcode. 436 static SelectorTable* CreateAndRegister(llvm::Deserializer& D); 437}; 438 439/// DeclarationNameExtra - Common base of the MultiKeywordSelector, 440/// CXXSpecialName, and CXXOperatorIdName classes, all of which are 441/// private classes that describe different kinds of names. 442class DeclarationNameExtra { 443public: 444 /// ExtraKind - The kind of "extra" information stored in the 445 /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of 446 /// how these enumerator values are used. 447 enum ExtraKind { 448 CXXConstructor = 0, 449 CXXDestructor, 450 CXXConversionFunction, 451#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 452 CXXOperator##Name, 453#include "clang/Basic/OperatorKinds.def" 454 CXXUsingDirective, 455 NUM_EXTRA_KINDS 456 }; 457 458 /// ExtraKindOrNumArgs - Either the kind of C++ special name or 459 /// operator-id (if the value is one of the CXX* enumerators of 460 /// ExtraKind), in which case the DeclarationNameExtra is also a 461 /// CXXSpecialName (for CXXConstructor, CXXDestructor, or 462 /// CXXConversionFunction) or CXXOperatorIdName, it may be also 463 /// name common to C++ using-directives (CXXUsingDirective), otherwise 464 /// it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of 465 /// arguments in the Objective-C selector, in which case the 466 /// DeclarationNameExtra is also a MultiKeywordSelector. 467 unsigned ExtraKindOrNumArgs; 468}; 469 470} // end namespace clang 471 472namespace llvm { 473/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 474/// DenseSets. 475template <> 476struct DenseMapInfo<clang::Selector> { 477 static inline clang::Selector getEmptyKey() { 478 return clang::Selector::getEmptyMarker(); 479 } 480 static inline clang::Selector getTombstoneKey() { 481 return clang::Selector::getTombstoneMarker(); 482 } 483 484 static unsigned getHashValue(clang::Selector S); 485 486 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 487 return LHS == RHS; 488 } 489 490 static bool isPod() { return true; } 491}; 492 493} // end namespace llvm 494#endif 495