IdentifierTable.cpp revision 8c5a760b82e73ed90b560090772db97e2ae27b09
1//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the IdentifierInfo, IdentifierVisitor, and 11// IdentifierTable interfaces. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Basic/IdentifierTable.h" 16#include "clang/Basic/LangOptions.h" 17#include "llvm/ADT/FoldingSet.h" 18#include "llvm/ADT/DenseMap.h" 19#include <cstdio> 20 21using namespace clang; 22 23//===----------------------------------------------------------------------===// 24// IdentifierInfo Implementation 25//===----------------------------------------------------------------------===// 26 27IdentifierInfo::IdentifierInfo() { 28 TokenID = tok::identifier; 29 ObjCOrBuiltinID = 0; 30 HasMacro = false; 31 IsExtension = false; 32 IsPoisoned = false; 33 IsCPPOperatorKeyword = false; 34 NeedsHandleIdentifier = false; 35 FETokenInfo = 0; 36 Entry = 0; 37} 38 39//===----------------------------------------------------------------------===// 40// IdentifierTable Implementation 41//===----------------------------------------------------------------------===// 42 43IdentifierInfoLookup::~IdentifierInfoLookup() {} 44 45ExternalIdentifierLookup::~ExternalIdentifierLookup() {} 46 47IdentifierTable::IdentifierTable(const LangOptions &LangOpts, 48 IdentifierInfoLookup* externalLookup) 49 : HashTable(8192), // Start with space for 8K identifiers. 50 ExternalLookup(externalLookup) { 51 52 // Populate the identifier table with info about keywords for the current 53 // language. 54 AddKeywords(LangOpts); 55} 56 57//===----------------------------------------------------------------------===// 58// Language Keyword Implementation 59//===----------------------------------------------------------------------===// 60 61/// AddKeyword - This method is used to associate a token ID with specific 62/// identifiers because they are language keywords. This causes the lexer to 63/// automatically map matching identifiers to specialized token codes. 64/// 65/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be 66/// enabled in the specified langauge, set to 1 if it is an extension 67/// in the specified language, and set to 2 if disabled in the 68/// specified language. 69static void AddKeyword(const char *Keyword, unsigned KWLen, 70 tok::TokenKind TokenCode, 71 int C90, int C99, int CXX, int CXX0x, int BoolSupport, 72 const LangOptions &LangOpts, IdentifierTable &Table) { 73 int Flags = 0; 74 if (BoolSupport != 0) { 75 Flags = LangOpts.CPlusPlus? 0 : LangOpts.Boolean ? BoolSupport : 2; 76 } else if (LangOpts.CPlusPlus) { 77 Flags = LangOpts.CPlusPlus0x ? CXX0x : CXX; 78 } else if (LangOpts.C99) { 79 Flags = C99; 80 } else { 81 Flags = C90; 82 } 83 84 // Don't add this keyword if disabled in this language or if an extension 85 // and extensions are disabled. 86 if (Flags + LangOpts.NoExtensions >= 2) return; 87 88 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen); 89 Info.setTokenID(TokenCode); 90 Info.setIsExtensionToken(Flags == 1); 91} 92 93static void AddAlias(const char *Keyword, unsigned KWLen, 94 tok::TokenKind AliaseeID, 95 const char *AliaseeKeyword, unsigned AliaseeKWLen, 96 const LangOptions &LangOpts, IdentifierTable &Table) { 97 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen); 98 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword, 99 AliaseeKeyword+AliaseeKWLen); 100 AliasInfo.setTokenID(AliaseeID); 101 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken()); 102} 103 104/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative 105/// representations. 106static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen, 107 tok::TokenKind TokenCode, 108 IdentifierTable &Table) { 109 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen); 110 Info.setTokenID(TokenCode); 111 Info.setIsCPlusPlusOperatorKeyword(); 112} 113 114/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or 115/// "property". 116static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID, 117 const char *Name, unsigned NameLen, 118 IdentifierTable &Table) { 119 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID); 120} 121 122/// AddKeywords - Add all keywords to the symbol table. 123/// 124void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { 125 enum { 126 C90Shift = 0, 127 EXTC90 = 1 << C90Shift, 128 NOTC90 = 2 << C90Shift, 129 C99Shift = 2, 130 EXTC99 = 1 << C99Shift, 131 NOTC99 = 2 << C99Shift, 132 CPPShift = 4, 133 EXTCPP = 1 << CPPShift, 134 NOTCPP = 2 << CPPShift, 135 CPP0xShift = 6, 136 EXTCPP0x = 1 << CPP0xShift, 137 NOTCPP0x = 2 << CPP0xShift, 138 BoolShift = 8, 139 BOOLSUPPORT = 1 << BoolShift, 140 Mask = 3 141 }; 142 143 // Add keywords and tokens for the current language. 144#define KEYWORD(NAME, FLAGS) \ 145 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \ 146 ((FLAGS) >> C90Shift) & Mask, \ 147 ((FLAGS) >> C99Shift) & Mask, \ 148 ((FLAGS) >> CPPShift) & Mask, \ 149 ((FLAGS) >> CPP0xShift) & Mask, \ 150 ((FLAGS) >> BoolShift) & Mask, LangOpts, *this); 151#define ALIAS(NAME, TOK) \ 152 AddAlias(NAME, strlen(NAME), tok::kw_ ## TOK, #TOK, strlen(#TOK), \ 153 LangOpts, *this); 154#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ 155 if (LangOpts.CXXOperatorNames) \ 156 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this); 157#define OBJC1_AT_KEYWORD(NAME) \ 158 if (LangOpts.ObjC1) \ 159 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); 160#define OBJC2_AT_KEYWORD(NAME) \ 161 if (LangOpts.ObjC2) \ 162 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); 163#include "clang/Basic/TokenKinds.def" 164} 165 166tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { 167 // We use a perfect hash function here involving the length of the keyword, 168 // the first and third character. For preprocessor ID's there are no 169 // collisions (if there were, the switch below would complain about duplicate 170 // case values). Note that this depends on 'if' being null terminated. 171 172#define HASH(LEN, FIRST, THIRD) \ 173 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) 174#define CASE(LEN, FIRST, THIRD, NAME) \ 175 case HASH(LEN, FIRST, THIRD): \ 176 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME 177 178 unsigned Len = getLength(); 179 if (Len < 2) return tok::pp_not_keyword; 180 const char *Name = getName(); 181 switch (HASH(Len, Name[0], Name[2])) { 182 default: return tok::pp_not_keyword; 183 CASE( 2, 'i', '\0', if); 184 CASE( 4, 'e', 'i', elif); 185 CASE( 4, 'e', 's', else); 186 CASE( 4, 'l', 'n', line); 187 CASE( 4, 's', 'c', sccs); 188 CASE( 5, 'e', 'd', endif); 189 CASE( 5, 'e', 'r', error); 190 CASE( 5, 'i', 'e', ident); 191 CASE( 5, 'i', 'd', ifdef); 192 CASE( 5, 'u', 'd', undef); 193 194 CASE( 6, 'a', 's', assert); 195 CASE( 6, 'd', 'f', define); 196 CASE( 6, 'i', 'n', ifndef); 197 CASE( 6, 'i', 'p', import); 198 CASE( 6, 'p', 'a', pragma); 199 200 CASE( 7, 'd', 'f', defined); 201 CASE( 7, 'i', 'c', include); 202 CASE( 7, 'w', 'r', warning); 203 204 CASE( 8, 'u', 'a', unassert); 205 CASE(12, 'i', 'c', include_next); 206 207 CASE(16, '_', 'i', __include_macros); 208#undef CASE 209#undef HASH 210 } 211} 212 213//===----------------------------------------------------------------------===// 214// Stats Implementation 215//===----------------------------------------------------------------------===// 216 217/// PrintStats - Print statistics about how well the identifier table is doing 218/// at hashing identifiers. 219void IdentifierTable::PrintStats() const { 220 unsigned NumBuckets = HashTable.getNumBuckets(); 221 unsigned NumIdentifiers = HashTable.getNumItems(); 222 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; 223 unsigned AverageIdentifierSize = 0; 224 unsigned MaxIdentifierLength = 0; 225 226 // TODO: Figure out maximum times an identifier had to probe for -stats. 227 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator 228 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { 229 unsigned IdLen = I->getKeyLength(); 230 AverageIdentifierSize += IdLen; 231 if (MaxIdentifierLength < IdLen) 232 MaxIdentifierLength = IdLen; 233 } 234 235 fprintf(stderr, "\n*** Identifier Table Stats:\n"); 236 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); 237 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); 238 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", 239 NumIdentifiers/(double)NumBuckets); 240 fprintf(stderr, "Ave identifier length: %f\n", 241 (AverageIdentifierSize/(double)NumIdentifiers)); 242 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); 243 244 // Compute statistics about the memory allocated for identifiers. 245 HashTable.getAllocator().PrintStats(); 246} 247 248//===----------------------------------------------------------------------===// 249// SelectorTable Implementation 250//===----------------------------------------------------------------------===// 251 252unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) { 253 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr()); 254} 255 256namespace clang { 257/// MultiKeywordSelector - One of these variable length records is kept for each 258/// selector containing more than one keyword. We use a folding set 259/// to unique aggregate names (keyword selectors in ObjC parlance). Access to 260/// this class is provided strictly through Selector. 261class MultiKeywordSelector 262 : public DeclarationNameExtra, public llvm::FoldingSetNode { 263 MultiKeywordSelector(unsigned nKeys) { 264 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys; 265 } 266public: 267 // Constructor for keyword selectors. 268 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) { 269 assert((nKeys > 1) && "not a multi-keyword selector"); 270 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys; 271 272 // Fill in the trailing keyword array. 273 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1); 274 for (unsigned i = 0; i != nKeys; ++i) 275 KeyInfo[i] = IIV[i]; 276 } 277 278 // getName - Derive the full selector name and return it. 279 std::string getName() const; 280 281 unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; } 282 283 typedef IdentifierInfo *const *keyword_iterator; 284 keyword_iterator keyword_begin() const { 285 return reinterpret_cast<keyword_iterator>(this+1); 286 } 287 keyword_iterator keyword_end() const { 288 return keyword_begin()+getNumArgs(); 289 } 290 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { 291 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); 292 return keyword_begin()[i]; 293 } 294 static void Profile(llvm::FoldingSetNodeID &ID, 295 keyword_iterator ArgTys, unsigned NumArgs) { 296 ID.AddInteger(NumArgs); 297 for (unsigned i = 0; i != NumArgs; ++i) 298 ID.AddPointer(ArgTys[i]); 299 } 300 void Profile(llvm::FoldingSetNodeID &ID) { 301 Profile(ID, keyword_begin(), getNumArgs()); 302 } 303}; 304} // end namespace clang. 305 306unsigned Selector::getNumArgs() const { 307 unsigned IIF = getIdentifierInfoFlag(); 308 if (IIF == ZeroArg) 309 return 0; 310 if (IIF == OneArg) 311 return 1; 312 // We point to a MultiKeywordSelector (pointer doesn't contain any flags). 313 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); 314 return SI->getNumArgs(); 315} 316 317IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { 318 if (IdentifierInfo *II = getAsIdentifierInfo()) { 319 assert(argIndex == 0 && "illegal keyword index"); 320 return II; 321 } 322 // We point to a MultiKeywordSelector (pointer doesn't contain any flags). 323 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr); 324 return SI->getIdentifierInfoForSlot(argIndex); 325} 326 327std::string MultiKeywordSelector::getName() const { 328 std::string Result; 329 unsigned Length = 0; 330 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { 331 if (*I) 332 Length += (*I)->getLength(); 333 ++Length; // : 334 } 335 336 Result.reserve(Length); 337 338 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { 339 if (*I) 340 Result.insert(Result.end(), (*I)->getName(), 341 (*I)->getName()+(*I)->getLength()); 342 Result.push_back(':'); 343 } 344 345 return Result; 346} 347 348std::string Selector::getAsString() const { 349 if (InfoPtr & ArgFlags) { 350 IdentifierInfo *II = getAsIdentifierInfo(); 351 352 // If the number of arguments is 0 then II is guaranteed to not be null. 353 if (getNumArgs() == 0) 354 return II->getName(); 355 356 std::string Res = II ? II->getName() : ""; 357 Res += ":"; 358 return Res; 359 } 360 361 // We have a multiple keyword selector (no embedded flags). 362 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName(); 363} 364 365 366namespace { 367 struct SelectorTableImpl { 368 llvm::FoldingSet<MultiKeywordSelector> Table; 369 llvm::BumpPtrAllocator Allocator; 370 }; 371} // end anonymous namespace. 372 373static SelectorTableImpl &getSelectorTableImpl(void *P) { 374 return *static_cast<SelectorTableImpl*>(P); 375} 376 377 378Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { 379 if (nKeys < 2) 380 return Selector(IIV[0], nKeys); 381 382 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 383 384 // Unique selector, to guarantee there is one per name. 385 llvm::FoldingSetNodeID ID; 386 MultiKeywordSelector::Profile(ID, IIV, nKeys); 387 388 void *InsertPos = 0; 389 if (MultiKeywordSelector *SI = 390 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos)) 391 return Selector(SI); 392 393 // MultiKeywordSelector objects are not allocated with new because they have a 394 // variable size array (for parameter types) at the end of them. 395 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *); 396 MultiKeywordSelector *SI = 397 (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size, 398 llvm::alignof<MultiKeywordSelector>()); 399 new (SI) MultiKeywordSelector(nKeys, IIV); 400 SelTabImpl.Table.InsertNode(SI, InsertPos); 401 return Selector(SI); 402} 403 404SelectorTable::SelectorTable() { 405 Impl = new SelectorTableImpl(); 406} 407 408SelectorTable::~SelectorTable() { 409 delete &getSelectorTableImpl(Impl); 410} 411 412