IdentifierTable.cpp revision 8c5a760b82e73ed90b560090772db97e2ae27b09
1//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/IdentifierTable.h"
16#include "clang/Basic/LangOptions.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/DenseMap.h"
19#include <cstdio>
20
21using namespace clang;
22
23//===----------------------------------------------------------------------===//
24// IdentifierInfo Implementation
25//===----------------------------------------------------------------------===//
26
27IdentifierInfo::IdentifierInfo() {
28  TokenID = tok::identifier;
29  ObjCOrBuiltinID = 0;
30  HasMacro = false;
31  IsExtension = false;
32  IsPoisoned = false;
33  IsCPPOperatorKeyword = false;
34  NeedsHandleIdentifier = false;
35  FETokenInfo = 0;
36  Entry = 0;
37}
38
39//===----------------------------------------------------------------------===//
40// IdentifierTable Implementation
41//===----------------------------------------------------------------------===//
42
43IdentifierInfoLookup::~IdentifierInfoLookup() {}
44
45ExternalIdentifierLookup::~ExternalIdentifierLookup() {}
46
47IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
48                                 IdentifierInfoLookup* externalLookup)
49  : HashTable(8192), // Start with space for 8K identifiers.
50    ExternalLookup(externalLookup) {
51
52  // Populate the identifier table with info about keywords for the current
53  // language.
54  AddKeywords(LangOpts);
55}
56
57//===----------------------------------------------------------------------===//
58// Language Keyword Implementation
59//===----------------------------------------------------------------------===//
60
61/// AddKeyword - This method is used to associate a token ID with specific
62/// identifiers because they are language keywords.  This causes the lexer to
63/// automatically map matching identifiers to specialized token codes.
64///
65/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
66/// enabled in the specified langauge, set to 1 if it is an extension
67/// in the specified language, and set to 2 if disabled in the
68/// specified language.
69static void AddKeyword(const char *Keyword, unsigned KWLen,
70                       tok::TokenKind TokenCode,
71                       int C90, int C99, int CXX, int CXX0x, int BoolSupport,
72                       const LangOptions &LangOpts, IdentifierTable &Table) {
73  int Flags = 0;
74  if (BoolSupport != 0) {
75    Flags = LangOpts.CPlusPlus? 0 : LangOpts.Boolean ? BoolSupport : 2;
76  } else if (LangOpts.CPlusPlus) {
77    Flags = LangOpts.CPlusPlus0x ? CXX0x : CXX;
78  } else if (LangOpts.C99) {
79    Flags = C99;
80  } else {
81    Flags = C90;
82  }
83
84  // Don't add this keyword if disabled in this language or if an extension
85  // and extensions are disabled.
86  if (Flags + LangOpts.NoExtensions >= 2) return;
87
88  IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
89  Info.setTokenID(TokenCode);
90  Info.setIsExtensionToken(Flags == 1);
91}
92
93static void AddAlias(const char *Keyword, unsigned KWLen,
94                     tok::TokenKind AliaseeID,
95                     const char *AliaseeKeyword, unsigned AliaseeKWLen,
96                     const LangOptions &LangOpts, IdentifierTable &Table) {
97  IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
98  IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
99                                          AliaseeKeyword+AliaseeKWLen);
100  AliasInfo.setTokenID(AliaseeID);
101  AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
102}
103
104/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
105/// representations.
106static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
107                                  tok::TokenKind TokenCode,
108                                  IdentifierTable &Table) {
109  IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
110  Info.setTokenID(TokenCode);
111  Info.setIsCPlusPlusOperatorKeyword();
112}
113
114/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
115/// "property".
116static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
117                           const char *Name, unsigned NameLen,
118                           IdentifierTable &Table) {
119  Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
120}
121
122/// AddKeywords - Add all keywords to the symbol table.
123///
124void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
125  enum {
126    C90Shift = 0,
127    EXTC90   = 1 << C90Shift,
128    NOTC90   = 2 << C90Shift,
129    C99Shift = 2,
130    EXTC99   = 1 << C99Shift,
131    NOTC99   = 2 << C99Shift,
132    CPPShift = 4,
133    EXTCPP   = 1 << CPPShift,
134    NOTCPP   = 2 << CPPShift,
135    CPP0xShift = 6,
136    EXTCPP0x   = 1 << CPP0xShift,
137    NOTCPP0x   = 2 << CPP0xShift,
138    BoolShift = 8,
139    BOOLSUPPORT = 1 << BoolShift,
140    Mask     = 3
141  };
142
143  // Add keywords and tokens for the current language.
144#define KEYWORD(NAME, FLAGS) \
145  AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME,  \
146             ((FLAGS) >> C90Shift) & Mask, \
147             ((FLAGS) >> C99Shift) & Mask, \
148             ((FLAGS) >> CPPShift) & Mask, \
149             ((FLAGS) >> CPP0xShift) & Mask, \
150             ((FLAGS) >> BoolShift) & Mask, LangOpts, *this);
151#define ALIAS(NAME, TOK) \
152  AddAlias(NAME, strlen(NAME), tok::kw_ ## TOK, #TOK, strlen(#TOK),  \
153           LangOpts, *this);
154#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
155  if (LangOpts.CXXOperatorNames)          \
156    AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
157#define OBJC1_AT_KEYWORD(NAME) \
158  if (LangOpts.ObjC1)          \
159    AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
160#define OBJC2_AT_KEYWORD(NAME) \
161  if (LangOpts.ObjC2)          \
162    AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
163#include "clang/Basic/TokenKinds.def"
164}
165
166tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
167  // We use a perfect hash function here involving the length of the keyword,
168  // the first and third character.  For preprocessor ID's there are no
169  // collisions (if there were, the switch below would complain about duplicate
170  // case values).  Note that this depends on 'if' being null terminated.
171
172#define HASH(LEN, FIRST, THIRD) \
173  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
174#define CASE(LEN, FIRST, THIRD, NAME) \
175  case HASH(LEN, FIRST, THIRD): \
176    return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
177
178  unsigned Len = getLength();
179  if (Len < 2) return tok::pp_not_keyword;
180  const char *Name = getName();
181  switch (HASH(Len, Name[0], Name[2])) {
182  default: return tok::pp_not_keyword;
183  CASE( 2, 'i', '\0', if);
184  CASE( 4, 'e', 'i', elif);
185  CASE( 4, 'e', 's', else);
186  CASE( 4, 'l', 'n', line);
187  CASE( 4, 's', 'c', sccs);
188  CASE( 5, 'e', 'd', endif);
189  CASE( 5, 'e', 'r', error);
190  CASE( 5, 'i', 'e', ident);
191  CASE( 5, 'i', 'd', ifdef);
192  CASE( 5, 'u', 'd', undef);
193
194  CASE( 6, 'a', 's', assert);
195  CASE( 6, 'd', 'f', define);
196  CASE( 6, 'i', 'n', ifndef);
197  CASE( 6, 'i', 'p', import);
198  CASE( 6, 'p', 'a', pragma);
199
200  CASE( 7, 'd', 'f', defined);
201  CASE( 7, 'i', 'c', include);
202  CASE( 7, 'w', 'r', warning);
203
204  CASE( 8, 'u', 'a', unassert);
205  CASE(12, 'i', 'c', include_next);
206
207  CASE(16, '_', 'i', __include_macros);
208#undef CASE
209#undef HASH
210  }
211}
212
213//===----------------------------------------------------------------------===//
214// Stats Implementation
215//===----------------------------------------------------------------------===//
216
217/// PrintStats - Print statistics about how well the identifier table is doing
218/// at hashing identifiers.
219void IdentifierTable::PrintStats() const {
220  unsigned NumBuckets = HashTable.getNumBuckets();
221  unsigned NumIdentifiers = HashTable.getNumItems();
222  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
223  unsigned AverageIdentifierSize = 0;
224  unsigned MaxIdentifierLength = 0;
225
226  // TODO: Figure out maximum times an identifier had to probe for -stats.
227  for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
228       I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
229    unsigned IdLen = I->getKeyLength();
230    AverageIdentifierSize += IdLen;
231    if (MaxIdentifierLength < IdLen)
232      MaxIdentifierLength = IdLen;
233  }
234
235  fprintf(stderr, "\n*** Identifier Table Stats:\n");
236  fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
237  fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
238  fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
239          NumIdentifiers/(double)NumBuckets);
240  fprintf(stderr, "Ave identifier length: %f\n",
241          (AverageIdentifierSize/(double)NumIdentifiers));
242  fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
243
244  // Compute statistics about the memory allocated for identifiers.
245  HashTable.getAllocator().PrintStats();
246}
247
248//===----------------------------------------------------------------------===//
249// SelectorTable Implementation
250//===----------------------------------------------------------------------===//
251
252unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
253  return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
254}
255
256namespace clang {
257/// MultiKeywordSelector - One of these variable length records is kept for each
258/// selector containing more than one keyword. We use a folding set
259/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
260/// this class is provided strictly through Selector.
261class MultiKeywordSelector
262  : public DeclarationNameExtra, public llvm::FoldingSetNode {
263  MultiKeywordSelector(unsigned nKeys) {
264    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
265  }
266public:
267  // Constructor for keyword selectors.
268  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
269    assert((nKeys > 1) && "not a multi-keyword selector");
270    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
271
272    // Fill in the trailing keyword array.
273    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
274    for (unsigned i = 0; i != nKeys; ++i)
275      KeyInfo[i] = IIV[i];
276  }
277
278  // getName - Derive the full selector name and return it.
279  std::string getName() const;
280
281  unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; }
282
283  typedef IdentifierInfo *const *keyword_iterator;
284  keyword_iterator keyword_begin() const {
285    return reinterpret_cast<keyword_iterator>(this+1);
286  }
287  keyword_iterator keyword_end() const {
288    return keyword_begin()+getNumArgs();
289  }
290  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
291    assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
292    return keyword_begin()[i];
293  }
294  static void Profile(llvm::FoldingSetNodeID &ID,
295                      keyword_iterator ArgTys, unsigned NumArgs) {
296    ID.AddInteger(NumArgs);
297    for (unsigned i = 0; i != NumArgs; ++i)
298      ID.AddPointer(ArgTys[i]);
299  }
300  void Profile(llvm::FoldingSetNodeID &ID) {
301    Profile(ID, keyword_begin(), getNumArgs());
302  }
303};
304} // end namespace clang.
305
306unsigned Selector::getNumArgs() const {
307  unsigned IIF = getIdentifierInfoFlag();
308  if (IIF == ZeroArg)
309    return 0;
310  if (IIF == OneArg)
311    return 1;
312  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
313  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
314  return SI->getNumArgs();
315}
316
317IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
318  if (IdentifierInfo *II = getAsIdentifierInfo()) {
319    assert(argIndex == 0 && "illegal keyword index");
320    return II;
321  }
322  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
323  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
324  return SI->getIdentifierInfoForSlot(argIndex);
325}
326
327std::string MultiKeywordSelector::getName() const {
328  std::string Result;
329  unsigned Length = 0;
330  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
331    if (*I)
332      Length += (*I)->getLength();
333    ++Length;  // :
334  }
335
336  Result.reserve(Length);
337
338  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
339    if (*I)
340      Result.insert(Result.end(), (*I)->getName(),
341                    (*I)->getName()+(*I)->getLength());
342    Result.push_back(':');
343  }
344
345  return Result;
346}
347
348std::string Selector::getAsString() const {
349  if (InfoPtr & ArgFlags) {
350    IdentifierInfo *II = getAsIdentifierInfo();
351
352    // If the number of arguments is 0 then II is guaranteed to not be null.
353    if (getNumArgs() == 0)
354      return II->getName();
355
356    std::string Res = II ? II->getName() : "";
357    Res += ":";
358    return Res;
359  }
360
361  // We have a multiple keyword selector (no embedded flags).
362  return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
363}
364
365
366namespace {
367  struct SelectorTableImpl {
368    llvm::FoldingSet<MultiKeywordSelector> Table;
369    llvm::BumpPtrAllocator Allocator;
370  };
371} // end anonymous namespace.
372
373static SelectorTableImpl &getSelectorTableImpl(void *P) {
374  return *static_cast<SelectorTableImpl*>(P);
375}
376
377
378Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
379  if (nKeys < 2)
380    return Selector(IIV[0], nKeys);
381
382  SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
383
384  // Unique selector, to guarantee there is one per name.
385  llvm::FoldingSetNodeID ID;
386  MultiKeywordSelector::Profile(ID, IIV, nKeys);
387
388  void *InsertPos = 0;
389  if (MultiKeywordSelector *SI =
390        SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
391    return Selector(SI);
392
393  // MultiKeywordSelector objects are not allocated with new because they have a
394  // variable size array (for parameter types) at the end of them.
395  unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
396  MultiKeywordSelector *SI =
397    (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size,
398                                         llvm::alignof<MultiKeywordSelector>());
399  new (SI) MultiKeywordSelector(nKeys, IIV);
400  SelTabImpl.Table.InsertNode(SI, InsertPos);
401  return Selector(SI);
402}
403
404SelectorTable::SelectorTable() {
405  Impl = new SelectorTableImpl();
406}
407
408SelectorTable::~SelectorTable() {
409  delete &getSelectorTableImpl(Impl);
410}
411
412