IdentifierTable.cpp revision 5142af38ed0dd2f592cbfa00fa6e2e14dd6cc516
1//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/IdentifierTable.h"
16#include "clang/Basic/LangOptions.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21
22using namespace clang;
23
24//===----------------------------------------------------------------------===//
25// IdentifierInfo Implementation
26//===----------------------------------------------------------------------===//
27
28IdentifierInfo::IdentifierInfo() {
29  TokenID = tok::identifier;
30  ObjCOrBuiltinID = 0;
31  HasMacro = false;
32  IsExtension = false;
33  IsPoisoned = false;
34  IsCPPOperatorKeyword = false;
35  FETokenInfo = 0;
36}
37
38//===----------------------------------------------------------------------===//
39// IdentifierTable Implementation
40//===----------------------------------------------------------------------===//
41
42IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
43  // Start with space for 8K identifiers.
44  : HashTable(8192) {
45
46  // Populate the identifier table with info about keywords for the current
47  // language.
48  AddKeywords(LangOpts);
49}
50
51// This cstor is intended to be used only for serialization.
52IdentifierTable::IdentifierTable() : HashTable(8192) {}
53
54//===----------------------------------------------------------------------===//
55// Language Keyword Implementation
56//===----------------------------------------------------------------------===//
57
58/// AddKeyword - This method is used to associate a token ID with specific
59/// identifiers because they are language keywords.  This causes the lexer to
60/// automatically map matching identifiers to specialized token codes.
61///
62/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
63/// enabled in the specified langauge, set to 1 if it is an extension
64/// in the specified language, and set to 2 if disabled in the
65/// specified language.
66static void AddKeyword(const char *Keyword, unsigned KWLen,
67                       tok::TokenKind TokenCode,
68                       int C90, int C99, int CXX, int CXX0x, int BoolSupport,
69                       const LangOptions &LangOpts, IdentifierTable &Table) {
70  int Flags = 0;
71  if (BoolSupport != 0) {
72    Flags = LangOpts.CPlusPlus? 0 : LangOpts.Boolean ? BoolSupport : 2;
73  } else if (LangOpts.CPlusPlus) {
74    Flags = LangOpts.CPlusPlus0x ? CXX0x : CXX;
75  } else if (LangOpts.C99) {
76    Flags = C99;
77  } else {
78    Flags = C90;
79  }
80
81  // Don't add this keyword if disabled in this language or if an extension
82  // and extensions are disabled.
83  if (Flags + LangOpts.NoExtensions >= 2) return;
84
85  IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
86  Info.setTokenID(TokenCode);
87  Info.setIsExtensionToken(Flags == 1);
88}
89
90static void AddAlias(const char *Keyword, unsigned KWLen,
91                     tok::TokenKind AliaseeID,
92                     const char *AliaseeKeyword, unsigned AliaseeKWLen,
93                     const LangOptions &LangOpts, IdentifierTable &Table) {
94  IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
95  IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
96                                          AliaseeKeyword+AliaseeKWLen);
97  AliasInfo.setTokenID(AliaseeID);
98  AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
99}
100
101/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
102/// representations.
103static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
104                                  tok::TokenKind TokenCode,
105                                  IdentifierTable &Table) {
106  IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
107  Info.setTokenID(TokenCode);
108  Info.setIsCPlusPlusOperatorKeyword();
109}
110
111/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
112/// "property".
113static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
114                           const char *Name, unsigned NameLen,
115                           IdentifierTable &Table) {
116  Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
117}
118
119/// AddKeywords - Add all keywords to the symbol table.
120///
121void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
122  enum {
123    C90Shift = 0,
124    EXTC90   = 1 << C90Shift,
125    NOTC90   = 2 << C90Shift,
126    C99Shift = 2,
127    EXTC99   = 1 << C99Shift,
128    NOTC99   = 2 << C99Shift,
129    CPPShift = 4,
130    EXTCPP   = 1 << CPPShift,
131    NOTCPP   = 2 << CPPShift,
132    CPP0xShift = 6,
133    EXTCPP0x   = 1 << CPP0xShift,
134    NOTCPP0x   = 2 << CPP0xShift,
135    BoolShift = 8,
136    BOOLSUPPORT = 1 << BoolShift,
137    Mask     = 3
138  };
139
140  // Add keywords and tokens for the current language.
141#define KEYWORD(NAME, FLAGS) \
142  AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME,  \
143             ((FLAGS) >> C90Shift) & Mask, \
144             ((FLAGS) >> C99Shift) & Mask, \
145             ((FLAGS) >> CPPShift) & Mask, \
146             ((FLAGS) >> CPP0xShift) & Mask, \
147             ((FLAGS) >> BoolShift) & Mask, LangOpts, *this);
148#define ALIAS(NAME, TOK) \
149  AddAlias(NAME, strlen(NAME), tok::kw_ ## TOK, #TOK, strlen(#TOK),  \
150           LangOpts, *this);
151#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
152  if (LangOpts.CXXOperatorNames)          \
153    AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
154#define OBJC1_AT_KEYWORD(NAME) \
155  if (LangOpts.ObjC1)          \
156    AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
157#define OBJC2_AT_KEYWORD(NAME) \
158  if (LangOpts.ObjC2)          \
159    AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
160#include "clang/Basic/TokenKinds.def"
161}
162
163tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
164  // We use a perfect hash function here involving the length of the keyword,
165  // the first and third character.  For preprocessor ID's there are no
166  // collisions (if there were, the switch below would complain about duplicate
167  // case values).  Note that this depends on 'if' being null terminated.
168
169#define HASH(LEN, FIRST, THIRD) \
170  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
171#define CASE(LEN, FIRST, THIRD, NAME) \
172  case HASH(LEN, FIRST, THIRD): \
173    return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
174
175  unsigned Len = getLength();
176  if (Len < 2) return tok::pp_not_keyword;
177  const char *Name = getName();
178  switch (HASH(Len, Name[0], Name[2])) {
179  default: return tok::pp_not_keyword;
180  CASE( 2, 'i', '\0', if);
181  CASE( 4, 'e', 'i', elif);
182  CASE( 4, 'e', 's', else);
183  CASE( 4, 'l', 'n', line);
184  CASE( 4, 's', 'c', sccs);
185  CASE( 5, 'e', 'd', endif);
186  CASE( 5, 'e', 'r', error);
187  CASE( 5, 'i', 'e', ident);
188  CASE( 5, 'i', 'd', ifdef);
189  CASE( 5, 'u', 'd', undef);
190
191  CASE( 6, 'a', 's', assert);
192  CASE( 6, 'd', 'f', define);
193  CASE( 6, 'i', 'n', ifndef);
194  CASE( 6, 'i', 'p', import);
195  CASE( 6, 'p', 'a', pragma);
196
197  CASE( 7, 'd', 'f', defined);
198  CASE( 7, 'i', 'c', include);
199  CASE( 7, 'w', 'r', warning);
200
201  CASE( 8, 'u', 'a', unassert);
202  CASE(12, 'i', 'c', include_next);
203#undef CASE
204#undef HASH
205  }
206}
207
208//===----------------------------------------------------------------------===//
209// Stats Implementation
210//===----------------------------------------------------------------------===//
211
212/// PrintStats - Print statistics about how well the identifier table is doing
213/// at hashing identifiers.
214void IdentifierTable::PrintStats() const {
215  unsigned NumBuckets = HashTable.getNumBuckets();
216  unsigned NumIdentifiers = HashTable.getNumItems();
217  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
218  unsigned AverageIdentifierSize = 0;
219  unsigned MaxIdentifierLength = 0;
220
221  // TODO: Figure out maximum times an identifier had to probe for -stats.
222  for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
223       I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
224    unsigned IdLen = I->getKeyLength();
225    AverageIdentifierSize += IdLen;
226    if (MaxIdentifierLength < IdLen)
227      MaxIdentifierLength = IdLen;
228  }
229
230  fprintf(stderr, "\n*** Identifier Table Stats:\n");
231  fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
232  fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
233  fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
234          NumIdentifiers/(double)NumBuckets);
235  fprintf(stderr, "Ave identifier length: %f\n",
236          (AverageIdentifierSize/(double)NumIdentifiers));
237  fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
238
239  // Compute statistics about the memory allocated for identifiers.
240  HashTable.getAllocator().PrintStats();
241}
242
243//===----------------------------------------------------------------------===//
244// SelectorTable Implementation
245//===----------------------------------------------------------------------===//
246
247unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
248  return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
249}
250
251
252/// MultiKeywordSelector - One of these variable length records is kept for each
253/// selector containing more than one keyword. We use a folding set
254/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
255/// this class is provided strictly through Selector.
256namespace clang {
257class MultiKeywordSelector : public llvm::FoldingSetNode {
258  friend SelectorTable* SelectorTable::CreateAndRegister(llvm::Deserializer&);
259  MultiKeywordSelector(unsigned nKeys) : NumArgs(nKeys) {}
260public:
261  unsigned NumArgs;
262
263  // Constructor for keyword selectors.
264  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
265    assert((nKeys > 1) && "not a multi-keyword selector");
266    NumArgs = nKeys;
267
268    // Fill in the trailing keyword array.
269    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
270    for (unsigned i = 0; i != nKeys; ++i)
271      KeyInfo[i] = IIV[i];
272  }
273
274  // getName - Derive the full selector name and return it.
275  std::string getName() const;
276
277  unsigned getNumArgs() const { return NumArgs; }
278
279  typedef IdentifierInfo *const *keyword_iterator;
280  keyword_iterator keyword_begin() const {
281    return reinterpret_cast<keyword_iterator>(this+1);
282  }
283  keyword_iterator keyword_end() const {
284    return keyword_begin()+NumArgs;
285  }
286  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
287    assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
288    return keyword_begin()[i];
289  }
290  static void Profile(llvm::FoldingSetNodeID &ID,
291                      keyword_iterator ArgTys, unsigned NumArgs) {
292    ID.AddInteger(NumArgs);
293    for (unsigned i = 0; i != NumArgs; ++i)
294      ID.AddPointer(ArgTys[i]);
295  }
296  void Profile(llvm::FoldingSetNodeID &ID) {
297    Profile(ID, keyword_begin(), NumArgs);
298  }
299};
300} // end namespace clang.
301
302unsigned Selector::getNumArgs() const {
303  unsigned IIF = getIdentifierInfoFlag();
304  if (IIF == ZeroArg)
305    return 0;
306  if (IIF == OneArg)
307    return 1;
308  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
309  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
310  return SI->getNumArgs();
311}
312
313IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
314  if (IdentifierInfo *II = getAsIdentifierInfo()) {
315    assert(argIndex == 0 && "illegal keyword index");
316    return II;
317  }
318  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
319  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
320  return SI->getIdentifierInfoForSlot(argIndex);
321}
322
323std::string MultiKeywordSelector::getName() const {
324  std::string Result;
325  unsigned Length = 0;
326  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
327    if (*I)
328      Length += (*I)->getLength();
329    ++Length;  // :
330  }
331
332  Result.reserve(Length);
333
334  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
335    if (*I)
336      Result.insert(Result.end(), (*I)->getName(),
337                    (*I)->getName()+(*I)->getLength());
338    Result.push_back(':');
339  }
340
341  return Result;
342}
343
344std::string Selector::getName() const {
345  if (IdentifierInfo *II = getAsIdentifierInfo()) {
346    if (getNumArgs() == 0)
347      return II->getName();
348
349    std::string Res = II->getName();
350    Res += ":";
351    return Res;
352  }
353
354  // We have a multiple keyword selector (no embedded flags).
355  return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
356}
357
358
359Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
360  if (nKeys < 2)
361    return Selector(IIV[0], nKeys);
362
363  llvm::FoldingSet<MultiKeywordSelector> *SelTab;
364
365  SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
366
367  // Unique selector, to guarantee there is one per name.
368  llvm::FoldingSetNodeID ID;
369  MultiKeywordSelector::Profile(ID, IIV, nKeys);
370
371  void *InsertPos = 0;
372  if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
373    return Selector(SI);
374
375  // MultiKeywordSelector objects are not allocated with new because they have a
376  // variable size array (for parameter types) at the end of them.
377  MultiKeywordSelector *SI =
378    (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
379                                  nKeys*sizeof(IdentifierInfo *));
380  new (SI) MultiKeywordSelector(nKeys, IIV);
381  SelTab->InsertNode(SI, InsertPos);
382  return Selector(SI);
383}
384
385SelectorTable::SelectorTable() {
386  Impl = new llvm::FoldingSet<MultiKeywordSelector>;
387}
388
389SelectorTable::~SelectorTable() {
390  delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
391}
392
393//===----------------------------------------------------------------------===//
394// Serialization for IdentifierInfo and IdentifierTable.
395//===----------------------------------------------------------------------===//
396
397void IdentifierInfo::Emit(llvm::Serializer& S) const {
398  S.EmitInt(getTokenID());
399  S.EmitInt(getBuiltinID());
400  S.EmitInt(getObjCKeywordID());
401  S.EmitBool(hasMacroDefinition());
402  S.EmitBool(isExtensionToken());
403  S.EmitBool(isPoisoned());
404  S.EmitBool(isCPlusPlusOperatorKeyword());
405  // FIXME: FETokenInfo
406}
407
408void IdentifierInfo::Read(llvm::Deserializer& D) {
409  setTokenID((tok::TokenKind) D.ReadInt());
410  setBuiltinID(D.ReadInt());
411  setObjCKeywordID((tok::ObjCKeywordKind) D.ReadInt());
412  setHasMacroDefinition(D.ReadBool());
413  setIsExtensionToken(D.ReadBool());
414  setIsPoisoned(D.ReadBool());
415  setIsCPlusPlusOperatorKeyword(D.ReadBool());
416  // FIXME: FETokenInfo
417}
418
419void IdentifierTable::Emit(llvm::Serializer& S) const {
420  S.EnterBlock();
421
422  S.EmitPtr(this);
423
424  for (iterator I=begin(), E=end(); I != E; ++I) {
425    const char* Key = I->getKeyData();
426    const IdentifierInfo* Info = &I->getValue();
427
428    bool KeyRegistered = S.isRegistered(Key);
429    bool InfoRegistered = S.isRegistered(Info);
430
431    if (KeyRegistered || InfoRegistered) {
432      // These acrobatics are so that we don't incur the cost of registering
433      // a pointer with the backpatcher during deserialization if nobody
434      // references the object.
435      S.EmitPtr(InfoRegistered ? Info : NULL);
436      S.EmitPtr(KeyRegistered ? Key : NULL);
437      S.EmitCStr(Key);
438      S.Emit(*Info);
439    }
440  }
441
442  S.ExitBlock();
443}
444
445IdentifierTable* IdentifierTable::CreateAndRegister(llvm::Deserializer& D) {
446  llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
447
448  std::vector<char> buff;
449  buff.reserve(200);
450
451  IdentifierTable* t = new IdentifierTable();
452  D.RegisterPtr(t);
453
454  while (!D.FinishedBlock(BLoc)) {
455    llvm::SerializedPtrID InfoPtrID = D.ReadPtrID();
456    llvm::SerializedPtrID KeyPtrID = D.ReadPtrID();
457
458    D.ReadCStr(buff);
459
460    llvm::StringMapEntry<IdentifierInfo>& Entry =
461      t->HashTable.GetOrCreateValue(&buff[0],&buff[0]+buff.size());
462
463    D.Read(Entry.getValue());
464
465    if (InfoPtrID)
466      D.RegisterRef(InfoPtrID,Entry.getValue());
467
468    if (KeyPtrID)
469      D.RegisterPtr(KeyPtrID,Entry.getKeyData());
470  }
471
472  return t;
473}
474
475//===----------------------------------------------------------------------===//
476// Serialization for Selector and SelectorTable.
477//===----------------------------------------------------------------------===//
478
479void Selector::Emit(llvm::Serializer& S) const {
480  S.EmitInt(getIdentifierInfoFlag());
481  S.EmitPtr(reinterpret_cast<void*>(InfoPtr & ~ArgFlags));
482}
483
484Selector Selector::ReadVal(llvm::Deserializer& D) {
485  unsigned flag = D.ReadInt();
486
487  uintptr_t ptr;
488  D.ReadUIntPtr(ptr,false); // No backpatching.
489
490  return Selector(ptr | flag);
491}
492
493void SelectorTable::Emit(llvm::Serializer& S) const {
494  typedef llvm::FoldingSet<MultiKeywordSelector>::iterator iterator;
495  llvm::FoldingSet<MultiKeywordSelector> *SelTab;
496  SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
497
498  S.EnterBlock();
499
500  S.EmitPtr(this);
501
502  for (iterator I=SelTab->begin(), E=SelTab->end(); I != E; ++I) {
503    if (!S.isRegistered(&*I))
504      continue;
505
506    S.FlushRecord(); // Start a new record.
507
508    S.EmitPtr(&*I);
509    S.EmitInt(I->getNumArgs());
510
511    for (MultiKeywordSelector::keyword_iterator KI = I->keyword_begin(),
512         KE = I->keyword_end(); KI != KE; ++KI)
513      S.EmitPtr(*KI);
514  }
515
516  S.ExitBlock();
517}
518
519SelectorTable* SelectorTable::CreateAndRegister(llvm::Deserializer& D) {
520  llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
521
522  SelectorTable* t = new SelectorTable();
523  D.RegisterPtr(t);
524
525  llvm::FoldingSet<MultiKeywordSelector>& SelTab =
526    *static_cast<llvm::FoldingSet<MultiKeywordSelector>*>(t->Impl);
527
528  while (!D.FinishedBlock(BLoc)) {
529
530    llvm::SerializedPtrID PtrID = D.ReadPtrID();
531    unsigned nKeys = D.ReadInt();
532
533    MultiKeywordSelector *SI =
534      (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
535                                    nKeys*sizeof(IdentifierInfo *));
536
537    new (SI) MultiKeywordSelector(nKeys);
538
539    D.RegisterPtr(PtrID,SI);
540
541    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(SI+1);
542
543    for (unsigned i = 0; i != nKeys; ++i)
544      D.ReadPtr(KeyInfo[i],false);
545
546    SelTab.GetOrInsertNode(SI);
547  }
548
549  return t;
550}
551