IdentifierTable.cpp revision 3c7f4134603d04b44f997b43c0a9def270f25386
1//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/IdentifierTable.h"
16#include "clang/Basic/LangOptions.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/raw_ostream.h"
21#include <cstdio>
22
23using namespace clang;
24
25//===----------------------------------------------------------------------===//
26// IdentifierInfo Implementation
27//===----------------------------------------------------------------------===//
28
29IdentifierInfo::IdentifierInfo() {
30  TokenID = tok::identifier;
31  ObjCOrBuiltinID = 0;
32  HasMacro = false;
33  IsExtension = false;
34  IsPoisoned = false;
35  IsCPPOperatorKeyword = false;
36  NeedsHandleIdentifier = false;
37  IsFromAST = false;
38  RevertedTokenID = false;
39  FETokenInfo = 0;
40  Entry = 0;
41}
42
43//===----------------------------------------------------------------------===//
44// IdentifierTable Implementation
45//===----------------------------------------------------------------------===//
46
47IdentifierInfoLookup::~IdentifierInfoLookup() {}
48
49ExternalIdentifierLookup::~ExternalIdentifierLookup() {}
50
51IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
52                                 IdentifierInfoLookup* externalLookup)
53  : HashTable(8192), // Start with space for 8K identifiers.
54    ExternalLookup(externalLookup) {
55
56  // Populate the identifier table with info about keywords for the current
57  // language.
58  AddKeywords(LangOpts);
59}
60
61//===----------------------------------------------------------------------===//
62// Language Keyword Implementation
63//===----------------------------------------------------------------------===//
64
65// Constants for TokenKinds.def
66namespace {
67  enum {
68    KEYALL = 1,
69    KEYC99 = 2,
70    KEYCXX = 4,
71    KEYCXX0X = 8,
72    KEYGNU = 16,
73    KEYMS = 32,
74    BOOLSUPPORT = 64,
75    KEYALTIVEC = 128,
76    KEYNOMS = 256
77  };
78}
79
80/// AddKeyword - This method is used to associate a token ID with specific
81/// identifiers because they are language keywords.  This causes the lexer to
82/// automatically map matching identifiers to specialized token codes.
83///
84/// The C90/C99/CPP/CPP0x flags are set to 2 if the token should be
85/// enabled in the specified langauge, set to 1 if it is an extension
86/// in the specified language, and set to 0 if disabled in the
87/// specified language.
88static void AddKeyword(llvm::StringRef Keyword,
89                       tok::TokenKind TokenCode, unsigned Flags,
90                       const LangOptions &LangOpts, IdentifierTable &Table) {
91  unsigned AddResult = 0;
92  if (Flags & KEYALL) AddResult = 2;
93  else if (LangOpts.CPlusPlus && (Flags & KEYCXX)) AddResult = 2;
94  else if (LangOpts.CPlusPlus0x && (Flags & KEYCXX0X)) AddResult = 2;
95  else if (LangOpts.C99 && (Flags & KEYC99)) AddResult = 2;
96  else if (LangOpts.GNUKeywords && (Flags & KEYGNU)) AddResult = 1;
97  else if (LangOpts.Microsoft && (Flags & KEYMS)) AddResult = 1;
98  else if (LangOpts.Bool && (Flags & BOOLSUPPORT)) AddResult = 2;
99  else if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) AddResult = 2;
100  else if (!LangOpts.Microsoft && (Flags & KEYNOMS)) AddResult = 2;
101
102  // Don't add this keyword if disabled in this language.
103  if (AddResult == 0) return;
104
105  IdentifierInfo &Info = Table.get(Keyword, TokenCode);
106  Info.setIsExtensionToken(AddResult == 1);
107}
108
109/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
110/// representations.
111static void AddCXXOperatorKeyword(llvm::StringRef Keyword,
112                                  tok::TokenKind TokenCode,
113                                  IdentifierTable &Table) {
114  IdentifierInfo &Info = Table.get(Keyword, TokenCode);
115  Info.setIsCPlusPlusOperatorKeyword();
116}
117
118/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
119/// "property".
120static void AddObjCKeyword(llvm::StringRef Name,
121                           tok::ObjCKeywordKind ObjCID,
122                           IdentifierTable &Table) {
123  Table.get(Name).setObjCKeywordID(ObjCID);
124}
125
126/// AddKeywords - Add all keywords to the symbol table.
127///
128void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
129  // Add keywords and tokens for the current language.
130#define KEYWORD(NAME, FLAGS) \
131  AddKeyword(llvm::StringRef(#NAME), tok::kw_ ## NAME,  \
132             FLAGS, LangOpts, *this);
133#define ALIAS(NAME, TOK, FLAGS) \
134  AddKeyword(llvm::StringRef(NAME), tok::kw_ ## TOK,  \
135             FLAGS, LangOpts, *this);
136#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
137  if (LangOpts.CXXOperatorNames)          \
138    AddCXXOperatorKeyword(llvm::StringRef(#NAME), tok::ALIAS, *this);
139#define OBJC1_AT_KEYWORD(NAME) \
140  if (LangOpts.ObjC1)          \
141    AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this);
142#define OBJC2_AT_KEYWORD(NAME) \
143  if (LangOpts.ObjC2)          \
144    AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this);
145#include "clang/Basic/TokenKinds.def"
146}
147
148tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
149  // We use a perfect hash function here involving the length of the keyword,
150  // the first and third character.  For preprocessor ID's there are no
151  // collisions (if there were, the switch below would complain about duplicate
152  // case values).  Note that this depends on 'if' being null terminated.
153
154#define HASH(LEN, FIRST, THIRD) \
155  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
156#define CASE(LEN, FIRST, THIRD, NAME) \
157  case HASH(LEN, FIRST, THIRD): \
158    return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
159
160  unsigned Len = getLength();
161  if (Len < 2) return tok::pp_not_keyword;
162  const char *Name = getNameStart();
163  switch (HASH(Len, Name[0], Name[2])) {
164  default: return tok::pp_not_keyword;
165  CASE( 2, 'i', '\0', if);
166  CASE( 4, 'e', 'i', elif);
167  CASE( 4, 'e', 's', else);
168  CASE( 4, 'l', 'n', line);
169  CASE( 4, 's', 'c', sccs);
170  CASE( 5, 'e', 'd', endif);
171  CASE( 5, 'e', 'r', error);
172  CASE( 5, 'i', 'e', ident);
173  CASE( 5, 'i', 'd', ifdef);
174  CASE( 5, 'u', 'd', undef);
175
176  CASE( 6, 'a', 's', assert);
177  CASE( 6, 'd', 'f', define);
178  CASE( 6, 'i', 'n', ifndef);
179  CASE( 6, 'i', 'p', import);
180  CASE( 6, 'p', 'a', pragma);
181
182  CASE( 7, 'd', 'f', defined);
183  CASE( 7, 'i', 'c', include);
184  CASE( 7, 'w', 'r', warning);
185
186  CASE( 8, 'u', 'a', unassert);
187  CASE(12, 'i', 'c', include_next);
188
189  CASE(16, '_', 'i', __include_macros);
190#undef CASE
191#undef HASH
192  }
193}
194
195//===----------------------------------------------------------------------===//
196// Stats Implementation
197//===----------------------------------------------------------------------===//
198
199/// PrintStats - Print statistics about how well the identifier table is doing
200/// at hashing identifiers.
201void IdentifierTable::PrintStats() const {
202  unsigned NumBuckets = HashTable.getNumBuckets();
203  unsigned NumIdentifiers = HashTable.getNumItems();
204  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
205  unsigned AverageIdentifierSize = 0;
206  unsigned MaxIdentifierLength = 0;
207
208  // TODO: Figure out maximum times an identifier had to probe for -stats.
209  for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
210       I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
211    unsigned IdLen = I->getKeyLength();
212    AverageIdentifierSize += IdLen;
213    if (MaxIdentifierLength < IdLen)
214      MaxIdentifierLength = IdLen;
215  }
216
217  fprintf(stderr, "\n*** Identifier Table Stats:\n");
218  fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
219  fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
220  fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
221          NumIdentifiers/(double)NumBuckets);
222  fprintf(stderr, "Ave identifier length: %f\n",
223          (AverageIdentifierSize/(double)NumIdentifiers));
224  fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
225
226  // Compute statistics about the memory allocated for identifiers.
227  HashTable.getAllocator().PrintStats();
228}
229
230//===----------------------------------------------------------------------===//
231// SelectorTable Implementation
232//===----------------------------------------------------------------------===//
233
234unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
235  return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
236}
237
238namespace clang {
239/// MultiKeywordSelector - One of these variable length records is kept for each
240/// selector containing more than one keyword. We use a folding set
241/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
242/// this class is provided strictly through Selector.
243class MultiKeywordSelector
244  : public DeclarationNameExtra, public llvm::FoldingSetNode {
245  MultiKeywordSelector(unsigned nKeys) {
246    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
247  }
248public:
249  // Constructor for keyword selectors.
250  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
251    assert((nKeys > 1) && "not a multi-keyword selector");
252    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
253
254    // Fill in the trailing keyword array.
255    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
256    for (unsigned i = 0; i != nKeys; ++i)
257      KeyInfo[i] = IIV[i];
258  }
259
260  // getName - Derive the full selector name and return it.
261  std::string getName() const;
262
263  unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; }
264
265  typedef IdentifierInfo *const *keyword_iterator;
266  keyword_iterator keyword_begin() const {
267    return reinterpret_cast<keyword_iterator>(this+1);
268  }
269  keyword_iterator keyword_end() const {
270    return keyword_begin()+getNumArgs();
271  }
272  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
273    assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
274    return keyword_begin()[i];
275  }
276  static void Profile(llvm::FoldingSetNodeID &ID,
277                      keyword_iterator ArgTys, unsigned NumArgs) {
278    ID.AddInteger(NumArgs);
279    for (unsigned i = 0; i != NumArgs; ++i)
280      ID.AddPointer(ArgTys[i]);
281  }
282  void Profile(llvm::FoldingSetNodeID &ID) {
283    Profile(ID, keyword_begin(), getNumArgs());
284  }
285};
286} // end namespace clang.
287
288unsigned Selector::getNumArgs() const {
289  unsigned IIF = getIdentifierInfoFlag();
290  if (IIF == ZeroArg)
291    return 0;
292  if (IIF == OneArg)
293    return 1;
294  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
295  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
296  return SI->getNumArgs();
297}
298
299IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
300  if (getIdentifierInfoFlag()) {
301    assert(argIndex == 0 && "illegal keyword index");
302    return getAsIdentifierInfo();
303  }
304  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
305  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
306  return SI->getIdentifierInfoForSlot(argIndex);
307}
308
309std::string MultiKeywordSelector::getName() const {
310  llvm::SmallString<256> Str;
311  llvm::raw_svector_ostream OS(Str);
312  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
313    if (*I)
314      OS << (*I)->getName();
315    OS << ':';
316  }
317
318  return OS.str();
319}
320
321std::string Selector::getAsString() const {
322  if (InfoPtr == 0)
323    return "<null selector>";
324
325  if (InfoPtr & ArgFlags) {
326    IdentifierInfo *II = getAsIdentifierInfo();
327
328    // If the number of arguments is 0 then II is guaranteed to not be null.
329    if (getNumArgs() == 0)
330      return II->getName();
331
332    if (!II)
333      return ":";
334
335    return II->getName().str() + ":";
336  }
337
338  // We have a multiple keyword selector (no embedded flags).
339  return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
340}
341
342
343namespace {
344  struct SelectorTableImpl {
345    llvm::FoldingSet<MultiKeywordSelector> Table;
346    llvm::BumpPtrAllocator Allocator;
347  };
348} // end anonymous namespace.
349
350static SelectorTableImpl &getSelectorTableImpl(void *P) {
351  return *static_cast<SelectorTableImpl*>(P);
352}
353
354
355Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
356  if (nKeys < 2)
357    return Selector(IIV[0], nKeys);
358
359  SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
360
361  // Unique selector, to guarantee there is one per name.
362  llvm::FoldingSetNodeID ID;
363  MultiKeywordSelector::Profile(ID, IIV, nKeys);
364
365  void *InsertPos = 0;
366  if (MultiKeywordSelector *SI =
367        SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
368    return Selector(SI);
369
370  // MultiKeywordSelector objects are not allocated with new because they have a
371  // variable size array (for parameter types) at the end of them.
372  unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
373  MultiKeywordSelector *SI =
374    (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size,
375                                         llvm::alignof<MultiKeywordSelector>());
376  new (SI) MultiKeywordSelector(nKeys, IIV);
377  SelTabImpl.Table.InsertNode(SI, InsertPos);
378  return Selector(SI);
379}
380
381SelectorTable::SelectorTable() {
382  Impl = new SelectorTableImpl();
383}
384
385SelectorTable::~SelectorTable() {
386  delete &getSelectorTableImpl(Impl);
387}
388
389const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
390  switch (Operator) {
391  case OO_None:
392  case NUM_OVERLOADED_OPERATORS:
393    return 0;
394
395#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
396  case OO_##Name: return Spelling;
397#include "clang/Basic/OperatorKinds.def"
398  }
399
400  return 0;
401}
402
403