IdentifierTable.h revision ea684e699ea84e61711e279f5fa7a1b9f3d46bc2
1//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the IdentifierInfo, IdentifierTable, and Selector
11// interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18#include "clang/Basic/OperatorKinds.h"
19#include "clang/Basic/TokenKinds.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/OwningPtr.h"
23#include "llvm/Bitcode/SerializationFwd.h"
24#include <string>
25#include <cassert>
26
27namespace llvm {
28  template <typename T> struct DenseMapInfo;
29}
30
31namespace clang {
32  class LangOptions;
33  class IdentifierInfo;
34  class IdentifierTable;
35  class SourceLocation;
36  class MultiKeywordSelector; // private class used by Selector
37  class DeclarationName;      // AST class that stores declaration names
38
39  /// IdentifierLocPair - A simple pair of identifier info and location.
40  typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
41
42
43/// IdentifierInfo - One of these records is kept for each identifier that
44/// is lexed.  This contains information about whether the token was #define'd,
45/// is a language keyword, or if it is a front-end token of some sort (e.g. a
46/// variable or function name).  The preprocessor keeps this information in a
47/// set, and all tok::identifier tokens have a pointer to one of these.
48class IdentifierInfo {
49  // Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a
50  //       signed char and TokenKinds > 127 won't be handled correctly.
51  unsigned TokenID            : 8; // Front-end token ID or tok::identifier.
52  // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
53  // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
54  // are for builtins.
55  unsigned ObjCOrBuiltinID    :10;
56  bool HasMacro               : 1; // True if there is a #define for this.
57  bool IsExtension            : 1; // True if identifier is a lang extension.
58  bool IsPoisoned             : 1; // True if identifier is poisoned.
59  bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
60  bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
61  // 9 bits left in 32-bit word.
62  void *FETokenInfo;               // Managed by the language front-end.
63  llvm::StringMapEntry<IdentifierInfo*> *Entry;
64
65  IdentifierInfo(const IdentifierInfo&);  // NONCOPYABLE.
66  void operator=(const IdentifierInfo&);  // NONASSIGNABLE.
67
68  friend class IdentifierTable;
69
70public:
71  IdentifierInfo();
72
73
74  /// isStr - Return true if this is the identifier for the specified string.
75  /// This is intended to be used for string literals only: II->isStr("foo").
76  template <std::size_t StrLen>
77  bool isStr(const char (&Str)[StrLen]) const {
78    return getLength() == StrLen-1 && !memcmp(getName(), Str, StrLen-1);
79  }
80
81  /// getName - Return the actual string for this identifier.  The returned
82  /// string is properly null terminated.
83  ///
84  const char *getName() const {
85    if (Entry) return Entry->getKeyData();
86    // FIXME: This is gross. It would be best not to embed specific details
87    // of the PTH file format here.
88    // The 'this' pointer really points to a
89    // std::pair<IdentifierInfo, const char*>, where internal pointer
90    // points to the external string data.
91    return ((std::pair<IdentifierInfo, const char*>*) this)->second;
92  }
93
94  /// getLength - Efficiently return the length of this identifier info.
95  ///
96  unsigned getLength() const {
97    if (Entry) return Entry->getKeyLength();
98    // FIXME: This is gross. It would be best not to embed specific details
99    // of the PTH file format here.
100    // The 'this' pointer really points to a
101    // std::pair<IdentifierInfo, const char*>, where internal pointer
102    // points to the external string data.
103    const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second-2;
104    return (((unsigned) p[0])
105        | (((unsigned) p[1]) << 8)) - 1;
106  }
107
108  /// hasMacroDefinition - Return true if this identifier is #defined to some
109  /// other value.
110  bool hasMacroDefinition() const {
111    return HasMacro;
112  }
113  void setHasMacroDefinition(bool Val) {
114    if (HasMacro == Val) return;
115
116    HasMacro = Val;
117    if (Val)
118      NeedsHandleIdentifier = 1;
119    else
120      RecomputeNeedsHandleIdentifier();
121  }
122
123  /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API
124  /// can be used to cause the lexer to map identifiers to source-language
125  /// tokens.
126  tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
127  void setTokenID(tok::TokenKind ID) { TokenID = ID; }
128
129  /// getPPKeywordID - Return the preprocessor keyword ID for this identifier.
130  /// For example, "define" will return tok::pp_define.
131  tok::PPKeywordKind getPPKeywordID() const;
132
133  /// getObjCKeywordID - Return the Objective-C keyword ID for the this
134  /// identifier.  For example, 'class' will return tok::objc_class if ObjC is
135  /// enabled.
136  tok::ObjCKeywordKind getObjCKeywordID() const {
137    if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
138      return tok::ObjCKeywordKind(ObjCOrBuiltinID);
139    else
140      return tok::objc_not_keyword;
141  }
142  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
143
144  /// getBuiltinID - Return a value indicating whether this is a builtin
145  /// function.  0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
146  /// 2+ are specific builtin functions.
147  unsigned getBuiltinID() const {
148    if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
149      return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
150    else
151      return 0;
152  }
153  void setBuiltinID(unsigned ID) {
154    ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
155    assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
156           && "ID too large for field!");
157  }
158
159  /// get/setExtension - Initialize information about whether or not this
160  /// language token is an extension.  This controls extension warnings, and is
161  /// only valid if a custom token ID is set.
162  bool isExtensionToken() const { return IsExtension; }
163  void setIsExtensionToken(bool Val) {
164    IsExtension = Val;
165    if (Val)
166      NeedsHandleIdentifier = 1;
167    else
168      RecomputeNeedsHandleIdentifier();
169  }
170
171  /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
172  /// Preprocessor will emit an error every time this token is used.
173  void setIsPoisoned(bool Value = true) {
174    IsPoisoned = Value;
175    if (Value)
176      NeedsHandleIdentifier = 1;
177    else
178      RecomputeNeedsHandleIdentifier();
179  }
180
181  /// isPoisoned - Return true if this token has been poisoned.
182  bool isPoisoned() const { return IsPoisoned; }
183
184  /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
185  /// this identifier is a C++ alternate representation of an operator.
186  void setIsCPlusPlusOperatorKeyword(bool Val = true) {
187    IsCPPOperatorKeyword = Val;
188    if (Val)
189      NeedsHandleIdentifier = 1;
190    else
191      RecomputeNeedsHandleIdentifier();
192  }
193  bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
194
195  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
196  /// associate arbitrary metadata with this token.
197  template<typename T>
198  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
199  void setFETokenInfo(void *T) { FETokenInfo = T; }
200
201  /// isHandleIdentifierCase - Return true if the Preprocessor::HandleIdentifier
202  /// must be called on a token of this identifier.  If this returns false, we
203  /// know that HandleIdentifier will not affect the token.
204  bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
205
206  /// Emit - Serialize this IdentifierInfo to a bitstream.
207  void Emit(llvm::Serializer& S) const;
208
209  /// Read - Deserialize an IdentifierInfo object from a bitstream.
210  void Read(llvm::Deserializer& D);
211
212private:
213  /// RecomputeNeedsHandleIdentifier - The Preprocessor::HandleIdentifier does
214  /// several special (but rare) things to identifiers of various sorts.  For
215  /// example, it changes the "for" keyword token from tok::identifier to
216  /// tok::for.
217  ///
218  /// This method is very tied to the definition of HandleIdentifier.  Any
219  /// change to it should be reflected here.
220  void RecomputeNeedsHandleIdentifier() {
221    NeedsHandleIdentifier =
222      (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
223       isExtensionToken());
224  }
225};
226
227/// IdentifierInfoLookup - An abstract class used by IdentifierTable that
228///  provides an interface for for performing lookups from strings
229/// (const char *) to IdentiferInfo objects.
230class IdentifierInfoLookup {
231public:
232  virtual ~IdentifierInfoLookup();
233
234  /// get - Return the identifier token info for the specified named identifier.
235  ///  Unlike the version in IdentifierTable, this returns a pointer instead
236  ///  of a reference.  If the pointer is NULL then the IdentifierInfo cannot
237  ///  be found.
238  virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd) = 0;
239};
240
241/// IdentifierTable - This table implements an efficient mapping from strings to
242/// IdentifierInfo nodes.  It has no other purpose, but this is an
243/// extremely performance-critical piece of the code, as each occurrance of
244/// every identifier goes through here when lexed.
245class IdentifierTable {
246  // Shark shows that using MallocAllocator is *much* slower than using this
247  // BumpPtrAllocator!
248  typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
249  HashTableTy HashTable;
250
251  IdentifierInfoLookup* ExternalLookup;
252
253public:
254  /// IdentifierTable ctor - Create the identifier table, populating it with
255  /// info about the language keywords for the language specified by LangOpts.
256  IdentifierTable(const LangOptions &LangOpts,
257                  IdentifierInfoLookup* externalLookup = 0);
258
259  llvm::BumpPtrAllocator& getAllocator() {
260    return HashTable.getAllocator();
261  }
262
263  /// get - Return the identifier token info for the specified named identifier.
264  ///
265  IdentifierInfo &get(const char *NameStart, const char *NameEnd) {
266    llvm::StringMapEntry<IdentifierInfo*> &Entry =
267      HashTable.GetOrCreateValue(NameStart, NameEnd);
268
269    IdentifierInfo *II = Entry.getValue();
270
271    if (!II) {
272      while (1) {
273        if (ExternalLookup) {
274          II = ExternalLookup->get(NameStart, NameEnd);
275          if (II) break;
276        }
277
278        void *Mem = getAllocator().Allocate<IdentifierInfo>();
279        II = new (Mem) IdentifierInfo();
280        break;
281      }
282
283      Entry.setValue(II);
284      II->Entry = &Entry;
285    }
286
287    assert(II->Entry != 0);
288    return *II;
289  }
290
291  IdentifierInfo &get(const char *Name) {
292    return get(Name, Name+strlen(Name));
293  }
294  IdentifierInfo &get(const std::string &Name) {
295    // Don't use c_str() here: no need to be null terminated.
296    const char *NameBytes = &Name[0];
297    return get(NameBytes, NameBytes+Name.size());
298  }
299
300private:
301  typedef HashTableTy::const_iterator iterator;
302  typedef HashTableTy::const_iterator const_iterator;
303
304  iterator begin() const { return HashTable.begin(); }
305  iterator end() const   { return HashTable.end(); }
306public:
307
308  unsigned size() const { return HashTable.size(); }
309
310  /// PrintStats - Print some statistics to stderr that indicate how well the
311  /// hashing is doing.
312  void PrintStats() const;
313
314  void AddKeywords(const LangOptions &LangOpts);
315
316  /// Emit - Serialize this IdentifierTable to a bitstream.  This should
317  ///  be called AFTER objects that externally reference the identifiers in the
318  ///  table have been serialized.  This is because only the identifiers that
319  ///  are actually referenced are serialized.
320  void Emit(llvm::Serializer& S) const;
321
322  /// Create - Deserialize an IdentifierTable from a bitstream.
323  static IdentifierTable* CreateAndRegister(llvm::Deserializer& D);
324
325private:
326  /// This ctor is not intended to be used by anyone except for object
327  /// serialization.
328  IdentifierTable();
329};
330
331/// Selector - This smart pointer class efficiently represents Objective-C
332/// method names. This class will either point to an IdentifierInfo or a
333/// MultiKeywordSelector (which is private). This enables us to optimize
334/// selectors that take no arguments and selectors that take 1 argument, which
335/// accounts for 78% of all selectors in Cocoa.h.
336class Selector {
337  enum IdentifierInfoFlag {
338    // MultiKeywordSelector = 0.
339    ZeroArg  = 0x1,
340    OneArg   = 0x2,
341    ArgFlags = ZeroArg|OneArg
342  };
343  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
344
345  Selector(IdentifierInfo *II, unsigned nArgs) {
346    InfoPtr = reinterpret_cast<uintptr_t>(II);
347    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
348    assert(nArgs < 2 && "nArgs not equal to 0/1");
349    InfoPtr |= nArgs+1;
350  }
351  Selector(MultiKeywordSelector *SI) {
352    InfoPtr = reinterpret_cast<uintptr_t>(SI);
353    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
354  }
355  Selector(uintptr_t V) : InfoPtr(V) {}
356public:
357  friend class SelectorTable; // only the SelectorTable can create these
358  friend class DeclarationName; // and the AST's DeclarationName.
359
360  /// The default ctor should only be used when creating data structures that
361  ///  will contain selectors.
362  Selector() : InfoPtr(0) {}
363
364  IdentifierInfo *getAsIdentifierInfo() const {
365    if (getIdentifierInfoFlag())
366      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
367    return 0;
368  }
369  unsigned getIdentifierInfoFlag() const {
370    return InfoPtr & ArgFlags;
371  }
372  /// operator==/!= - Indicate whether the specified selectors are identical.
373  bool operator==(Selector RHS) const {
374    return InfoPtr == RHS.InfoPtr;
375  }
376  bool operator!=(Selector RHS) const {
377    return InfoPtr != RHS.InfoPtr;
378  }
379  void *getAsOpaquePtr() const {
380    return reinterpret_cast<void*>(InfoPtr);
381  }
382  // Predicates to identify the selector type.
383  bool isKeywordSelector() const {
384    return getIdentifierInfoFlag() != ZeroArg;
385  }
386  bool isUnarySelector() const {
387    return getIdentifierInfoFlag() == ZeroArg;
388  }
389  unsigned getNumArgs() const;
390  IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
391
392  /// getAsString - Derive the full selector name (e.g. "foo:bar:") and return
393  /// it as an std::string.
394  std::string getAsString() const;
395
396  static Selector getEmptyMarker() {
397    return Selector(uintptr_t(-1));
398  }
399  static Selector getTombstoneMarker() {
400    return Selector(uintptr_t(-2));
401  }
402
403  // Emit - Emit a selector to bitcode.
404  void Emit(llvm::Serializer& S) const;
405
406  // ReadVal - Read a selector from bitcode.
407  static Selector ReadVal(llvm::Deserializer& D);
408};
409
410/// SelectorTable - This table allows us to fully hide how we implement
411/// multi-keyword caching.
412class SelectorTable {
413  void *Impl;  // Actually a FoldingSet<MultiKeywordSelector>*
414  SelectorTable(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
415  void operator=(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
416public:
417  SelectorTable();
418  ~SelectorTable();
419
420  /// getSelector - This can create any sort of selector.  NumArgs indicates
421  /// whether this is a no argument selector "foo", a single argument selector
422  /// "foo:" or multi-argument "foo:bar:".
423  Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
424
425  Selector getUnarySelector(IdentifierInfo *ID) {
426    return Selector(ID, 1);
427  }
428  Selector getNullarySelector(IdentifierInfo *ID) {
429    return Selector(ID, 0);
430  }
431
432  // Emit - Emit a SelectorTable to bitcode.
433  void Emit(llvm::Serializer& S) const;
434
435  // Create - Reconstitute a SelectorTable from bitcode.
436  static SelectorTable* CreateAndRegister(llvm::Deserializer& D);
437};
438
439/// DeclarationNameExtra - Common base of the MultiKeywordSelector,
440/// CXXSpecialName, and CXXOperatorIdName classes, all of which are
441/// private classes that describe different kinds of names.
442class DeclarationNameExtra {
443public:
444  /// ExtraKind - The kind of "extra" information stored in the
445  /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
446  /// how these enumerator values are used.
447  enum ExtraKind {
448    CXXConstructor = 0,
449    CXXDestructor,
450    CXXConversionFunction,
451#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
452    CXXOperator##Name,
453#include "clang/Basic/OperatorKinds.def"
454    CXXUsingDirective,
455    NUM_EXTRA_KINDS
456  };
457
458  /// ExtraKindOrNumArgs - Either the kind of C++ special name or
459  /// operator-id (if the value is one of the CXX* enumerators of
460  /// ExtraKind), in which case the DeclarationNameExtra is also a
461  /// CXXSpecialName (for CXXConstructor, CXXDestructor, or
462  /// CXXConversionFunction) or CXXOperatorIdName, it may be also
463  /// name common to C++ using-directives (CXXUsingDirective), otherwise
464  /// it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
465  /// arguments in the Objective-C selector, in which case the
466  /// DeclarationNameExtra is also a MultiKeywordSelector.
467  unsigned ExtraKindOrNumArgs;
468};
469
470}  // end namespace clang
471
472namespace llvm {
473/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
474/// DenseSets.
475template <>
476struct DenseMapInfo<clang::Selector> {
477  static inline clang::Selector getEmptyKey() {
478    return clang::Selector::getEmptyMarker();
479  }
480  static inline clang::Selector getTombstoneKey() {
481    return clang::Selector::getTombstoneMarker();
482  }
483
484  static unsigned getHashValue(clang::Selector S);
485
486  static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
487    return LHS == RHS;
488  }
489
490  static bool isPod() { return true; }
491};
492
493}  // end namespace llvm
494#endif
495