IdentifierTable.h revision 2d29581d2b7ad5ec5df6ff3947fb0711339361a4
1//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the IdentifierInfo, IdentifierTable, and Selector
11// interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18#include "clang/Basic/TokenKinds.h"
19#include "llvm/ADT/StringMap.h"
20#include "llvm/ADT/SmallString.h"
21#include <string>
22#include <cassert>
23
24namespace llvm {
25  template <typename T> struct DenseMapInfo;
26}
27
28namespace clang {
29  struct LangOptions;
30  class MultiKeywordSelector; // a private class used by Selector.
31
32/// IdentifierInfo - One of these records is kept for each identifier that
33/// is lexed.  This contains information about whether the token was #define'd,
34/// is a language keyword, or if it is a front-end token of some sort (e.g. a
35/// variable or function name).  The preprocessor keeps this information in a
36/// set, and all tok::identifier tokens have a pointer to one of these.
37class IdentifierInfo {
38  tok::TokenKind TokenID      : 8; // Front-end token ID or tok::identifier.
39  unsigned BuiltinID          : 9; // ID if this is a builtin (__builtin_inf).
40  tok::ObjCKeywordKind ObjCID : 5; // ID for objc @ keyword like @'protocol'.
41  bool HasMacro               : 1; // True if there is a #define for this.
42  bool IsExtension            : 1; // True if identifier is a lang extension.
43  bool IsPoisoned             : 1; // True if identifier is poisoned.
44  bool IsOtherTargetMacro     : 1; // True if ident is macro on another target.
45  bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
46  bool IsNonPortableBuiltin   : 1; // True if builtin varies across targets.
47  // 4 bits left in 32-bit word.
48  void *FETokenInfo;               // Managed by the language front-end.
49  IdentifierInfo(const IdentifierInfo&);  // NONCOPYABLE.
50  void operator=(const IdentifierInfo&);  // NONASSIGNABLE.
51public:
52  IdentifierInfo();
53
54  /// getName - Return the actual string for this identifier.  The returned
55  /// string is properly null terminated.
56  ///
57  const char *getName() const {
58    // We know that this is embedded into a StringMapEntry, and it knows how to
59    // efficiently find the string.
60    return llvm::StringMapEntry<IdentifierInfo>::
61                  GetStringMapEntryFromValue(*this).getKeyData();
62  }
63
64  /// getLength - Efficiently return the length of this identifier info.
65  ///
66  unsigned getLength() const {
67    return llvm::StringMapEntry<IdentifierInfo>::
68                    GetStringMapEntryFromValue(*this).getKeyLength();
69  }
70
71  /// hasMacroDefinition - Return true if this identifier is #defined to some
72  /// other value.
73  bool hasMacroDefinition() const {
74    return HasMacro;
75  }
76  void setHasMacroDefinition(bool Val) { HasMacro = Val; }
77
78  /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API
79  /// can be used to cause the lexer to map identifiers to source-language
80  /// tokens.
81  tok::TokenKind getTokenID() const { return TokenID; }
82  void setTokenID(tok::TokenKind ID) { TokenID = ID; }
83
84  /// getPPKeywordID - Return the preprocessor keyword ID for this identifier.
85  /// For example, "define" will return tok::pp_define.
86  tok::PPKeywordKind getPPKeywordID() const;
87
88  /// getObjCKeywordID - Return the Objective-C keyword ID for the this
89  /// identifier.  For example, 'class' will return tok::objc_class if ObjC is
90  /// enabled.
91  tok::ObjCKeywordKind getObjCKeywordID() const { return ObjCID; }
92  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCID = ID; }
93
94  /// getBuiltinID - Return a value indicating whether this is a builtin
95  /// function.  0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
96  /// 2+ are specific builtin functions.
97  unsigned getBuiltinID() const { return BuiltinID; }
98  void setBuiltinID(unsigned ID) {
99    BuiltinID = ID;
100    assert(BuiltinID == ID && "ID too large for field!");
101  }
102
103  /// isNonPortableBuiltin - Return true if this identifier corresponds to a
104  /// builtin on some other target, but isn't one on this target, or if it is on
105  /// the target but not on another, or if it is on both but it differs somehow
106  /// in behavior.
107  bool isNonPortableBuiltin() const { return IsNonPortableBuiltin; }
108  void setNonPortableBuiltin(bool Val) { IsNonPortableBuiltin = Val; }
109
110  /// get/setExtension - Initialize information about whether or not this
111  /// language token is an extension.  This controls extension warnings, and is
112  /// only valid if a custom token ID is set.
113  bool isExtensionToken() const { return IsExtension; }
114  void setIsExtensionToken(bool Val) { IsExtension = Val; }
115
116  /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
117  /// Preprocessor will emit an error every time this token is used.
118  void setIsPoisoned(bool Value = true) { IsPoisoned = Value; }
119
120  /// isPoisoned - Return true if this token has been poisoned.
121  bool isPoisoned() const { return IsPoisoned; }
122
123  /// setIsOtherTargetMacro/isOtherTargetMacro control whether this identifier
124  /// is seen as being a macro on some other target.
125  void setIsOtherTargetMacro(bool Val = true) { IsOtherTargetMacro = Val; }
126  bool isOtherTargetMacro() const { return IsOtherTargetMacro; }
127
128  /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
129  /// this identifier is a C++ alternate representation of an operator.
130  void setIsCPlusplusOperatorKeyword(bool Val = true)
131    { IsCPPOperatorKeyword = Val; }
132  bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
133
134  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
135  /// associate arbitrary metadata with this token.
136  template<typename T>
137  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
138  void setFETokenInfo(void *T) { FETokenInfo = T; }
139};
140
141/// IdentifierTable - This table implements an efficient mapping from strings to
142/// IdentifierInfo nodes.  It has no other purpose, but this is an
143/// extremely performance-critical piece of the code, as each occurrance of
144/// every identifier goes through here when lexed.
145class IdentifierTable {
146  // Shark shows that using MallocAllocator is *much* slower than using this
147  // BumpPtrAllocator!
148  typedef llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator> HashTableTy;
149  HashTableTy HashTable;
150public:
151  /// IdentifierTable ctor - Create the identifier table, populating it with
152  /// info about the language keywords for the language specified by LangOpts.
153  IdentifierTable(const LangOptions &LangOpts);
154
155  /// get - Return the identifier token info for the specified named identifier.
156  ///
157  IdentifierInfo &get(const char *NameStart, const char *NameEnd) {
158    return HashTable.GetOrCreateValue(NameStart, NameEnd).getValue();
159  }
160
161  IdentifierInfo &get(const char *Name) {
162    return get(Name, Name+strlen(Name));
163  }
164  IdentifierInfo &get(const std::string &Name) {
165    // Don't use c_str() here: no need to be null terminated.
166    const char *NameBytes = &Name[0];
167    return get(NameBytes, NameBytes+Name.size());
168  }
169
170  typedef HashTableTy::const_iterator iterator;
171  typedef HashTableTy::const_iterator const_iterator;
172
173  iterator begin() const { return HashTable.begin(); }
174  iterator end() const   { return HashTable.end(); }
175
176  /// PrintStats - Print some statistics to stderr that indicate how well the
177  /// hashing is doing.
178  void PrintStats() const;
179private:
180  void AddKeywords(const LangOptions &LangOpts);
181};
182
183/// Selector - This smart pointer class efficiently represents Objective-C
184/// method names. This class will either point to an IdentifierInfo or a
185/// MultiKeywordSelector (which is private). This enables us to optimize
186/// selectors that no arguments and selectors that take 1 argument, which
187/// accounts for 78% of all selectors in Cocoa.h.
188class Selector {
189  enum IdentifierInfoFlag {
190    // MultiKeywordSelector = 0.
191    ZeroArg  = 0x1,
192    OneArg   = 0x2,
193    ArgFlags = ZeroArg|OneArg
194  };
195  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
196
197  Selector(IdentifierInfo *II, unsigned nArgs) {
198    InfoPtr = reinterpret_cast<uintptr_t>(II);
199    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
200    assert(nArgs < 2 && "nArgs not equal to 0/1");
201    InfoPtr |= nArgs+1;
202  }
203  Selector(MultiKeywordSelector *SI) {
204    InfoPtr = reinterpret_cast<uintptr_t>(SI);
205    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
206  }
207  Selector(intptr_t V) : InfoPtr(V) {}
208public:
209  friend class SelectorTable; // only the SelectorTable can create these.
210
211  IdentifierInfo *getAsIdentifierInfo() const {
212    if (getIdentifierInfoFlag())
213      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
214    return 0;
215  }
216  unsigned getIdentifierInfoFlag() const {
217    return InfoPtr & ArgFlags;
218  }
219  /// operator==/!= - Indicate whether the specified selectors are identical.
220  bool operator==(const Selector &RHS) const {
221    return InfoPtr == RHS.InfoPtr;
222  }
223  bool operator!=(const Selector &RHS) const {
224    return InfoPtr != RHS.InfoPtr;
225  }
226  void *getAsOpaquePtr() const {
227    return reinterpret_cast<void*>(InfoPtr);
228  }
229  // Predicates to identify the selector type.
230  bool isKeywordSelector() const {
231    return getIdentifierInfoFlag() != ZeroArg;
232  }
233  bool isUnarySelector() const {
234    return getIdentifierInfoFlag() == ZeroArg;
235  }
236  unsigned getNumArgs() const;
237  IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
238
239  /// getName - Derive the full selector name (e.g. "foo:bar:") and return it.
240  ///
241  std::string getName() const;
242
243  static Selector getEmptyMarker() {
244    return Selector(uintptr_t(-1));
245  }
246  static Selector getTombstoneMarker() {
247    return Selector(uintptr_t(-2));
248  }
249};
250
251/// SelectorTable - This table allows us to fully hide how we implement
252/// multi-keyword caching.
253class SelectorTable {
254  void *Impl;  // Actually a FoldingSet<MultiKeywordSelector>*
255  SelectorTable(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
256  void operator=(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
257public:
258  SelectorTable();
259  ~SelectorTable();
260
261  /// getSelector - This can create any sort of selector.  NumArgs indicates
262  /// whether this is a no argument selector "foo", a single argument selector
263  /// "foo:" or multi-argument "foo:bar:".
264  Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
265
266  Selector getUnarySelector(IdentifierInfo *ID) {
267    return Selector(ID, 1);
268  }
269  Selector getNullarySelector(IdentifierInfo *ID) {
270    return Selector(ID, 0);
271  }
272};
273
274}  // end namespace clang
275
276
277/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
278/// DenseSets.
279namespace llvm {
280template <>
281struct DenseMapInfo<clang::Selector> {
282  static inline clang::Selector getEmptyKey() {
283    return clang::Selector::getEmptyMarker();
284  }
285  static inline clang::Selector getTombstoneKey() {
286    return clang::Selector::getTombstoneMarker();
287  }
288
289  static unsigned getHashValue(clang::Selector S);
290
291  static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
292    return LHS == RHS;
293  }
294
295  static bool isPod() { return true; }
296};
297
298}  // end namespace llvm
299
300#endif
301