1//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12/// clang::Selector interfaces.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18
19#include "clang/Basic/LLVM.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Allocator.h"
25#include <cassert>
26#include <cstddef>
27#include <cstdint>
28#include <cstring>
29#include <new>
30#include <string>
31#include <utility>
32
33namespace llvm {
34
35  template <typename T> struct DenseMapInfo;
36
37} // end namespace llvm
38
39namespace clang {
40
41  class LangOptions;
42  class IdentifierInfo;
43  class IdentifierTable;
44  class SourceLocation;
45  class MultiKeywordSelector; // private class used by Selector
46  class DeclarationName;      // AST class that stores declaration names
47
48  /// \brief A simple pair of identifier info and location.
49  typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
50
51/// One of these records is kept for each identifier that
52/// is lexed.  This contains information about whether the token was \#define'd,
53/// is a language keyword, or if it is a front-end token of some sort (e.g. a
54/// variable or function name).  The preprocessor keeps this information in a
55/// set, and all tok::identifier tokens have a pointer to one of these.
56class IdentifierInfo {
57  friend class IdentifierTable;
58
59  unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
60  // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
61  // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
62  // are for builtins.
63  unsigned ObjCOrBuiltinID    :13;
64  bool HasMacro               : 1; // True if there is a #define for this.
65  bool HadMacro               : 1; // True if there was a #define for this.
66  bool IsExtension            : 1; // True if identifier is a lang extension.
67  bool IsFutureCompatKeyword  : 1; // True if identifier is a keyword in a
68                                   // newer Standard or proposed Standard.
69  bool IsPoisoned             : 1; // True if identifier is poisoned.
70  bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
71  bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
72  bool IsFromAST              : 1; // True if identifier was loaded (at least
73                                   // partially) from an AST file.
74  bool ChangedAfterLoad       : 1; // True if identifier has changed from the
75                                   // definition loaded from an AST file.
76  bool FEChangedAfterLoad     : 1; // True if identifier's frontend information
77                                   // has changed from the definition loaded
78                                   // from an AST file.
79  bool RevertedTokenID        : 1; // True if revertTokenIDToIdentifier was
80                                   // called.
81  bool OutOfDate              : 1; // True if there may be additional
82                                   // information about this identifier
83                                   // stored externally.
84  bool IsModulesImport        : 1; // True if this is the 'import' contextual
85                                   // keyword.
86  // 29 bit left in 64-bit word.
87
88  void *FETokenInfo;               // Managed by the language front-end.
89  llvm::StringMapEntry<IdentifierInfo*> *Entry;
90
91public:
92  IdentifierInfo();
93  IdentifierInfo(const IdentifierInfo &) = delete;
94  IdentifierInfo &operator=(const IdentifierInfo &) = delete;
95
96  /// \brief Return true if this is the identifier for the specified string.
97  ///
98  /// This is intended to be used for string literals only: II->isStr("foo").
99  template <std::size_t StrLen>
100  bool isStr(const char (&Str)[StrLen]) const {
101    return getLength() == StrLen-1 &&
102           memcmp(getNameStart(), Str, StrLen-1) == 0;
103  }
104
105  /// \brief Return the beginning of the actual null-terminated string for this
106  /// identifier.
107  ///
108  const char *getNameStart() const {
109    if (Entry) return Entry->getKeyData();
110    // FIXME: This is gross. It would be best not to embed specific details
111    // of the PTH file format here.
112    // The 'this' pointer really points to a
113    // std::pair<IdentifierInfo, const char*>, where internal pointer
114    // points to the external string data.
115    typedef std::pair<IdentifierInfo, const char*> actualtype;
116    return ((const actualtype*) this)->second;
117  }
118
119  /// \brief Efficiently return the length of this identifier info.
120  ///
121  unsigned getLength() const {
122    if (Entry) return Entry->getKeyLength();
123    // FIXME: This is gross. It would be best not to embed specific details
124    // of the PTH file format here.
125    // The 'this' pointer really points to a
126    // std::pair<IdentifierInfo, const char*>, where internal pointer
127    // points to the external string data.
128    typedef std::pair<IdentifierInfo, const char*> actualtype;
129    const char* p = ((const actualtype*) this)->second - 2;
130    return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
131  }
132
133  /// \brief Return the actual identifier string.
134  StringRef getName() const {
135    return StringRef(getNameStart(), getLength());
136  }
137
138  /// \brief Return true if this identifier is \#defined to some other value.
139  /// \note The current definition may be in a module and not currently visible.
140  bool hasMacroDefinition() const {
141    return HasMacro;
142  }
143  void setHasMacroDefinition(bool Val) {
144    if (HasMacro == Val) return;
145
146    HasMacro = Val;
147    if (Val) {
148      NeedsHandleIdentifier = true;
149      HadMacro = true;
150    } else {
151      RecomputeNeedsHandleIdentifier();
152    }
153  }
154  /// \brief Returns true if this identifier was \#defined to some value at any
155  /// moment. In this case there should be an entry for the identifier in the
156  /// macro history table in Preprocessor.
157  bool hadMacroDefinition() const {
158    return HadMacro;
159  }
160
161  /// If this is a source-language token (e.g. 'for'), this API
162  /// can be used to cause the lexer to map identifiers to source-language
163  /// tokens.
164  tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
165
166  /// \brief True if revertTokenIDToIdentifier() was called.
167  bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
168
169  /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
170  /// compatibility.
171  ///
172  /// TokenID is normally read-only but there are 2 instances where we revert it
173  /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
174  /// using this method so we can inform serialization about it.
175  void revertTokenIDToIdentifier() {
176    assert(TokenID != tok::identifier && "Already at tok::identifier");
177    TokenID = tok::identifier;
178    RevertedTokenID = true;
179  }
180  void revertIdentifierToTokenID(tok::TokenKind TK) {
181    assert(TokenID == tok::identifier && "Should be at tok::identifier");
182    TokenID = TK;
183    RevertedTokenID = false;
184  }
185
186  /// \brief Return the preprocessor keyword ID for this identifier.
187  ///
188  /// For example, "define" will return tok::pp_define.
189  tok::PPKeywordKind getPPKeywordID() const;
190
191  /// \brief Return the Objective-C keyword ID for the this identifier.
192  ///
193  /// For example, 'class' will return tok::objc_class if ObjC is enabled.
194  tok::ObjCKeywordKind getObjCKeywordID() const {
195    if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
196      return tok::ObjCKeywordKind(ObjCOrBuiltinID);
197    else
198      return tok::objc_not_keyword;
199  }
200  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
201
202  /// \brief True if setNotBuiltin() was called.
203  bool hasRevertedBuiltin() const {
204    return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
205  }
206
207  /// \brief Revert the identifier to a non-builtin identifier. We do this if
208  /// the name of a known builtin library function is used to declare that
209  /// function, but an unexpected type is specified.
210  void revertBuiltin() {
211    setBuiltinID(0);
212  }
213
214  /// \brief Return a value indicating whether this is a builtin function.
215  ///
216  /// 0 is not-built-in. 1+ are specific builtin functions.
217  unsigned getBuiltinID() const {
218    if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
219      return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
220    else
221      return 0;
222  }
223  void setBuiltinID(unsigned ID) {
224    ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
225    assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
226           && "ID too large for field!");
227  }
228
229  unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
230  void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
231
232  /// get/setExtension - Initialize information about whether or not this
233  /// language token is an extension.  This controls extension warnings, and is
234  /// only valid if a custom token ID is set.
235  bool isExtensionToken() const { return IsExtension; }
236  void setIsExtensionToken(bool Val) {
237    IsExtension = Val;
238    if (Val)
239      NeedsHandleIdentifier = true;
240    else
241      RecomputeNeedsHandleIdentifier();
242  }
243
244  /// is/setIsFutureCompatKeyword - Initialize information about whether or not
245  /// this language token is a keyword in a newer or proposed Standard. This
246  /// controls compatibility warnings, and is only true when not parsing the
247  /// corresponding Standard. Once a compatibility problem has been diagnosed
248  /// with this keyword, the flag will be cleared.
249  bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
250  void setIsFutureCompatKeyword(bool Val) {
251    IsFutureCompatKeyword = Val;
252    if (Val)
253      NeedsHandleIdentifier = true;
254    else
255      RecomputeNeedsHandleIdentifier();
256  }
257
258  /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
259  /// Preprocessor will emit an error every time this token is used.
260  void setIsPoisoned(bool Value = true) {
261    IsPoisoned = Value;
262    if (Value)
263      NeedsHandleIdentifier = true;
264    else
265      RecomputeNeedsHandleIdentifier();
266  }
267
268  /// \brief Return true if this token has been poisoned.
269  bool isPoisoned() const { return IsPoisoned; }
270
271  /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
272  /// this identifier is a C++ alternate representation of an operator.
273  void setIsCPlusPlusOperatorKeyword(bool Val = true) {
274    IsCPPOperatorKeyword = Val;
275  }
276  bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
277
278  /// \brief Return true if this token is a keyword in the specified language.
279  bool isKeyword(const LangOptions &LangOpts) const;
280
281  /// \brief Return true if this token is a C++ keyword in the specified
282  /// language.
283  bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
284
285  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
286  /// associate arbitrary metadata with this token.
287  template<typename T>
288  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
289  void setFETokenInfo(void *T) { FETokenInfo = T; }
290
291  /// \brief Return true if the Preprocessor::HandleIdentifier must be called
292  /// on a token of this identifier.
293  ///
294  /// If this returns false, we know that HandleIdentifier will not affect
295  /// the token.
296  bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
297
298  /// \brief Return true if the identifier in its current state was loaded
299  /// from an AST file.
300  bool isFromAST() const { return IsFromAST; }
301
302  void setIsFromAST() { IsFromAST = true; }
303
304  /// \brief Determine whether this identifier has changed since it was loaded
305  /// from an AST file.
306  bool hasChangedSinceDeserialization() const {
307    return ChangedAfterLoad;
308  }
309
310  /// \brief Note that this identifier has changed since it was loaded from
311  /// an AST file.
312  void setChangedSinceDeserialization() {
313    ChangedAfterLoad = true;
314  }
315
316  /// \brief Determine whether the frontend token information for this
317  /// identifier has changed since it was loaded from an AST file.
318  bool hasFETokenInfoChangedSinceDeserialization() const {
319    return FEChangedAfterLoad;
320  }
321
322  /// \brief Note that the frontend token information for this identifier has
323  /// changed since it was loaded from an AST file.
324  void setFETokenInfoChangedSinceDeserialization() {
325    FEChangedAfterLoad = true;
326  }
327
328  /// \brief Determine whether the information for this identifier is out of
329  /// date with respect to the external source.
330  bool isOutOfDate() const { return OutOfDate; }
331
332  /// \brief Set whether the information for this identifier is out of
333  /// date with respect to the external source.
334  void setOutOfDate(bool OOD) {
335    OutOfDate = OOD;
336    if (OOD)
337      NeedsHandleIdentifier = true;
338    else
339      RecomputeNeedsHandleIdentifier();
340  }
341
342  /// \brief Determine whether this is the contextual keyword \c import.
343  bool isModulesImport() const { return IsModulesImport; }
344
345  /// \brief Set whether this identifier is the contextual keyword \c import.
346  void setModulesImport(bool I) {
347    IsModulesImport = I;
348    if (I)
349      NeedsHandleIdentifier = true;
350    else
351      RecomputeNeedsHandleIdentifier();
352  }
353
354  /// Return true if this identifier is an editor placeholder.
355  ///
356  /// Editor placeholders are produced by the code-completion engine and are
357  /// represented as characters between '<#' and '#>' in the source code. An
358  /// example of auto-completed call with a placeholder parameter is shown
359  /// below:
360  /// \code
361  ///   function(<#int x#>);
362  /// \endcode
363  bool isEditorPlaceholder() const {
364    return getName().startswith("<#") && getName().endswith("#>");
365  }
366
367  /// \brief Provide less than operator for lexicographical sorting.
368  bool operator<(const IdentifierInfo &RHS) const {
369    return getName() < RHS.getName();
370  }
371
372private:
373  /// The Preprocessor::HandleIdentifier does several special (but rare)
374  /// things to identifiers of various sorts.  For example, it changes the
375  /// \c for keyword token from tok::identifier to tok::for.
376  ///
377  /// This method is very tied to the definition of HandleIdentifier.  Any
378  /// change to it should be reflected here.
379  void RecomputeNeedsHandleIdentifier() {
380    NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
381                            isExtensionToken() || isFutureCompatKeyword() ||
382                            isOutOfDate() || isModulesImport();
383  }
384};
385
386/// \brief An RAII object for [un]poisoning an identifier within a scope.
387///
388/// \p II is allowed to be null, in which case objects of this type have
389/// no effect.
390class PoisonIdentifierRAIIObject {
391  IdentifierInfo *const II;
392  const bool OldValue;
393
394public:
395  PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
396    : II(II), OldValue(II ? II->isPoisoned() : false) {
397    if(II)
398      II->setIsPoisoned(NewValue);
399  }
400
401  ~PoisonIdentifierRAIIObject() {
402    if(II)
403      II->setIsPoisoned(OldValue);
404  }
405};
406
407/// \brief An iterator that walks over all of the known identifiers
408/// in the lookup table.
409///
410/// Since this iterator uses an abstract interface via virtual
411/// functions, it uses an object-oriented interface rather than the
412/// more standard C++ STL iterator interface. In this OO-style
413/// iteration, the single function \c Next() provides dereference,
414/// advance, and end-of-sequence checking in a single
415/// operation. Subclasses of this iterator type will provide the
416/// actual functionality.
417class IdentifierIterator {
418protected:
419  IdentifierIterator() = default;
420
421public:
422  IdentifierIterator(const IdentifierIterator &) = delete;
423  IdentifierIterator &operator=(const IdentifierIterator &) = delete;
424
425  virtual ~IdentifierIterator();
426
427  /// \brief Retrieve the next string in the identifier table and
428  /// advances the iterator for the following string.
429  ///
430  /// \returns The next string in the identifier table. If there is
431  /// no such string, returns an empty \c StringRef.
432  virtual StringRef Next() = 0;
433};
434
435/// \brief Provides lookups to, and iteration over, IdentiferInfo objects.
436class IdentifierInfoLookup {
437public:
438  virtual ~IdentifierInfoLookup();
439
440  /// \brief Return the IdentifierInfo for the specified named identifier.
441  ///
442  /// Unlike the version in IdentifierTable, this returns a pointer instead
443  /// of a reference.  If the pointer is null then the IdentifierInfo cannot
444  /// be found.
445  virtual IdentifierInfo* get(StringRef Name) = 0;
446
447  /// \brief Retrieve an iterator into the set of all identifiers
448  /// known to this identifier lookup source.
449  ///
450  /// This routine provides access to all of the identifiers known to
451  /// the identifier lookup, allowing access to the contents of the
452  /// identifiers without introducing the overhead of constructing
453  /// IdentifierInfo objects for each.
454  ///
455  /// \returns A new iterator into the set of known identifiers. The
456  /// caller is responsible for deleting this iterator.
457  virtual IdentifierIterator *getIdentifiers();
458};
459
460/// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
461///
462/// This has no other purpose, but this is an extremely performance-critical
463/// piece of the code, as each occurrence of every identifier goes through
464/// here when lexed.
465class IdentifierTable {
466  // Shark shows that using MallocAllocator is *much* slower than using this
467  // BumpPtrAllocator!
468  typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
469  HashTableTy HashTable;
470
471  IdentifierInfoLookup* ExternalLookup;
472
473public:
474  /// \brief Create the identifier table, populating it with info about the
475  /// language keywords for the language specified by \p LangOpts.
476  IdentifierTable(const LangOptions &LangOpts,
477                  IdentifierInfoLookup* externalLookup = nullptr);
478
479  /// \brief Set the external identifier lookup mechanism.
480  void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
481    ExternalLookup = IILookup;
482  }
483
484  /// \brief Retrieve the external identifier lookup object, if any.
485  IdentifierInfoLookup *getExternalIdentifierLookup() const {
486    return ExternalLookup;
487  }
488
489  llvm::BumpPtrAllocator& getAllocator() {
490    return HashTable.getAllocator();
491  }
492
493  /// \brief Return the identifier token info for the specified named
494  /// identifier.
495  IdentifierInfo &get(StringRef Name) {
496    auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
497
498    IdentifierInfo *&II = Entry.second;
499    if (II) return *II;
500
501    // No entry; if we have an external lookup, look there first.
502    if (ExternalLookup) {
503      II = ExternalLookup->get(Name);
504      if (II)
505        return *II;
506    }
507
508    // Lookups failed, make a new IdentifierInfo.
509    void *Mem = getAllocator().Allocate<IdentifierInfo>();
510    II = new (Mem) IdentifierInfo();
511
512    // Make sure getName() knows how to find the IdentifierInfo
513    // contents.
514    II->Entry = &Entry;
515
516    return *II;
517  }
518
519  IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
520    IdentifierInfo &II = get(Name);
521    II.TokenID = TokenCode;
522    assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
523    return II;
524  }
525
526  /// \brief Gets an IdentifierInfo for the given name without consulting
527  ///        external sources.
528  ///
529  /// This is a version of get() meant for external sources that want to
530  /// introduce or modify an identifier. If they called get(), they would
531  /// likely end up in a recursion.
532  IdentifierInfo &getOwn(StringRef Name) {
533    auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
534
535    IdentifierInfo *&II = Entry.second;
536    if (II)
537      return *II;
538
539    // Lookups failed, make a new IdentifierInfo.
540    void *Mem = getAllocator().Allocate<IdentifierInfo>();
541    II = new (Mem) IdentifierInfo();
542
543    // Make sure getName() knows how to find the IdentifierInfo
544    // contents.
545    II->Entry = &Entry;
546
547    // If this is the 'import' contextual keyword, mark it as such.
548    if (Name.equals("import"))
549      II->setModulesImport(true);
550
551    return *II;
552  }
553
554  typedef HashTableTy::const_iterator iterator;
555  typedef HashTableTy::const_iterator const_iterator;
556
557  iterator begin() const { return HashTable.begin(); }
558  iterator end() const   { return HashTable.end(); }
559  unsigned size() const  { return HashTable.size(); }
560
561  /// \brief Print some statistics to stderr that indicate how well the
562  /// hashing is doing.
563  void PrintStats() const;
564
565  void AddKeywords(const LangOptions &LangOpts);
566};
567
568/// \brief A family of Objective-C methods.
569///
570/// These families have no inherent meaning in the language, but are
571/// nonetheless central enough in the existing implementations to
572/// merit direct AST support.  While, in theory, arbitrary methods can
573/// be considered to form families, we focus here on the methods
574/// involving allocation and retain-count management, as these are the
575/// most "core" and the most likely to be useful to diverse clients
576/// without extra information.
577///
578/// Both selectors and actual method declarations may be classified
579/// into families.  Method families may impose additional restrictions
580/// beyond their selector name; for example, a method called '_init'
581/// that returns void is not considered to be in the 'init' family
582/// (but would be if it returned 'id').  It is also possible to
583/// explicitly change or remove a method's family.  Therefore the
584/// method's family should be considered the single source of truth.
585enum ObjCMethodFamily {
586  /// \brief No particular method family.
587  OMF_None,
588
589  // Selectors in these families may have arbitrary arity, may be
590  // written with arbitrary leading underscores, and may have
591  // additional CamelCase "words" in their first selector chunk
592  // following the family name.
593  OMF_alloc,
594  OMF_copy,
595  OMF_init,
596  OMF_mutableCopy,
597  OMF_new,
598
599  // These families are singletons consisting only of the nullary
600  // selector with the given name.
601  OMF_autorelease,
602  OMF_dealloc,
603  OMF_finalize,
604  OMF_release,
605  OMF_retain,
606  OMF_retainCount,
607  OMF_self,
608  OMF_initialize,
609
610  // performSelector families
611  OMF_performSelector
612};
613
614/// Enough bits to store any enumerator in ObjCMethodFamily or
615/// InvalidObjCMethodFamily.
616enum { ObjCMethodFamilyBitWidth = 4 };
617
618/// \brief An invalid value of ObjCMethodFamily.
619enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
620
621/// \brief A family of Objective-C methods.
622///
623/// These are family of methods whose result type is initially 'id', but
624/// but are candidate for the result type to be changed to 'instancetype'.
625enum ObjCInstanceTypeFamily {
626  OIT_None,
627  OIT_Array,
628  OIT_Dictionary,
629  OIT_Singleton,
630  OIT_Init,
631  OIT_ReturnsSelf
632};
633
634enum ObjCStringFormatFamily {
635  SFF_None,
636  SFF_NSString,
637  SFF_CFString
638};
639
640/// \brief Smart pointer class that efficiently represents Objective-C method
641/// names.
642///
643/// This class will either point to an IdentifierInfo or a
644/// MultiKeywordSelector (which is private). This enables us to optimize
645/// selectors that take no arguments and selectors that take 1 argument, which
646/// accounts for 78% of all selectors in Cocoa.h.
647class Selector {
648  friend class Diagnostic;
649
650  enum IdentifierInfoFlag {
651    // Empty selector = 0.
652    ZeroArg  = 0x1,
653    OneArg   = 0x2,
654    MultiArg = 0x3,
655    ArgFlags = ZeroArg|OneArg
656  };
657  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
658
659  Selector(IdentifierInfo *II, unsigned nArgs) {
660    InfoPtr = reinterpret_cast<uintptr_t>(II);
661    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
662    assert(nArgs < 2 && "nArgs not equal to 0/1");
663    InfoPtr |= nArgs+1;
664  }
665  Selector(MultiKeywordSelector *SI) {
666    InfoPtr = reinterpret_cast<uintptr_t>(SI);
667    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
668    InfoPtr |= MultiArg;
669  }
670
671  IdentifierInfo *getAsIdentifierInfo() const {
672    if (getIdentifierInfoFlag() < MultiArg)
673      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
674    return nullptr;
675  }
676
677  MultiKeywordSelector *getMultiKeywordSelector() const {
678    return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
679  }
680
681  unsigned getIdentifierInfoFlag() const {
682    return InfoPtr & ArgFlags;
683  }
684
685  static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
686
687  static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
688
689public:
690  friend class SelectorTable; // only the SelectorTable can create these
691  friend class DeclarationName; // and the AST's DeclarationName.
692
693  /// The default ctor should only be used when creating data structures that
694  ///  will contain selectors.
695  Selector() : InfoPtr(0) {}
696  Selector(uintptr_t V) : InfoPtr(V) {}
697
698  /// operator==/!= - Indicate whether the specified selectors are identical.
699  bool operator==(Selector RHS) const {
700    return InfoPtr == RHS.InfoPtr;
701  }
702  bool operator!=(Selector RHS) const {
703    return InfoPtr != RHS.InfoPtr;
704  }
705
706  void *getAsOpaquePtr() const {
707    return reinterpret_cast<void*>(InfoPtr);
708  }
709
710  /// \brief Determine whether this is the empty selector.
711  bool isNull() const { return InfoPtr == 0; }
712
713  // Predicates to identify the selector type.
714  bool isKeywordSelector() const {
715    return getIdentifierInfoFlag() != ZeroArg;
716  }
717
718  bool isUnarySelector() const {
719    return getIdentifierInfoFlag() == ZeroArg;
720  }
721
722  unsigned getNumArgs() const;
723
724  /// \brief Retrieve the identifier at a given position in the selector.
725  ///
726  /// Note that the identifier pointer returned may be NULL. Clients that only
727  /// care about the text of the identifier string, and not the specific,
728  /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
729  /// an empty string when the identifier pointer would be NULL.
730  ///
731  /// \param argIndex The index for which we want to retrieve the identifier.
732  /// This index shall be less than \c getNumArgs() unless this is a keyword
733  /// selector, in which case 0 is the only permissible value.
734  ///
735  /// \returns the uniqued identifier for this slot, or NULL if this slot has
736  /// no corresponding identifier.
737  IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
738
739  /// \brief Retrieve the name at a given position in the selector.
740  ///
741  /// \param argIndex The index for which we want to retrieve the name.
742  /// This index shall be less than \c getNumArgs() unless this is a keyword
743  /// selector, in which case 0 is the only permissible value.
744  ///
745  /// \returns the name for this slot, which may be the empty string if no
746  /// name was supplied.
747  StringRef getNameForSlot(unsigned argIndex) const;
748
749  /// \brief Derive the full selector name (e.g. "foo:bar:") and return
750  /// it as an std::string.
751  std::string getAsString() const;
752
753  /// \brief Prints the full selector name (e.g. "foo:bar:").
754  void print(llvm::raw_ostream &OS) const;
755
756  /// \brief Derive the conventional family of this method.
757  ObjCMethodFamily getMethodFamily() const {
758    return getMethodFamilyImpl(*this);
759  }
760
761  ObjCStringFormatFamily getStringFormatFamily() const {
762    return getStringFormatFamilyImpl(*this);
763  }
764
765  static Selector getEmptyMarker() {
766    return Selector(uintptr_t(-1));
767  }
768
769  static Selector getTombstoneMarker() {
770    return Selector(uintptr_t(-2));
771  }
772
773  static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
774};
775
776/// \brief This table allows us to fully hide how we implement
777/// multi-keyword caching.
778class SelectorTable {
779  void *Impl;  // Actually a SelectorTableImpl
780
781public:
782  SelectorTable();
783  SelectorTable(const SelectorTable &) = delete;
784  SelectorTable &operator=(const SelectorTable &) = delete;
785  ~SelectorTable();
786
787  /// \brief Can create any sort of selector.
788  ///
789  /// \p NumArgs indicates whether this is a no argument selector "foo", a
790  /// single argument selector "foo:" or multi-argument "foo:bar:".
791  Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
792
793  Selector getUnarySelector(IdentifierInfo *ID) {
794    return Selector(ID, 1);
795  }
796  Selector getNullarySelector(IdentifierInfo *ID) {
797    return Selector(ID, 0);
798  }
799
800  /// \brief Return the total amount of memory allocated for managing selectors.
801  size_t getTotalMemory() const;
802
803  /// \brief Return the default setter name for the given identifier.
804  ///
805  /// This is "set" + \p Name where the initial character of \p Name
806  /// has been capitalized.
807  static SmallString<64> constructSetterName(StringRef Name);
808
809  /// \brief Return the default setter selector for the given identifier.
810  ///
811  /// This is "set" + \p Name where the initial character of \p Name
812  /// has been capitalized.
813  static Selector constructSetterSelector(IdentifierTable &Idents,
814                                          SelectorTable &SelTable,
815                                          const IdentifierInfo *Name);
816};
817
818/// DeclarationNameExtra - Common base of the MultiKeywordSelector,
819/// CXXSpecialName, and CXXOperatorIdName classes, all of which are
820/// private classes that describe different kinds of names.
821class DeclarationNameExtra {
822public:
823  /// ExtraKind - The kind of "extra" information stored in the
824  /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
825  /// how these enumerator values are used.
826  enum ExtraKind {
827    CXXConstructor = 0,
828    CXXDestructor,
829    CXXConversionFunction,
830#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
831    CXXOperator##Name,
832#include "clang/Basic/OperatorKinds.def"
833    CXXDeductionGuide,
834    CXXLiteralOperator,
835    CXXUsingDirective,
836    NUM_EXTRA_KINDS
837  };
838
839  /// ExtraKindOrNumArgs - Either the kind of C++ special name or
840  /// operator-id (if the value is one of the CXX* enumerators of
841  /// ExtraKind), in which case the DeclarationNameExtra is also a
842  /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
843  /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
844  /// it may be also name common to C++ using-directives (CXXUsingDirective),
845  /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
846  /// arguments in the Objective-C selector, in which case the
847  /// DeclarationNameExtra is also a MultiKeywordSelector.
848  unsigned ExtraKindOrNumArgs;
849};
850
851}  // end namespace clang
852
853namespace llvm {
854
855/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
856/// DenseSets.
857template <>
858struct DenseMapInfo<clang::Selector> {
859  static inline clang::Selector getEmptyKey() {
860    return clang::Selector::getEmptyMarker();
861  }
862
863  static inline clang::Selector getTombstoneKey() {
864    return clang::Selector::getTombstoneMarker();
865  }
866
867  static unsigned getHashValue(clang::Selector S);
868
869  static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
870    return LHS == RHS;
871  }
872};
873
874template <>
875struct isPodLike<clang::Selector> { static const bool value = true; };
876
877template <typename T> struct PointerLikeTypeTraits;
878
879template<>
880struct PointerLikeTypeTraits<clang::Selector> {
881  static inline const void *getAsVoidPointer(clang::Selector P) {
882    return P.getAsOpaquePtr();
883  }
884
885  static inline clang::Selector getFromVoidPointer(const void *P) {
886    return clang::Selector(reinterpret_cast<uintptr_t>(P));
887  }
888
889  enum { NumLowBitsAvailable = 0 };
890};
891
892// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
893// are not guaranteed to be 8-byte aligned.
894template<>
895struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
896  static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
897    return P;
898  }
899
900  static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
901    return static_cast<clang::IdentifierInfo*>(P);
902  }
903
904  enum { NumLowBitsAvailable = 1 };
905};
906
907template<>
908struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
909  static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
910    return P;
911  }
912
913  static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
914    return static_cast<const clang::IdentifierInfo*>(P);
915  }
916
917  enum { NumLowBitsAvailable = 1 };
918};
919
920} // end namespace llvm
921
922#endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
923