CIndexUSRs.cpp revision 28a7f258aefdd58db0bbf3a903f053bf2cb69c90
1//===- CIndexUSR.cpp - Clang-C Source Indexing Library --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the generation and use of USRs from CXEntities.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CIndexer.h"
15#include "CXCursor.h"
16#include "clang/AST/DeclVisitor.h"
17#include "clang/Frontend/ASTUnit.h"
18#include "clang/Lex/PreprocessingRecord.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/raw_ostream.h"
21
22using namespace clang;
23using namespace clang::cxstring;
24
25//===----------------------------------------------------------------------===//
26// USR generation.
27//===----------------------------------------------------------------------===//
28
29namespace {
30class USRGenerator : public DeclVisitor<USRGenerator> {
31  llvm::SmallString<1024> Buf;
32  llvm::raw_svector_ostream Out;
33  bool IgnoreResults;
34  ASTUnit *AU;
35  bool generatedLoc;
36public:
37  USRGenerator(const CXCursor *C = 0)
38    : Out(Buf),
39      IgnoreResults(false),
40      AU(C ? cxcursor::getCursorASTUnit(*C) : 0),
41      generatedLoc(false)
42  {
43    // Add the USR space prefix.
44    Out << "c:";
45  }
46
47  llvm::StringRef str() {
48    return Out.str();
49  }
50
51  USRGenerator* operator->() { return this; }
52
53  template <typename T>
54  llvm::raw_svector_ostream &operator<<(const T &x) {
55    Out << x;
56    return Out;
57  }
58
59  bool ignoreResults() const { return IgnoreResults; }
60
61  // Visitation methods from generating USRs from AST elements.
62  void VisitDeclContext(DeclContext *D);
63  void VisitFieldDecl(FieldDecl *D);
64  void VisitFunctionDecl(FunctionDecl *D);
65  void VisitNamedDecl(NamedDecl *D);
66  void VisitNamespaceDecl(NamespaceDecl *D);
67  void VisitObjCClassDecl(ObjCClassDecl *CD);
68  void VisitObjCContainerDecl(ObjCContainerDecl *CD);
69  void VisitObjCForwardProtocolDecl(ObjCForwardProtocolDecl *P);
70  void VisitObjCMethodDecl(ObjCMethodDecl *MD);
71  void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
72  void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
73  void VisitTagDecl(TagDecl *D);
74  void VisitTypedefDecl(TypedefDecl *D);
75  void VisitVarDecl(VarDecl *D);
76  void VisitLinkageSpecDecl(LinkageSpecDecl *D) {
77    IgnoreResults = true;
78    return;
79  }
80
81  /// Generate the string component containing the location of the
82  ///  declaration.
83  bool GenLoc(const Decl *D);
84
85  /// String generation methods used both by the visitation methods
86  /// and from other clients that want to directly generate USRs.  These
87  /// methods do not construct complete USRs (which incorporate the parents
88  /// of an AST element), but only the fragments concerning the AST element
89  /// itself.
90
91  /// Generate a USR for an Objective-C class.
92  void GenObjCClass(llvm::StringRef cls);
93  /// Generate a USR for an Objective-C class category.
94  void GenObjCCategory(llvm::StringRef cls, llvm::StringRef cat);
95  /// Generate a USR fragment for an Objective-C instance variable.  The
96  /// complete USR can be created by concatenating the USR for the
97  /// encompassing class with this USR fragment.
98  void GenObjCIvar(llvm::StringRef ivar);
99  /// Generate a USR fragment for an Objective-C method.
100  void GenObjCMethod(llvm::StringRef sel, bool isInstanceMethod);
101  /// Generate a USR fragment for an Objective-C property.
102  void GenObjCProperty(llvm::StringRef prop);
103  /// Generate a USR for an Objective-C protocol.
104  void GenObjCProtocol(llvm::StringRef prot);
105
106  void VisitType(QualType T);
107
108  /// Emit a Decl's name using NamedDecl::printName() and return true if
109  ///  the decl had no name.
110  bool EmitDeclName(const NamedDecl *D);
111};
112
113} // end anonymous namespace
114
115//===----------------------------------------------------------------------===//
116// Generating USRs from ASTS.
117//===----------------------------------------------------------------------===//
118
119bool USRGenerator::EmitDeclName(const NamedDecl *D) {
120  Out.flush();
121  const unsigned startSize = Buf.size();
122  D->printName(Out);
123  Out.flush();
124  const unsigned endSize = Buf.size();
125  return startSize == endSize;
126}
127
128static bool InAnonymousNamespace(const Decl *D) {
129  if (const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(D->getDeclContext()))
130    return ND->isAnonymousNamespace();
131  return false;
132}
133
134static inline bool ShouldGenerateLocation(const NamedDecl *D) {
135  return D->getLinkage() != ExternalLinkage && !InAnonymousNamespace(D);
136}
137
138void USRGenerator::VisitDeclContext(DeclContext *DC) {
139  if (NamedDecl *D = dyn_cast<NamedDecl>(DC))
140    Visit(D);
141}
142
143void USRGenerator::VisitFieldDecl(FieldDecl *D) {
144  VisitDeclContext(D->getDeclContext());
145  Out << (isa<ObjCIvarDecl>(D) ? "@" : "@FI@");
146  if (EmitDeclName(D)) {
147    // Bit fields can be anonymous.
148    IgnoreResults = true;
149    return;
150  }
151}
152
153void USRGenerator::VisitFunctionDecl(FunctionDecl *D) {
154  if (ShouldGenerateLocation(D) && GenLoc(D))
155    return;
156
157  VisitDeclContext(D->getDeclContext());
158  Out << "@F@";
159  D->printName(Out);
160
161  ASTContext &Ctx = AU->getASTContext();
162  if (!Ctx.getLangOptions().CPlusPlus || D->isExternC())
163    return;
164
165  // Mangle in type information for the arguments.
166  for (FunctionDecl::param_iterator I = D->param_begin(), E = D->param_end();
167       I != E; ++I) {
168    Out << '#';
169    if (ParmVarDecl *PD = *I)
170      VisitType(PD->getType());
171  }
172  if (D->isVariadic())
173    Out << '.';
174  Out << '#';
175  if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
176    if (MD->isStatic())
177      Out << 'S';
178    if (unsigned quals = MD->getTypeQualifiers())
179      Out << (char)('0' + quals);
180  }
181}
182
183void USRGenerator::VisitNamedDecl(NamedDecl *D) {
184  VisitDeclContext(D->getDeclContext());
185  Out << "@";
186
187  if (EmitDeclName(D)) {
188    // The string can be empty if the declaration has no name; e.g., it is
189    // the ParmDecl with no name for declaration of a function pointer type,
190    // e.g.: void  (*f)(void *);
191    // In this case, don't generate a USR.
192    IgnoreResults = true;
193  }
194}
195
196void USRGenerator::VisitVarDecl(VarDecl *D) {
197  // VarDecls can be declared 'extern' within a function or method body,
198  // but their enclosing DeclContext is the function, not the TU.  We need
199  // to check the storage class to correctly generate the USR.
200  if (ShouldGenerateLocation(D) && GenLoc(D))
201    return;
202
203  VisitDeclContext(D->getDeclContext());
204
205  // Variables always have simple names.
206  llvm::StringRef s = D->getName();
207
208  // The string can be empty if the declaration has no name; e.g., it is
209  // the ParmDecl with no name for declaration of a function pointer type, e.g.:
210  //    void  (*f)(void *);
211  // In this case, don't generate a USR.
212  if (s.empty())
213    IgnoreResults = true;
214  else
215    Out << '@' << s;
216}
217
218void USRGenerator::VisitNamespaceDecl(NamespaceDecl *D) {
219  if (D->isAnonymousNamespace()) {
220    Out << "@aN";
221    return;
222  }
223
224  VisitDeclContext(D->getDeclContext());
225  if (!IgnoreResults)
226    Out << "@N@" << D->getName();
227}
228
229void USRGenerator::VisitObjCMethodDecl(ObjCMethodDecl *D) {
230  Decl *container = cast<Decl>(D->getDeclContext());
231
232  // The USR for a method declared in a class extension is based on
233  // the ObjCInterfaceDecl, not the ObjCCategoryDecl.
234  do {
235    if (ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(container))
236      if (CD->IsClassExtension()) {
237        Visit(CD->getClassInterface());
238        break;
239      }
240    Visit(cast<Decl>(D->getDeclContext()));
241  }
242  while (false);
243
244  // Ideally we would use 'GenObjCMethod', but this is such a hot path
245  // for Objective-C code that we don't want to use
246  // DeclarationName::getAsString().
247  Out << (D->isInstanceMethod() ? "(im)" : "(cm)");
248  DeclarationName N(D->getSelector());
249  N.printName(Out);
250}
251
252void USRGenerator::VisitObjCClassDecl(ObjCClassDecl *D) {
253  // FIXME: @class declarations can refer to multiple classes.  We need
254  //  to be able to traverse these.
255  IgnoreResults = true;
256}
257
258void USRGenerator::VisitObjCForwardProtocolDecl(ObjCForwardProtocolDecl *D) {
259  // FIXME: @protocol declarations can refer to multiple protocols.  We need
260  //  to be able to traverse these.
261  IgnoreResults = true;
262}
263
264void USRGenerator::VisitObjCContainerDecl(ObjCContainerDecl *D) {
265  switch (D->getKind()) {
266    default:
267      assert(false && "Invalid ObjC container.");
268    case Decl::ObjCInterface:
269    case Decl::ObjCImplementation:
270      GenObjCClass(D->getName());
271      break;
272    case Decl::ObjCCategory: {
273      ObjCCategoryDecl *CD = cast<ObjCCategoryDecl>(D);
274      ObjCInterfaceDecl *ID = CD->getClassInterface();
275      if (!ID) {
276        // Handle invalid code where the @interface might not
277        // have been specified.
278        // FIXME: We should be able to generate this USR even if the
279        // @interface isn't available.
280        IgnoreResults = true;
281        return;
282      }
283      // Specially handle class extensions, which are anonymous categories.
284      // We want to mangle in the location to uniquely distinguish them.
285      if (CD->IsClassExtension()) {
286        Out << "objc(ext)" << ID->getName() << '@';
287        GenLoc(CD);
288      }
289      else
290        GenObjCCategory(ID->getName(), CD->getName());
291
292      break;
293    }
294    case Decl::ObjCCategoryImpl: {
295      ObjCCategoryImplDecl *CD = cast<ObjCCategoryImplDecl>(D);
296      ObjCInterfaceDecl *ID = CD->getClassInterface();
297      if (!ID) {
298        // Handle invalid code where the @interface might not
299        // have been specified.
300        // FIXME: We should be able to generate this USR even if the
301        // @interface isn't available.
302        IgnoreResults = true;
303        return;
304      }
305      GenObjCCategory(ID->getName(), CD->getName());
306      break;
307    }
308    case Decl::ObjCProtocol:
309      GenObjCProtocol(cast<ObjCProtocolDecl>(D)->getName());
310      break;
311  }
312}
313
314void USRGenerator::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
315  Visit(cast<Decl>(D->getDeclContext()));
316  GenObjCProperty(D->getName());
317}
318
319void USRGenerator::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
320  if (ObjCPropertyDecl *PD = D->getPropertyDecl()) {
321    VisitObjCPropertyDecl(PD);
322    return;
323  }
324
325  IgnoreResults = true;
326}
327
328void USRGenerator::VisitTagDecl(TagDecl *D) {
329  // Add the location of the tag decl to handle resolution across
330  // translation units.
331  if (ShouldGenerateLocation(D) && GenLoc(D))
332    return;
333
334  D = D->getCanonicalDecl();
335  VisitDeclContext(D->getDeclContext());
336
337  switch (D->getTagKind()) {
338    case TTK_Struct: Out << "@S"; break;
339    case TTK_Class:  Out << "@C"; break;
340    case TTK_Union:  Out << "@U"; break;
341    case TTK_Enum:   Out << "@E"; break;
342  }
343
344  Out << '@';
345  Out.flush();
346  assert(Buf.size() > 0);
347  const unsigned off = Buf.size() - 1;
348
349  if (EmitDeclName(D)) {
350    if (const TypedefDecl *TD = D->getTypedefForAnonDecl()) {
351      Buf[off] = 'A';
352      Out << '@' << TD;
353    }
354    else
355      Buf[off] = 'a';
356  }
357}
358
359void USRGenerator::VisitTypedefDecl(TypedefDecl *D) {
360  if (ShouldGenerateLocation(D) && GenLoc(D))
361    return;
362  DeclContext *DC = D->getDeclContext();
363  if (NamedDecl *DCN = dyn_cast<NamedDecl>(DC))
364    Visit(DCN);
365  Out << "@T@";
366  Out << D->getName();
367}
368
369bool USRGenerator::GenLoc(const Decl *D) {
370  if (generatedLoc)
371    return IgnoreResults;
372  generatedLoc = true;
373
374  const SourceManager &SM = AU->getSourceManager();
375  SourceLocation L = D->getLocStart();
376  if (L.isInvalid()) {
377    IgnoreResults = true;
378    return true;
379  }
380  L = SM.getInstantiationLoc(L);
381  const std::pair<FileID, unsigned> &Decomposed = SM.getDecomposedLoc(L);
382  const FileEntry *FE = SM.getFileEntryForID(Decomposed.first);
383  if (FE) {
384    llvm::sys::Path P(FE->getName());
385    Out << P.getLast();
386  }
387  else {
388    // This case really isn't interesting.
389    IgnoreResults = true;
390    return true;
391  }
392  // Use the offest into the FileID to represent the location.  Using
393  // a line/column can cause us to look back at the original source file,
394  // which is expensive.
395  Out << '@' << Decomposed.second;
396  return IgnoreResults;
397}
398
399void USRGenerator::VisitType(QualType T) {
400  // This method mangles in USR information for types.  It can possibly
401  // just reuse the naming-mangling logic used by codegen, although the
402  // requirements for USRs might not be the same.
403  ASTContext &Ctx = AU->getASTContext();
404
405  do {
406    T = Ctx.getCanonicalType(T);
407    Qualifiers Q = T.getQualifiers();
408    unsigned qVal = 0;
409    if (Q.hasConst())
410      qVal |= 0x1;
411    if (Q.hasVolatile())
412      qVal |= 0x2;
413    if (Q.hasRestrict())
414      qVal |= 0x4;
415    if(qVal)
416      Out << ((char) ('0' + qVal));
417
418    // Mangle in ObjC GC qualifiers?
419
420    if (const PointerType *PT = T->getAs<PointerType>()) {
421      Out << '*';
422      T = PT->getPointeeType();
423      continue;
424    }
425    if (const ReferenceType *RT = T->getAs<ReferenceType>()) {
426      Out << '&';
427      T = RT->getPointeeType();
428      continue;
429    }
430    if (const FunctionProtoType *FT = T->getAs<FunctionProtoType>()) {
431      Out << 'F';
432      VisitType(FT->getResultType());
433      for (FunctionProtoType::arg_type_iterator
434            I = FT->arg_type_begin(), E = FT->arg_type_end(); I!=E; ++I) {
435        VisitType(*I);
436      }
437      if (FT->isVariadic())
438        Out << '.';
439      return;
440    }
441    if (const BlockPointerType *BT = T->getAs<BlockPointerType>()) {
442      Out << 'B';
443      T = BT->getPointeeType();
444      continue;
445    }
446    if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
447      unsigned char c = '\0';
448      switch (BT->getKind()) {
449        case BuiltinType::Void:
450          c = 'v'; break;
451        case BuiltinType::Bool:
452          c = 'b'; break;
453        case BuiltinType::Char_U:
454        case BuiltinType::UChar:
455          c = 'c'; break;
456        case BuiltinType::Char16:
457          c = 'q'; break;
458        case BuiltinType::Char32:
459          c = 'w'; break;
460        case BuiltinType::UShort:
461          c = 's'; break;
462        case BuiltinType::UInt:
463          c = 'i'; break;
464        case BuiltinType::ULong:
465          c = 'l'; break;
466        case BuiltinType::ULongLong:
467          c = 'k'; break;
468        case BuiltinType::UInt128:
469          c = 'j'; break;
470        case BuiltinType::Char_S:
471        case BuiltinType::SChar:
472          c = 'C'; break;
473        case BuiltinType::WChar:
474          c = 'W'; break;
475        case BuiltinType::Short:
476          c = 'S'; break;
477        case BuiltinType::Int:
478          c = 'I'; break;
479        case BuiltinType::Long:
480          c = 'L'; break;
481        case BuiltinType::LongLong:
482          c = 'K'; break;
483        case BuiltinType::Int128:
484          c = 'J'; break;
485        case BuiltinType::Float:
486          c = 'f'; break;
487        case BuiltinType::Double:
488          c = 'd'; break;
489        case BuiltinType::LongDouble:
490          c = 'D'; break;
491        case BuiltinType::NullPtr:
492          c = 'n'; break;
493        case BuiltinType::Overload:
494        case BuiltinType::Dependent:
495        case BuiltinType::UndeducedAuto:
496          IgnoreResults = true;
497          return;
498        case BuiltinType::ObjCId:
499          c = 'o'; break;
500        case BuiltinType::ObjCClass:
501          c = 'O'; break;
502        case BuiltinType::ObjCSel:
503          c = 'e'; break;
504      }
505      Out << c;
506      return;
507    }
508    if (const ComplexType *CT = T->getAs<ComplexType>()) {
509      Out << '<';
510      T = CT->getElementType();
511      continue;
512    }
513    if (const TagType *TT = T->getAs<TagType>()) {
514      Out << '$';
515      VisitTagDecl(TT->getDecl());
516      return;
517    }
518
519    // Unhandled type.
520    Out << ' ';
521    break;
522  } while (true);
523}
524
525//===----------------------------------------------------------------------===//
526// General purpose USR generation methods.
527//===----------------------------------------------------------------------===//
528
529void USRGenerator::GenObjCClass(llvm::StringRef cls) {
530  Out << "objc(cs)" << cls;
531}
532
533void USRGenerator::GenObjCCategory(llvm::StringRef cls, llvm::StringRef cat) {
534  Out << "objc(cy)" << cls << '@' << cat;
535}
536
537void USRGenerator::GenObjCIvar(llvm::StringRef ivar) {
538  Out << '@' << ivar;
539}
540
541void USRGenerator::GenObjCMethod(llvm::StringRef meth, bool isInstanceMethod) {
542  Out << (isInstanceMethod ? "(im)" : "(cm)") << meth;
543}
544
545void USRGenerator::GenObjCProperty(llvm::StringRef prop) {
546  Out << "(py)" << prop;
547}
548
549void USRGenerator::GenObjCProtocol(llvm::StringRef prot) {
550  Out << "objc(pl)" << prot;
551}
552
553//===----------------------------------------------------------------------===//
554// API hooks.
555//===----------------------------------------------------------------------===//
556
557static inline llvm::StringRef extractUSRSuffix(llvm::StringRef s) {
558  return s.startswith("c:") ? s.substr(2) : "";
559}
560
561static CXString getDeclCursorUSR(const CXCursor &C) {
562  Decl *D = cxcursor::getCursorDecl(C);
563
564  // Don't generate USRs for things with invalid locations.
565  if (!D || D->getLocStart().isInvalid())
566    return createCXString("");
567
568  // Check if the cursor has 'NoLinkage'.
569  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
570    switch (ND->getLinkage()) {
571      case ExternalLinkage:
572        // Generate USRs for all entities with external linkage.
573        break;
574      case NoLinkage:
575      case UniqueExternalLinkage:
576        // We allow enums, typedefs, and structs that have no linkage to
577        // have USRs that are anchored to the file they were defined in
578        // (e.g., the header).  This is a little gross, but in principal
579        // enums/anonymous structs/etc. defined in a common header file
580        // are referred to across multiple translation units.
581        if (isa<TagDecl>(ND) || isa<TypedefDecl>(ND) ||
582            isa<EnumConstantDecl>(ND) || isa<FieldDecl>(ND) ||
583            isa<VarDecl>(ND) || isa<NamespaceDecl>(ND))
584          break;
585        // Fall-through.
586      case InternalLinkage:
587        if (isa<FunctionDecl>(ND))
588          break;
589    }
590
591  USRGenerator UG(&C);
592  UG->Visit(D);
593
594  if (UG->ignoreResults())
595    return createCXString("");
596
597#if 0
598  // For development testing.
599  assert(UG.str().size() > 2);
600#endif
601
602    // Return a copy of the string that must be disposed by the caller.
603  return createCXString(UG.str(), true);
604}
605
606extern "C" {
607
608CXString clang_getCursorUSR(CXCursor C) {
609  const CXCursorKind &K = clang_getCursorKind(C);
610
611  if (clang_isDeclaration(K))
612      return getDeclCursorUSR(C);
613
614  if (K == CXCursor_MacroDefinition) {
615    USRGenerator UG(&C);
616    UG << "macro@"
617       << cxcursor::getCursorMacroDefinition(C)->getName()->getNameStart();
618    return createCXString(UG.str(), true);
619  }
620
621  return createCXString("");
622}
623
624CXString clang_constructUSR_ObjCIvar(const char *name, CXString classUSR) {
625  USRGenerator UG;
626  UG << extractUSRSuffix(clang_getCString(classUSR));
627  UG->GenObjCIvar(name);
628  return createCXString(UG.str(), true);
629}
630
631CXString clang_constructUSR_ObjCMethod(const char *name,
632                                       unsigned isInstanceMethod,
633                                       CXString classUSR) {
634  USRGenerator UG;
635  UG << extractUSRSuffix(clang_getCString(classUSR));
636  UG->GenObjCMethod(name, isInstanceMethod);
637  return createCXString(UG.str(), true);
638}
639
640CXString clang_constructUSR_ObjCClass(const char *name) {
641  USRGenerator UG;
642  UG->GenObjCClass(name);
643  return createCXString(UG.str(), true);
644}
645
646CXString clang_constructUSR_ObjCProtocol(const char *name) {
647  USRGenerator UG;
648  UG->GenObjCProtocol(name);
649  return createCXString(UG.str(), true);
650}
651
652CXString clang_constructUSR_ObjCCategory(const char *class_name,
653                                         const char *category_name) {
654  USRGenerator UG;
655  UG->GenObjCCategory(class_name, category_name);
656  return createCXString(UG.str(), true);
657}
658
659CXString clang_constructUSR_ObjCProperty(const char *property,
660                                         CXString classUSR) {
661  USRGenerator UG;
662  UG << extractUSRSuffix(clang_getCString(classUSR));
663  UG->GenObjCProperty(property);
664  return createCXString(UG.str(), true);
665}
666
667} // end extern "C"
668