cindex.py revision 4efd632322731425d83d205f26bddcdfe1ac8937
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===# 2# 3# The LLVM Compiler Infrastructure 4# 5# This file is distributed under the University of Illinois Open Source 6# License. See LICENSE.TXT for details. 7# 8#===------------------------------------------------------------------------===# 9 10r""" 11Clang Indexing Library Bindings 12=============================== 13 14This module provides an interface to the Clang indexing library. It is a 15low-level interface to the indexing library which attempts to match the Clang 16API directly while also being "pythonic". Notable differences from the C API 17are: 18 19 * string results are returned as Python strings, not CXString objects. 20 21 * null cursors are translated to None. 22 23 * access to child cursors is done via iteration, not visitation. 24 25The major indexing objects are: 26 27 Index 28 29 The top-level object which manages some global library state. 30 31 TranslationUnit 32 33 High-level object encapsulating the AST for a single translation unit. These 34 can be loaded from .ast files or parsed on the fly. 35 36 Cursor 37 38 Generic object for representing a node in the AST. 39 40 SourceRange, SourceLocation, and File 41 42 Objects representing information about the input source. 43 44Most object information is exposed using properties, when the underlying API 45call is efficient. 46""" 47 48# TODO 49# ==== 50# 51# o fix memory management issues (currently client must hold on to index and 52# translation unit, or risk crashes). 53# 54# o expose code completion APIs. 55# 56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more 57# clearly, and hide from the external interface (i.e., help(cindex)). 58# 59# o implement additional SourceLocation, SourceRange, and File methods. 60 61from ctypes import * 62 63def get_cindex_library(): 64 # FIXME: It's probably not the case that the library is actually found in 65 # this location. We need a better system of identifying and loading the 66 # CIndex library. It could be on path or elsewhere, or versioned, etc. 67 import platform 68 name = platform.system() 69 if name == 'Darwin': 70 return cdll.LoadLibrary('libCIndex.dylib') 71 elif name == 'Windows': 72 return cdll.LoadLibrary('libCIndex.dll') 73 else: 74 return cdll.LoadLibrary('libCIndex.so') 75 76## Utility Types and Functions ## 77def alloc_string_vector(strs): 78 """ 79 Allocate a string buffer large enough to accommodate the given list of 80 python strings. 81 """ 82 n = 0 83 for i in strs: n += len(i) + 1 84 return create_string_buffer(n) 85 86def copy_string_vector(vec, strs): 87 """ 88 Copy the contents of each string into the vector, preserving null 89 terminated elements. 90 """ 91 n = 0 92 for i in strs: 93 # This is terribly inefficient, but I can't figure out how to copy a 94 # chunk of characters into the resultant vector. t should be: something 95 # like this: vec[n:n + len(i)] = i[:]; n += len(i) + 1 96 for j in i: 97 vec[n] = j 98 n += 1 99 n += 1 100 101def create_string_vector(strs): 102 """ 103 Create a string vector (char *[]) from the given list of strings. 104 """ 105 vec = alloc_string_vector(strs) 106 copy_string_vector(vec, strs) 107 return vec 108 109# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper 110# object. This is a problem, because it means that from_parameter will see an 111# integer and pass the wrong value on platforms where int != void*. Work around 112# this by marshalling object arguments as void**. 113c_object_p = POINTER(c_void_p) 114 115lib = get_cindex_library() 116 117### Structures and Utility Classes ### 118 119class _CXString(Structure): 120 """Helper for transforming CXString results.""" 121 122 _fields_ = [("spelling", c_char_p), ("free", c_int)] 123 124 def __del__(self): 125 _CXString_dispose(self) 126 127 @staticmethod 128 def from_result(res, fn, args): 129 assert isinstance(res, _CXString) 130 return _CXString_getCString(res) 131 132class SourceLocation(Structure): 133 """ 134 A SourceLocation represents a particular location within a source file. 135 """ 136 _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)] 137 _data = None 138 139 def _get_instantiation(self): 140 if self._data is None: 141 f, l, c = c_object_p(), c_uint(), c_uint() 142 SourceLocation_loc(self, byref(f), byref(l), byref(c)) 143 f = File(f) if f else None 144 self._data = (f, int(l.value), int(c.value)) 145 return self._data 146 147 @property 148 def file(self): 149 """Get the file represented by this source location.""" 150 return self._get_instantiation()[0] 151 152 @property 153 def line(self): 154 """Get the line represented by this source location.""" 155 return self._get_instantiation()[1] 156 157 @property 158 def column(self): 159 """Get the column represented by this source location.""" 160 return self._get_instantiation()[2] 161 162 def __repr__(self): 163 return "<SourceLocation file %r, line %r, column %r>" % ( 164 self.file.name if self.file else None, self.line, self.column) 165 166class SourceRange(Structure): 167 """ 168 A SourceRange describes a range of source locations within the source 169 code. 170 """ 171 _fields_ = [ 172 ("ptr_data", c_void_p), 173 ("begin_int_data", c_uint), 174 ("end_int_data", c_uint)] 175 176 @property 177 def start(self): 178 """ 179 Return a SourceLocation representing the first character within a 180 source range. 181 """ 182 return SourceRange_start(self) 183 184 @property 185 def end(self): 186 """ 187 Return a SourceLocation representing the last character within a 188 source range. 189 """ 190 return SourceRange_end(self) 191 192 def __repr__(self): 193 return "<SourceRange start %r, end %r>" % (self.start, self.end) 194 195### Cursor Kinds ### 196 197class CursorKind(object): 198 """ 199 A CursorKind describes the kind of entity that a cursor points to. 200 """ 201 202 # The unique kind objects, indexed by id. 203 _kinds = [] 204 _name_map = None 205 206 def __init__(self, value): 207 if value >= len(CursorKind._kinds): 208 CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1) 209 if CursorKind._kinds[value] is not None: 210 raise ValueError,'CursorKind already loaded' 211 self.value = value 212 CursorKind._kinds[value] = self 213 CursorKind._name_map = None 214 215 def from_param(self): 216 return self.value 217 218 @property 219 def name(self): 220 """Get the enumeration name of this cursor kind.""" 221 if self._name_map is None: 222 self._name_map = {} 223 for key,value in CursorKind.__dict__.items(): 224 if isinstance(value,CursorKind): 225 self._name_map[value] = key 226 return self._name_map[self] 227 228 @staticmethod 229 def from_id(id): 230 if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None: 231 raise ValueError,'Unknown cursor kind' 232 return CursorKind._kinds[id] 233 234 @staticmethod 235 def get_all_kinds(): 236 """Return all CursorKind enumeration instances.""" 237 return filter(None, CursorKind._kinds) 238 239 def is_declaration(self): 240 """Test if this is a declaration kind.""" 241 return CursorKind_is_decl(self) 242 243 def is_reference(self): 244 """Test if this is a reference kind.""" 245 return CursorKind_is_ref(self) 246 247 def is_expression(self): 248 """Test if this is an expression kind.""" 249 return CursorKind_is_expr(self) 250 251 def is_statement(self): 252 """Test if this is a statement kind.""" 253 return CursorKind_is_stmt(self) 254 255 def is_invalid(self): 256 """Test if this is an invalid kind.""" 257 return CursorKind_is_inv(self) 258 259 def __repr__(self): 260 return 'CursorKind.%s' % (self.name,) 261 262# FIXME: Is there a nicer way to expose this enumeration? We could potentially 263# represent the nested structure, or even build a class hierarchy. The main 264# things we want for sure are (a) simple external access to kinds, (b) a place 265# to hang a description and name, (c) easy to keep in sync with Index.h. 266 267### 268# Declaration Kinds 269 270# A declaration whose specific kind is not exposed via this interface. 271# 272# Unexposed declarations have the same operations as any other kind of 273# declaration; one can extract their location information, spelling, find their 274# definitions, etc. However, the specific kind of the declaration is not 275# reported. 276CursorKind.UNEXPOSED_DECL = CursorKind(1) 277 278# A C or C++ struct. 279CursorKind.STRUCT_DECL = CursorKind(2) 280 281# A C or C++ union. 282CursorKind.UNION_DECL = CursorKind(3) 283 284# A C++ class. 285CursorKind.CLASS_DECL = CursorKind(4) 286 287# An enumeration. 288CursorKind.ENUM_DECL = CursorKind(5) 289 290# A field (in C) or non-static data member (in C++) in a struct, union, or C++ 291# class. 292CursorKind.FIELD_DECL = CursorKind(6) 293 294# An enumerator constant. 295CursorKind.ENUM_CONSTANT_DECL = CursorKind(7) 296 297# A function. 298CursorKind.FUNCTION_DECL = CursorKind(8) 299 300# A variable. 301CursorKind.VAR_DECL = CursorKind(9) 302 303# A function or method parameter. 304CursorKind.PARM_DECL = CursorKind(10) 305 306# An Objective-C @interface. 307CursorKind.OBJC_INTERFACE_DECL = CursorKind(11) 308 309# An Objective-C @interface for a category. 310CursorKind.OBJC_CATEGORY_DECL = CursorKind(12) 311 312# An Objective-C @protocol declaration. 313CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13) 314 315# An Objective-C @property declaration. 316CursorKind.OBJC_PROPERTY_DECL = CursorKind(14) 317 318# An Objective-C instance variable. 319CursorKind.OBJC_IVAR_DECL = CursorKind(15) 320 321# An Objective-C instance method. 322CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16) 323 324# An Objective-C class method. 325CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17) 326 327# An Objective-C @implementation. 328CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18) 329 330# An Objective-C @implementation for a category. 331CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19) 332 333# A typedef. 334CursorKind.TYPEDEF_DECL = CursorKind(20) 335 336### 337# Reference Kinds 338 339CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40) 340CursorKind.OBJC_PROTOCOL_REF = CursorKind(41) 341CursorKind.OBJC_CLASS_REF = CursorKind(42) 342 343# A reference to a type declaration. 344# 345# A type reference occurs anywhere where a type is named but not 346# declared. For example, given: 347# typedef unsigned size_type; 348# size_type size; 349# 350# The typedef is a declaration of size_type (CXCursor_TypedefDecl), 351# while the type of the variable "size" is referenced. The cursor 352# referenced by the type of size is the typedef for size_type. 353CursorKind.TYPE_REF = CursorKind(43) 354 355### 356# Invalid/Error Kinds 357 358CursorKind.INVALID_FILE = CursorKind(70) 359CursorKind.NO_DECL_FOUND = CursorKind(71) 360CursorKind.NOT_IMPLEMENTED = CursorKind(72) 361 362### 363# Expression Kinds 364 365# An expression whose specific kind is not exposed via this interface. 366# 367# Unexposed expressions have the same operations as any other kind of 368# expression; one can extract their location information, spelling, children, 369# etc. However, the specific kind of the expression is not reported. 370CursorKind.UNEXPOSED_EXPR = CursorKind(100) 371 372# An expression that refers to some value declaration, such as a function, 373# varible, or enumerator. 374CursorKind.DECL_REF_EXPR = CursorKind(101) 375 376# An expression that refers to a member of a struct, union, class, Objective-C 377# class, etc. 378CursorKind.MEMBER_REF_EXPR = CursorKind(102) 379 380# An expression that calls a function. 381CursorKind.CALL_EXPR = CursorKind(103) 382 383# An expression that sends a message to an Objective-C object or class. 384CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104) 385 386# A statement whose specific kind is not exposed via this interface. 387# 388# Unexposed statements have the same operations as any other kind of statement; 389# one can extract their location information, spelling, children, etc. However, 390# the specific kind of the statement is not reported. 391CursorKind.UNEXPOSED_STMT = CursorKind(200) 392 393### 394# Other Kinds 395 396# Cursor that represents the translation unit itself. 397# 398# The translation unit cursor exists primarily to act as the root cursor for 399# traversing the contents of a translation unit. 400CursorKind.TRANSLATION_UNIT = CursorKind(300) 401 402### Cursors ### 403 404class Cursor(Structure): 405 """ 406 The Cursor class represents a reference to an element within the AST. It 407 acts as a kind of iterator. 408 """ 409 _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)] 410 411 def __eq__(self, other): 412 return Cursor_eq(self, other) 413 414 def __ne__(self, other): 415 return not Cursor_eq(self, other) 416 417 def is_definition(self): 418 """ 419 Returns true if the declaration pointed at by the cursor is also a 420 definition of that entity. 421 """ 422 return Cursor_is_def(self) 423 424 def get_definition(self): 425 """ 426 If the cursor is a reference to a declaration or a declaration of 427 some entity, return a cursor that points to the definition of that 428 entity. 429 """ 430 # TODO: Should probably check that this is either a reference or 431 # declaration prior to issuing the lookup. 432 return Cursor_def(self) 433 434 def get_usr(self): 435 """Return the Unified Symbol Resultion (USR) for the entity referenced 436 by the given cursor (or None). 437 438 A Unified Symbol Resolution (USR) is a string that identifies a 439 particular entity (function, class, variable, etc.) within a 440 program. USRs can be compared across translation units to determine, 441 e.g., when references in one translation refer to an entity defined in 442 another translation unit.""" 443 return Cursor_usr(self) 444 445 @property 446 def kind(self): 447 """Return the kind of this cursor.""" 448 return CursorKind.from_id(self._kind_id) 449 450 @property 451 def spelling(self): 452 """Return the spelling of the entity pointed at by the cursor.""" 453 if not self.kind.is_declaration(): 454 # FIXME: clang_getCursorSpelling should be fixed to not assert on 455 # this, for consistency with clang_getCursorUSR. 456 return None 457 return Cursor_spelling(self) 458 459 @property 460 def location(self): 461 """ 462 Return the source location (the starting character) of the entity 463 pointed at by the cursor. 464 """ 465 return Cursor_loc(self) 466 467 @property 468 def extent(self): 469 """ 470 Return the source range (the range of text) occupied by the entity 471 pointed at by the cursor. 472 """ 473 return Cursor_extent(self) 474 475 def get_children(self): 476 """Return an iterator for accessing the children of this cursor.""" 477 478 # FIXME: Expose iteration from CIndex, PR6125. 479 def visitor(child, parent, children): 480 # FIXME: Document this assertion in API. 481 # FIXME: There should just be an isNull method. 482 assert child != Cursor_null() 483 children.append(child) 484 return 1 # continue 485 children = [] 486 Cursor_visit(self, Callback(visitor), children) 487 return iter(children) 488 489 @staticmethod 490 def from_result(res, fn, args): 491 assert isinstance(res, Cursor) 492 # FIXME: There should just be an isNull method. 493 if res == Cursor_null(): 494 return None 495 return res 496 497## CIndex Objects ## 498 499# CIndex objects (derived from ClangObject) are essentially lightweight 500# wrappers attached to some underlying object, which is exposed via CIndex as 501# a void*. 502 503class ClangObject(object): 504 """ 505 A helper for Clang objects. This class helps act as an intermediary for 506 the ctypes library and the Clang CIndex library. 507 """ 508 def __init__(self, obj): 509 assert isinstance(obj, c_object_p) and obj 510 self.obj = self._as_parameter_ = obj 511 512 def from_param(self): 513 return self._as_parameter_ 514 515class Index(ClangObject): 516 """ 517 The Index type provides the primary interface to the Clang CIndex library, 518 primarily by providing an interface for reading and parsing translation 519 units. 520 """ 521 522 @staticmethod 523 def create(excludeDecls=False, displayDiags=False): 524 """ 525 Create a new Index. 526 Parameters: 527 excludeDecls -- Exclude local declarations from translation units. 528 displayDiags -- Display diagnostics during translation unit creation. 529 """ 530 return Index(Index_create(excludeDecls, displayDiags)) 531 532 def __del__(self): 533 Index_dispose(self) 534 535 def read(self, path): 536 """Load the translation unit from the given AST file.""" 537 return TranslationUnit.read(self, path) 538 539 def parse(self, path, args = []): 540 """ 541 Load the translation unit from the given source code file by running 542 clang and generating the AST before loading. Additional command line 543 parameters can be passed to clang via the args parameter. 544 """ 545 return TranslationUnit.parse(self, path, args) 546 547 548class TranslationUnit(ClangObject): 549 """ 550 The TranslationUnit class represents a source code translation unit and 551 provides read-only access to its top-level declarations. 552 """ 553 554 def __del__(self): 555 TranslationUnit_dispose(self) 556 557 @property 558 def cursor(self): 559 """Retrieve the cursor that represents the given translation unit.""" 560 return TranslationUnit_cursor(self) 561 562 @property 563 def spelling(self): 564 """Get the original translation unit source file name.""" 565 return TranslationUnit_spelling(self) 566 567 @staticmethod 568 def read(ix, path): 569 """Create a translation unit from the given AST file.""" 570 ptr = TranslationUnit_read(ix, path) 571 return TranslationUnit(ptr) if ptr else None 572 573 @staticmethod 574 def parse(ix, path, args = []): 575 """ 576 Construct a translation unit from the given source file, applying 577 the given command line argument. 578 """ 579 # TODO: Support unsaved files. 580 argc, argv = len(args), create_string_vector(args) 581 ptr = TranslationUnit_parse(ix, path, argc, byref(argv), 0, 0) 582 return TranslationUnit(ptr) if ptr else None 583 584class File(ClangObject): 585 """ 586 The File class represents a particular source file that is part of a 587 translation unit. 588 """ 589 590 @property 591 def name(self): 592 """Return the complete file and path name of the file.""" 593 return File_name(self) 594 595 @property 596 def time(self): 597 """Return the last modification time of the file.""" 598 return File_time(self) 599 600# Additional Functions and Types 601 602# Wrap calls to TranslationUnit._load and Decl._load. 603Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object) 604 605# String Functions 606_CXString_dispose = lib.clang_disposeString 607_CXString_dispose.argtypes = [_CXString] 608 609_CXString_getCString = lib.clang_getCString 610_CXString_getCString.argtypes = [_CXString] 611_CXString_getCString.restype = c_char_p 612 613# Source Location Functions 614SourceLocation_loc = lib.clang_getInstantiationLocation 615SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p), 616 POINTER(c_uint), POINTER(c_uint)] 617 618# Source Range Functions 619SourceRange_start = lib.clang_getRangeStart 620SourceRange_start.argtypes = [SourceRange] 621SourceRange_start.restype = SourceLocation 622 623SourceRange_end = lib.clang_getRangeEnd 624SourceRange_end.argtypes = [SourceRange] 625SourceRange_end.restype = SourceLocation 626 627# CursorKind Functions 628CursorKind_is_decl = lib.clang_isDeclaration 629CursorKind_is_decl.argtypes = [CursorKind] 630CursorKind_is_decl.restype = bool 631 632CursorKind_is_ref = lib.clang_isReference 633CursorKind_is_ref.argtypes = [CursorKind] 634CursorKind_is_ref.restype = bool 635 636CursorKind_is_expr = lib.clang_isExpression 637CursorKind_is_expr.argtypes = [CursorKind] 638CursorKind_is_expr.restype = bool 639 640CursorKind_is_stmt = lib.clang_isStatement 641CursorKind_is_stmt.argtypes = [CursorKind] 642CursorKind_is_stmt.restype = bool 643 644CursorKind_is_inv = lib.clang_isInvalid 645CursorKind_is_inv.argtypes = [CursorKind] 646CursorKind_is_inv.restype = bool 647 648# Cursor Functions 649# TODO: Implement this function 650Cursor_get = lib.clang_getCursor 651Cursor_get.argtypes = [TranslationUnit, SourceLocation] 652Cursor_get.restype = Cursor 653 654Cursor_null = lib.clang_getNullCursor 655Cursor_null.restype = Cursor 656 657Cursor_usr = lib.clang_getCursorUSR 658Cursor_usr.argtypes = [Cursor] 659Cursor_usr.restype = _CXString 660Cursor_usr.errcheck = _CXString.from_result 661 662Cursor_is_def = lib.clang_isCursorDefinition 663Cursor_is_def.argtypes = [Cursor] 664Cursor_is_def.restype = bool 665 666Cursor_def = lib.clang_getCursorDefinition 667Cursor_def.argtypes = [Cursor] 668Cursor_def.restype = Cursor 669Cursor_def.errcheck = Cursor.from_result 670 671Cursor_eq = lib.clang_equalCursors 672Cursor_eq.argtypes = [Cursor, Cursor] 673Cursor_eq.restype = c_uint 674 675Cursor_spelling = lib.clang_getCursorSpelling 676Cursor_spelling.argtypes = [Cursor] 677Cursor_spelling.restype = _CXString 678Cursor_spelling.errcheck = _CXString.from_result 679 680Cursor_loc = lib.clang_getCursorLocation 681Cursor_loc.argtypes = [Cursor] 682Cursor_loc.restype = SourceLocation 683 684Cursor_extent = lib.clang_getCursorExtent 685Cursor_extent.argtypes = [Cursor] 686Cursor_extent.restype = SourceRange 687 688Cursor_ref = lib.clang_getCursorReferenced 689Cursor_ref.argtypes = [Cursor] 690Cursor_ref.restype = Cursor 691Cursor_ref.errcheck = Cursor.from_result 692 693Cursor_visit = lib.clang_visitChildren 694Cursor_visit.argtypes = [Cursor, Callback, py_object] 695Cursor_visit.restype = c_uint 696 697# Index Functions 698Index_create = lib.clang_createIndex 699Index_create.argtypes = [c_int, c_int] 700Index_create.restype = c_object_p 701 702Index_dispose = lib.clang_disposeIndex 703Index_dispose.argtypes = [Index] 704 705# Translation Unit Functions 706TranslationUnit_read = lib.clang_createTranslationUnit 707TranslationUnit_read.argtypes = [Index, c_char_p] 708TranslationUnit_read.restype = c_object_p 709 710TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile 711TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p, 712 c_int, c_void_p] 713TranslationUnit_parse.restype = c_object_p 714 715TranslationUnit_cursor = lib.clang_getTranslationUnitCursor 716TranslationUnit_cursor.argtypes = [TranslationUnit] 717TranslationUnit_cursor.restype = Cursor 718TranslationUnit_cursor.errcheck = Cursor.from_result 719 720TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling 721TranslationUnit_spelling.argtypes = [TranslationUnit] 722TranslationUnit_spelling.restype = _CXString 723TranslationUnit_spelling.errcheck = _CXString.from_result 724 725TranslationUnit_dispose = lib.clang_disposeTranslationUnit 726TranslationUnit_dispose.argtypes = [TranslationUnit] 727 728# File Functions 729File_name = lib.clang_getFileName 730File_name.argtypes = [File] 731File_name.restype = c_char_p 732 733File_time = lib.clang_getFileTime 734File_time.argtypes = [File] 735File_time.restype = c_uint 736 737### 738 739__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind', 740 'SourceRange', 'SourceLocation', 'File'] 741