cindex.py revision 5b534f67946eeb2cb29076288bfee9707f055f82
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===# 2# 3# The LLVM Compiler Infrastructure 4# 5# This file is distributed under the University of Illinois Open Source 6# License. See LICENSE.TXT for details. 7# 8#===------------------------------------------------------------------------===# 9 10r""" 11Clang Indexing Library Bindings 12=============================== 13 14This module provides an interface to the Clang indexing library. It is a 15low-level interface to the indexing library which attempts to match the Clang 16API directly while also being "pythonic". Notable differences from the C API 17are: 18 19 * string results are returned as Python strings, not CXString objects. 20 21 * null cursors are translated to None. 22 23 * access to child cursors is done via iteration, not visitation. 24 25The major indexing objects are: 26 27 Index 28 29 The top-level object which manages some global library state. 30 31 TranslationUnit 32 33 High-level object encapsulating the AST for a single translation unit. These 34 can be loaded from .ast files or parsed on the fly. 35 36 Cursor 37 38 Generic object for representing a node in the AST. 39 40 SourceRange, SourceLocation, and File 41 42 Objects representing information about the input source. 43 44Most object information is exposed using properties, when the underlying API 45call is efficient. 46""" 47 48# TODO 49# ==== 50# 51# o fix memory management issues (currently client must hold on to index and 52# translation unit, or risk crashes). 53# 54# o expose code completion APIs. 55# 56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more 57# clearly, and hide from the external interface (i.e., help(cindex)). 58# 59# o implement additional SourceLocation, SourceRange, and File methods. 60 61from ctypes import * 62 63def get_cindex_library(): 64 # FIXME: It's probably not the case that the library is actually found in 65 # this location. We need a better system of identifying and loading the 66 # CIndex library. It could be on path or elsewhere, or versioned, etc. 67 import platform 68 name = platform.system() 69 if name == 'Darwin': 70 return cdll.LoadLibrary('libCIndex.dylib') 71 elif name == 'Windows': 72 return cdll.LoadLibrary('libCIndex.dll') 73 else: 74 return cdll.LoadLibrary('libCIndex.so') 75 76# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper 77# object. This is a problem, because it means that from_parameter will see an 78# integer and pass the wrong value on platforms where int != void*. Work around 79# this by marshalling object arguments as void**. 80c_object_p = POINTER(c_void_p) 81 82lib = get_cindex_library() 83 84### Structures and Utility Classes ### 85 86class _CXString(Structure): 87 """Helper for transforming CXString results.""" 88 89 _fields_ = [("spelling", c_char_p), ("free", c_int)] 90 91 def __del__(self): 92 _CXString_dispose(self) 93 94 @staticmethod 95 def from_result(res, fn, args): 96 assert isinstance(res, _CXString) 97 return _CXString_getCString(res) 98 99class SourceLocation(Structure): 100 """ 101 A SourceLocation represents a particular location within a source file. 102 """ 103 _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)] 104 _data = None 105 106 def _get_instantiation(self): 107 if self._data is None: 108 f, l, c = c_object_p(), c_uint(), c_uint() 109 SourceLocation_loc(self, byref(f), byref(l), byref(c)) 110 f = File(f) if f else None 111 self._data = (f, int(l.value), int(c.value)) 112 return self._data 113 114 @property 115 def file(self): 116 """Get the file represented by this source location.""" 117 return self._get_instantiation()[0] 118 119 @property 120 def line(self): 121 """Get the line represented by this source location.""" 122 return self._get_instantiation()[1] 123 124 @property 125 def column(self): 126 """Get the column represented by this source location.""" 127 return self._get_instantiation()[2] 128 129 def __repr__(self): 130 return "<SourceLocation file %r, line %r, column %r>" % ( 131 self.file.name if self.file else None, self.line, self.column) 132 133class SourceRange(Structure): 134 """ 135 A SourceRange describes a range of source locations within the source 136 code. 137 """ 138 _fields_ = [ 139 ("ptr_data", c_void_p), 140 ("begin_int_data", c_uint), 141 ("end_int_data", c_uint)] 142 143 @property 144 def start(self): 145 """ 146 Return a SourceLocation representing the first character within a 147 source range. 148 """ 149 return SourceRange_start(self) 150 151 @property 152 def end(self): 153 """ 154 Return a SourceLocation representing the last character within a 155 source range. 156 """ 157 return SourceRange_end(self) 158 159 def __repr__(self): 160 return "<SourceRange start %r, end %r>" % (self.start, self.end) 161 162### Cursor Kinds ### 163 164class CursorKind(object): 165 """ 166 A CursorKind describes the kind of entity that a cursor points to. 167 """ 168 169 # The unique kind objects, indexed by id. 170 _kinds = [] 171 _name_map = None 172 173 def __init__(self, value): 174 if value >= len(CursorKind._kinds): 175 CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1) 176 if CursorKind._kinds[value] is not None: 177 raise ValueError,'CursorKind already loaded' 178 self.value = value 179 CursorKind._kinds[value] = self 180 CursorKind._name_map = None 181 182 def from_param(self): 183 return self.value 184 185 @property 186 def name(self): 187 """Get the enumeration name of this cursor kind.""" 188 if self._name_map is None: 189 self._name_map = {} 190 for key,value in CursorKind.__dict__.items(): 191 if isinstance(value,CursorKind): 192 self._name_map[value] = key 193 return self._name_map[self] 194 195 @staticmethod 196 def from_id(id): 197 if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None: 198 raise ValueError,'Unknown cursor kind' 199 return CursorKind._kinds[id] 200 201 @staticmethod 202 def get_all_kinds(): 203 """Return all CursorKind enumeration instances.""" 204 return filter(None, CursorKind._kinds) 205 206 def is_declaration(self): 207 """Test if this is a declaration kind.""" 208 return CursorKind_is_decl(self) 209 210 def is_reference(self): 211 """Test if this is a reference kind.""" 212 return CursorKind_is_ref(self) 213 214 def is_expression(self): 215 """Test if this is an expression kind.""" 216 return CursorKind_is_expr(self) 217 218 def is_statement(self): 219 """Test if this is a statement kind.""" 220 return CursorKind_is_stmt(self) 221 222 def is_invalid(self): 223 """Test if this is an invalid kind.""" 224 return CursorKind_is_inv(self) 225 226 def __repr__(self): 227 return 'CursorKind.%s' % (self.name,) 228 229# FIXME: Is there a nicer way to expose this enumeration? We could potentially 230# represent the nested structure, or even build a class hierarchy. The main 231# things we want for sure are (a) simple external access to kinds, (b) a place 232# to hang a description and name, (c) easy to keep in sync with Index.h. 233 234### 235# Declaration Kinds 236 237# A declaration whose specific kind is not exposed via this interface. 238# 239# Unexposed declarations have the same operations as any other kind of 240# declaration; one can extract their location information, spelling, find their 241# definitions, etc. However, the specific kind of the declaration is not 242# reported. 243CursorKind.UNEXPOSED_DECL = CursorKind(1) 244 245# A C or C++ struct. 246CursorKind.STRUCT_DECL = CursorKind(2) 247 248# A C or C++ union. 249CursorKind.UNION_DECL = CursorKind(3) 250 251# A C++ class. 252CursorKind.CLASS_DECL = CursorKind(4) 253 254# An enumeration. 255CursorKind.ENUM_DECL = CursorKind(5) 256 257# A field (in C) or non-static data member (in C++) in a struct, union, or C++ 258# class. 259CursorKind.FIELD_DECL = CursorKind(6) 260 261# An enumerator constant. 262CursorKind.ENUM_CONSTANT_DECL = CursorKind(7) 263 264# A function. 265CursorKind.FUNCTION_DECL = CursorKind(8) 266 267# A variable. 268CursorKind.VAR_DECL = CursorKind(9) 269 270# A function or method parameter. 271CursorKind.PARM_DECL = CursorKind(10) 272 273# An Objective-C @interface. 274CursorKind.OBJC_INTERFACE_DECL = CursorKind(11) 275 276# An Objective-C @interface for a category. 277CursorKind.OBJC_CATEGORY_DECL = CursorKind(12) 278 279# An Objective-C @protocol declaration. 280CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13) 281 282# An Objective-C @property declaration. 283CursorKind.OBJC_PROPERTY_DECL = CursorKind(14) 284 285# An Objective-C instance variable. 286CursorKind.OBJC_IVAR_DECL = CursorKind(15) 287 288# An Objective-C instance method. 289CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16) 290 291# An Objective-C class method. 292CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17) 293 294# An Objective-C @implementation. 295CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18) 296 297# An Objective-C @implementation for a category. 298CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19) 299 300# A typedef. 301CursorKind.TYPEDEF_DECL = CursorKind(20) 302 303### 304# Reference Kinds 305 306CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40) 307CursorKind.OBJC_PROTOCOL_REF = CursorKind(41) 308CursorKind.OBJC_CLASS_REF = CursorKind(42) 309 310# A reference to a type declaration. 311# 312# A type reference occurs anywhere where a type is named but not 313# declared. For example, given: 314# typedef unsigned size_type; 315# size_type size; 316# 317# The typedef is a declaration of size_type (CXCursor_TypedefDecl), 318# while the type of the variable "size" is referenced. The cursor 319# referenced by the type of size is the typedef for size_type. 320CursorKind.TYPE_REF = CursorKind(43) 321 322### 323# Invalid/Error Kinds 324 325CursorKind.INVALID_FILE = CursorKind(70) 326CursorKind.NO_DECL_FOUND = CursorKind(71) 327CursorKind.NOT_IMPLEMENTED = CursorKind(72) 328 329### 330# Expression Kinds 331 332# An expression whose specific kind is not exposed via this interface. 333# 334# Unexposed expressions have the same operations as any other kind of 335# expression; one can extract their location information, spelling, children, 336# etc. However, the specific kind of the expression is not reported. 337CursorKind.UNEXPOSED_EXPR = CursorKind(100) 338 339# An expression that refers to some value declaration, such as a function, 340# varible, or enumerator. 341CursorKind.DECL_REF_EXPR = CursorKind(101) 342 343# An expression that refers to a member of a struct, union, class, Objective-C 344# class, etc. 345CursorKind.MEMBER_REF_EXPR = CursorKind(102) 346 347# An expression that calls a function. 348CursorKind.CALL_EXPR = CursorKind(103) 349 350# An expression that sends a message to an Objective-C object or class. 351CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104) 352 353# A statement whose specific kind is not exposed via this interface. 354# 355# Unexposed statements have the same operations as any other kind of statement; 356# one can extract their location information, spelling, children, etc. However, 357# the specific kind of the statement is not reported. 358CursorKind.UNEXPOSED_STMT = CursorKind(200) 359 360### 361# Other Kinds 362 363# Cursor that represents the translation unit itself. 364# 365# The translation unit cursor exists primarily to act as the root cursor for 366# traversing the contents of a translation unit. 367CursorKind.TRANSLATION_UNIT = CursorKind(300) 368 369### Cursors ### 370 371class Cursor(Structure): 372 """ 373 The Cursor class represents a reference to an element within the AST. It 374 acts as a kind of iterator. 375 """ 376 _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)] 377 378 def __eq__(self, other): 379 return Cursor_eq(self, other) 380 381 def __ne__(self, other): 382 return not Cursor_eq(self, other) 383 384 def is_definition(self): 385 """ 386 Returns true if the declaration pointed at by the cursor is also a 387 definition of that entity. 388 """ 389 return Cursor_is_def(self) 390 391 def get_definition(self): 392 """ 393 If the cursor is a reference to a declaration or a declaration of 394 some entity, return a cursor that points to the definition of that 395 entity. 396 """ 397 # TODO: Should probably check that this is either a reference or 398 # declaration prior to issuing the lookup. 399 return Cursor_def(self) 400 401 def get_usr(self): 402 """Return the Unified Symbol Resultion (USR) for the entity referenced 403 by the given cursor (or None). 404 405 A Unified Symbol Resolution (USR) is a string that identifies a 406 particular entity (function, class, variable, etc.) within a 407 program. USRs can be compared across translation units to determine, 408 e.g., when references in one translation refer to an entity defined in 409 another translation unit.""" 410 return Cursor_usr(self) 411 412 @property 413 def kind(self): 414 """Return the kind of this cursor.""" 415 return CursorKind.from_id(self._kind_id) 416 417 @property 418 def spelling(self): 419 """Return the spelling of the entity pointed at by the cursor.""" 420 if not self.kind.is_declaration(): 421 # FIXME: clang_getCursorSpelling should be fixed to not assert on 422 # this, for consistency with clang_getCursorUSR. 423 return None 424 return Cursor_spelling(self) 425 426 @property 427 def location(self): 428 """ 429 Return the source location (the starting character) of the entity 430 pointed at by the cursor. 431 """ 432 return Cursor_loc(self) 433 434 @property 435 def extent(self): 436 """ 437 Return the source range (the range of text) occupied by the entity 438 pointed at by the cursor. 439 """ 440 return Cursor_extent(self) 441 442 def get_children(self): 443 """Return an iterator for accessing the children of this cursor.""" 444 445 # FIXME: Expose iteration from CIndex, PR6125. 446 def visitor(child, parent, children): 447 # FIXME: Document this assertion in API. 448 # FIXME: There should just be an isNull method. 449 assert child != Cursor_null() 450 children.append(child) 451 return 1 # continue 452 children = [] 453 Cursor_visit(self, Callback(visitor), children) 454 return iter(children) 455 456 @staticmethod 457 def from_result(res, fn, args): 458 assert isinstance(res, Cursor) 459 # FIXME: There should just be an isNull method. 460 if res == Cursor_null(): 461 return None 462 return res 463 464## CIndex Objects ## 465 466# CIndex objects (derived from ClangObject) are essentially lightweight 467# wrappers attached to some underlying object, which is exposed via CIndex as 468# a void*. 469 470class ClangObject(object): 471 """ 472 A helper for Clang objects. This class helps act as an intermediary for 473 the ctypes library and the Clang CIndex library. 474 """ 475 def __init__(self, obj): 476 assert isinstance(obj, c_object_p) and obj 477 self.obj = self._as_parameter_ = obj 478 479 def from_param(self): 480 return self._as_parameter_ 481 482 483class _CXUnsavedFile(Structure): 484 """Helper for passing unsaved file arguments.""" 485 _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] 486 487class Index(ClangObject): 488 """ 489 The Index type provides the primary interface to the Clang CIndex library, 490 primarily by providing an interface for reading and parsing translation 491 units. 492 """ 493 494 @staticmethod 495 def create(excludeDecls=False, displayDiags=False): 496 """ 497 Create a new Index. 498 Parameters: 499 excludeDecls -- Exclude local declarations from translation units. 500 displayDiags -- Display diagnostics during translation unit creation. 501 """ 502 return Index(Index_create(excludeDecls, displayDiags)) 503 504 def __del__(self): 505 Index_dispose(self) 506 507 def read(self, path): 508 """Load the translation unit from the given AST file.""" 509 return TranslationUnit.read(self, path) 510 511 def parse(self, path, args = [], unsaved_files = []): 512 """ 513 Load the translation unit from the given source code file by running 514 clang and generating the AST before loading. Additional command line 515 parameters can be passed to clang via the args parameter. 516 517 In-memory contents for files can be provided by passing a list of pairs 518 to as unsaved_files, the first item should be the filenames to be mapped 519 and the second should be the contents to be substituted for the 520 file. The contents may be passed as strings or file objects. 521 """ 522 return TranslationUnit.parse(self, path, args, unsaved_files) 523 524 525class TranslationUnit(ClangObject): 526 """ 527 The TranslationUnit class represents a source code translation unit and 528 provides read-only access to its top-level declarations. 529 """ 530 531 def __del__(self): 532 TranslationUnit_dispose(self) 533 534 @property 535 def cursor(self): 536 """Retrieve the cursor that represents the given translation unit.""" 537 return TranslationUnit_cursor(self) 538 539 @property 540 def spelling(self): 541 """Get the original translation unit source file name.""" 542 return TranslationUnit_spelling(self) 543 544 @staticmethod 545 def read(ix, path): 546 """Create a translation unit from the given AST file.""" 547 ptr = TranslationUnit_read(ix, path) 548 return TranslationUnit(ptr) if ptr else None 549 550 @staticmethod 551 def parse(ix, path, args = [], unsaved_files = []): 552 """ 553 Construct a translation unit from the given source file, using 554 the given command line argument. 555 """ 556 # TODO: Support unsaved files. 557 arg_array = 0 558 if len(args): 559 arg_array = (c_char_p * len(args))(* args) 560 unsaved_files_array = 0 561 if len(unsaved_files): 562 unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))() 563 for i,(name,value) in enumerate(unsaved_files): 564 # FIXME: Support file objects. 565 unsaved_files_array[i].name = name 566 unsaved_files_array[i].contents = value 567 unsaved_files_array[i].length = len(value) 568 ptr = TranslationUnit_parse(ix, path, len(args), arg_array, 569 len(unsaved_files), unsaved_files_array) 570 return TranslationUnit(ptr) if ptr else None 571 572class File(ClangObject): 573 """ 574 The File class represents a particular source file that is part of a 575 translation unit. 576 """ 577 578 @property 579 def name(self): 580 """Return the complete file and path name of the file.""" 581 return File_name(self) 582 583 @property 584 def time(self): 585 """Return the last modification time of the file.""" 586 return File_time(self) 587 588# Additional Functions and Types 589 590# Wrap calls to TranslationUnit._load and Decl._load. 591Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object) 592 593# String Functions 594_CXString_dispose = lib.clang_disposeString 595_CXString_dispose.argtypes = [_CXString] 596 597_CXString_getCString = lib.clang_getCString 598_CXString_getCString.argtypes = [_CXString] 599_CXString_getCString.restype = c_char_p 600 601# Source Location Functions 602SourceLocation_loc = lib.clang_getInstantiationLocation 603SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p), 604 POINTER(c_uint), POINTER(c_uint)] 605 606# Source Range Functions 607SourceRange_start = lib.clang_getRangeStart 608SourceRange_start.argtypes = [SourceRange] 609SourceRange_start.restype = SourceLocation 610 611SourceRange_end = lib.clang_getRangeEnd 612SourceRange_end.argtypes = [SourceRange] 613SourceRange_end.restype = SourceLocation 614 615# CursorKind Functions 616CursorKind_is_decl = lib.clang_isDeclaration 617CursorKind_is_decl.argtypes = [CursorKind] 618CursorKind_is_decl.restype = bool 619 620CursorKind_is_ref = lib.clang_isReference 621CursorKind_is_ref.argtypes = [CursorKind] 622CursorKind_is_ref.restype = bool 623 624CursorKind_is_expr = lib.clang_isExpression 625CursorKind_is_expr.argtypes = [CursorKind] 626CursorKind_is_expr.restype = bool 627 628CursorKind_is_stmt = lib.clang_isStatement 629CursorKind_is_stmt.argtypes = [CursorKind] 630CursorKind_is_stmt.restype = bool 631 632CursorKind_is_inv = lib.clang_isInvalid 633CursorKind_is_inv.argtypes = [CursorKind] 634CursorKind_is_inv.restype = bool 635 636# Cursor Functions 637# TODO: Implement this function 638Cursor_get = lib.clang_getCursor 639Cursor_get.argtypes = [TranslationUnit, SourceLocation] 640Cursor_get.restype = Cursor 641 642Cursor_null = lib.clang_getNullCursor 643Cursor_null.restype = Cursor 644 645Cursor_usr = lib.clang_getCursorUSR 646Cursor_usr.argtypes = [Cursor] 647Cursor_usr.restype = _CXString 648Cursor_usr.errcheck = _CXString.from_result 649 650Cursor_is_def = lib.clang_isCursorDefinition 651Cursor_is_def.argtypes = [Cursor] 652Cursor_is_def.restype = bool 653 654Cursor_def = lib.clang_getCursorDefinition 655Cursor_def.argtypes = [Cursor] 656Cursor_def.restype = Cursor 657Cursor_def.errcheck = Cursor.from_result 658 659Cursor_eq = lib.clang_equalCursors 660Cursor_eq.argtypes = [Cursor, Cursor] 661Cursor_eq.restype = c_uint 662 663Cursor_spelling = lib.clang_getCursorSpelling 664Cursor_spelling.argtypes = [Cursor] 665Cursor_spelling.restype = _CXString 666Cursor_spelling.errcheck = _CXString.from_result 667 668Cursor_loc = lib.clang_getCursorLocation 669Cursor_loc.argtypes = [Cursor] 670Cursor_loc.restype = SourceLocation 671 672Cursor_extent = lib.clang_getCursorExtent 673Cursor_extent.argtypes = [Cursor] 674Cursor_extent.restype = SourceRange 675 676Cursor_ref = lib.clang_getCursorReferenced 677Cursor_ref.argtypes = [Cursor] 678Cursor_ref.restype = Cursor 679Cursor_ref.errcheck = Cursor.from_result 680 681Cursor_visit = lib.clang_visitChildren 682Cursor_visit.argtypes = [Cursor, Callback, py_object] 683Cursor_visit.restype = c_uint 684 685# Index Functions 686Index_create = lib.clang_createIndex 687Index_create.argtypes = [c_int, c_int] 688Index_create.restype = c_object_p 689 690Index_dispose = lib.clang_disposeIndex 691Index_dispose.argtypes = [Index] 692 693# Translation Unit Functions 694TranslationUnit_read = lib.clang_createTranslationUnit 695TranslationUnit_read.argtypes = [Index, c_char_p] 696TranslationUnit_read.restype = c_object_p 697 698TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile 699TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p, 700 c_int, c_void_p] 701TranslationUnit_parse.restype = c_object_p 702 703TranslationUnit_cursor = lib.clang_getTranslationUnitCursor 704TranslationUnit_cursor.argtypes = [TranslationUnit] 705TranslationUnit_cursor.restype = Cursor 706TranslationUnit_cursor.errcheck = Cursor.from_result 707 708TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling 709TranslationUnit_spelling.argtypes = [TranslationUnit] 710TranslationUnit_spelling.restype = _CXString 711TranslationUnit_spelling.errcheck = _CXString.from_result 712 713TranslationUnit_dispose = lib.clang_disposeTranslationUnit 714TranslationUnit_dispose.argtypes = [TranslationUnit] 715 716# File Functions 717File_name = lib.clang_getFileName 718File_name.argtypes = [File] 719File_name.restype = c_char_p 720 721File_time = lib.clang_getFileTime 722File_time.argtypes = [File] 723File_time.restype = c_uint 724 725### 726 727__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind', 728 'SourceRange', 'SourceLocation', 'File'] 729