cindex.py revision 3239a67361cc89eba2fe7c7abdb41bd2c9414207
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===# 2# 3# The LLVM Compiler Infrastructure 4# 5# This file is distributed under the University of Illinois Open Source 6# License. See LICENSE.TXT for details. 7# 8#===------------------------------------------------------------------------===# 9 10r""" 11Clang Indexing Library Bindings 12=============================== 13 14This module provides an interface to the Clang indexing library. It is a 15low-level interface to the indexing library which attempts to match the Clang 16API directly while also being "pythonic". Notable differences from the C API 17are: 18 19 * string results are returned as Python strings, not CXString objects. 20 21 * null cursors are translated to None. 22 23 * access to child cursors is done via iteration, not visitation. 24 25The major indexing objects are: 26 27 Index 28 29 The top-level object which manages some global library state. 30 31 TranslationUnit 32 33 High-level object encapsulating the AST for a single translation unit. These 34 can be loaded from .ast files or parsed on the fly. 35 36 Cursor 37 38 Generic object for representing a node in the AST. 39 40 SourceRange, SourceLocation, and File 41 42 Objects representing information about the input source. 43 44Most object information is exposed using properties, when the underlying API 45call is efficient. 46""" 47 48# TODO 49# ==== 50# 51# o fix memory management issues (currently client must hold on to index and 52# translation unit, or risk crashes). 53# 54# o expose code completion APIs. 55# 56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more 57# clearly, and hide from the external interface (i.e., help(cindex)). 58# 59# o implement additional SourceLocation, SourceRange, and File methods. 60 61from ctypes import * 62 63def get_cindex_library(): 64 # FIXME: It's probably not the case that the library is actually found in 65 # this location. We need a better system of identifying and loading the 66 # CIndex library. It could be on path or elsewhere, or versioned, etc. 67 import platform 68 name = platform.system() 69 if name == 'Darwin': 70 return cdll.LoadLibrary('libCIndex.dylib') 71 elif name == 'Windows': 72 return cdll.LoadLibrary('libCIndex.dll') 73 else: 74 return cdll.LoadLibrary('libCIndex.so') 75 76# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper 77# object. This is a problem, because it means that from_parameter will see an 78# integer and pass the wrong value on platforms where int != void*. Work around 79# this by marshalling object arguments as void**. 80c_object_p = POINTER(c_void_p) 81 82lib = get_cindex_library() 83 84### Structures and Utility Classes ### 85 86class _CXString(Structure): 87 """Helper for transforming CXString results.""" 88 89 _fields_ = [("spelling", c_char_p), ("free", c_int)] 90 91 def __del__(self): 92 _CXString_dispose(self) 93 94 @staticmethod 95 def from_result(res, fn, args): 96 assert isinstance(res, _CXString) 97 return _CXString_getCString(res) 98 99class SourceLocation(Structure): 100 """ 101 A SourceLocation represents a particular location within a source file. 102 """ 103 _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)] 104 _data = None 105 106 def _get_instantiation(self): 107 if self._data is None: 108 f, l, c, o = c_object_p(), c_uint(), c_uint(), c_uint() 109 SourceLocation_loc(self, byref(f), byref(l), byref(c), byref(o)) 110 f = File(f) if f else None 111 self._data = (f, int(l.value), int(c.value), int(c.value)) 112 return self._data 113 114 @property 115 def file(self): 116 """Get the file represented by this source location.""" 117 return self._get_instantiation()[0] 118 119 @property 120 def line(self): 121 """Get the line represented by this source location.""" 122 return self._get_instantiation()[1] 123 124 @property 125 def column(self): 126 """Get the column represented by this source location.""" 127 return self._get_instantiation()[2] 128 129 @property 130 def offset(self): 131 """Get the file offset represented by this source location.""" 132 return self._get_instantiation()[3] 133 134 def __repr__(self): 135 return "<SourceLocation file %r, line %r, column %r>" % ( 136 self.file.name if self.file else None, self.line, self.column) 137 138class SourceRange(Structure): 139 """ 140 A SourceRange describes a range of source locations within the source 141 code. 142 """ 143 _fields_ = [ 144 ("ptr_data", c_void_p), 145 ("begin_int_data", c_uint), 146 ("end_int_data", c_uint)] 147 148 @property 149 def start(self): 150 """ 151 Return a SourceLocation representing the first character within a 152 source range. 153 """ 154 return SourceRange_start(self) 155 156 @property 157 def end(self): 158 """ 159 Return a SourceLocation representing the last character within a 160 source range. 161 """ 162 return SourceRange_end(self) 163 164 def __repr__(self): 165 return "<SourceRange start %r, end %r>" % (self.start, self.end) 166 167### Cursor Kinds ### 168 169class CursorKind(object): 170 """ 171 A CursorKind describes the kind of entity that a cursor points to. 172 """ 173 174 # The unique kind objects, indexed by id. 175 _kinds = [] 176 _name_map = None 177 178 def __init__(self, value): 179 if value >= len(CursorKind._kinds): 180 CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1) 181 if CursorKind._kinds[value] is not None: 182 raise ValueError,'CursorKind already loaded' 183 self.value = value 184 CursorKind._kinds[value] = self 185 CursorKind._name_map = None 186 187 def from_param(self): 188 return self.value 189 190 @property 191 def name(self): 192 """Get the enumeration name of this cursor kind.""" 193 if self._name_map is None: 194 self._name_map = {} 195 for key,value in CursorKind.__dict__.items(): 196 if isinstance(value,CursorKind): 197 self._name_map[value] = key 198 return self._name_map[self] 199 200 @staticmethod 201 def from_id(id): 202 if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None: 203 raise ValueError,'Unknown cursor kind' 204 return CursorKind._kinds[id] 205 206 @staticmethod 207 def get_all_kinds(): 208 """Return all CursorKind enumeration instances.""" 209 return filter(None, CursorKind._kinds) 210 211 def is_declaration(self): 212 """Test if this is a declaration kind.""" 213 return CursorKind_is_decl(self) 214 215 def is_reference(self): 216 """Test if this is a reference kind.""" 217 return CursorKind_is_ref(self) 218 219 def is_expression(self): 220 """Test if this is an expression kind.""" 221 return CursorKind_is_expr(self) 222 223 def is_statement(self): 224 """Test if this is a statement kind.""" 225 return CursorKind_is_stmt(self) 226 227 def is_invalid(self): 228 """Test if this is an invalid kind.""" 229 return CursorKind_is_inv(self) 230 231 def __repr__(self): 232 return 'CursorKind.%s' % (self.name,) 233 234# FIXME: Is there a nicer way to expose this enumeration? We could potentially 235# represent the nested structure, or even build a class hierarchy. The main 236# things we want for sure are (a) simple external access to kinds, (b) a place 237# to hang a description and name, (c) easy to keep in sync with Index.h. 238 239### 240# Declaration Kinds 241 242# A declaration whose specific kind is not exposed via this interface. 243# 244# Unexposed declarations have the same operations as any other kind of 245# declaration; one can extract their location information, spelling, find their 246# definitions, etc. However, the specific kind of the declaration is not 247# reported. 248CursorKind.UNEXPOSED_DECL = CursorKind(1) 249 250# A C or C++ struct. 251CursorKind.STRUCT_DECL = CursorKind(2) 252 253# A C or C++ union. 254CursorKind.UNION_DECL = CursorKind(3) 255 256# A C++ class. 257CursorKind.CLASS_DECL = CursorKind(4) 258 259# An enumeration. 260CursorKind.ENUM_DECL = CursorKind(5) 261 262# A field (in C) or non-static data member (in C++) in a struct, union, or C++ 263# class. 264CursorKind.FIELD_DECL = CursorKind(6) 265 266# An enumerator constant. 267CursorKind.ENUM_CONSTANT_DECL = CursorKind(7) 268 269# A function. 270CursorKind.FUNCTION_DECL = CursorKind(8) 271 272# A variable. 273CursorKind.VAR_DECL = CursorKind(9) 274 275# A function or method parameter. 276CursorKind.PARM_DECL = CursorKind(10) 277 278# An Objective-C @interface. 279CursorKind.OBJC_INTERFACE_DECL = CursorKind(11) 280 281# An Objective-C @interface for a category. 282CursorKind.OBJC_CATEGORY_DECL = CursorKind(12) 283 284# An Objective-C @protocol declaration. 285CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13) 286 287# An Objective-C @property declaration. 288CursorKind.OBJC_PROPERTY_DECL = CursorKind(14) 289 290# An Objective-C instance variable. 291CursorKind.OBJC_IVAR_DECL = CursorKind(15) 292 293# An Objective-C instance method. 294CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16) 295 296# An Objective-C class method. 297CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17) 298 299# An Objective-C @implementation. 300CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18) 301 302# An Objective-C @implementation for a category. 303CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19) 304 305# A typedef. 306CursorKind.TYPEDEF_DECL = CursorKind(20) 307 308### 309# Reference Kinds 310 311CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40) 312CursorKind.OBJC_PROTOCOL_REF = CursorKind(41) 313CursorKind.OBJC_CLASS_REF = CursorKind(42) 314 315# A reference to a type declaration. 316# 317# A type reference occurs anywhere where a type is named but not 318# declared. For example, given: 319# typedef unsigned size_type; 320# size_type size; 321# 322# The typedef is a declaration of size_type (CXCursor_TypedefDecl), 323# while the type of the variable "size" is referenced. The cursor 324# referenced by the type of size is the typedef for size_type. 325CursorKind.TYPE_REF = CursorKind(43) 326 327### 328# Invalid/Error Kinds 329 330CursorKind.INVALID_FILE = CursorKind(70) 331CursorKind.NO_DECL_FOUND = CursorKind(71) 332CursorKind.NOT_IMPLEMENTED = CursorKind(72) 333 334### 335# Expression Kinds 336 337# An expression whose specific kind is not exposed via this interface. 338# 339# Unexposed expressions have the same operations as any other kind of 340# expression; one can extract their location information, spelling, children, 341# etc. However, the specific kind of the expression is not reported. 342CursorKind.UNEXPOSED_EXPR = CursorKind(100) 343 344# An expression that refers to some value declaration, such as a function, 345# varible, or enumerator. 346CursorKind.DECL_REF_EXPR = CursorKind(101) 347 348# An expression that refers to a member of a struct, union, class, Objective-C 349# class, etc. 350CursorKind.MEMBER_REF_EXPR = CursorKind(102) 351 352# An expression that calls a function. 353CursorKind.CALL_EXPR = CursorKind(103) 354 355# An expression that sends a message to an Objective-C object or class. 356CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104) 357 358# A statement whose specific kind is not exposed via this interface. 359# 360# Unexposed statements have the same operations as any other kind of statement; 361# one can extract their location information, spelling, children, etc. However, 362# the specific kind of the statement is not reported. 363CursorKind.UNEXPOSED_STMT = CursorKind(200) 364 365### 366# Other Kinds 367 368# Cursor that represents the translation unit itself. 369# 370# The translation unit cursor exists primarily to act as the root cursor for 371# traversing the contents of a translation unit. 372CursorKind.TRANSLATION_UNIT = CursorKind(300) 373 374### Cursors ### 375 376class Cursor(Structure): 377 """ 378 The Cursor class represents a reference to an element within the AST. It 379 acts as a kind of iterator. 380 """ 381 _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)] 382 383 def __eq__(self, other): 384 return Cursor_eq(self, other) 385 386 def __ne__(self, other): 387 return not Cursor_eq(self, other) 388 389 def is_definition(self): 390 """ 391 Returns true if the declaration pointed at by the cursor is also a 392 definition of that entity. 393 """ 394 return Cursor_is_def(self) 395 396 def get_definition(self): 397 """ 398 If the cursor is a reference to a declaration or a declaration of 399 some entity, return a cursor that points to the definition of that 400 entity. 401 """ 402 # TODO: Should probably check that this is either a reference or 403 # declaration prior to issuing the lookup. 404 return Cursor_def(self) 405 406 def get_usr(self): 407 """Return the Unified Symbol Resultion (USR) for the entity referenced 408 by the given cursor (or None). 409 410 A Unified Symbol Resolution (USR) is a string that identifies a 411 particular entity (function, class, variable, etc.) within a 412 program. USRs can be compared across translation units to determine, 413 e.g., when references in one translation refer to an entity defined in 414 another translation unit.""" 415 return Cursor_usr(self) 416 417 @property 418 def kind(self): 419 """Return the kind of this cursor.""" 420 return CursorKind.from_id(self._kind_id) 421 422 @property 423 def spelling(self): 424 """Return the spelling of the entity pointed at by the cursor.""" 425 if not self.kind.is_declaration(): 426 # FIXME: clang_getCursorSpelling should be fixed to not assert on 427 # this, for consistency with clang_getCursorUSR. 428 return None 429 return Cursor_spelling(self) 430 431 @property 432 def location(self): 433 """ 434 Return the source location (the starting character) of the entity 435 pointed at by the cursor. 436 """ 437 return Cursor_loc(self) 438 439 @property 440 def extent(self): 441 """ 442 Return the source range (the range of text) occupied by the entity 443 pointed at by the cursor. 444 """ 445 return Cursor_extent(self) 446 447 def get_children(self): 448 """Return an iterator for accessing the children of this cursor.""" 449 450 # FIXME: Expose iteration from CIndex, PR6125. 451 def visitor(child, parent, children): 452 # FIXME: Document this assertion in API. 453 # FIXME: There should just be an isNull method. 454 assert child != Cursor_null() 455 children.append(child) 456 return 1 # continue 457 children = [] 458 Cursor_visit(self, Callback(visitor), children) 459 return iter(children) 460 461 @staticmethod 462 def from_result(res, fn, args): 463 assert isinstance(res, Cursor) 464 # FIXME: There should just be an isNull method. 465 if res == Cursor_null(): 466 return None 467 return res 468 469## CIndex Objects ## 470 471# CIndex objects (derived from ClangObject) are essentially lightweight 472# wrappers attached to some underlying object, which is exposed via CIndex as 473# a void*. 474 475class ClangObject(object): 476 """ 477 A helper for Clang objects. This class helps act as an intermediary for 478 the ctypes library and the Clang CIndex library. 479 """ 480 def __init__(self, obj): 481 assert isinstance(obj, c_object_p) and obj 482 self.obj = self._as_parameter_ = obj 483 484 def from_param(self): 485 return self._as_parameter_ 486 487 488class _CXUnsavedFile(Structure): 489 """Helper for passing unsaved file arguments.""" 490 _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] 491 492class Index(ClangObject): 493 """ 494 The Index type provides the primary interface to the Clang CIndex library, 495 primarily by providing an interface for reading and parsing translation 496 units. 497 """ 498 499 @staticmethod 500 def create(excludeDecls=False, displayDiags=False): 501 """ 502 Create a new Index. 503 Parameters: 504 excludeDecls -- Exclude local declarations from translation units. 505 displayDiags -- Display diagnostics during translation unit creation. 506 """ 507 return Index(Index_create(excludeDecls, displayDiags)) 508 509 def __del__(self): 510 Index_dispose(self) 511 512 def read(self, path): 513 """Load the translation unit from the given AST file.""" 514 ptr = TranslationUnit_read(self, path) 515 return TranslationUnit(ptr) if ptr else None 516 517 def parse(self, path, args = [], unsaved_files = []): 518 """ 519 Load the translation unit from the given source code file by running 520 clang and generating the AST before loading. Additional command line 521 parameters can be passed to clang via the args parameter. 522 523 In-memory contents for files can be provided by passing a list of pairs 524 to as unsaved_files, the first item should be the filenames to be mapped 525 and the second should be the contents to be substituted for the 526 file. The contents may be passed as strings or file objects. 527 """ 528 arg_array = 0 529 if len(args): 530 arg_array = (c_char_p * len(args))(* args) 531 unsaved_files_array = 0 532 if len(unsaved_files): 533 unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))() 534 for i,(name,value) in enumerate(unsaved_files): 535 if not isinstance(value, str): 536 # FIXME: It would be great to support an efficient version 537 # of this, one day. 538 value = value.read() 539 print value 540 if not isinstance(value, str): 541 raise TypeError,'Unexpected unsaved file contents.' 542 unsaved_files_array[i].name = name 543 unsaved_files_array[i].contents = value 544 unsaved_files_array[i].length = len(value) 545 ptr = TranslationUnit_parse(self, path, len(args), arg_array, 546 len(unsaved_files), unsaved_files_array) 547 return TranslationUnit(ptr) if ptr else None 548 549 550class TranslationUnit(ClangObject): 551 """ 552 The TranslationUnit class represents a source code translation unit and 553 provides read-only access to its top-level declarations. 554 """ 555 556 def __del__(self): 557 TranslationUnit_dispose(self) 558 559 @property 560 def cursor(self): 561 """Retrieve the cursor that represents the given translation unit.""" 562 return TranslationUnit_cursor(self) 563 564 @property 565 def spelling(self): 566 """Get the original translation unit source file name.""" 567 return TranslationUnit_spelling(self) 568 569class File(ClangObject): 570 """ 571 The File class represents a particular source file that is part of a 572 translation unit. 573 """ 574 575 @property 576 def name(self): 577 """Return the complete file and path name of the file.""" 578 return File_name(self) 579 580 @property 581 def time(self): 582 """Return the last modification time of the file.""" 583 return File_time(self) 584 585# Additional Functions and Types 586 587# Wrap calls to TranslationUnit._load and Decl._load. 588Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object) 589 590# String Functions 591_CXString_dispose = lib.clang_disposeString 592_CXString_dispose.argtypes = [_CXString] 593 594_CXString_getCString = lib.clang_getCString 595_CXString_getCString.argtypes = [_CXString] 596_CXString_getCString.restype = c_char_p 597 598# Source Location Functions 599SourceLocation_loc = lib.clang_getInstantiationLocation 600SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p), 601 POINTER(c_uint), POINTER(c_uint), 602 POINTER(c_uint)] 603 604# Source Range Functions 605SourceRange_start = lib.clang_getRangeStart 606SourceRange_start.argtypes = [SourceRange] 607SourceRange_start.restype = SourceLocation 608 609SourceRange_end = lib.clang_getRangeEnd 610SourceRange_end.argtypes = [SourceRange] 611SourceRange_end.restype = SourceLocation 612 613# CursorKind Functions 614CursorKind_is_decl = lib.clang_isDeclaration 615CursorKind_is_decl.argtypes = [CursorKind] 616CursorKind_is_decl.restype = bool 617 618CursorKind_is_ref = lib.clang_isReference 619CursorKind_is_ref.argtypes = [CursorKind] 620CursorKind_is_ref.restype = bool 621 622CursorKind_is_expr = lib.clang_isExpression 623CursorKind_is_expr.argtypes = [CursorKind] 624CursorKind_is_expr.restype = bool 625 626CursorKind_is_stmt = lib.clang_isStatement 627CursorKind_is_stmt.argtypes = [CursorKind] 628CursorKind_is_stmt.restype = bool 629 630CursorKind_is_inv = lib.clang_isInvalid 631CursorKind_is_inv.argtypes = [CursorKind] 632CursorKind_is_inv.restype = bool 633 634# Cursor Functions 635# TODO: Implement this function 636Cursor_get = lib.clang_getCursor 637Cursor_get.argtypes = [TranslationUnit, SourceLocation] 638Cursor_get.restype = Cursor 639 640Cursor_null = lib.clang_getNullCursor 641Cursor_null.restype = Cursor 642 643Cursor_usr = lib.clang_getCursorUSR 644Cursor_usr.argtypes = [Cursor] 645Cursor_usr.restype = _CXString 646Cursor_usr.errcheck = _CXString.from_result 647 648Cursor_is_def = lib.clang_isCursorDefinition 649Cursor_is_def.argtypes = [Cursor] 650Cursor_is_def.restype = bool 651 652Cursor_def = lib.clang_getCursorDefinition 653Cursor_def.argtypes = [Cursor] 654Cursor_def.restype = Cursor 655Cursor_def.errcheck = Cursor.from_result 656 657Cursor_eq = lib.clang_equalCursors 658Cursor_eq.argtypes = [Cursor, Cursor] 659Cursor_eq.restype = c_uint 660 661Cursor_spelling = lib.clang_getCursorSpelling 662Cursor_spelling.argtypes = [Cursor] 663Cursor_spelling.restype = _CXString 664Cursor_spelling.errcheck = _CXString.from_result 665 666Cursor_loc = lib.clang_getCursorLocation 667Cursor_loc.argtypes = [Cursor] 668Cursor_loc.restype = SourceLocation 669 670Cursor_extent = lib.clang_getCursorExtent 671Cursor_extent.argtypes = [Cursor] 672Cursor_extent.restype = SourceRange 673 674Cursor_ref = lib.clang_getCursorReferenced 675Cursor_ref.argtypes = [Cursor] 676Cursor_ref.restype = Cursor 677Cursor_ref.errcheck = Cursor.from_result 678 679Cursor_visit = lib.clang_visitChildren 680Cursor_visit.argtypes = [Cursor, Callback, py_object] 681Cursor_visit.restype = c_uint 682 683# Index Functions 684Index_create = lib.clang_createIndex 685Index_create.argtypes = [c_int, c_int] 686Index_create.restype = c_object_p 687 688Index_dispose = lib.clang_disposeIndex 689Index_dispose.argtypes = [Index] 690 691# Translation Unit Functions 692TranslationUnit_read = lib.clang_createTranslationUnit 693TranslationUnit_read.argtypes = [Index, c_char_p] 694TranslationUnit_read.restype = c_object_p 695 696TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile 697TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p, 698 c_int, c_void_p] 699TranslationUnit_parse.restype = c_object_p 700 701TranslationUnit_cursor = lib.clang_getTranslationUnitCursor 702TranslationUnit_cursor.argtypes = [TranslationUnit] 703TranslationUnit_cursor.restype = Cursor 704TranslationUnit_cursor.errcheck = Cursor.from_result 705 706TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling 707TranslationUnit_spelling.argtypes = [TranslationUnit] 708TranslationUnit_spelling.restype = _CXString 709TranslationUnit_spelling.errcheck = _CXString.from_result 710 711TranslationUnit_dispose = lib.clang_disposeTranslationUnit 712TranslationUnit_dispose.argtypes = [TranslationUnit] 713 714# File Functions 715File_name = lib.clang_getFileName 716File_name.argtypes = [File] 717File_name.restype = c_char_p 718 719File_time = lib.clang_getFileTime 720File_time.argtypes = [File] 721File_time.restype = c_uint 722 723### 724 725__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind', 726 'SourceRange', 'SourceLocation', 'File'] 727