cindex.py revision 4efd632322731425d83d205f26bddcdfe1ac8937
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10r"""
11Clang Indexing Library Bindings
12===============================
13
14This module provides an interface to the Clang indexing library. It is a
15low-level interface to the indexing library which attempts to match the Clang
16API directly while also being "pythonic". Notable differences from the C API
17are:
18
19 * string results are returned as Python strings, not CXString objects.
20
21 * null cursors are translated to None.
22
23 * access to child cursors is done via iteration, not visitation.
24
25The major indexing objects are:
26
27  Index
28
29    The top-level object which manages some global library state.
30
31  TranslationUnit
32
33    High-level object encapsulating the AST for a single translation unit. These
34    can be loaded from .ast files or parsed on the fly.
35
36  Cursor
37
38    Generic object for representing a node in the AST.
39
40  SourceRange, SourceLocation, and File
41
42    Objects representing information about the input source.
43
44Most object information is exposed using properties, when the underlying API
45call is efficient.
46"""
47
48# TODO
49# ====
50#
51# o fix memory management issues (currently client must hold on to index and
52#   translation unit, or risk crashes).
53#
54# o expose code completion APIs.
55#
56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more
57#   clearly, and hide from the external interface (i.e., help(cindex)).
58#
59# o implement additional SourceLocation, SourceRange, and File methods.
60
61from ctypes import *
62
63def get_cindex_library():
64    # FIXME: It's probably not the case that the library is actually found in
65    # this location. We need a better system of identifying and loading the
66    # CIndex library. It could be on path or elsewhere, or versioned, etc.
67    import platform
68    name = platform.system()
69    if name == 'Darwin':
70        return cdll.LoadLibrary('libCIndex.dylib')
71    elif name == 'Windows':
72        return cdll.LoadLibrary('libCIndex.dll')
73    else:
74        return cdll.LoadLibrary('libCIndex.so')
75
76## Utility Types and Functions ##
77def alloc_string_vector(strs):
78    """
79    Allocate a string buffer large enough to accommodate the given list of
80    python strings.
81    """
82    n = 0
83    for i in strs: n += len(i) + 1
84    return create_string_buffer(n)
85
86def copy_string_vector(vec, strs):
87    """
88    Copy the contents of each string into the vector, preserving null
89    terminated elements.
90    """
91    n = 0
92    for i in strs:
93        # This is terribly inefficient, but I can't figure out how to copy a
94        # chunk of characters into the resultant vector. t should be: something
95        # like this: vec[n:n + len(i)] = i[:]; n += len(i) + 1
96        for j in i:
97            vec[n] = j
98            n += 1
99        n += 1
100
101def create_string_vector(strs):
102    """
103    Create a string vector (char *[]) from the given list of strings.
104    """
105    vec = alloc_string_vector(strs)
106    copy_string_vector(vec, strs)
107    return vec
108
109# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
110# object. This is a problem, because it means that from_parameter will see an
111# integer and pass the wrong value on platforms where int != void*. Work around
112# this by marshalling object arguments as void**.
113c_object_p = POINTER(c_void_p)
114
115lib = get_cindex_library()
116
117### Structures and Utility Classes ###
118
119class _CXString(Structure):
120    """Helper for transforming CXString results."""
121
122    _fields_ = [("spelling", c_char_p), ("free", c_int)]
123
124    def __del__(self):
125        _CXString_dispose(self)
126
127    @staticmethod
128    def from_result(res, fn, args):
129        assert isinstance(res, _CXString)
130        return _CXString_getCString(res)
131
132class SourceLocation(Structure):
133    """
134    A SourceLocation represents a particular location within a source file.
135    """
136    _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)]
137    _data = None
138
139    def _get_instantiation(self):
140        if self._data is None:
141            f, l, c = c_object_p(), c_uint(), c_uint()
142            SourceLocation_loc(self, byref(f), byref(l), byref(c))
143            f = File(f) if f else None
144            self._data = (f, int(l.value), int(c.value))
145        return self._data
146
147    @property
148    def file(self):
149        """Get the file represented by this source location."""
150        return self._get_instantiation()[0]
151
152    @property
153    def line(self):
154        """Get the line represented by this source location."""
155        return self._get_instantiation()[1]
156
157    @property
158    def column(self):
159        """Get the column represented by this source location."""
160        return self._get_instantiation()[2]
161
162    def __repr__(self):
163        return "<SourceLocation file %r, line %r, column %r>" % (
164            self.file.name if self.file else None, self.line, self.column)
165
166class SourceRange(Structure):
167    """
168    A SourceRange describes a range of source locations within the source
169    code.
170    """
171    _fields_ = [
172        ("ptr_data", c_void_p),
173        ("begin_int_data", c_uint),
174        ("end_int_data", c_uint)]
175
176    @property
177    def start(self):
178        """
179        Return a SourceLocation representing the first character within a
180        source range.
181        """
182        return SourceRange_start(self)
183
184    @property
185    def end(self):
186        """
187        Return a SourceLocation representing the last character within a
188        source range.
189        """
190        return SourceRange_end(self)
191
192    def __repr__(self):
193        return "<SourceRange start %r, end %r>" % (self.start, self.end)
194
195### Cursor Kinds ###
196
197class CursorKind(object):
198    """
199    A CursorKind describes the kind of entity that a cursor points to.
200    """
201
202    # The unique kind objects, indexed by id.
203    _kinds = []
204    _name_map = None
205
206    def __init__(self, value):
207        if value >= len(CursorKind._kinds):
208            CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1)
209        if CursorKind._kinds[value] is not None:
210            raise ValueError,'CursorKind already loaded'
211        self.value = value
212        CursorKind._kinds[value] = self
213        CursorKind._name_map = None
214
215    def from_param(self):
216        return self.value
217
218    @property
219    def name(self):
220        """Get the enumeration name of this cursor kind."""
221        if self._name_map is None:
222            self._name_map = {}
223            for key,value in CursorKind.__dict__.items():
224                if isinstance(value,CursorKind):
225                    self._name_map[value] = key
226        return self._name_map[self]
227
228    @staticmethod
229    def from_id(id):
230        if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None:
231            raise ValueError,'Unknown cursor kind'
232        return CursorKind._kinds[id]
233
234    @staticmethod
235    def get_all_kinds():
236        """Return all CursorKind enumeration instances."""
237        return filter(None, CursorKind._kinds)
238
239    def is_declaration(self):
240        """Test if this is a declaration kind."""
241        return CursorKind_is_decl(self)
242
243    def is_reference(self):
244        """Test if this is a reference kind."""
245        return CursorKind_is_ref(self)
246
247    def is_expression(self):
248        """Test if this is an expression kind."""
249        return CursorKind_is_expr(self)
250
251    def is_statement(self):
252        """Test if this is a statement kind."""
253        return CursorKind_is_stmt(self)
254
255    def is_invalid(self):
256        """Test if this is an invalid kind."""
257        return CursorKind_is_inv(self)
258
259    def __repr__(self):
260        return 'CursorKind.%s' % (self.name,)
261
262# FIXME: Is there a nicer way to expose this enumeration? We could potentially
263# represent the nested structure, or even build a class hierarchy. The main
264# things we want for sure are (a) simple external access to kinds, (b) a place
265# to hang a description and name, (c) easy to keep in sync with Index.h.
266
267###
268# Declaration Kinds
269
270# A declaration whose specific kind is not exposed via this interface.
271#
272# Unexposed declarations have the same operations as any other kind of
273# declaration; one can extract their location information, spelling, find their
274# definitions, etc. However, the specific kind of the declaration is not
275# reported.
276CursorKind.UNEXPOSED_DECL = CursorKind(1)
277
278# A C or C++ struct.
279CursorKind.STRUCT_DECL = CursorKind(2)
280
281# A C or C++ union.
282CursorKind.UNION_DECL = CursorKind(3)
283
284# A C++ class.
285CursorKind.CLASS_DECL = CursorKind(4)
286
287# An enumeration.
288CursorKind.ENUM_DECL = CursorKind(5)
289
290# A field (in C) or non-static data member (in C++) in a struct, union, or C++
291# class.
292CursorKind.FIELD_DECL = CursorKind(6)
293
294# An enumerator constant.
295CursorKind.ENUM_CONSTANT_DECL = CursorKind(7)
296
297# A function.
298CursorKind.FUNCTION_DECL = CursorKind(8)
299
300# A variable.
301CursorKind.VAR_DECL = CursorKind(9)
302
303# A function or method parameter.
304CursorKind.PARM_DECL = CursorKind(10)
305
306# An Objective-C @interface.
307CursorKind.OBJC_INTERFACE_DECL = CursorKind(11)
308
309# An Objective-C @interface for a category.
310CursorKind.OBJC_CATEGORY_DECL = CursorKind(12)
311
312# An Objective-C @protocol declaration.
313CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13)
314
315# An Objective-C @property declaration.
316CursorKind.OBJC_PROPERTY_DECL = CursorKind(14)
317
318# An Objective-C instance variable.
319CursorKind.OBJC_IVAR_DECL = CursorKind(15)
320
321# An Objective-C instance method.
322CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16)
323
324# An Objective-C class method.
325CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17)
326
327# An Objective-C @implementation.
328CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18)
329
330# An Objective-C @implementation for a category.
331CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19)
332
333# A typedef.
334CursorKind.TYPEDEF_DECL = CursorKind(20)
335
336###
337# Reference Kinds
338
339CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40)
340CursorKind.OBJC_PROTOCOL_REF = CursorKind(41)
341CursorKind.OBJC_CLASS_REF = CursorKind(42)
342
343# A reference to a type declaration.
344#
345# A type reference occurs anywhere where a type is named but not
346# declared. For example, given:
347#   typedef unsigned size_type;
348#   size_type size;
349#
350# The typedef is a declaration of size_type (CXCursor_TypedefDecl),
351# while the type of the variable "size" is referenced. The cursor
352# referenced by the type of size is the typedef for size_type.
353CursorKind.TYPE_REF = CursorKind(43)
354
355###
356# Invalid/Error Kinds
357
358CursorKind.INVALID_FILE = CursorKind(70)
359CursorKind.NO_DECL_FOUND = CursorKind(71)
360CursorKind.NOT_IMPLEMENTED = CursorKind(72)
361
362###
363# Expression Kinds
364
365# An expression whose specific kind is not exposed via this interface.
366#
367# Unexposed expressions have the same operations as any other kind of
368# expression; one can extract their location information, spelling, children,
369# etc. However, the specific kind of the expression is not reported.
370CursorKind.UNEXPOSED_EXPR = CursorKind(100)
371
372# An expression that refers to some value declaration, such as a function,
373# varible, or enumerator.
374CursorKind.DECL_REF_EXPR = CursorKind(101)
375
376# An expression that refers to a member of a struct, union, class, Objective-C
377# class, etc.
378CursorKind.MEMBER_REF_EXPR = CursorKind(102)
379
380# An expression that calls a function.
381CursorKind.CALL_EXPR = CursorKind(103)
382
383# An expression that sends a message to an Objective-C object or class.
384CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104)
385
386# A statement whose specific kind is not exposed via this interface.
387#
388# Unexposed statements have the same operations as any other kind of statement;
389# one can extract their location information, spelling, children, etc. However,
390# the specific kind of the statement is not reported.
391CursorKind.UNEXPOSED_STMT = CursorKind(200)
392
393###
394# Other Kinds
395
396# Cursor that represents the translation unit itself.
397#
398# The translation unit cursor exists primarily to act as the root cursor for
399# traversing the contents of a translation unit.
400CursorKind.TRANSLATION_UNIT = CursorKind(300)
401
402### Cursors ###
403
404class Cursor(Structure):
405    """
406    The Cursor class represents a reference to an element within the AST. It
407    acts as a kind of iterator.
408    """
409    _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)]
410
411    def __eq__(self, other):
412        return Cursor_eq(self, other)
413
414    def __ne__(self, other):
415        return not Cursor_eq(self, other)
416
417    def is_definition(self):
418        """
419        Returns true if the declaration pointed at by the cursor is also a
420        definition of that entity.
421        """
422        return Cursor_is_def(self)
423
424    def get_definition(self):
425        """
426        If the cursor is a reference to a declaration or a declaration of
427        some entity, return a cursor that points to the definition of that
428        entity.
429        """
430        # TODO: Should probably check that this is either a reference or
431        # declaration prior to issuing the lookup.
432        return Cursor_def(self)
433
434    def get_usr(self):
435        """Return the Unified Symbol Resultion (USR) for the entity referenced
436        by the given cursor (or None).
437
438        A Unified Symbol Resolution (USR) is a string that identifies a
439        particular entity (function, class, variable, etc.) within a
440        program. USRs can be compared across translation units to determine,
441        e.g., when references in one translation refer to an entity defined in
442        another translation unit."""
443        return Cursor_usr(self)
444
445    @property
446    def kind(self):
447        """Return the kind of this cursor."""
448        return CursorKind.from_id(self._kind_id)
449
450    @property
451    def spelling(self):
452        """Return the spelling of the entity pointed at by the cursor."""
453        if not self.kind.is_declaration():
454            # FIXME: clang_getCursorSpelling should be fixed to not assert on
455            # this, for consistency with clang_getCursorUSR.
456            return None
457        return Cursor_spelling(self)
458
459    @property
460    def location(self):
461        """
462        Return the source location (the starting character) of the entity
463        pointed at by the cursor.
464        """
465        return Cursor_loc(self)
466
467    @property
468    def extent(self):
469        """
470        Return the source range (the range of text) occupied by the entity
471        pointed at by the cursor.
472        """
473        return Cursor_extent(self)
474
475    def get_children(self):
476        """Return an iterator for accessing the children of this cursor."""
477
478        # FIXME: Expose iteration from CIndex, PR6125.
479        def visitor(child, parent, children):
480            # FIXME: Document this assertion in API.
481            # FIXME: There should just be an isNull method.
482            assert child != Cursor_null()
483            children.append(child)
484            return 1 # continue
485        children = []
486        Cursor_visit(self, Callback(visitor), children)
487        return iter(children)
488
489    @staticmethod
490    def from_result(res, fn, args):
491        assert isinstance(res, Cursor)
492        # FIXME: There should just be an isNull method.
493        if res == Cursor_null():
494            return None
495        return res
496
497## CIndex Objects ##
498
499# CIndex objects (derived from ClangObject) are essentially lightweight
500# wrappers attached to some underlying object, which is exposed via CIndex as
501# a void*.
502
503class ClangObject(object):
504    """
505    A helper for Clang objects. This class helps act as an intermediary for
506    the ctypes library and the Clang CIndex library.
507    """
508    def __init__(self, obj):
509        assert isinstance(obj, c_object_p) and obj
510        self.obj = self._as_parameter_ = obj
511
512    def from_param(self):
513        return self._as_parameter_
514
515class Index(ClangObject):
516    """
517    The Index type provides the primary interface to the Clang CIndex library,
518    primarily by providing an interface for reading and parsing translation
519    units.
520    """
521
522    @staticmethod
523    def create(excludeDecls=False, displayDiags=False):
524        """
525        Create a new Index.
526        Parameters:
527        excludeDecls -- Exclude local declarations from translation units.
528        displayDiags -- Display diagnostics during translation unit creation.
529        """
530        return Index(Index_create(excludeDecls, displayDiags))
531
532    def __del__(self):
533        Index_dispose(self)
534
535    def read(self, path):
536        """Load the translation unit from the given AST file."""
537        return TranslationUnit.read(self, path)
538
539    def parse(self, path, args = []):
540        """
541        Load the translation unit from the given source code file by running
542        clang and generating the AST before loading. Additional command line
543        parameters can be passed to clang via the args parameter.
544        """
545        return TranslationUnit.parse(self, path, args)
546
547
548class TranslationUnit(ClangObject):
549    """
550    The TranslationUnit class represents a source code translation unit and
551    provides read-only access to its top-level declarations.
552    """
553
554    def __del__(self):
555        TranslationUnit_dispose(self)
556
557    @property
558    def cursor(self):
559        """Retrieve the cursor that represents the given translation unit."""
560        return TranslationUnit_cursor(self)
561
562    @property
563    def spelling(self):
564        """Get the original translation unit source file name."""
565        return TranslationUnit_spelling(self)
566
567    @staticmethod
568    def read(ix, path):
569        """Create a translation unit from the given AST file."""
570        ptr = TranslationUnit_read(ix, path)
571        return TranslationUnit(ptr) if ptr else None
572
573    @staticmethod
574    def parse(ix, path, args = []):
575        """
576        Construct a translation unit from the given source file, applying
577        the given command line argument.
578        """
579        # TODO: Support unsaved files.
580        argc, argv = len(args), create_string_vector(args)
581        ptr = TranslationUnit_parse(ix, path, argc, byref(argv), 0, 0)
582        return TranslationUnit(ptr) if ptr else None
583
584class File(ClangObject):
585    """
586    The File class represents a particular source file that is part of a
587    translation unit.
588    """
589
590    @property
591    def name(self):
592        """Return the complete file and path name of the file."""
593        return File_name(self)
594
595    @property
596    def time(self):
597        """Return the last modification time of the file."""
598        return File_time(self)
599
600# Additional Functions and Types
601
602# Wrap calls to TranslationUnit._load and Decl._load.
603Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object)
604
605# String Functions
606_CXString_dispose = lib.clang_disposeString
607_CXString_dispose.argtypes = [_CXString]
608
609_CXString_getCString = lib.clang_getCString
610_CXString_getCString.argtypes = [_CXString]
611_CXString_getCString.restype = c_char_p
612
613# Source Location Functions
614SourceLocation_loc = lib.clang_getInstantiationLocation
615SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p),
616                               POINTER(c_uint), POINTER(c_uint)]
617
618# Source Range Functions
619SourceRange_start = lib.clang_getRangeStart
620SourceRange_start.argtypes = [SourceRange]
621SourceRange_start.restype = SourceLocation
622
623SourceRange_end = lib.clang_getRangeEnd
624SourceRange_end.argtypes = [SourceRange]
625SourceRange_end.restype = SourceLocation
626
627# CursorKind Functions
628CursorKind_is_decl = lib.clang_isDeclaration
629CursorKind_is_decl.argtypes = [CursorKind]
630CursorKind_is_decl.restype = bool
631
632CursorKind_is_ref = lib.clang_isReference
633CursorKind_is_ref.argtypes = [CursorKind]
634CursorKind_is_ref.restype = bool
635
636CursorKind_is_expr = lib.clang_isExpression
637CursorKind_is_expr.argtypes = [CursorKind]
638CursorKind_is_expr.restype = bool
639
640CursorKind_is_stmt = lib.clang_isStatement
641CursorKind_is_stmt.argtypes = [CursorKind]
642CursorKind_is_stmt.restype = bool
643
644CursorKind_is_inv = lib.clang_isInvalid
645CursorKind_is_inv.argtypes = [CursorKind]
646CursorKind_is_inv.restype = bool
647
648# Cursor Functions
649# TODO: Implement this function
650Cursor_get = lib.clang_getCursor
651Cursor_get.argtypes = [TranslationUnit, SourceLocation]
652Cursor_get.restype = Cursor
653
654Cursor_null = lib.clang_getNullCursor
655Cursor_null.restype = Cursor
656
657Cursor_usr = lib.clang_getCursorUSR
658Cursor_usr.argtypes = [Cursor]
659Cursor_usr.restype = _CXString
660Cursor_usr.errcheck = _CXString.from_result
661
662Cursor_is_def = lib.clang_isCursorDefinition
663Cursor_is_def.argtypes = [Cursor]
664Cursor_is_def.restype = bool
665
666Cursor_def = lib.clang_getCursorDefinition
667Cursor_def.argtypes = [Cursor]
668Cursor_def.restype = Cursor
669Cursor_def.errcheck = Cursor.from_result
670
671Cursor_eq = lib.clang_equalCursors
672Cursor_eq.argtypes = [Cursor, Cursor]
673Cursor_eq.restype = c_uint
674
675Cursor_spelling = lib.clang_getCursorSpelling
676Cursor_spelling.argtypes = [Cursor]
677Cursor_spelling.restype = _CXString
678Cursor_spelling.errcheck = _CXString.from_result
679
680Cursor_loc = lib.clang_getCursorLocation
681Cursor_loc.argtypes = [Cursor]
682Cursor_loc.restype = SourceLocation
683
684Cursor_extent = lib.clang_getCursorExtent
685Cursor_extent.argtypes = [Cursor]
686Cursor_extent.restype = SourceRange
687
688Cursor_ref = lib.clang_getCursorReferenced
689Cursor_ref.argtypes = [Cursor]
690Cursor_ref.restype = Cursor
691Cursor_ref.errcheck = Cursor.from_result
692
693Cursor_visit = lib.clang_visitChildren
694Cursor_visit.argtypes = [Cursor, Callback, py_object]
695Cursor_visit.restype = c_uint
696
697# Index Functions
698Index_create = lib.clang_createIndex
699Index_create.argtypes = [c_int, c_int]
700Index_create.restype = c_object_p
701
702Index_dispose = lib.clang_disposeIndex
703Index_dispose.argtypes = [Index]
704
705# Translation Unit Functions
706TranslationUnit_read = lib.clang_createTranslationUnit
707TranslationUnit_read.argtypes = [Index, c_char_p]
708TranslationUnit_read.restype = c_object_p
709
710TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile
711TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p,
712                                  c_int, c_void_p]
713TranslationUnit_parse.restype = c_object_p
714
715TranslationUnit_cursor = lib.clang_getTranslationUnitCursor
716TranslationUnit_cursor.argtypes = [TranslationUnit]
717TranslationUnit_cursor.restype = Cursor
718TranslationUnit_cursor.errcheck = Cursor.from_result
719
720TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling
721TranslationUnit_spelling.argtypes = [TranslationUnit]
722TranslationUnit_spelling.restype = _CXString
723TranslationUnit_spelling.errcheck = _CXString.from_result
724
725TranslationUnit_dispose = lib.clang_disposeTranslationUnit
726TranslationUnit_dispose.argtypes = [TranslationUnit]
727
728# File Functions
729File_name = lib.clang_getFileName
730File_name.argtypes = [File]
731File_name.restype = c_char_p
732
733File_time = lib.clang_getFileTime
734File_time.argtypes = [File]
735File_time.restype = c_uint
736
737###
738
739__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind',
740           'SourceRange', 'SourceLocation', 'File']
741