cindex.py revision 3d855f8d48b235eb2beb45216cced24efd3c08fa
1# -*- coding: utf-8 -*-
2
3from ctypes import *
4
5def get_cindex_library():
6    # FIXME: It's probably not the case that the library is actually found in
7    # this location. We need a better system of identifying and loading the
8    # CIndex library. It could be on path or elsewhere, or versioned, etc.
9    import platform
10    name = platform.system()
11    if name == 'Darwin':
12        return cdll.LoadLibrary('libCIndex.dylib')
13    elif name == 'Windows':
14        return cdll.LoadLibrary('libCIndex.dll')
15    else:
16        return cdll.LoadLibrary('libCIndex.so')
17
18## Utility Types and Functions ##
19def alloc_string_vector(strs):
20    """
21    Allocate a string buffer large enough to accommodate the given list of
22    python strings.
23    """
24    n = 0
25    for i in strs: n += len(i) + 1
26    return create_string_buffer(n)
27
28def copy_string_vector(vec, strs):
29    """
30    Copy the contents of each string into the vector, preserving null
31    terminated elements.
32    """
33    n = 0
34    for i in strs:
35        # This is terribly inefficient, but I can't figure out how to copy a
36        # chunk of characters into the resultant vector. t should be: something
37        # like this: vec[n:n + len(i)] = i[:]; n += len(i) + 1
38        for j in i:
39            vec[n] = j
40            n += 1
41        n += 1
42
43def create_string_vector(strs):
44    """
45    Create a string vector (char *[]) from the given list of strings.
46    """
47    vec = alloc_string_vector(strs)
48    copy_string_vector(vec, strs)
49    return vec
50
51# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
52# object. This is a problem, because it means that from_parameter will see an
53# integer and pass the wrong value on platforms where int != void*. Work around
54# this by marshalling object arguments as void**.
55c_object_p = POINTER(c_void_p)
56
57lib = get_cindex_library()
58
59## Typedefs ##
60CursorKind = c_int
61
62### Structures and Utility Classes ###
63
64class _CXString(Structure):
65    """Helper for transforming CXString results."""
66
67    _fields_ = [("spelling", c_char_p), ("free", c_int)]
68
69    def __del__(self):
70        _CXString_dispose(self)
71
72    @staticmethod
73    def from_result(res, fn, args):
74        assert isinstance(res, _CXString)
75        return _CXString_getCString(res)
76
77class SourceLocation(Structure):
78    """
79    A SourceLocation represents a particular location within a source file.
80    """
81    _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)]
82
83    def init(self):
84        """
85        Initialize the source location, setting its file, line and column.
86        """
87        f, l, c = c_object_p(), c_uint(), c_uint()
88        SourceLocation_loc(self, byref(f), byref(l), byref(c))
89        f = File(f) if f else None
90        self.file, self.line, self.column = f, int(l.value), int(c.value)
91        return self
92
93    def __repr__(self):
94        return "<SourceLocation file %r, line %r, column %r>" % (
95            self.file.name if self.file else None, self.line, self.column)
96
97class SourceRange(Structure):
98    """
99    A SourceRange describes a range of source locations within the source
100    code.
101    """
102    _fields_ = [
103        ("ptr_data", c_void_p),
104        ("begin_int_data", c_uint),
105        ("end_int_data", c_uint)]
106
107    @property
108    def start(self):
109        """
110        Return a SourceLocation representing the first character within a
111        source range.
112        """
113        return SourceRange_start(self).init()
114
115    @property
116    def end(self):
117        """
118        Return a SourceLocation representing the last character within a
119        source range.
120        """
121        return SourceRange_end(self).init()
122
123class Cursor(Structure):
124    """
125    The Cursor class represents a reference to an element within the AST. It
126    acts as a kind of iterator.
127    """
128    _fields_ = [("kind", c_int), ("data", c_void_p * 3)]
129
130    def __eq__(self, other):
131        return Cursor_eq(self, other)
132
133    def __ne__(self, other):
134        return not Cursor_eq(self, other)
135
136    def is_declaration(self):
137        """Return True if the cursor points to a declaration."""
138        return Cursor_is_decl(self.kind)
139
140    def is_reference(self):
141        """Return True if the cursor points to a reference."""
142        return Cursor_is_ref(self.kind)
143
144    def is_expression(self):
145        """Return True if the cursor points to an expression."""
146        return Cursor_is_expr(self.kind)
147
148    def is_statement(self):
149        """Return True if the cursor points to a statement."""
150        return Cursor_is_stmt(self.kind)
151
152    def is_translation_unit(self):
153        """Return True if the cursor points to a translation unit."""
154        return Cursor_is_tu(self.kind)
155
156    def is_invalid(self):
157        """Return  True if the cursor points to an invalid entity."""
158        return Cursor_is_inv(self.kind)
159
160    def is_definition(self):
161        """
162        Returns true if the declaration pointed at by the cursor is also a
163        definition of that entity.
164        """
165        return Cursor_is_def(self)
166
167    def get_definition(self):
168        """
169        If the cursor is a reference to a declaration or a declaration of
170        some entity, return a cursor that points to the definition of that
171        entity.
172        """
173        # TODO: Should probably check that this is either a reference or
174        # declaration prior to issuing the lookup.
175        return Cursor_def(self)
176
177    def get_usr(self):
178        """Return the Unified Symbol Resultion (USR) for the entity referenced
179        by the given cursor (or None).
180
181        A Unified Symbol Resolution (USR) is a string that identifies a
182        particular entity (function, class, variable, etc.) within a
183        program. USRs can be compared across translation units to determine,
184        e.g., when references in one translation refer to an entity defined in
185        another translation unit."""
186        return Cursor_usr(self)
187
188    @property
189    def spelling(self):
190        """Return the spelling of the entity pointed at by the cursor."""
191        if not self.is_declaration():
192            # FIXME: clang_getCursorSpelling should be fixed to not assert on
193            # this, for consistency with clang_getCursorUSR.
194            return None
195        return Cursor_spelling(self)
196
197    @property
198    def location(self):
199        """
200        Return the source location (the starting character) of the entity
201        pointed at by the cursor.
202        """
203        return Cursor_loc(self).init()
204
205    @property
206    def extent(self):
207        """
208        Return the source range (the range of text) occupied by the entity
209        pointed at by the cursor.
210        """
211        return Cursor_extent(self)
212
213    def get_children(self):
214        """Return an iterator for the accessing the children of this cursor."""
215
216        # FIXME: Expose iteration from CIndex, PR6125.
217        def visitor(child, parent, children):
218            # FIXME: Document this assertion in API.
219            # FIXME: There should just be an isNull method.
220            assert child != Cursor_null()
221            children.append(child)
222            return 1 # continue
223        children = []
224        Cursor_visit(self, Callback(visitor), children)
225        return iter(children)
226
227    @staticmethod
228    def from_result(res, fn, args):
229        assert isinstance(res, Cursor)
230        # FIXME: There should just be an isNull method.
231        if res == Cursor_null():
232            return None
233        return res
234
235## CIndex Objects ##
236
237# CIndex objects (derived from ClangObject) are essentially lightweight
238# wrappers attached to some underlying object, which is exposed via CIndex as
239# a void*.
240
241class ClangObject(object):
242    """
243    A helper for Clang objects. This class helps act as an intermediary for
244    the ctypes library and the Clang CIndex library.
245    """
246    def __init__(self, obj):
247        assert isinstance(obj, c_object_p) and obj
248        self.obj = self._as_parameter_ = obj
249
250    def from_param(self):
251        return self._as_parameter_
252
253class Index(ClangObject):
254    """
255    The Index type provides the primary interface to the Clang CIndex library,
256    primarily by providing an interface for reading and parsing translation
257    units.
258    """
259
260    @staticmethod
261    def create(excludeDecls=False, displayDiags=False):
262        """
263        Create a new Index.
264        Parameters:
265        excludeDecls -- Exclude local declarations from translation units.
266        displayDiags -- Display diagnostics during translation unit creation.
267        """
268        return Index(Index_create(excludeDecls, displayDiags))
269
270    def __del__(self):
271        Index_dispose(self)
272
273    def read(self, path):
274        """Load the translation unit from the given AST file."""
275        return TranslationUnit.read(self, path)
276
277    def parse(self, path, args = []):
278        """
279        Load the translation unit from the given source code file by running
280        clang and generating the AST before loading. Additional command line
281        parameters can be passed to clang via the args parameter.
282        """
283        return TranslationUnit.parse(self, path, args)
284
285
286class TranslationUnit(ClangObject):
287    """
288    The TranslationUnit class represents a source code translation unit and
289    provides read-only access to its top-level declarations.
290    """
291
292    def __del__(self):
293        TranslationUnit_dispose(self)
294
295    @property
296    def cursor(self):
297        """Retrieve the cursor that represents the given translation unit."""
298        return TranslationUnit_cursor(self)
299
300    @property
301    def spelling(self):
302        """Get the original translation unit source file name."""
303        return TranslationUnit_spelling(self)
304
305    @staticmethod
306    def read(ix, path):
307        """Create a translation unit from the given AST file."""
308        ptr = TranslationUnit_read(ix, path)
309        return TranslationUnit(ptr) if ptr else None
310
311    @staticmethod
312    def parse(ix, path, args = []):
313        """
314        Construct a translation unit from the given source file, applying
315        the given command line argument.
316        """
317        # TODO: Support unsaved files.
318        argc, argv = len(args), create_string_vector(args)
319        ptr = TranslationUnit_parse(ix, path, argc, byref(argv), 0, 0)
320        return TranslationUnit(ptr) if ptr else None
321
322class File(ClangObject):
323    """
324    The File class represents a particular source file that is part of a
325    translation unit.
326    """
327
328    @property
329    def name(self):
330        """Return the complete file and path name of the file, if valid."""
331        return File_name(self)
332
333    @property
334    def time(self):
335        """Return the last modification time of the file, if valid."""
336        return File_time(self)
337
338# Additional Functions and Types
339
340# Wrap calls to TranslationUnit._load and Decl._load.
341Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object)
342
343# String Functions
344_CXString_dispose = lib.clang_disposeString
345_CXString_dispose.argtypes = [_CXString]
346
347_CXString_getCString = lib.clang_getCString
348_CXString_getCString.argtypes = [_CXString]
349_CXString_getCString.restype = c_char_p
350
351# Source Location Functions
352SourceLocation_loc = lib.clang_getInstantiationLocation
353SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p),
354                               POINTER(c_uint), POINTER(c_uint)]
355
356# Source Range Functions
357SourceRange_start = lib.clang_getRangeStart
358SourceRange_start.argtypes = [SourceRange]
359SourceRange_start.restype = SourceLocation
360
361SourceRange_end = lib.clang_getRangeEnd
362SourceRange_end.argtypes = [SourceRange]
363SourceRange_end.restype = SourceLocation
364
365# Cursor Functions
366# TODO: Implement this function
367Cursor_get = lib.clang_getCursor
368Cursor_get.argtypes = [TranslationUnit, SourceLocation]
369Cursor_get.restype = Cursor
370
371Cursor_null = lib.clang_getNullCursor
372Cursor_null.restype = Cursor
373
374Cursor_kind = lib.clang_getCursorKind
375Cursor_kind.argtypes = [Cursor]
376Cursor_kind.restype = c_int
377
378Cursor_usr = lib.clang_getCursorUSR
379Cursor_usr.argtypes = [Cursor]
380Cursor_usr.restype = _CXString
381Cursor_usr.errcheck = _CXString.from_result
382
383Cursor_is_decl = lib.clang_isDeclaration
384Cursor_is_decl.argtypes = [CursorKind]
385Cursor_is_decl.restype = bool
386
387Cursor_is_ref = lib.clang_isReference
388Cursor_is_ref.argtypes = [CursorKind]
389Cursor_is_ref.restype = bool
390
391Cursor_is_expr = lib.clang_isExpression
392Cursor_is_expr.argtypes = [CursorKind]
393Cursor_is_expr.restype = bool
394
395Cursor_is_stmt = lib.clang_isStatement
396Cursor_is_stmt.argtypes = [CursorKind]
397Cursor_is_stmt.restype = bool
398
399Cursor_is_inv = lib.clang_isInvalid
400Cursor_is_inv.argtypes = [CursorKind]
401Cursor_is_inv.restype = bool
402
403Cursor_is_tu = lib.clang_isTranslationUnit
404Cursor_is_tu.argtypes = [CursorKind]
405Cursor_is_tu.restype = bool
406
407Cursor_is_def = lib.clang_isCursorDefinition
408Cursor_is_def.argtypes = [Cursor]
409Cursor_is_def.restype = bool
410
411Cursor_def = lib.clang_getCursorDefinition
412Cursor_def.argtypes = [Cursor]
413Cursor_def.restype = Cursor
414Cursor_def.errcheck = Cursor.from_result
415
416Cursor_eq = lib.clang_equalCursors
417Cursor_eq.argtypes = [Cursor, Cursor]
418Cursor_eq.restype = c_uint
419
420Cursor_spelling = lib.clang_getCursorSpelling
421Cursor_spelling.argtypes = [Cursor]
422Cursor_spelling.restype = _CXString
423Cursor_spelling.errcheck = _CXString.from_result
424
425Cursor_loc = lib.clang_getCursorLocation
426Cursor_loc.argtypes = [Cursor]
427Cursor_loc.restype = SourceLocation
428
429Cursor_extent = lib.clang_getCursorExtent
430Cursor_extent.argtypes = [Cursor]
431Cursor_extent.restype = SourceRange
432
433Cursor_ref = lib.clang_getCursorReferenced
434Cursor_ref.argtypes = [Cursor]
435Cursor_ref.restype = Cursor
436Cursor_ref.errcheck = Cursor.from_result
437
438Cursor_visit = lib.clang_visitChildren
439Cursor_visit.argtypes = [Cursor, Callback, py_object]
440Cursor_visit.restype = c_uint
441
442# Index Functions
443Index_create = lib.clang_createIndex
444Index_create.argtypes = [c_int, c_int]
445Index_create.restype = c_object_p
446
447Index_dispose = lib.clang_disposeIndex
448Index_dispose.argtypes = [Index]
449
450# Translation Unit Functions
451TranslationUnit_read = lib.clang_createTranslationUnit
452TranslationUnit_read.argtypes = [Index, c_char_p]
453TranslationUnit_read.restype = c_object_p
454
455TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile
456TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p,
457                                  c_int, c_void_p]
458TranslationUnit_parse.restype = c_object_p
459
460TranslationUnit_cursor = lib.clang_getTranslationUnitCursor
461TranslationUnit_cursor.argtypes = [TranslationUnit]
462TranslationUnit_cursor.restype = Cursor
463TranslationUnit_cursor.errcheck = Cursor.from_result
464
465TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling
466TranslationUnit_spelling.argtypes = [TranslationUnit]
467TranslationUnit_spelling.restype = _CXString
468TranslationUnit_spelling.errcheck = _CXString.from_result
469
470TranslationUnit_dispose = lib.clang_disposeTranslationUnit
471TranslationUnit_dispose.argtypes = [TranslationUnit]
472
473# File Functions
474File_name = lib.clang_getFileName
475File_name.argtypes = [File]
476File_name.restype = c_char_p
477
478File_time = lib.clang_getFileTime
479File_time.argtypes = [File]
480File_time.restype = c_uint
481