cindex.py revision 12bf15c48a007bc6cc36f3d2e8a0d2e67ccf9886
1# -*- coding: utf-8 -*-
2
3from ctypes import *
4
5def get_cindex_library():
6    # FIXME: It's probably not the case that the library is actually found in
7    # this location. We need a better system of identifying and loading the
8    # CIndex library. It could be on path or elsewhere, or versioned, etc.
9    import platform
10    name = platform.system()
11    if name == 'Darwin':
12        return cdll.LoadLibrary('libCIndex.dylib')
13    elif name == 'Windows':
14        return cdll.LoadLibrary('libCIndex.dll')
15    else:
16        return cdll.LoadLibrary('libCIndex.so')
17
18## Utility Types and Functions ##
19def alloc_string_vector(strs):
20    """
21    Allocate a string buffer large enough to accommodate the given list of
22    python strings.
23    """
24    n = 0
25    for i in strs: n += len(i) + 1
26    return create_string_buffer(n)
27
28def copy_string_vector(vec, strs):
29    """
30    Copy the contents of each string into the vector, preserving null
31    terminated elements.
32    """
33    n = 0
34    for i in strs:
35        # This is terribly inefficient, but I can't figure out how to copy a
36        # chunk of characters into the resultant vector. t should be: something
37        # like this: vec[n:n + len(i)] = i[:]; n += len(i) + 1
38        for j in i:
39            vec[n] = j
40            n += 1
41        n += 1
42
43def create_string_vector(strs):
44    """
45    Create a string vector (char *[]) from the given list of strings.
46    """
47    vec = alloc_string_vector(strs)
48    copy_string_vector(vec, strs)
49    return vec
50
51# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
52# object. This is a problem, because it means that from_parameter will see an
53# integer and pass the wrong value on platforms where int != void*. Work around
54# this by marshalling object arguments as void**.
55c_object_p = POINTER(c_void_p)
56
57lib = get_cindex_library()
58
59### Structures and Utility Classes ###
60
61class _CXString(Structure):
62    """Helper for transforming CXString results."""
63
64    _fields_ = [("spelling", c_char_p), ("free", c_int)]
65
66    def __del__(self):
67        _CXString_dispose(self)
68
69    @staticmethod
70    def from_result(res, fn, args):
71        assert isinstance(res, _CXString)
72        return _CXString_getCString(res)
73
74class SourceLocation(Structure):
75    """
76    A SourceLocation represents a particular location within a source file.
77    """
78    _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)]
79    _data = None
80
81    def _get_instantiation(self):
82        if self._data is None:
83            f, l, c = c_object_p(), c_uint(), c_uint()
84            SourceLocation_loc(self, byref(f), byref(l), byref(c))
85            f = File(f) if f else None
86            self._data = (f, int(l.value), int(c.value))
87        return self._data
88
89    @property
90    def file(self):
91        """Get the file represented by this source location."""
92        return self._get_instantiation()[0]
93
94    @property
95    def line(self):
96        """Get the line represented by this source location."""
97        return self._get_instantiation()[1]
98
99    @property
100    def column(self):
101        """Get the column represented by this source location."""
102        return self._get_instantiation()[2]
103
104    def __repr__(self):
105        return "<SourceLocation file %r, line %r, column %r>" % (
106            self.file.name if self.file else None, self.line, self.column)
107
108class SourceRange(Structure):
109    """
110    A SourceRange describes a range of source locations within the source
111    code.
112    """
113    _fields_ = [
114        ("ptr_data", c_void_p),
115        ("begin_int_data", c_uint),
116        ("end_int_data", c_uint)]
117
118    @property
119    def start(self):
120        """
121        Return a SourceLocation representing the first character within a
122        source range.
123        """
124        return SourceRange_start(self)
125
126    @property
127    def end(self):
128        """
129        Return a SourceLocation representing the last character within a
130        source range.
131        """
132        return SourceRange_end(self)
133
134    def __repr__(self):
135        return "<SourceRange start %r, end %r>" % (self.start, self.end)
136
137### Cursor Kinds ###
138
139class CursorKind(object):
140    """
141    A CursorKind describes the kind of entity that a cursor points to.
142    """
143
144    # The unique kind objects, indexed by id.
145    _kinds = []
146    _name_map = None
147
148    def __init__(self, value):
149        if value >= len(CursorKind._kinds):
150            CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1)
151        if CursorKind._kinds[value] is not None:
152            raise ValueError,'CursorKind already loaded'
153        self.value = value
154        CursorKind._kinds[value] = self
155        CursorKind._name_map = None
156
157    def from_param(self):
158        return self.value
159
160    @property
161    def name(self):
162        """Get the enumeration name of this cursor kind."""
163        if self._name_map is None:
164            self._name_map = {}
165            for key,value in CursorKind.__dict__.items():
166                if isinstance(value,CursorKind):
167                    self._name_map[value] = key
168        return self._name_map[self]
169
170    @staticmethod
171    def from_id(id):
172        if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None:
173            raise ValueError,'Unknown cursor kind'
174        return CursorKind._kinds[id]
175
176    def __repr__(self):
177        return 'CursorKind.%s' % (self.name,)
178
179# FIXME: Is there a nicer way to expose this enumeration? We could potentially
180# represent the nested structure, or even build a class hierarchy. The main
181# things we want for sure are (a) simple external access to kinds, (b) a place
182# to hang a description and name, (c) easy to keep in sync with Index.h.
183
184# A declaration whose specific kind is not exposed via this interface.
185#
186# Unexposed declarations have the same operations as any other kind of
187# declaration; one can extract their location information, spelling, find their
188# definitions, etc. However, the specific kind of the declaration is not
189# reported.
190CursorKind.UNEXPOSED_DECL = CursorKind(1)
191
192# A C or C++ struct.
193CursorKind.STRUCT_DECL = CursorKind(2)
194
195# A C or C++ union.
196CursorKind.UNION_DECL = CursorKind(3)
197
198# A C++ class.
199CursorKind.CLASS_DECL = CursorKind(4)
200
201# An enumeration.
202CursorKind.ENUM_DECL = CursorKind(5)
203
204# A field (in C) or non-static data member (in C++) in a struct, union, or C++
205# class.
206CursorKind.FIELD_DECL = CursorKind(6)
207
208# An enumerator constant.
209CursorKind.ENUM_CONSTANT_DECL = CursorKind(7)
210
211# A function.
212CursorKind.FUNCTION_DECL = CursorKind(8)
213
214# A variable.
215CursorKind.VAR_DECL = CursorKind(9)
216
217# A function or method parameter.
218CursorKind.PARM_DECL = CursorKind(10)
219
220# An Objective-C @interface.
221CursorKind.OBJC_INTERFACE_DECL = CursorKind(11)
222
223# An Objective-C @interface for a category.
224CursorKind.OBJC_CATEGORY_DECL = CursorKind(12)
225
226# An Objective-C @protocol declaration.
227CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13)
228
229# An Objective-C @property declaration.
230CursorKind.OBJC_PROPERTY_DECL = CursorKind(14)
231
232# An Objective-C instance variable.
233CursorKind.OBJC_IVAR_DECL = CursorKind(15)
234
235# An Objective-C instance method.
236CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16)
237
238# An Objective-C class method.
239CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17)
240
241# An Objective-C @implementation.
242CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18)
243
244# An Objective-C @implementation for a category.
245CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19)
246
247# A typedef
248CursorKind.TYPEDEF_DECL = CursorKind(20)
249
250# References.
251
252CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40)
253CursorKind.OBJC_PROTOCOL_REF = CursorKind(41)
254CursorKind.OBJC_CLASS_REF = CursorKind(42)
255
256# A reference to a type declaration.
257#
258# A type reference occurs anywhere where a type is named but not
259# declared. For example, given:
260#   typedef unsigned size_type;
261#   size_type size;
262#
263# The typedef is a declaration of size_type (CXCursor_TypedefDecl),
264# while the type of the variable "size" is referenced. The cursor
265# referenced by the type of size is the typedef for size_type.
266CursorKind.TYPE_REF = CursorKind(43)
267
268
269# Error conditions.
270CursorKind.INVALID_FILE = CursorKind(70)
271CursorKind.NO_DECL_FOUND = CursorKind(71)
272CursorKind.NOT_IMPLEMENTED = CursorKind(72)
273
274# An expression whose specific kind is not exposed via this interface.
275#
276# Unexposed expressions have the same operations as any other kind of
277# expression; one can extract their location information, spelling, children,
278# etc. However, the specific kind of the expression is not reported.
279CursorKind.UNEXPOSED_EXPR = CursorKind(100)
280
281# An expression that refers to some value declaration, such as a function,
282# varible, or enumerator.
283CursorKind.DECL_REF_EXPR = CursorKind(101)
284
285# An expression that refers to a member of a struct, union, class, Objective-C
286# class, etc.
287CursorKind.MEMBER_REF_EXPR = CursorKind(102)
288
289# An expression that calls a function.
290CursorKind.CALL_EXPR = CursorKind(103)
291
292# An expression that sends a message to an Objective-C object or class.
293CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104)
294
295# A statement whose specific kind is not exposed via this interface.
296#
297# Unexposed statements have the same operations as any other kind of statement;
298# one can extract their location information, spelling, children, etc. However,
299# the specific kind of the statement is not reported.
300CursorKind.UNEXPOSED_STMT = CursorKind(200)
301
302# Cursor that represents the translation unit itself.
303#
304# The translation unit cursor exists primarily to act as the root cursor for
305# traversing the contents of a translation unit.
306CursorKind.TRANSLATION_UNIT = CursorKind(300)
307
308### Cursors ###
309
310class Cursor(Structure):
311    """
312    The Cursor class represents a reference to an element within the AST. It
313    acts as a kind of iterator.
314    """
315    _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)]
316
317    def __eq__(self, other):
318        return Cursor_eq(self, other)
319
320    def __ne__(self, other):
321        return not Cursor_eq(self, other)
322
323    def is_declaration(self):
324        """Return True if the cursor points to a declaration."""
325        return Cursor_is_decl(self.kind)
326
327    def is_reference(self):
328        """Return True if the cursor points to a reference."""
329        return Cursor_is_ref(self.kind)
330
331    def is_expression(self):
332        """Return True if the cursor points to an expression."""
333        return Cursor_is_expr(self.kind)
334
335    def is_statement(self):
336        """Return True if the cursor points to a statement."""
337        return Cursor_is_stmt(self.kind)
338
339    def is_translation_unit(self):
340        """Return True if the cursor points to a translation unit."""
341        return Cursor_is_tu(self.kind)
342
343    def is_invalid(self):
344        """Return  True if the cursor points to an invalid entity."""
345        return Cursor_is_inv(self.kind)
346
347    def is_definition(self):
348        """
349        Returns true if the declaration pointed at by the cursor is also a
350        definition of that entity.
351        """
352        return Cursor_is_def(self)
353
354    def get_definition(self):
355        """
356        If the cursor is a reference to a declaration or a declaration of
357        some entity, return a cursor that points to the definition of that
358        entity.
359        """
360        # TODO: Should probably check that this is either a reference or
361        # declaration prior to issuing the lookup.
362        return Cursor_def(self)
363
364    def get_usr(self):
365        """Return the Unified Symbol Resultion (USR) for the entity referenced
366        by the given cursor (or None).
367
368        A Unified Symbol Resolution (USR) is a string that identifies a
369        particular entity (function, class, variable, etc.) within a
370        program. USRs can be compared across translation units to determine,
371        e.g., when references in one translation refer to an entity defined in
372        another translation unit."""
373        return Cursor_usr(self)
374
375    @property
376    def kind(self):
377        """Return the kind of this cursor."""
378        return CursorKind.from_id(self._kind_id)
379
380    @property
381    def spelling(self):
382        """Return the spelling of the entity pointed at by the cursor."""
383        if not self.is_declaration():
384            # FIXME: clang_getCursorSpelling should be fixed to not assert on
385            # this, for consistency with clang_getCursorUSR.
386            return None
387        return Cursor_spelling(self)
388
389    @property
390    def location(self):
391        """
392        Return the source location (the starting character) of the entity
393        pointed at by the cursor.
394        """
395        return Cursor_loc(self)
396
397    @property
398    def extent(self):
399        """
400        Return the source range (the range of text) occupied by the entity
401        pointed at by the cursor.
402        """
403        return Cursor_extent(self)
404
405    def get_children(self):
406        """Return an iterator for accessing the children of this cursor."""
407
408        # FIXME: Expose iteration from CIndex, PR6125.
409        def visitor(child, parent, children):
410            # FIXME: Document this assertion in API.
411            # FIXME: There should just be an isNull method.
412            assert child != Cursor_null()
413            children.append(child)
414            return 1 # continue
415        children = []
416        Cursor_visit(self, Callback(visitor), children)
417        return iter(children)
418
419    @staticmethod
420    def from_result(res, fn, args):
421        assert isinstance(res, Cursor)
422        # FIXME: There should just be an isNull method.
423        if res == Cursor_null():
424            return None
425        return res
426
427## CIndex Objects ##
428
429# CIndex objects (derived from ClangObject) are essentially lightweight
430# wrappers attached to some underlying object, which is exposed via CIndex as
431# a void*.
432
433class ClangObject(object):
434    """
435    A helper for Clang objects. This class helps act as an intermediary for
436    the ctypes library and the Clang CIndex library.
437    """
438    def __init__(self, obj):
439        assert isinstance(obj, c_object_p) and obj
440        self.obj = self._as_parameter_ = obj
441
442    def from_param(self):
443        return self._as_parameter_
444
445class Index(ClangObject):
446    """
447    The Index type provides the primary interface to the Clang CIndex library,
448    primarily by providing an interface for reading and parsing translation
449    units.
450    """
451
452    @staticmethod
453    def create(excludeDecls=False, displayDiags=False):
454        """
455        Create a new Index.
456        Parameters:
457        excludeDecls -- Exclude local declarations from translation units.
458        displayDiags -- Display diagnostics during translation unit creation.
459        """
460        return Index(Index_create(excludeDecls, displayDiags))
461
462    def __del__(self):
463        Index_dispose(self)
464
465    def read(self, path):
466        """Load the translation unit from the given AST file."""
467        return TranslationUnit.read(self, path)
468
469    def parse(self, path, args = []):
470        """
471        Load the translation unit from the given source code file by running
472        clang and generating the AST before loading. Additional command line
473        parameters can be passed to clang via the args parameter.
474        """
475        return TranslationUnit.parse(self, path, args)
476
477
478class TranslationUnit(ClangObject):
479    """
480    The TranslationUnit class represents a source code translation unit and
481    provides read-only access to its top-level declarations.
482    """
483
484    def __del__(self):
485        TranslationUnit_dispose(self)
486
487    @property
488    def cursor(self):
489        """Retrieve the cursor that represents the given translation unit."""
490        return TranslationUnit_cursor(self)
491
492    @property
493    def spelling(self):
494        """Get the original translation unit source file name."""
495        return TranslationUnit_spelling(self)
496
497    @staticmethod
498    def read(ix, path):
499        """Create a translation unit from the given AST file."""
500        ptr = TranslationUnit_read(ix, path)
501        return TranslationUnit(ptr) if ptr else None
502
503    @staticmethod
504    def parse(ix, path, args = []):
505        """
506        Construct a translation unit from the given source file, applying
507        the given command line argument.
508        """
509        # TODO: Support unsaved files.
510        argc, argv = len(args), create_string_vector(args)
511        ptr = TranslationUnit_parse(ix, path, argc, byref(argv), 0, 0)
512        return TranslationUnit(ptr) if ptr else None
513
514class File(ClangObject):
515    """
516    The File class represents a particular source file that is part of a
517    translation unit.
518    """
519
520    @property
521    def name(self):
522        """Return the complete file and path name of the file, if valid."""
523        return File_name(self)
524
525    @property
526    def time(self):
527        """Return the last modification time of the file, if valid."""
528        return File_time(self)
529
530# Additional Functions and Types
531
532# Wrap calls to TranslationUnit._load and Decl._load.
533Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object)
534
535# String Functions
536_CXString_dispose = lib.clang_disposeString
537_CXString_dispose.argtypes = [_CXString]
538
539_CXString_getCString = lib.clang_getCString
540_CXString_getCString.argtypes = [_CXString]
541_CXString_getCString.restype = c_char_p
542
543# Source Location Functions
544SourceLocation_loc = lib.clang_getInstantiationLocation
545SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p),
546                               POINTER(c_uint), POINTER(c_uint)]
547
548# Source Range Functions
549SourceRange_start = lib.clang_getRangeStart
550SourceRange_start.argtypes = [SourceRange]
551SourceRange_start.restype = SourceLocation
552
553SourceRange_end = lib.clang_getRangeEnd
554SourceRange_end.argtypes = [SourceRange]
555SourceRange_end.restype = SourceLocation
556
557# Cursor Functions
558# TODO: Implement this function
559Cursor_get = lib.clang_getCursor
560Cursor_get.argtypes = [TranslationUnit, SourceLocation]
561Cursor_get.restype = Cursor
562
563Cursor_null = lib.clang_getNullCursor
564Cursor_null.restype = Cursor
565
566Cursor_usr = lib.clang_getCursorUSR
567Cursor_usr.argtypes = [Cursor]
568Cursor_usr.restype = _CXString
569Cursor_usr.errcheck = _CXString.from_result
570
571Cursor_is_decl = lib.clang_isDeclaration
572Cursor_is_decl.argtypes = [CursorKind]
573Cursor_is_decl.restype = bool
574
575Cursor_is_ref = lib.clang_isReference
576Cursor_is_ref.argtypes = [CursorKind]
577Cursor_is_ref.restype = bool
578
579Cursor_is_expr = lib.clang_isExpression
580Cursor_is_expr.argtypes = [CursorKind]
581Cursor_is_expr.restype = bool
582
583Cursor_is_stmt = lib.clang_isStatement
584Cursor_is_stmt.argtypes = [CursorKind]
585Cursor_is_stmt.restype = bool
586
587Cursor_is_inv = lib.clang_isInvalid
588Cursor_is_inv.argtypes = [CursorKind]
589Cursor_is_inv.restype = bool
590
591Cursor_is_tu = lib.clang_isTranslationUnit
592Cursor_is_tu.argtypes = [CursorKind]
593Cursor_is_tu.restype = bool
594
595Cursor_is_def = lib.clang_isCursorDefinition
596Cursor_is_def.argtypes = [Cursor]
597Cursor_is_def.restype = bool
598
599Cursor_def = lib.clang_getCursorDefinition
600Cursor_def.argtypes = [Cursor]
601Cursor_def.restype = Cursor
602Cursor_def.errcheck = Cursor.from_result
603
604Cursor_eq = lib.clang_equalCursors
605Cursor_eq.argtypes = [Cursor, Cursor]
606Cursor_eq.restype = c_uint
607
608Cursor_spelling = lib.clang_getCursorSpelling
609Cursor_spelling.argtypes = [Cursor]
610Cursor_spelling.restype = _CXString
611Cursor_spelling.errcheck = _CXString.from_result
612
613Cursor_loc = lib.clang_getCursorLocation
614Cursor_loc.argtypes = [Cursor]
615Cursor_loc.restype = SourceLocation
616
617Cursor_extent = lib.clang_getCursorExtent
618Cursor_extent.argtypes = [Cursor]
619Cursor_extent.restype = SourceRange
620
621Cursor_ref = lib.clang_getCursorReferenced
622Cursor_ref.argtypes = [Cursor]
623Cursor_ref.restype = Cursor
624Cursor_ref.errcheck = Cursor.from_result
625
626Cursor_visit = lib.clang_visitChildren
627Cursor_visit.argtypes = [Cursor, Callback, py_object]
628Cursor_visit.restype = c_uint
629
630# Index Functions
631Index_create = lib.clang_createIndex
632Index_create.argtypes = [c_int, c_int]
633Index_create.restype = c_object_p
634
635Index_dispose = lib.clang_disposeIndex
636Index_dispose.argtypes = [Index]
637
638# Translation Unit Functions
639TranslationUnit_read = lib.clang_createTranslationUnit
640TranslationUnit_read.argtypes = [Index, c_char_p]
641TranslationUnit_read.restype = c_object_p
642
643TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile
644TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p,
645                                  c_int, c_void_p]
646TranslationUnit_parse.restype = c_object_p
647
648TranslationUnit_cursor = lib.clang_getTranslationUnitCursor
649TranslationUnit_cursor.argtypes = [TranslationUnit]
650TranslationUnit_cursor.restype = Cursor
651TranslationUnit_cursor.errcheck = Cursor.from_result
652
653TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling
654TranslationUnit_spelling.argtypes = [TranslationUnit]
655TranslationUnit_spelling.restype = _CXString
656TranslationUnit_spelling.errcheck = _CXString.from_result
657
658TranslationUnit_dispose = lib.clang_disposeTranslationUnit
659TranslationUnit_dispose.argtypes = [TranslationUnit]
660
661# File Functions
662File_name = lib.clang_getFileName
663File_name.argtypes = [File]
664File_name.restype = c_char_p
665
666File_time = lib.clang_getFileTime
667File_time.argtypes = [File]
668File_time.restype = c_uint
669