cindex.py revision 3239a67361cc89eba2fe7c7abdb41bd2c9414207
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10r"""
11Clang Indexing Library Bindings
12===============================
13
14This module provides an interface to the Clang indexing library. It is a
15low-level interface to the indexing library which attempts to match the Clang
16API directly while also being "pythonic". Notable differences from the C API
17are:
18
19 * string results are returned as Python strings, not CXString objects.
20
21 * null cursors are translated to None.
22
23 * access to child cursors is done via iteration, not visitation.
24
25The major indexing objects are:
26
27  Index
28
29    The top-level object which manages some global library state.
30
31  TranslationUnit
32
33    High-level object encapsulating the AST for a single translation unit. These
34    can be loaded from .ast files or parsed on the fly.
35
36  Cursor
37
38    Generic object for representing a node in the AST.
39
40  SourceRange, SourceLocation, and File
41
42    Objects representing information about the input source.
43
44Most object information is exposed using properties, when the underlying API
45call is efficient.
46"""
47
48# TODO
49# ====
50#
51# o fix memory management issues (currently client must hold on to index and
52#   translation unit, or risk crashes).
53#
54# o expose code completion APIs.
55#
56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more
57#   clearly, and hide from the external interface (i.e., help(cindex)).
58#
59# o implement additional SourceLocation, SourceRange, and File methods.
60
61from ctypes import *
62
63def get_cindex_library():
64    # FIXME: It's probably not the case that the library is actually found in
65    # this location. We need a better system of identifying and loading the
66    # CIndex library. It could be on path or elsewhere, or versioned, etc.
67    import platform
68    name = platform.system()
69    if name == 'Darwin':
70        return cdll.LoadLibrary('libCIndex.dylib')
71    elif name == 'Windows':
72        return cdll.LoadLibrary('libCIndex.dll')
73    else:
74        return cdll.LoadLibrary('libCIndex.so')
75
76# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
77# object. This is a problem, because it means that from_parameter will see an
78# integer and pass the wrong value on platforms where int != void*. Work around
79# this by marshalling object arguments as void**.
80c_object_p = POINTER(c_void_p)
81
82lib = get_cindex_library()
83
84### Structures and Utility Classes ###
85
86class _CXString(Structure):
87    """Helper for transforming CXString results."""
88
89    _fields_ = [("spelling", c_char_p), ("free", c_int)]
90
91    def __del__(self):
92        _CXString_dispose(self)
93
94    @staticmethod
95    def from_result(res, fn, args):
96        assert isinstance(res, _CXString)
97        return _CXString_getCString(res)
98
99class SourceLocation(Structure):
100    """
101    A SourceLocation represents a particular location within a source file.
102    """
103    _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)]
104    _data = None
105
106    def _get_instantiation(self):
107        if self._data is None:
108            f, l, c, o = c_object_p(), c_uint(), c_uint(), c_uint()
109            SourceLocation_loc(self, byref(f), byref(l), byref(c), byref(o))
110            f = File(f) if f else None
111            self._data = (f, int(l.value), int(c.value), int(c.value))
112        return self._data
113
114    @property
115    def file(self):
116        """Get the file represented by this source location."""
117        return self._get_instantiation()[0]
118
119    @property
120    def line(self):
121        """Get the line represented by this source location."""
122        return self._get_instantiation()[1]
123
124    @property
125    def column(self):
126        """Get the column represented by this source location."""
127        return self._get_instantiation()[2]
128
129    @property
130    def offset(self):
131        """Get the file offset represented by this source location."""
132        return self._get_instantiation()[3]
133
134    def __repr__(self):
135        return "<SourceLocation file %r, line %r, column %r>" % (
136            self.file.name if self.file else None, self.line, self.column)
137
138class SourceRange(Structure):
139    """
140    A SourceRange describes a range of source locations within the source
141    code.
142    """
143    _fields_ = [
144        ("ptr_data", c_void_p),
145        ("begin_int_data", c_uint),
146        ("end_int_data", c_uint)]
147
148    @property
149    def start(self):
150        """
151        Return a SourceLocation representing the first character within a
152        source range.
153        """
154        return SourceRange_start(self)
155
156    @property
157    def end(self):
158        """
159        Return a SourceLocation representing the last character within a
160        source range.
161        """
162        return SourceRange_end(self)
163
164    def __repr__(self):
165        return "<SourceRange start %r, end %r>" % (self.start, self.end)
166
167### Cursor Kinds ###
168
169class CursorKind(object):
170    """
171    A CursorKind describes the kind of entity that a cursor points to.
172    """
173
174    # The unique kind objects, indexed by id.
175    _kinds = []
176    _name_map = None
177
178    def __init__(self, value):
179        if value >= len(CursorKind._kinds):
180            CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1)
181        if CursorKind._kinds[value] is not None:
182            raise ValueError,'CursorKind already loaded'
183        self.value = value
184        CursorKind._kinds[value] = self
185        CursorKind._name_map = None
186
187    def from_param(self):
188        return self.value
189
190    @property
191    def name(self):
192        """Get the enumeration name of this cursor kind."""
193        if self._name_map is None:
194            self._name_map = {}
195            for key,value in CursorKind.__dict__.items():
196                if isinstance(value,CursorKind):
197                    self._name_map[value] = key
198        return self._name_map[self]
199
200    @staticmethod
201    def from_id(id):
202        if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None:
203            raise ValueError,'Unknown cursor kind'
204        return CursorKind._kinds[id]
205
206    @staticmethod
207    def get_all_kinds():
208        """Return all CursorKind enumeration instances."""
209        return filter(None, CursorKind._kinds)
210
211    def is_declaration(self):
212        """Test if this is a declaration kind."""
213        return CursorKind_is_decl(self)
214
215    def is_reference(self):
216        """Test if this is a reference kind."""
217        return CursorKind_is_ref(self)
218
219    def is_expression(self):
220        """Test if this is an expression kind."""
221        return CursorKind_is_expr(self)
222
223    def is_statement(self):
224        """Test if this is a statement kind."""
225        return CursorKind_is_stmt(self)
226
227    def is_invalid(self):
228        """Test if this is an invalid kind."""
229        return CursorKind_is_inv(self)
230
231    def __repr__(self):
232        return 'CursorKind.%s' % (self.name,)
233
234# FIXME: Is there a nicer way to expose this enumeration? We could potentially
235# represent the nested structure, or even build a class hierarchy. The main
236# things we want for sure are (a) simple external access to kinds, (b) a place
237# to hang a description and name, (c) easy to keep in sync with Index.h.
238
239###
240# Declaration Kinds
241
242# A declaration whose specific kind is not exposed via this interface.
243#
244# Unexposed declarations have the same operations as any other kind of
245# declaration; one can extract their location information, spelling, find their
246# definitions, etc. However, the specific kind of the declaration is not
247# reported.
248CursorKind.UNEXPOSED_DECL = CursorKind(1)
249
250# A C or C++ struct.
251CursorKind.STRUCT_DECL = CursorKind(2)
252
253# A C or C++ union.
254CursorKind.UNION_DECL = CursorKind(3)
255
256# A C++ class.
257CursorKind.CLASS_DECL = CursorKind(4)
258
259# An enumeration.
260CursorKind.ENUM_DECL = CursorKind(5)
261
262# A field (in C) or non-static data member (in C++) in a struct, union, or C++
263# class.
264CursorKind.FIELD_DECL = CursorKind(6)
265
266# An enumerator constant.
267CursorKind.ENUM_CONSTANT_DECL = CursorKind(7)
268
269# A function.
270CursorKind.FUNCTION_DECL = CursorKind(8)
271
272# A variable.
273CursorKind.VAR_DECL = CursorKind(9)
274
275# A function or method parameter.
276CursorKind.PARM_DECL = CursorKind(10)
277
278# An Objective-C @interface.
279CursorKind.OBJC_INTERFACE_DECL = CursorKind(11)
280
281# An Objective-C @interface for a category.
282CursorKind.OBJC_CATEGORY_DECL = CursorKind(12)
283
284# An Objective-C @protocol declaration.
285CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13)
286
287# An Objective-C @property declaration.
288CursorKind.OBJC_PROPERTY_DECL = CursorKind(14)
289
290# An Objective-C instance variable.
291CursorKind.OBJC_IVAR_DECL = CursorKind(15)
292
293# An Objective-C instance method.
294CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16)
295
296# An Objective-C class method.
297CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17)
298
299# An Objective-C @implementation.
300CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18)
301
302# An Objective-C @implementation for a category.
303CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19)
304
305# A typedef.
306CursorKind.TYPEDEF_DECL = CursorKind(20)
307
308###
309# Reference Kinds
310
311CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40)
312CursorKind.OBJC_PROTOCOL_REF = CursorKind(41)
313CursorKind.OBJC_CLASS_REF = CursorKind(42)
314
315# A reference to a type declaration.
316#
317# A type reference occurs anywhere where a type is named but not
318# declared. For example, given:
319#   typedef unsigned size_type;
320#   size_type size;
321#
322# The typedef is a declaration of size_type (CXCursor_TypedefDecl),
323# while the type of the variable "size" is referenced. The cursor
324# referenced by the type of size is the typedef for size_type.
325CursorKind.TYPE_REF = CursorKind(43)
326
327###
328# Invalid/Error Kinds
329
330CursorKind.INVALID_FILE = CursorKind(70)
331CursorKind.NO_DECL_FOUND = CursorKind(71)
332CursorKind.NOT_IMPLEMENTED = CursorKind(72)
333
334###
335# Expression Kinds
336
337# An expression whose specific kind is not exposed via this interface.
338#
339# Unexposed expressions have the same operations as any other kind of
340# expression; one can extract their location information, spelling, children,
341# etc. However, the specific kind of the expression is not reported.
342CursorKind.UNEXPOSED_EXPR = CursorKind(100)
343
344# An expression that refers to some value declaration, such as a function,
345# varible, or enumerator.
346CursorKind.DECL_REF_EXPR = CursorKind(101)
347
348# An expression that refers to a member of a struct, union, class, Objective-C
349# class, etc.
350CursorKind.MEMBER_REF_EXPR = CursorKind(102)
351
352# An expression that calls a function.
353CursorKind.CALL_EXPR = CursorKind(103)
354
355# An expression that sends a message to an Objective-C object or class.
356CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104)
357
358# A statement whose specific kind is not exposed via this interface.
359#
360# Unexposed statements have the same operations as any other kind of statement;
361# one can extract their location information, spelling, children, etc. However,
362# the specific kind of the statement is not reported.
363CursorKind.UNEXPOSED_STMT = CursorKind(200)
364
365###
366# Other Kinds
367
368# Cursor that represents the translation unit itself.
369#
370# The translation unit cursor exists primarily to act as the root cursor for
371# traversing the contents of a translation unit.
372CursorKind.TRANSLATION_UNIT = CursorKind(300)
373
374### Cursors ###
375
376class Cursor(Structure):
377    """
378    The Cursor class represents a reference to an element within the AST. It
379    acts as a kind of iterator.
380    """
381    _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)]
382
383    def __eq__(self, other):
384        return Cursor_eq(self, other)
385
386    def __ne__(self, other):
387        return not Cursor_eq(self, other)
388
389    def is_definition(self):
390        """
391        Returns true if the declaration pointed at by the cursor is also a
392        definition of that entity.
393        """
394        return Cursor_is_def(self)
395
396    def get_definition(self):
397        """
398        If the cursor is a reference to a declaration or a declaration of
399        some entity, return a cursor that points to the definition of that
400        entity.
401        """
402        # TODO: Should probably check that this is either a reference or
403        # declaration prior to issuing the lookup.
404        return Cursor_def(self)
405
406    def get_usr(self):
407        """Return the Unified Symbol Resultion (USR) for the entity referenced
408        by the given cursor (or None).
409
410        A Unified Symbol Resolution (USR) is a string that identifies a
411        particular entity (function, class, variable, etc.) within a
412        program. USRs can be compared across translation units to determine,
413        e.g., when references in one translation refer to an entity defined in
414        another translation unit."""
415        return Cursor_usr(self)
416
417    @property
418    def kind(self):
419        """Return the kind of this cursor."""
420        return CursorKind.from_id(self._kind_id)
421
422    @property
423    def spelling(self):
424        """Return the spelling of the entity pointed at by the cursor."""
425        if not self.kind.is_declaration():
426            # FIXME: clang_getCursorSpelling should be fixed to not assert on
427            # this, for consistency with clang_getCursorUSR.
428            return None
429        return Cursor_spelling(self)
430
431    @property
432    def location(self):
433        """
434        Return the source location (the starting character) of the entity
435        pointed at by the cursor.
436        """
437        return Cursor_loc(self)
438
439    @property
440    def extent(self):
441        """
442        Return the source range (the range of text) occupied by the entity
443        pointed at by the cursor.
444        """
445        return Cursor_extent(self)
446
447    def get_children(self):
448        """Return an iterator for accessing the children of this cursor."""
449
450        # FIXME: Expose iteration from CIndex, PR6125.
451        def visitor(child, parent, children):
452            # FIXME: Document this assertion in API.
453            # FIXME: There should just be an isNull method.
454            assert child != Cursor_null()
455            children.append(child)
456            return 1 # continue
457        children = []
458        Cursor_visit(self, Callback(visitor), children)
459        return iter(children)
460
461    @staticmethod
462    def from_result(res, fn, args):
463        assert isinstance(res, Cursor)
464        # FIXME: There should just be an isNull method.
465        if res == Cursor_null():
466            return None
467        return res
468
469## CIndex Objects ##
470
471# CIndex objects (derived from ClangObject) are essentially lightweight
472# wrappers attached to some underlying object, which is exposed via CIndex as
473# a void*.
474
475class ClangObject(object):
476    """
477    A helper for Clang objects. This class helps act as an intermediary for
478    the ctypes library and the Clang CIndex library.
479    """
480    def __init__(self, obj):
481        assert isinstance(obj, c_object_p) and obj
482        self.obj = self._as_parameter_ = obj
483
484    def from_param(self):
485        return self._as_parameter_
486
487
488class _CXUnsavedFile(Structure):
489    """Helper for passing unsaved file arguments."""
490    _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)]
491
492class Index(ClangObject):
493    """
494    The Index type provides the primary interface to the Clang CIndex library,
495    primarily by providing an interface for reading and parsing translation
496    units.
497    """
498
499    @staticmethod
500    def create(excludeDecls=False, displayDiags=False):
501        """
502        Create a new Index.
503        Parameters:
504        excludeDecls -- Exclude local declarations from translation units.
505        displayDiags -- Display diagnostics during translation unit creation.
506        """
507        return Index(Index_create(excludeDecls, displayDiags))
508
509    def __del__(self):
510        Index_dispose(self)
511
512    def read(self, path):
513        """Load the translation unit from the given AST file."""
514        ptr = TranslationUnit_read(self, path)
515        return TranslationUnit(ptr) if ptr else None
516
517    def parse(self, path, args = [], unsaved_files = []):
518        """
519        Load the translation unit from the given source code file by running
520        clang and generating the AST before loading. Additional command line
521        parameters can be passed to clang via the args parameter.
522
523        In-memory contents for files can be provided by passing a list of pairs
524        to as unsaved_files, the first item should be the filenames to be mapped
525        and the second should be the contents to be substituted for the
526        file. The contents may be passed as strings or file objects.
527        """
528        arg_array = 0
529        if len(args):
530            arg_array = (c_char_p * len(args))(* args)
531        unsaved_files_array = 0
532        if len(unsaved_files):
533            unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))()
534            for i,(name,value) in enumerate(unsaved_files):
535                if not isinstance(value, str):
536                    # FIXME: It would be great to support an efficient version
537                    # of this, one day.
538                    value = value.read()
539                    print value
540                if not isinstance(value, str):
541                    raise TypeError,'Unexpected unsaved file contents.'
542                unsaved_files_array[i].name = name
543                unsaved_files_array[i].contents = value
544                unsaved_files_array[i].length = len(value)
545        ptr = TranslationUnit_parse(self, path, len(args), arg_array,
546                                    len(unsaved_files), unsaved_files_array)
547        return TranslationUnit(ptr) if ptr else None
548
549
550class TranslationUnit(ClangObject):
551    """
552    The TranslationUnit class represents a source code translation unit and
553    provides read-only access to its top-level declarations.
554    """
555
556    def __del__(self):
557        TranslationUnit_dispose(self)
558
559    @property
560    def cursor(self):
561        """Retrieve the cursor that represents the given translation unit."""
562        return TranslationUnit_cursor(self)
563
564    @property
565    def spelling(self):
566        """Get the original translation unit source file name."""
567        return TranslationUnit_spelling(self)
568
569class File(ClangObject):
570    """
571    The File class represents a particular source file that is part of a
572    translation unit.
573    """
574
575    @property
576    def name(self):
577        """Return the complete file and path name of the file."""
578        return File_name(self)
579
580    @property
581    def time(self):
582        """Return the last modification time of the file."""
583        return File_time(self)
584
585# Additional Functions and Types
586
587# Wrap calls to TranslationUnit._load and Decl._load.
588Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object)
589
590# String Functions
591_CXString_dispose = lib.clang_disposeString
592_CXString_dispose.argtypes = [_CXString]
593
594_CXString_getCString = lib.clang_getCString
595_CXString_getCString.argtypes = [_CXString]
596_CXString_getCString.restype = c_char_p
597
598# Source Location Functions
599SourceLocation_loc = lib.clang_getInstantiationLocation
600SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p),
601                               POINTER(c_uint), POINTER(c_uint),
602                               POINTER(c_uint)]
603
604# Source Range Functions
605SourceRange_start = lib.clang_getRangeStart
606SourceRange_start.argtypes = [SourceRange]
607SourceRange_start.restype = SourceLocation
608
609SourceRange_end = lib.clang_getRangeEnd
610SourceRange_end.argtypes = [SourceRange]
611SourceRange_end.restype = SourceLocation
612
613# CursorKind Functions
614CursorKind_is_decl = lib.clang_isDeclaration
615CursorKind_is_decl.argtypes = [CursorKind]
616CursorKind_is_decl.restype = bool
617
618CursorKind_is_ref = lib.clang_isReference
619CursorKind_is_ref.argtypes = [CursorKind]
620CursorKind_is_ref.restype = bool
621
622CursorKind_is_expr = lib.clang_isExpression
623CursorKind_is_expr.argtypes = [CursorKind]
624CursorKind_is_expr.restype = bool
625
626CursorKind_is_stmt = lib.clang_isStatement
627CursorKind_is_stmt.argtypes = [CursorKind]
628CursorKind_is_stmt.restype = bool
629
630CursorKind_is_inv = lib.clang_isInvalid
631CursorKind_is_inv.argtypes = [CursorKind]
632CursorKind_is_inv.restype = bool
633
634# Cursor Functions
635# TODO: Implement this function
636Cursor_get = lib.clang_getCursor
637Cursor_get.argtypes = [TranslationUnit, SourceLocation]
638Cursor_get.restype = Cursor
639
640Cursor_null = lib.clang_getNullCursor
641Cursor_null.restype = Cursor
642
643Cursor_usr = lib.clang_getCursorUSR
644Cursor_usr.argtypes = [Cursor]
645Cursor_usr.restype = _CXString
646Cursor_usr.errcheck = _CXString.from_result
647
648Cursor_is_def = lib.clang_isCursorDefinition
649Cursor_is_def.argtypes = [Cursor]
650Cursor_is_def.restype = bool
651
652Cursor_def = lib.clang_getCursorDefinition
653Cursor_def.argtypes = [Cursor]
654Cursor_def.restype = Cursor
655Cursor_def.errcheck = Cursor.from_result
656
657Cursor_eq = lib.clang_equalCursors
658Cursor_eq.argtypes = [Cursor, Cursor]
659Cursor_eq.restype = c_uint
660
661Cursor_spelling = lib.clang_getCursorSpelling
662Cursor_spelling.argtypes = [Cursor]
663Cursor_spelling.restype = _CXString
664Cursor_spelling.errcheck = _CXString.from_result
665
666Cursor_loc = lib.clang_getCursorLocation
667Cursor_loc.argtypes = [Cursor]
668Cursor_loc.restype = SourceLocation
669
670Cursor_extent = lib.clang_getCursorExtent
671Cursor_extent.argtypes = [Cursor]
672Cursor_extent.restype = SourceRange
673
674Cursor_ref = lib.clang_getCursorReferenced
675Cursor_ref.argtypes = [Cursor]
676Cursor_ref.restype = Cursor
677Cursor_ref.errcheck = Cursor.from_result
678
679Cursor_visit = lib.clang_visitChildren
680Cursor_visit.argtypes = [Cursor, Callback, py_object]
681Cursor_visit.restype = c_uint
682
683# Index Functions
684Index_create = lib.clang_createIndex
685Index_create.argtypes = [c_int, c_int]
686Index_create.restype = c_object_p
687
688Index_dispose = lib.clang_disposeIndex
689Index_dispose.argtypes = [Index]
690
691# Translation Unit Functions
692TranslationUnit_read = lib.clang_createTranslationUnit
693TranslationUnit_read.argtypes = [Index, c_char_p]
694TranslationUnit_read.restype = c_object_p
695
696TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile
697TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p,
698                                  c_int, c_void_p]
699TranslationUnit_parse.restype = c_object_p
700
701TranslationUnit_cursor = lib.clang_getTranslationUnitCursor
702TranslationUnit_cursor.argtypes = [TranslationUnit]
703TranslationUnit_cursor.restype = Cursor
704TranslationUnit_cursor.errcheck = Cursor.from_result
705
706TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling
707TranslationUnit_spelling.argtypes = [TranslationUnit]
708TranslationUnit_spelling.restype = _CXString
709TranslationUnit_spelling.errcheck = _CXString.from_result
710
711TranslationUnit_dispose = lib.clang_disposeTranslationUnit
712TranslationUnit_dispose.argtypes = [TranslationUnit]
713
714# File Functions
715File_name = lib.clang_getFileName
716File_name.argtypes = [File]
717File_name.restype = c_char_p
718
719File_time = lib.clang_getFileTime
720File_time.argtypes = [File]
721File_time.restype = c_uint
722
723###
724
725__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind',
726           'SourceRange', 'SourceLocation', 'File']
727