cindex.py revision 5b534f67946eeb2cb29076288bfee9707f055f82
1#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10r"""
11Clang Indexing Library Bindings
12===============================
13
14This module provides an interface to the Clang indexing library. It is a
15low-level interface to the indexing library which attempts to match the Clang
16API directly while also being "pythonic". Notable differences from the C API
17are:
18
19 * string results are returned as Python strings, not CXString objects.
20
21 * null cursors are translated to None.
22
23 * access to child cursors is done via iteration, not visitation.
24
25The major indexing objects are:
26
27  Index
28
29    The top-level object which manages some global library state.
30
31  TranslationUnit
32
33    High-level object encapsulating the AST for a single translation unit. These
34    can be loaded from .ast files or parsed on the fly.
35
36  Cursor
37
38    Generic object for representing a node in the AST.
39
40  SourceRange, SourceLocation, and File
41
42    Objects representing information about the input source.
43
44Most object information is exposed using properties, when the underlying API
45call is efficient.
46"""
47
48# TODO
49# ====
50#
51# o fix memory management issues (currently client must hold on to index and
52#   translation unit, or risk crashes).
53#
54# o expose code completion APIs.
55#
56# o cleanup ctypes wrapping, would be nice to separate the ctypes details more
57#   clearly, and hide from the external interface (i.e., help(cindex)).
58#
59# o implement additional SourceLocation, SourceRange, and File methods.
60
61from ctypes import *
62
63def get_cindex_library():
64    # FIXME: It's probably not the case that the library is actually found in
65    # this location. We need a better system of identifying and loading the
66    # CIndex library. It could be on path or elsewhere, or versioned, etc.
67    import platform
68    name = platform.system()
69    if name == 'Darwin':
70        return cdll.LoadLibrary('libCIndex.dylib')
71    elif name == 'Windows':
72        return cdll.LoadLibrary('libCIndex.dll')
73    else:
74        return cdll.LoadLibrary('libCIndex.so')
75
76# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
77# object. This is a problem, because it means that from_parameter will see an
78# integer and pass the wrong value on platforms where int != void*. Work around
79# this by marshalling object arguments as void**.
80c_object_p = POINTER(c_void_p)
81
82lib = get_cindex_library()
83
84### Structures and Utility Classes ###
85
86class _CXString(Structure):
87    """Helper for transforming CXString results."""
88
89    _fields_ = [("spelling", c_char_p), ("free", c_int)]
90
91    def __del__(self):
92        _CXString_dispose(self)
93
94    @staticmethod
95    def from_result(res, fn, args):
96        assert isinstance(res, _CXString)
97        return _CXString_getCString(res)
98
99class SourceLocation(Structure):
100    """
101    A SourceLocation represents a particular location within a source file.
102    """
103    _fields_ = [("ptr_data", c_void_p), ("int_data", c_uint)]
104    _data = None
105
106    def _get_instantiation(self):
107        if self._data is None:
108            f, l, c = c_object_p(), c_uint(), c_uint()
109            SourceLocation_loc(self, byref(f), byref(l), byref(c))
110            f = File(f) if f else None
111            self._data = (f, int(l.value), int(c.value))
112        return self._data
113
114    @property
115    def file(self):
116        """Get the file represented by this source location."""
117        return self._get_instantiation()[0]
118
119    @property
120    def line(self):
121        """Get the line represented by this source location."""
122        return self._get_instantiation()[1]
123
124    @property
125    def column(self):
126        """Get the column represented by this source location."""
127        return self._get_instantiation()[2]
128
129    def __repr__(self):
130        return "<SourceLocation file %r, line %r, column %r>" % (
131            self.file.name if self.file else None, self.line, self.column)
132
133class SourceRange(Structure):
134    """
135    A SourceRange describes a range of source locations within the source
136    code.
137    """
138    _fields_ = [
139        ("ptr_data", c_void_p),
140        ("begin_int_data", c_uint),
141        ("end_int_data", c_uint)]
142
143    @property
144    def start(self):
145        """
146        Return a SourceLocation representing the first character within a
147        source range.
148        """
149        return SourceRange_start(self)
150
151    @property
152    def end(self):
153        """
154        Return a SourceLocation representing the last character within a
155        source range.
156        """
157        return SourceRange_end(self)
158
159    def __repr__(self):
160        return "<SourceRange start %r, end %r>" % (self.start, self.end)
161
162### Cursor Kinds ###
163
164class CursorKind(object):
165    """
166    A CursorKind describes the kind of entity that a cursor points to.
167    """
168
169    # The unique kind objects, indexed by id.
170    _kinds = []
171    _name_map = None
172
173    def __init__(self, value):
174        if value >= len(CursorKind._kinds):
175            CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1)
176        if CursorKind._kinds[value] is not None:
177            raise ValueError,'CursorKind already loaded'
178        self.value = value
179        CursorKind._kinds[value] = self
180        CursorKind._name_map = None
181
182    def from_param(self):
183        return self.value
184
185    @property
186    def name(self):
187        """Get the enumeration name of this cursor kind."""
188        if self._name_map is None:
189            self._name_map = {}
190            for key,value in CursorKind.__dict__.items():
191                if isinstance(value,CursorKind):
192                    self._name_map[value] = key
193        return self._name_map[self]
194
195    @staticmethod
196    def from_id(id):
197        if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None:
198            raise ValueError,'Unknown cursor kind'
199        return CursorKind._kinds[id]
200
201    @staticmethod
202    def get_all_kinds():
203        """Return all CursorKind enumeration instances."""
204        return filter(None, CursorKind._kinds)
205
206    def is_declaration(self):
207        """Test if this is a declaration kind."""
208        return CursorKind_is_decl(self)
209
210    def is_reference(self):
211        """Test if this is a reference kind."""
212        return CursorKind_is_ref(self)
213
214    def is_expression(self):
215        """Test if this is an expression kind."""
216        return CursorKind_is_expr(self)
217
218    def is_statement(self):
219        """Test if this is a statement kind."""
220        return CursorKind_is_stmt(self)
221
222    def is_invalid(self):
223        """Test if this is an invalid kind."""
224        return CursorKind_is_inv(self)
225
226    def __repr__(self):
227        return 'CursorKind.%s' % (self.name,)
228
229# FIXME: Is there a nicer way to expose this enumeration? We could potentially
230# represent the nested structure, or even build a class hierarchy. The main
231# things we want for sure are (a) simple external access to kinds, (b) a place
232# to hang a description and name, (c) easy to keep in sync with Index.h.
233
234###
235# Declaration Kinds
236
237# A declaration whose specific kind is not exposed via this interface.
238#
239# Unexposed declarations have the same operations as any other kind of
240# declaration; one can extract their location information, spelling, find their
241# definitions, etc. However, the specific kind of the declaration is not
242# reported.
243CursorKind.UNEXPOSED_DECL = CursorKind(1)
244
245# A C or C++ struct.
246CursorKind.STRUCT_DECL = CursorKind(2)
247
248# A C or C++ union.
249CursorKind.UNION_DECL = CursorKind(3)
250
251# A C++ class.
252CursorKind.CLASS_DECL = CursorKind(4)
253
254# An enumeration.
255CursorKind.ENUM_DECL = CursorKind(5)
256
257# A field (in C) or non-static data member (in C++) in a struct, union, or C++
258# class.
259CursorKind.FIELD_DECL = CursorKind(6)
260
261# An enumerator constant.
262CursorKind.ENUM_CONSTANT_DECL = CursorKind(7)
263
264# A function.
265CursorKind.FUNCTION_DECL = CursorKind(8)
266
267# A variable.
268CursorKind.VAR_DECL = CursorKind(9)
269
270# A function or method parameter.
271CursorKind.PARM_DECL = CursorKind(10)
272
273# An Objective-C @interface.
274CursorKind.OBJC_INTERFACE_DECL = CursorKind(11)
275
276# An Objective-C @interface for a category.
277CursorKind.OBJC_CATEGORY_DECL = CursorKind(12)
278
279# An Objective-C @protocol declaration.
280CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13)
281
282# An Objective-C @property declaration.
283CursorKind.OBJC_PROPERTY_DECL = CursorKind(14)
284
285# An Objective-C instance variable.
286CursorKind.OBJC_IVAR_DECL = CursorKind(15)
287
288# An Objective-C instance method.
289CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16)
290
291# An Objective-C class method.
292CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17)
293
294# An Objective-C @implementation.
295CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18)
296
297# An Objective-C @implementation for a category.
298CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19)
299
300# A typedef.
301CursorKind.TYPEDEF_DECL = CursorKind(20)
302
303###
304# Reference Kinds
305
306CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40)
307CursorKind.OBJC_PROTOCOL_REF = CursorKind(41)
308CursorKind.OBJC_CLASS_REF = CursorKind(42)
309
310# A reference to a type declaration.
311#
312# A type reference occurs anywhere where a type is named but not
313# declared. For example, given:
314#   typedef unsigned size_type;
315#   size_type size;
316#
317# The typedef is a declaration of size_type (CXCursor_TypedefDecl),
318# while the type of the variable "size" is referenced. The cursor
319# referenced by the type of size is the typedef for size_type.
320CursorKind.TYPE_REF = CursorKind(43)
321
322###
323# Invalid/Error Kinds
324
325CursorKind.INVALID_FILE = CursorKind(70)
326CursorKind.NO_DECL_FOUND = CursorKind(71)
327CursorKind.NOT_IMPLEMENTED = CursorKind(72)
328
329###
330# Expression Kinds
331
332# An expression whose specific kind is not exposed via this interface.
333#
334# Unexposed expressions have the same operations as any other kind of
335# expression; one can extract their location information, spelling, children,
336# etc. However, the specific kind of the expression is not reported.
337CursorKind.UNEXPOSED_EXPR = CursorKind(100)
338
339# An expression that refers to some value declaration, such as a function,
340# varible, or enumerator.
341CursorKind.DECL_REF_EXPR = CursorKind(101)
342
343# An expression that refers to a member of a struct, union, class, Objective-C
344# class, etc.
345CursorKind.MEMBER_REF_EXPR = CursorKind(102)
346
347# An expression that calls a function.
348CursorKind.CALL_EXPR = CursorKind(103)
349
350# An expression that sends a message to an Objective-C object or class.
351CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104)
352
353# A statement whose specific kind is not exposed via this interface.
354#
355# Unexposed statements have the same operations as any other kind of statement;
356# one can extract their location information, spelling, children, etc. However,
357# the specific kind of the statement is not reported.
358CursorKind.UNEXPOSED_STMT = CursorKind(200)
359
360###
361# Other Kinds
362
363# Cursor that represents the translation unit itself.
364#
365# The translation unit cursor exists primarily to act as the root cursor for
366# traversing the contents of a translation unit.
367CursorKind.TRANSLATION_UNIT = CursorKind(300)
368
369### Cursors ###
370
371class Cursor(Structure):
372    """
373    The Cursor class represents a reference to an element within the AST. It
374    acts as a kind of iterator.
375    """
376    _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)]
377
378    def __eq__(self, other):
379        return Cursor_eq(self, other)
380
381    def __ne__(self, other):
382        return not Cursor_eq(self, other)
383
384    def is_definition(self):
385        """
386        Returns true if the declaration pointed at by the cursor is also a
387        definition of that entity.
388        """
389        return Cursor_is_def(self)
390
391    def get_definition(self):
392        """
393        If the cursor is a reference to a declaration or a declaration of
394        some entity, return a cursor that points to the definition of that
395        entity.
396        """
397        # TODO: Should probably check that this is either a reference or
398        # declaration prior to issuing the lookup.
399        return Cursor_def(self)
400
401    def get_usr(self):
402        """Return the Unified Symbol Resultion (USR) for the entity referenced
403        by the given cursor (or None).
404
405        A Unified Symbol Resolution (USR) is a string that identifies a
406        particular entity (function, class, variable, etc.) within a
407        program. USRs can be compared across translation units to determine,
408        e.g., when references in one translation refer to an entity defined in
409        another translation unit."""
410        return Cursor_usr(self)
411
412    @property
413    def kind(self):
414        """Return the kind of this cursor."""
415        return CursorKind.from_id(self._kind_id)
416
417    @property
418    def spelling(self):
419        """Return the spelling of the entity pointed at by the cursor."""
420        if not self.kind.is_declaration():
421            # FIXME: clang_getCursorSpelling should be fixed to not assert on
422            # this, for consistency with clang_getCursorUSR.
423            return None
424        return Cursor_spelling(self)
425
426    @property
427    def location(self):
428        """
429        Return the source location (the starting character) of the entity
430        pointed at by the cursor.
431        """
432        return Cursor_loc(self)
433
434    @property
435    def extent(self):
436        """
437        Return the source range (the range of text) occupied by the entity
438        pointed at by the cursor.
439        """
440        return Cursor_extent(self)
441
442    def get_children(self):
443        """Return an iterator for accessing the children of this cursor."""
444
445        # FIXME: Expose iteration from CIndex, PR6125.
446        def visitor(child, parent, children):
447            # FIXME: Document this assertion in API.
448            # FIXME: There should just be an isNull method.
449            assert child != Cursor_null()
450            children.append(child)
451            return 1 # continue
452        children = []
453        Cursor_visit(self, Callback(visitor), children)
454        return iter(children)
455
456    @staticmethod
457    def from_result(res, fn, args):
458        assert isinstance(res, Cursor)
459        # FIXME: There should just be an isNull method.
460        if res == Cursor_null():
461            return None
462        return res
463
464## CIndex Objects ##
465
466# CIndex objects (derived from ClangObject) are essentially lightweight
467# wrappers attached to some underlying object, which is exposed via CIndex as
468# a void*.
469
470class ClangObject(object):
471    """
472    A helper for Clang objects. This class helps act as an intermediary for
473    the ctypes library and the Clang CIndex library.
474    """
475    def __init__(self, obj):
476        assert isinstance(obj, c_object_p) and obj
477        self.obj = self._as_parameter_ = obj
478
479    def from_param(self):
480        return self._as_parameter_
481
482
483class _CXUnsavedFile(Structure):
484    """Helper for passing unsaved file arguments."""
485    _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)]
486
487class Index(ClangObject):
488    """
489    The Index type provides the primary interface to the Clang CIndex library,
490    primarily by providing an interface for reading and parsing translation
491    units.
492    """
493
494    @staticmethod
495    def create(excludeDecls=False, displayDiags=False):
496        """
497        Create a new Index.
498        Parameters:
499        excludeDecls -- Exclude local declarations from translation units.
500        displayDiags -- Display diagnostics during translation unit creation.
501        """
502        return Index(Index_create(excludeDecls, displayDiags))
503
504    def __del__(self):
505        Index_dispose(self)
506
507    def read(self, path):
508        """Load the translation unit from the given AST file."""
509        return TranslationUnit.read(self, path)
510
511    def parse(self, path, args = [], unsaved_files = []):
512        """
513        Load the translation unit from the given source code file by running
514        clang and generating the AST before loading. Additional command line
515        parameters can be passed to clang via the args parameter.
516
517        In-memory contents for files can be provided by passing a list of pairs
518        to as unsaved_files, the first item should be the filenames to be mapped
519        and the second should be the contents to be substituted for the
520        file. The contents may be passed as strings or file objects.
521        """
522        return TranslationUnit.parse(self, path, args, unsaved_files)
523
524
525class TranslationUnit(ClangObject):
526    """
527    The TranslationUnit class represents a source code translation unit and
528    provides read-only access to its top-level declarations.
529    """
530
531    def __del__(self):
532        TranslationUnit_dispose(self)
533
534    @property
535    def cursor(self):
536        """Retrieve the cursor that represents the given translation unit."""
537        return TranslationUnit_cursor(self)
538
539    @property
540    def spelling(self):
541        """Get the original translation unit source file name."""
542        return TranslationUnit_spelling(self)
543
544    @staticmethod
545    def read(ix, path):
546        """Create a translation unit from the given AST file."""
547        ptr = TranslationUnit_read(ix, path)
548        return TranslationUnit(ptr) if ptr else None
549
550    @staticmethod
551    def parse(ix, path, args = [], unsaved_files = []):
552        """
553        Construct a translation unit from the given source file, using
554        the given command line argument.
555        """
556        # TODO: Support unsaved files.
557        arg_array = 0
558        if len(args):
559            arg_array = (c_char_p * len(args))(* args)
560        unsaved_files_array = 0
561        if len(unsaved_files):
562            unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))()
563            for i,(name,value) in enumerate(unsaved_files):
564                # FIXME: Support file objects.
565                unsaved_files_array[i].name = name
566                unsaved_files_array[i].contents = value
567                unsaved_files_array[i].length = len(value)
568        ptr = TranslationUnit_parse(ix, path, len(args), arg_array,
569                                    len(unsaved_files), unsaved_files_array)
570        return TranslationUnit(ptr) if ptr else None
571
572class File(ClangObject):
573    """
574    The File class represents a particular source file that is part of a
575    translation unit.
576    """
577
578    @property
579    def name(self):
580        """Return the complete file and path name of the file."""
581        return File_name(self)
582
583    @property
584    def time(self):
585        """Return the last modification time of the file."""
586        return File_time(self)
587
588# Additional Functions and Types
589
590# Wrap calls to TranslationUnit._load and Decl._load.
591Callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object)
592
593# String Functions
594_CXString_dispose = lib.clang_disposeString
595_CXString_dispose.argtypes = [_CXString]
596
597_CXString_getCString = lib.clang_getCString
598_CXString_getCString.argtypes = [_CXString]
599_CXString_getCString.restype = c_char_p
600
601# Source Location Functions
602SourceLocation_loc = lib.clang_getInstantiationLocation
603SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p),
604                               POINTER(c_uint), POINTER(c_uint)]
605
606# Source Range Functions
607SourceRange_start = lib.clang_getRangeStart
608SourceRange_start.argtypes = [SourceRange]
609SourceRange_start.restype = SourceLocation
610
611SourceRange_end = lib.clang_getRangeEnd
612SourceRange_end.argtypes = [SourceRange]
613SourceRange_end.restype = SourceLocation
614
615# CursorKind Functions
616CursorKind_is_decl = lib.clang_isDeclaration
617CursorKind_is_decl.argtypes = [CursorKind]
618CursorKind_is_decl.restype = bool
619
620CursorKind_is_ref = lib.clang_isReference
621CursorKind_is_ref.argtypes = [CursorKind]
622CursorKind_is_ref.restype = bool
623
624CursorKind_is_expr = lib.clang_isExpression
625CursorKind_is_expr.argtypes = [CursorKind]
626CursorKind_is_expr.restype = bool
627
628CursorKind_is_stmt = lib.clang_isStatement
629CursorKind_is_stmt.argtypes = [CursorKind]
630CursorKind_is_stmt.restype = bool
631
632CursorKind_is_inv = lib.clang_isInvalid
633CursorKind_is_inv.argtypes = [CursorKind]
634CursorKind_is_inv.restype = bool
635
636# Cursor Functions
637# TODO: Implement this function
638Cursor_get = lib.clang_getCursor
639Cursor_get.argtypes = [TranslationUnit, SourceLocation]
640Cursor_get.restype = Cursor
641
642Cursor_null = lib.clang_getNullCursor
643Cursor_null.restype = Cursor
644
645Cursor_usr = lib.clang_getCursorUSR
646Cursor_usr.argtypes = [Cursor]
647Cursor_usr.restype = _CXString
648Cursor_usr.errcheck = _CXString.from_result
649
650Cursor_is_def = lib.clang_isCursorDefinition
651Cursor_is_def.argtypes = [Cursor]
652Cursor_is_def.restype = bool
653
654Cursor_def = lib.clang_getCursorDefinition
655Cursor_def.argtypes = [Cursor]
656Cursor_def.restype = Cursor
657Cursor_def.errcheck = Cursor.from_result
658
659Cursor_eq = lib.clang_equalCursors
660Cursor_eq.argtypes = [Cursor, Cursor]
661Cursor_eq.restype = c_uint
662
663Cursor_spelling = lib.clang_getCursorSpelling
664Cursor_spelling.argtypes = [Cursor]
665Cursor_spelling.restype = _CXString
666Cursor_spelling.errcheck = _CXString.from_result
667
668Cursor_loc = lib.clang_getCursorLocation
669Cursor_loc.argtypes = [Cursor]
670Cursor_loc.restype = SourceLocation
671
672Cursor_extent = lib.clang_getCursorExtent
673Cursor_extent.argtypes = [Cursor]
674Cursor_extent.restype = SourceRange
675
676Cursor_ref = lib.clang_getCursorReferenced
677Cursor_ref.argtypes = [Cursor]
678Cursor_ref.restype = Cursor
679Cursor_ref.errcheck = Cursor.from_result
680
681Cursor_visit = lib.clang_visitChildren
682Cursor_visit.argtypes = [Cursor, Callback, py_object]
683Cursor_visit.restype = c_uint
684
685# Index Functions
686Index_create = lib.clang_createIndex
687Index_create.argtypes = [c_int, c_int]
688Index_create.restype = c_object_p
689
690Index_dispose = lib.clang_disposeIndex
691Index_dispose.argtypes = [Index]
692
693# Translation Unit Functions
694TranslationUnit_read = lib.clang_createTranslationUnit
695TranslationUnit_read.argtypes = [Index, c_char_p]
696TranslationUnit_read.restype = c_object_p
697
698TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile
699TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p,
700                                  c_int, c_void_p]
701TranslationUnit_parse.restype = c_object_p
702
703TranslationUnit_cursor = lib.clang_getTranslationUnitCursor
704TranslationUnit_cursor.argtypes = [TranslationUnit]
705TranslationUnit_cursor.restype = Cursor
706TranslationUnit_cursor.errcheck = Cursor.from_result
707
708TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling
709TranslationUnit_spelling.argtypes = [TranslationUnit]
710TranslationUnit_spelling.restype = _CXString
711TranslationUnit_spelling.errcheck = _CXString.from_result
712
713TranslationUnit_dispose = lib.clang_disposeTranslationUnit
714TranslationUnit_dispose.argtypes = [TranslationUnit]
715
716# File Functions
717File_name = lib.clang_getFileName
718File_name.argtypes = [File]
719File_name.restype = c_char_p
720
721File_time = lib.clang_getFileTime
722File_time.argtypes = [File]
723File_time.restype = c_uint
724
725###
726
727__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind',
728           'SourceRange', 'SourceLocation', 'File']
729