1c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu#!/usr/bin/python
2c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
3c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu# NOTE: this file is taken from the Python source distribution
4c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu# It can be found under Tools/gdb/libpython.py. It is shipped with Cython
5c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu# because it's not installed as a python module, and because changes are only
6c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu# merged into new python versions (v3.2+).
7c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
8c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu'''
9c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing XuFrom gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
10c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuto be extended with Python code e.g. for library-specific data visualizations,
11c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xusuch as for the C++ STL types.  Documentation on this API can be seen at:
12c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuhttp://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
13c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
14a0decc9a2481f938e1675b4f7bbd58761a882a36Argyrios Kyrtzidis
1555fc873017f10f6f566b182b70f6fc22aefa3464Chandler CarruthThis python module deals with the case when the process being debugged (the
16ec8605f1d7ec846dbf51047bfd5c56d32d1ff91cArgyrios Kyrtzidis"inferior process" in gdb parlance) is itself python, or more specifically,
17695fb502825a53ccd178ec1c85c77929d88acb71Argyrios Kyrtzidislinked against libpython.  In this situation, almost every item of data is a
18699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis(PyObject*), and having the debugger merely print their addresses is not very
1918c66fdc3c4008d335885695fe36fb5353c5f672Ted Kremenekenlightening.
2018c66fdc3c4008d335885695fe36fb5353c5f672Ted Kremenek
219b663716449b618ba0390b1dbebc54fa8e971124Ted KremenekThis module embeds knowledge about the implementation details of libpython so
22c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuthat we can emit useful visualizations e.g. a string, a list, a dict, a frame
23c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xugiving file/line information and the state of local variables
24c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
259ef6537a894c33003359b1f9b9676e9178e028b7Ted KremenekIn particular, given a gdb.Value corresponding to a PyObject* in the inferior
26c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuprocess, we can generate a "proxy value" within the gdb process.  For example,
27c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xugiven a PyObject* in the inferior process that is in fact a PyListObject*
28c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuholding three PyObject* that turn out to be PyStringObject* instances, we can
299843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xugenerate a proxy value within the gdb process that is a list of strings:
30766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu  ["foo", "bar", "baz"]
319843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
329843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing XuDoing so can be expensive for complicated graphs of objects, and could take
339843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xusome time, so we also have a "write_repr" method that writes a representation
349843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuof the data to a file-like object.  This allows us to stop the traversal by
359843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuhaving the file-like object raise an exception if it gets too much data.
369843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
37fae962200efef618d1c24c14b9c3fed25876f059Chris LattnerWith both "proxyval" and "write_repr" we keep track of the set of all addresses
38fae962200efef618d1c24c14b9c3fed25876f059Chris Lattnervisited so far in the traversal, to avoid infinite recursion due to cycles in
399843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuthe graph of object references.
409843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
419843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing XuWe try to defer gdb.lookup_type() invocations for python types until as late as
429843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xupossible: for a dynamically linked python binary, when the process starts in
439843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuthe debugger, the libpython.so hasn't been dynamically loaded yet, so none of
449843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuthe type names are known to the debugger
459843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
4647dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing XuThe module also extends gdb with some python-specific commands.
4747dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu'''
4847dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xufrom __future__ import with_statement
49766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu
50766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xuimport os
51766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xuimport re
529843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport sys
539843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport struct
549843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport locale
559843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport atexit
569843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport warnings
579843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport tempfile
589843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xuimport textwrap
59ec8605f1d7ec846dbf51047bfd5c56d32d1ff91cArgyrios Kyrtzidisimport itertools
60f34a5791c5c9df0348714e275adb09b8cf858460Jordan Rose
61699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidisimport gdb
62699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
63c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xuif sys.version_info[0] < 3:
64c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    # I think this is the only way to fix this bug :'(
65651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    # http://sourceware.org/bugzilla/show_bug.cgi?id=12285
66651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    out, err = sys.stdout, sys.stderr
67c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    reload(sys).setdefaultencoding('UTF-8')
68c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    sys.stdout = out
6923d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    sys.stderr = err
706bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines
716bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines# Look up the gdb.Type for some standard types:
726bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines_type_char_ptr = gdb.lookup_type('char').pointer() # char*
736bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer()
746bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines_type_void_ptr = gdb.lookup_type('void').pointer() # void*
75c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
76699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisSIZEOF_VOID_P = _type_void_ptr.sizeof
77699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
78c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing XuPy_TPFLAGS_HEAPTYPE = (1L << 9)
79c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
80699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_INT_SUBCLASS      = (1L << 23)
81699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
82699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
83699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
84699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
85699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
86699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
87699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
88699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
89699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisPy_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
90699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
91699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisMAX_OUTPUT_LEN = 1024
92699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
93699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidishexdigits = "0123456789abcdef"
94699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
95699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios KyrtzidisENCODING = locale.getpreferredencoding()
96c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
978bef8238181a30e52dea380789a7e2d760eac532Ted Kremenekclass NullPyObjectPtr(RuntimeError):
98699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis    pass
998bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
100699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
101c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xudef safety_limit(val):
102c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    # Given a integer value from the process being debugged, limit it to some
10323d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    # safety threshold so that arbitrary breakage within said process doesn't
104c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
105466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose    return min(val, 1000)
106466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose
1079843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
108699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidisdef safe_range(val):
109b805c8ff133ef0c62df032fa711d6b13c5afd7f4Anna Zaks    # As per range, but don't trust the value too much: cap it to a safety
1105ef6e94b294cc47750d8ab220858a36726caba59Jordan Rose    # threshold in case the data was corrupted
111c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    return xrange(safety_limit(val))
112c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
113c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xudef write_unicode(file, text):
114c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    # Write a byte or unicode string to file. Unicode strings are encoded to
115c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    # ENCODING encoding with 'backslashreplace' error handler to avoid
11647dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu    # UnicodeEncodeError.
11747dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu    if isinstance(text, unicode):
1189843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        text = text.encode(ENCODING, 'backslashreplace')
1199843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu    file.write(text)
120c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
121c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xudef os_fsencode(filename):
122c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    if not isinstance(filename, unicode):
123c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        return filename
12423d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    encoding = sys.getfilesystemencoding()
12523d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    if encoding == 'mbcs':
12623d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        # mbcs doesn't support surrogateescape
12723d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        return filename.encode(encoding)
12823d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    encoded = []
12923d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    for char in filename:
130c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        # surrogateescape error handler
131c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        if 0xDC80 <= ord(char) <= 0xDCFF:
132c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            byte = chr(ord(char) - 0xDC00)
133c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        else:
134c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            byte = char.encode(encoding)
135c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        encoded.append(byte)
136c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    return ''.join(encoded)
137c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
138c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xuclass StringTruncated(RuntimeError):
139c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    pass
140c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
141c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xuclass TruncatedStringIO(object):
14223d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    '''Similar to cStringIO, but can truncate the output by raising a
143c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu    StringTruncated exception'''
144c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def __init__(self, maxlen=None):
145c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        self._val = ''
146c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        self.maxlen = maxlen
14747dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu
14847dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu    def write(self, data):
14947dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        if self.maxlen:
15047dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu            if len(data) + len(self._val) > self.maxlen:
1519843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu                # Truncation:
1529843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu                self._val += data[0:self.maxlen - len(self._val)]
1539843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu                raise StringTruncated()
1549843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
155c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        self._val += data
156c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
157c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def getvalue(self):
158c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        return self._val
159c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
160c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
161c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu# pretty printer lookup
162c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xuall_pretty_typenames = set()
16323d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
164c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xuclass PrettyPrinterTrackerMeta(type):
16523d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
16623d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    def __init__(self, name, bases, dict):
16723d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        super(PrettyPrinterTrackerMeta, self).__init__(name, bases, dict)
168c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        all_pretty_typenames.add(self._typename)
16923d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
17023d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
17123d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xuclass PyObjectPtr(object):
17223d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    """
17323d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    Class wrapping a gdb.Value that's a either a (PyObject*) within the
17423d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu    inferior process, or some subclass pointer e.g. (PyStringObject*)
175c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
176c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    There will be a subclass for every refined PyObject type that we care
177c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    about.
178c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
179c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    Note that at every stage the underlying pointer could be NULL, point
180c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    to corrupt data, etc; this is the debugger, after all.
181c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    """
182c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
183c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    __metaclass__ = PrettyPrinterTrackerMeta
184c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
185c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    _typename = 'PyObject'
186c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
187c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def __init__(self, gdbval, cast_to=None):
188c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        if cast_to:
189c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            self._gdbval = gdbval.cast(cast_to)
190c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        else:
191c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            self._gdbval = gdbval
192c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
193c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def field(self, name):
194c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        '''
195c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        Get the gdb.Value for the given field within the PyObject, coping with
196c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        some python 2 versus python 3 differences.
197c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
198c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        Various libpython types are defined using the "PyObject_HEAD" and
19923d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        "PyObject_VAR_HEAD" macros.
200c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
201c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        In Python 2, this these are defined so that "ob_type" and (for a var
202c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        object) "ob_size" are fields of the type in question.
203699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
20447dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        In Python 3, this is defined as an embedded PyVarObject type thus:
20547dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu           PyVarObject ob_base;
20647dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        so that the "ob_size" field is located insize the "ob_base" field, and
207699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        the "ob_type" is most easily accessed by casting back to a (PyObject*).
20847dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        '''
20947dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        if self.is_null():
21047dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu            raise NullPyObjectPtr(self)
211699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
2128bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        if name == 'ob_type':
213c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
2143133f79cf451e6302dd05262b4bb53a3e4fd6300Ted Kremenek            return pyo_ptr.dereference()[name]
2156bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines
2166bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines        if name == 'ob_size':
2175251abea41b446c26e3239c8dd6c7edea6fc335dDavid Blaikie            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
2185eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            return pyo_ptr.dereference()[name]
21947dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu
220c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        # General case: look it up inside the object:
221c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        return self._gdbval.dereference()[name]
222c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
2238bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    def pyop_field(self, name):
224651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        '''
22547dc37f1efa6942366dd61c4acb0c874049dd1e0Zhongxing Xu        Get a PyObjectPtr for the given PyObject* field within this PyObject,
2268046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek        coping with some python 2 versus python 3 differences.
2278046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek        '''
2288046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek        return PyObjectPtr.from_pyobject_ptr(self.field(name))
229466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose
2308046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek    def write_field_repr(self, name, out, visited):
231466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose        '''
2328046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek        Extract the PyObject* field named "name", and write its representation
2330bd6b110e908892d4b5c8671a9f435a1d72ad16aAnna Zaks        to file-like object "out"
2340bd6b110e908892d4b5c8671a9f435a1d72ad16aAnna Zaks        '''
2358046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek        field_obj = self.pyop_field(name)
236c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu        field_obj.write_repr(out, visited)
237c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
238699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis    def get_truncated_repr(self, maxlen):
2398bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        '''
2409843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        Get a repr-like string for the data, but truncate it at "maxlen" bytes
2410bd6b110e908892d4b5c8671a9f435a1d72ad16aAnna Zaks        (ending the object graph traversal as soon as you do)
2429843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        '''
2439843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        out = TruncatedStringIO(maxlen)
244699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        try:
2458bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek            self.write_repr(out, set())
2465eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        except StringTruncated:
2475eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            # Truncation occurred:
24823d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu            return out.getvalue() + '...(truncated)'
24923d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
25023d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        # No truncation occurred:
251699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        return out.getvalue()
2528bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
2535eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    def type(self):
2545eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        return PyTypeObjectPtr(self.field('ob_type'))
255c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
256c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def is_null(self):
257c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        return 0 == long(self._gdbval)
258699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
2598bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    def is_optimized_out(self):
2605eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        '''
2615eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        Is the value of the underlying PyObject* visible to the debugger?
2620c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu
2630c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu        This can vary with the precise version of the compiler used to build
2645eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        Python, and the precise version of gdb.
265dc84cd5efdd3430efb22546b4ac656aa0540b210David Blaikie
26602b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
2670c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu        PyEval_EvalFrameEx's "f"
26802b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek        '''
2690c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu        return self._gdbval.is_optimized_out
2700c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu
27102b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek    def safe_tp_name(self):
27223d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        try:
2730c2e8c87f18e861cb48965784e20b9292fb70b60Zhongxing Xu            return self.type().field('tp_name').string()
2740bd6b110e908892d4b5c8671a9f435a1d72ad16aAnna Zaks        except NullPyObjectPtr:
27502b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek            # NULL tp_name?
276651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            return 'unknown'
277651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        except RuntimeError:
278651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            # Can't even read the object at all?
279651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            return 'unknown'
28002b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek
28102b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek    def proxyval(self, visited):
282785950e59424dca7ce0081bebf13c0acd2c4fff6Jordan Rose        '''
28302b49bb23273f3488a47f8abadf0ec7a98429d1fTed Kremenek        Scrape a value from the inferior process, and try to represent it
28423d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        within the gdb process, whilst (hopefully) avoiding crashes when
28523d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        the remote data is corrupt.
286699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
2878bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        Derived classes will override this.
2885eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek
2895eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        For example, a PyIntObject* with ob_ival 42 in the inferior process
29023d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        should result in an int(42) in this process.
29123d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
29223d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        visited: a set of all gdb.Value pyobject pointers already visited
293699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        whilst generating this value (to guard against infinite recursion when
2948bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        visiting object graphs with loops).  Analogous to Py_ReprEnter and
2955eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        Py_ReprLeave
2965eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        '''
29723d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu
29823d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        class FakeRepr(object):
299c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu            """
300699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis            Class representing a non-descript PyObject* value in the inferior
3018bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek            process for when we don't have a custom scraper, intended to have
3025eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            a sane repr().
3035eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            """
304c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
305c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            def __init__(self, tp_name, address):
306c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu                self.tp_name = tp_name
307699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis                self.address = address
3088bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
3095eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            def __repr__(self):
3105eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek                # For the NULL pointer, we have no way of knowing a type, so
311c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu                # special-case it as per
312c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu                # http://bugs.python.org/issue8032#msg100882
313c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu                if self.address == 0:
314699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis                    return '0x0'
3158bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
3165eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek
3175eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        return FakeRepr(self.safe_tp_name(),
318c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu                        long(self._gdbval))
319c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu
320c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu    def write_repr(self, out, visited):
321699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        '''
3228bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        Write a string representation of the value scraped from the inferior
3235eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        process to "out", a file-like object.
3245eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        '''
325c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        # Default implementation: generate a proxy value and write its repr
326c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        # However, this could involve a lot of work for complicated objects,
327c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        # so for derived classes we specialize this
328699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        return out.write(repr(self.proxyval(visited)))
3298bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
3305eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    @classmethod
3315eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    def subclass_from_type(cls, t):
332c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        '''
333c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
334c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
335699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis        to use
3368bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
3375eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        Ideally, we would look up the symbols for the global types, but that
3385eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        isn't working yet:
339c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
340c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu          Traceback (most recent call last):
341c7de88b6e777ad71e730efc152413ebca160a117Zhongxing Xu            File "<string>", line 1, in <module>
3428bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek          NotImplementedError: Symbol type not yet supported in Python scripts.
343699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis          Error while executing Python code.
344dc84cd5efdd3430efb22546b4ac656aa0540b210David Blaikie
34523d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        For now, we use tp_flags, after doing some string comparisons on the
3466bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines        tp_name for some special-cases that don't seem to be visible through
347b3f403112d40bc0a6ec240ae1feb536cc6892d47Zhongxing Xu        flags
34823d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        '''
3498bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        try:
350651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            tp_name = t.field('tp_name').string()
351b3f403112d40bc0a6ec240ae1feb536cc6892d47Zhongxing Xu            tp_flags = int(t.field('tp_flags'))
35223d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        except RuntimeError:
353d048c6ef5b6cfaa0cecb8cc1d4bdace32ed21d07Ted Kremenek            # Handle any kind of error e.g. NULL ptrs by simply using the base
35423d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu            # class
355651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            return cls
356651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
35723d90f90413ff1efd7e4410d28ae2cab99af1fdbZhongxing Xu        #print 'tp_flags = 0x%08x' % tp_flags
358785950e59424dca7ce0081bebf13c0acd2c4fff6Jordan Rose        #print 'tp_name = %r' % tp_name
359c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu
3606bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines        name_map = {'bool': PyBoolObjectPtr,
361c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu                    'classobj': PyClassObjectPtr,
362ab42130a229ea5ee80d96cfc33799d558d79be17Zhongxing Xu                    'instance': PyInstanceObjectPtr,
363c196095fa0b83d10b2c20fccfcb8198ee66451aaZhongxing Xu                    'NoneType': PyNoneStructPtr,
3649843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu                    'frame': PyFrameObjectPtr,
3658bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek                    'set' : PySetObjectPtr,
3668bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek                    'frozenset' : PySetObjectPtr,
367699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis                    'builtin_function_or_method' : PyCFunctionObjectPtr,
3685eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek                    }
3695eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        if tp_name in name_map:
3708046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek            return name_map[tp_name]
3718046037b8342f46197bbee79df83a54b873ae6e6Ted Kremenek
3729843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & (Py_TPFLAGS_HEAPTYPE|Py_TPFLAGS_TYPE_SUBCLASS):
373466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose            return PyTypeObjectPtr
3741c7370f933e4f413d8cc8964ff946d4261da2e78Zhongxing Xu
3751c7370f933e4f413d8cc8964ff946d4261da2e78Zhongxing Xu        if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
3761c7370f933e4f413d8cc8964ff946d4261da2e78Zhongxing Xu            return PyIntObjectPtr
3771c7370f933e4f413d8cc8964ff946d4261da2e78Zhongxing Xu        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
3789843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu            return PyLongObjectPtr
3799843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
3809843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu            return PyListObjectPtr
3819843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
382d048c6ef5b6cfaa0cecb8cc1d4bdace32ed21d07Ted Kremenek            return PyTupleObjectPtr
3839843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
3849843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu            try:
385651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                gdb.lookup_type('PyBytesObject')
386651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                return PyBytesObjectPtr
387651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines            except RuntimeError:
3889843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu                return PyStringObjectPtr
389a7e6845660f91ec611427e1db842780e1ec12bdbEli Friedman        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
390785950e59424dca7ce0081bebf13c0acd2c4fff6Jordan Rose            return PyUnicodeObjectPtr
3919843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
3926bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines            return PyDictObjectPtr
3939843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
3949843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu            return PyBaseExceptionObjectPtr
3959843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu
396466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose        # Use the base class:
3979843ba9be3560f7b283a6b5a927e4620cfce897dZhongxing Xu        return cls
398766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu
399699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis    @classmethod
400699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis    def from_pyobject_ptr(cls, gdbval):
4015ac1df3e15f91ed663826faec7efe2462c18d98cAnna Zaks        '''
402766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu        Try to locate the appropriate derived class dynamically, and cast
403a7e6845660f91ec611427e1db842780e1ec12bdbEli Friedman        the pointer accordingly.
404766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu        '''
4058bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        try:
406466224fd068a0a0084968a7f521a690a51c3b226Jordan Rose            p = PyObjectPtr(gdbval)
407766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu            cls = cls.subclass_from_type(p.type())
4080621c45dcd4c5f43df0de5a2febae525d3287b74Anna Zaks            return cls(gdbval, cast_to=cls.get_gdb_type())
409766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu        except RuntimeError, exc:
410766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu            # Handle any kind of error e.g. NULL ptrs by simply using the base
411d048c6ef5b6cfaa0cecb8cc1d4bdace32ed21d07Ted Kremenek            # class
412766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu            pass
413a7e6845660f91ec611427e1db842780e1ec12bdbEli Friedman        return cls(gdbval)
414651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
415651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    @classmethod
416651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    def get_gdb_type(cls):
417a7e6845660f91ec611427e1db842780e1ec12bdbEli Friedman        return gdb.lookup_type(cls._typename).pointer()
418a7e6845660f91ec611427e1db842780e1ec12bdbEli Friedman
419785950e59424dca7ce0081bebf13c0acd2c4fff6Jordan Rose    def as_address(self):
420766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu        return long(self._gdbval)
421766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu
422766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu
423766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xuclass PyVarObjectPtr(PyObjectPtr):
424766c20130dc6b960be420483a168c82a66b5bb7bZhongxing Xu    _typename = 'PyVarObject'
425699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis
426699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidisclass ProxyAlreadyVisited(object):
427699bbf9f3fa67ededdd762d2637d72d2a4a88b7aArgyrios Kyrtzidis    '''
428    Placeholder proxy to use when protecting against infinite recursion due to
429    loops in the object graph.
430
431    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
432    '''
433    def __init__(self, rep):
434        self._rep = rep
435
436    def __repr__(self):
437        return self._rep
438
439
440def _write_instance_repr(out, visited, name, pyop_attrdict, address):
441    '''Shared code for use by old-style and new-style classes:
442    write a representation to file-like object "out"'''
443    out.write('<')
444    out.write(name)
445
446    # Write dictionary of instance attributes:
447    if isinstance(pyop_attrdict, PyDictObjectPtr):
448        out.write('(')
449        first = True
450        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
451            if not first:
452                out.write(', ')
453            first = False
454            out.write(pyop_arg.proxyval(visited))
455            out.write('=')
456            pyop_val.write_repr(out, visited)
457        out.write(')')
458    out.write(' at remote 0x%x>' % address)
459
460
461class InstanceProxy(object):
462
463    def __init__(self, cl_name, attrdict, address):
464        self.cl_name = cl_name
465        self.attrdict = attrdict
466        self.address = address
467
468    def __repr__(self):
469        if isinstance(self.attrdict, dict):
470            kwargs = ', '.join(["%s=%r" % (arg, val)
471                                for arg, val in self.attrdict.iteritems()])
472            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
473                                                kwargs, self.address)
474        else:
475            return '<%s at remote 0x%x>' % (self.cl_name,
476                                            self.address)
477
478def _PyObject_VAR_SIZE(typeobj, nitems):
479    return ( ( typeobj.field('tp_basicsize') +
480               nitems * typeobj.field('tp_itemsize') +
481               (SIZEOF_VOID_P - 1)
482             ) & ~(SIZEOF_VOID_P - 1)
483           ).cast(gdb.lookup_type('size_t'))
484
485class PyTypeObjectPtr(PyObjectPtr):
486    _typename = 'PyTypeObject'
487
488    def get_attr_dict(self):
489        '''
490        Get the PyDictObject ptr representing the attribute dictionary
491        (or None if there's a problem)
492        '''
493        try:
494            typeobj = self.type()
495            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
496            if dictoffset != 0:
497                if dictoffset < 0:
498                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
499                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
500                    if tsize < 0:
501                        tsize = -tsize
502                    size = _PyObject_VAR_SIZE(typeobj, tsize)
503                    dictoffset += size
504                    assert dictoffset > 0
505                    assert dictoffset % SIZEOF_VOID_P == 0
506
507                dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
508                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
509                dictptr = dictptr.cast(PyObjectPtrPtr)
510                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
511        except RuntimeError:
512            # Corrupt data somewhere; fail safe
513            pass
514
515        # Not found, or some kind of error:
516        return None
517
518    def proxyval(self, visited):
519        '''
520        Support for new-style classes.
521
522        Currently we just locate the dictionary using a transliteration to
523        python of _PyObject_GetDictPtr, ignoring descriptors
524        '''
525        # Guard against infinite loops:
526        if self.as_address() in visited:
527            return ProxyAlreadyVisited('<...>')
528        visited.add(self.as_address())
529
530        pyop_attr_dict = self.get_attr_dict()
531        if pyop_attr_dict:
532            attr_dict = pyop_attr_dict.proxyval(visited)
533        else:
534            attr_dict = {}
535        tp_name = self.safe_tp_name()
536
537        # New-style class:
538        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
539
540    def write_repr(self, out, visited):
541        # Guard against infinite loops:
542        if self.as_address() in visited:
543            out.write('<...>')
544            return
545        visited.add(self.as_address())
546
547        try:
548            tp_name = self.field('tp_name').string()
549        except RuntimeError:
550            tp_name = 'unknown'
551
552        out.write('<type %s at remote 0x%x>' % (tp_name,
553                                                self.as_address()))
554        # pyop_attrdict = self.get_attr_dict()
555        # _write_instance_repr(out, visited,
556                             # self.safe_tp_name(), pyop_attrdict, self.as_address())
557
558class ProxyException(Exception):
559    def __init__(self, tp_name, args):
560        self.tp_name = tp_name
561        self.args = args
562
563    def __repr__(self):
564        return '%s%r' % (self.tp_name, self.args)
565
566class PyBaseExceptionObjectPtr(PyObjectPtr):
567    """
568    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
569    within the process being debugged.
570    """
571    _typename = 'PyBaseExceptionObject'
572
573    def proxyval(self, visited):
574        # Guard against infinite loops:
575        if self.as_address() in visited:
576            return ProxyAlreadyVisited('(...)')
577        visited.add(self.as_address())
578        arg_proxy = self.pyop_field('args').proxyval(visited)
579        return ProxyException(self.safe_tp_name(),
580                              arg_proxy)
581
582    def write_repr(self, out, visited):
583        # Guard against infinite loops:
584        if self.as_address() in visited:
585            out.write('(...)')
586            return
587        visited.add(self.as_address())
588
589        out.write(self.safe_tp_name())
590        self.write_field_repr('args', out, visited)
591
592
593class PyClassObjectPtr(PyObjectPtr):
594    """
595    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
596    instance within the process being debugged.
597    """
598    _typename = 'PyClassObject'
599
600
601class BuiltInFunctionProxy(object):
602    def __init__(self, ml_name):
603        self.ml_name = ml_name
604
605    def __repr__(self):
606        return "<built-in function %s>" % self.ml_name
607
608class BuiltInMethodProxy(object):
609    def __init__(self, ml_name, pyop_m_self):
610        self.ml_name = ml_name
611        self.pyop_m_self = pyop_m_self
612
613    def __repr__(self):
614        return ('<built-in method %s of %s object at remote 0x%x>'
615                % (self.ml_name,
616                   self.pyop_m_self.safe_tp_name(),
617                   self.pyop_m_self.as_address())
618                )
619
620class PyCFunctionObjectPtr(PyObjectPtr):
621    """
622    Class wrapping a gdb.Value that's a PyCFunctionObject*
623    (see Include/methodobject.h and Objects/methodobject.c)
624    """
625    _typename = 'PyCFunctionObject'
626
627    def proxyval(self, visited):
628        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
629        ml_name = m_ml['ml_name'].string()
630
631        pyop_m_self = self.pyop_field('m_self')
632        if pyop_m_self.is_null():
633            return BuiltInFunctionProxy(ml_name)
634        else:
635            return BuiltInMethodProxy(ml_name, pyop_m_self)
636
637
638class PyCodeObjectPtr(PyObjectPtr):
639    """
640    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
641    within the process being debugged.
642    """
643    _typename = 'PyCodeObject'
644
645    def addr2line(self, addrq):
646        '''
647        Get the line number for a given bytecode offset
648
649        Analogous to PyCode_Addr2Line; translated from pseudocode in
650        Objects/lnotab_notes.txt
651        '''
652        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
653
654        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
655        # not 0, as lnotab_notes.txt has it:
656        lineno = int_from_int(self.field('co_firstlineno'))
657
658        addr = 0
659        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
660            addr += ord(addr_incr)
661            if addr > addrq:
662                return lineno
663            lineno += ord(line_incr)
664        return lineno
665
666
667class PyDictObjectPtr(PyObjectPtr):
668    """
669    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
670    within the process being debugged.
671    """
672    _typename = 'PyDictObject'
673
674    def iteritems(self):
675        '''
676        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
677        analagous to dict.iteritems()
678        '''
679        for i in safe_range(self.field('ma_mask') + 1):
680            ep = self.field('ma_table') + i
681            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
682            if not pyop_value.is_null():
683                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
684                yield (pyop_key, pyop_value)
685
686    def proxyval(self, visited):
687        # Guard against infinite loops:
688        if self.as_address() in visited:
689            return ProxyAlreadyVisited('{...}')
690        visited.add(self.as_address())
691
692        result = {}
693        for pyop_key, pyop_value in self.iteritems():
694            proxy_key = pyop_key.proxyval(visited)
695            proxy_value = pyop_value.proxyval(visited)
696            result[proxy_key] = proxy_value
697        return result
698
699    def write_repr(self, out, visited):
700        # Guard against infinite loops:
701        if self.as_address() in visited:
702            out.write('{...}')
703            return
704        visited.add(self.as_address())
705
706        out.write('{')
707        first = True
708        for pyop_key, pyop_value in self.iteritems():
709            if not first:
710                out.write(', ')
711            first = False
712            pyop_key.write_repr(out, visited)
713            out.write(': ')
714            pyop_value.write_repr(out, visited)
715        out.write('}')
716
717class PyInstanceObjectPtr(PyObjectPtr):
718    _typename = 'PyInstanceObject'
719
720    def proxyval(self, visited):
721        # Guard against infinite loops:
722        if self.as_address() in visited:
723            return ProxyAlreadyVisited('<...>')
724        visited.add(self.as_address())
725
726        # Get name of class:
727        in_class = self.pyop_field('in_class')
728        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
729
730        # Get dictionary of instance attributes:
731        in_dict = self.pyop_field('in_dict').proxyval(visited)
732
733        # Old-style class:
734        return InstanceProxy(cl_name, in_dict, long(self._gdbval))
735
736    def write_repr(self, out, visited):
737        # Guard against infinite loops:
738        if self.as_address() in visited:
739            out.write('<...>')
740            return
741        visited.add(self.as_address())
742
743        # Old-style class:
744
745        # Get name of class:
746        in_class = self.pyop_field('in_class')
747        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
748
749        # Get dictionary of instance attributes:
750        pyop_in_dict = self.pyop_field('in_dict')
751
752        _write_instance_repr(out, visited,
753                             cl_name, pyop_in_dict, self.as_address())
754
755class PyIntObjectPtr(PyObjectPtr):
756    _typename = 'PyIntObject'
757
758    def proxyval(self, visited):
759        result = int_from_int(self.field('ob_ival'))
760        return result
761
762class PyListObjectPtr(PyObjectPtr):
763    _typename = 'PyListObject'
764
765    def __getitem__(self, i):
766        # Get the gdb.Value for the (PyObject*) with the given index:
767        field_ob_item = self.field('ob_item')
768        return field_ob_item[i]
769
770    def proxyval(self, visited):
771        # Guard against infinite loops:
772        if self.as_address() in visited:
773            return ProxyAlreadyVisited('[...]')
774        visited.add(self.as_address())
775
776        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
777                  for i in safe_range(int_from_int(self.field('ob_size')))]
778        return result
779
780    def write_repr(self, out, visited):
781        # Guard against infinite loops:
782        if self.as_address() in visited:
783            out.write('[...]')
784            return
785        visited.add(self.as_address())
786
787        out.write('[')
788        for i in safe_range(int_from_int(self.field('ob_size'))):
789            if i > 0:
790                out.write(', ')
791            element = PyObjectPtr.from_pyobject_ptr(self[i])
792            element.write_repr(out, visited)
793        out.write(']')
794
795class PyLongObjectPtr(PyObjectPtr):
796    _typename = 'PyLongObject'
797
798    def proxyval(self, visited):
799        '''
800        Python's Include/longobjrep.h has this declaration:
801           struct _longobject {
802               PyObject_VAR_HEAD
803               digit ob_digit[1];
804           };
805
806        with this description:
807            The absolute value of a number is equal to
808                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
809            Negative numbers are represented with ob_size < 0;
810            zero is represented by ob_size == 0.
811
812        where SHIFT can be either:
813            #define PyLong_SHIFT        30
814            #define PyLong_SHIFT        15
815        '''
816        ob_size = long(self.field('ob_size'))
817        if ob_size == 0:
818            return 0L
819
820        ob_digit = self.field('ob_digit')
821
822        if gdb.lookup_type('digit').sizeof == 2:
823            SHIFT = 15L
824        else:
825            SHIFT = 30L
826
827        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
828                  for i in safe_range(abs(ob_size))]
829        result = sum(digits)
830        if ob_size < 0:
831            result = -result
832        return result
833
834    def write_repr(self, out, visited):
835        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
836        proxy = self.proxyval(visited)
837        out.write("%s" % proxy)
838
839
840class PyBoolObjectPtr(PyLongObjectPtr):
841    """
842    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
843    <bool> instances (Py_True/Py_False) within the process being debugged.
844    """
845    _typename = 'PyBoolObject'
846
847    def proxyval(self, visited):
848        castto = gdb.lookup_type('PyLongObject').pointer()
849        self._gdbval = self._gdbval.cast(castto)
850        return bool(PyLongObjectPtr(self._gdbval).proxyval(visited))
851
852
853class PyNoneStructPtr(PyObjectPtr):
854    """
855    Class wrapping a gdb.Value that's a PyObject* pointing to the
856    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
857    """
858    _typename = 'PyObject'
859
860    def proxyval(self, visited):
861        return None
862
863
864class PyFrameObjectPtr(PyObjectPtr):
865    _typename = 'PyFrameObject'
866
867    def __init__(self, gdbval, cast_to=None):
868        PyObjectPtr.__init__(self, gdbval, cast_to)
869
870        if not self.is_optimized_out():
871            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
872            self.co_name = self.co.pyop_field('co_name')
873            self.co_filename = self.co.pyop_field('co_filename')
874
875            self.f_lineno = int_from_int(self.field('f_lineno'))
876            self.f_lasti = int_from_int(self.field('f_lasti'))
877            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
878            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
879
880    def iter_locals(self):
881        '''
882        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
883        the local variables of this frame
884        '''
885        if self.is_optimized_out():
886            return
887
888        f_localsplus = self.field('f_localsplus')
889        for i in safe_range(self.co_nlocals):
890            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
891            if not pyop_value.is_null():
892                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
893                yield (pyop_name, pyop_value)
894
895    def iter_globals(self):
896        '''
897        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
898        the global variables of this frame
899        '''
900        if self.is_optimized_out():
901            return
902
903        pyop_globals = self.pyop_field('f_globals')
904        return pyop_globals.iteritems()
905
906    def iter_builtins(self):
907        '''
908        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
909        the builtin variables
910        '''
911        if self.is_optimized_out():
912            return
913
914        pyop_builtins = self.pyop_field('f_builtins')
915        return pyop_builtins.iteritems()
916
917    def get_var_by_name(self, name):
918        '''
919        Look for the named local variable, returning a (PyObjectPtr, scope) pair
920        where scope is a string 'local', 'global', 'builtin'
921
922        If not found, return (None, None)
923        '''
924        for pyop_name, pyop_value in self.iter_locals():
925            if name == pyop_name.proxyval(set()):
926                return pyop_value, 'local'
927        for pyop_name, pyop_value in self.iter_globals():
928            if name == pyop_name.proxyval(set()):
929                return pyop_value, 'global'
930        for pyop_name, pyop_value in self.iter_builtins():
931            if name == pyop_name.proxyval(set()):
932                return pyop_value, 'builtin'
933        return None, None
934
935    def filename(self):
936        '''Get the path of the current Python source file, as a string'''
937        if self.is_optimized_out():
938            return '(frame information optimized out)'
939        return self.co_filename.proxyval(set())
940
941    def current_line_num(self):
942        '''Get current line number as an integer (1-based)
943
944        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
945
946        See Objects/lnotab_notes.txt
947        '''
948        if self.is_optimized_out():
949            return None
950        f_trace = self.field('f_trace')
951        if long(f_trace) != 0:
952            # we have a non-NULL f_trace:
953            return self.f_lineno
954        else:
955            #try:
956            return self.co.addr2line(self.f_lasti)
957            #except ValueError:
958            #    return self.f_lineno
959
960    def current_line(self):
961        '''Get the text of the current source line as a string, with a trailing
962        newline character'''
963        if self.is_optimized_out():
964            return '(frame information optimized out)'
965        filename = self.filename()
966        with open(os_fsencode(filename), 'r') as f:
967            all_lines = f.readlines()
968            # Convert from 1-based current_line_num to 0-based list offset:
969            return all_lines[self.current_line_num()-1]
970
971    def write_repr(self, out, visited):
972        if self.is_optimized_out():
973            out.write('(frame information optimized out)')
974            return
975        out.write('Frame 0x%x, for file %s, line %i, in %s ('
976                  % (self.as_address(),
977                     self.co_filename.proxyval(visited),
978                     self.current_line_num(),
979                     self.co_name.proxyval(visited)))
980        first = True
981        for pyop_name, pyop_value in self.iter_locals():
982            if not first:
983                out.write(', ')
984            first = False
985
986            out.write(pyop_name.proxyval(visited))
987            out.write('=')
988            pyop_value.write_repr(out, visited)
989
990        out.write(')')
991
992class PySetObjectPtr(PyObjectPtr):
993    _typename = 'PySetObject'
994
995    def proxyval(self, visited):
996        # Guard against infinite loops:
997        if self.as_address() in visited:
998            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
999        visited.add(self.as_address())
1000
1001        members = []
1002        table = self.field('table')
1003        for i in safe_range(self.field('mask')+1):
1004            setentry = table[i]
1005            key = setentry['key']
1006            if key != 0:
1007                key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
1008                if key_proxy != '<dummy key>':
1009                    members.append(key_proxy)
1010        if self.safe_tp_name() == 'frozenset':
1011            return frozenset(members)
1012        else:
1013            return set(members)
1014
1015    def write_repr(self, out, visited):
1016        # Emulate Python 3's set_repr
1017        tp_name = self.safe_tp_name()
1018
1019        # Guard against infinite loops:
1020        if self.as_address() in visited:
1021            out.write('(...)')
1022            return
1023        visited.add(self.as_address())
1024
1025        # Python 3's set_repr special-cases the empty set:
1026        if not self.field('used'):
1027            out.write(tp_name)
1028            out.write('()')
1029            return
1030
1031        # Python 3 uses {} for set literals:
1032        if tp_name != 'set':
1033            out.write(tp_name)
1034            out.write('(')
1035
1036        out.write('{')
1037        first = True
1038        table = self.field('table')
1039        for i in safe_range(self.field('mask')+1):
1040            setentry = table[i]
1041            key = setentry['key']
1042            if key != 0:
1043                pyop_key = PyObjectPtr.from_pyobject_ptr(key)
1044                key_proxy = pyop_key.proxyval(visited) # FIXME!
1045                if key_proxy != '<dummy key>':
1046                    if not first:
1047                        out.write(', ')
1048                    first = False
1049                    pyop_key.write_repr(out, visited)
1050        out.write('}')
1051
1052        if tp_name != 'set':
1053            out.write(')')
1054
1055
1056class PyBytesObjectPtr(PyObjectPtr):
1057    _typename = 'PyBytesObject'
1058
1059    def __str__(self):
1060        field_ob_size = self.field('ob_size')
1061        field_ob_sval = self.field('ob_sval')
1062        return ''.join(struct.pack('b', field_ob_sval[i])
1063                           for i in safe_range(field_ob_size))
1064
1065    def proxyval(self, visited):
1066        return str(self)
1067
1068    def write_repr(self, out, visited, py3=True):
1069        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1070
1071        # Get a PyStringObject* within the Python 2 gdb process:
1072        proxy = self.proxyval(visited)
1073
1074        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1075        # to Python 2 code:
1076        quote = "'"
1077        if "'" in proxy and not '"' in proxy:
1078            quote = '"'
1079
1080        if py3:
1081            out.write('b')
1082
1083        out.write(quote)
1084        for byte in proxy:
1085            if byte == quote or byte == '\\':
1086                out.write('\\')
1087                out.write(byte)
1088            elif byte == '\t':
1089                out.write('\\t')
1090            elif byte == '\n':
1091                out.write('\\n')
1092            elif byte == '\r':
1093                out.write('\\r')
1094            elif byte < ' ' or ord(byte) >= 0x7f:
1095                out.write('\\x')
1096                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1097                out.write(hexdigits[ord(byte) & 0xf])
1098            else:
1099                out.write(byte)
1100        out.write(quote)
1101
1102class PyStringObjectPtr(PyBytesObjectPtr):
1103    _typename = 'PyStringObject'
1104
1105    def write_repr(self, out, visited):
1106        return super(PyStringObjectPtr, self).write_repr(out, visited, py3=False)
1107
1108class PyTupleObjectPtr(PyObjectPtr):
1109    _typename = 'PyTupleObject'
1110
1111    def __getitem__(self, i):
1112        # Get the gdb.Value for the (PyObject*) with the given index:
1113        field_ob_item = self.field('ob_item')
1114        return field_ob_item[i]
1115
1116    def proxyval(self, visited):
1117        # Guard against infinite loops:
1118        if self.as_address() in visited:
1119            return ProxyAlreadyVisited('(...)')
1120        visited.add(self.as_address())
1121
1122        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1123                        for i in safe_range(int_from_int(self.field('ob_size')))])
1124        return result
1125
1126    def write_repr(self, out, visited):
1127        # Guard against infinite loops:
1128        if self.as_address() in visited:
1129            out.write('(...)')
1130            return
1131        visited.add(self.as_address())
1132
1133        out.write('(')
1134        for i in safe_range(int_from_int(self.field('ob_size'))):
1135            if i > 0:
1136                out.write(', ')
1137            element = PyObjectPtr.from_pyobject_ptr(self[i])
1138            element.write_repr(out, visited)
1139        if self.field('ob_size') == 1:
1140            out.write(',)')
1141        else:
1142            out.write(')')
1143
1144
1145def _unichr_is_printable(char):
1146    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1147    if char == u" ":
1148        return True
1149    import unicodedata
1150    return unicodedata.category(char) not in ("C", "Z")
1151
1152if sys.maxunicode >= 0x10000:
1153    _unichr = unichr
1154else:
1155    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1156    def _unichr(x):
1157        if x < 0x10000:
1158            return unichr(x)
1159        x -= 0x10000
1160        ch1 = 0xD800 | (x >> 10)
1161        ch2 = 0xDC00 | (x & 0x3FF)
1162        return unichr(ch1) + unichr(ch2)
1163
1164class PyUnicodeObjectPtr(PyObjectPtr):
1165    _typename = 'PyUnicodeObject'
1166
1167    def char_width(self):
1168        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1169        return _type_Py_UNICODE.sizeof
1170
1171    def proxyval(self, visited):
1172        # From unicodeobject.h:
1173        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
1174        #     Py_UNICODE *str;    /* Raw Unicode buffer */
1175        field_length = long(self.field('length'))
1176        field_str = self.field('str')
1177
1178        # Gather a list of ints from the Py_UNICODE array; these are either
1179        # UCS-2 or UCS-4 code points:
1180        if self.char_width() > 2:
1181            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1182        else:
1183            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1184            # inferior process: we must join surrogate pairs.
1185            Py_UNICODEs = []
1186            i = 0
1187            limit = safety_limit(field_length)
1188            while i < limit:
1189                ucs = int(field_str[i])
1190                i += 1
1191                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1192                    Py_UNICODEs.append(ucs)
1193                    continue
1194                # This could be a surrogate pair.
1195                ucs2 = int(field_str[i])
1196                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1197                    continue
1198                code = (ucs & 0x03FF) << 10
1199                code |= ucs2 & 0x03FF
1200                code += 0x00010000
1201                Py_UNICODEs.append(code)
1202                i += 1
1203
1204        # Convert the int code points to unicode characters, and generate a
1205        # local unicode instance.
1206        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1207        result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
1208        return result
1209
1210    def write_repr(self, out, visited):
1211        # Get a PyUnicodeObject* within the Python 2 gdb process:
1212        proxy = self.proxyval(visited)
1213
1214        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1215        # to Python 2:
1216        try:
1217            gdb.parse_and_eval('PyString_Type')
1218        except RuntimeError:
1219            # Python 3, don't write 'u' as prefix
1220            pass
1221        else:
1222            # Python 2, write the 'u'
1223            out.write('u')
1224
1225        if "'" in proxy and '"' not in proxy:
1226            quote = '"'
1227        else:
1228            quote = "'"
1229        out.write(quote)
1230
1231        i = 0
1232        while i < len(proxy):
1233            ch = proxy[i]
1234            i += 1
1235
1236            # Escape quotes and backslashes
1237            if ch == quote or ch == '\\':
1238                out.write('\\')
1239                out.write(ch)
1240
1241            #  Map special whitespace to '\t', \n', '\r'
1242            elif ch == '\t':
1243                out.write('\\t')
1244            elif ch == '\n':
1245                out.write('\\n')
1246            elif ch == '\r':
1247                out.write('\\r')
1248
1249            # Map non-printable US ASCII to '\xhh' */
1250            elif ch < ' ' or ch == 0x7F:
1251                out.write('\\x')
1252                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1253                out.write(hexdigits[ord(ch) & 0x000F])
1254
1255            # Copy ASCII characters as-is
1256            elif ord(ch) < 0x7F:
1257                out.write(ch)
1258
1259            # Non-ASCII characters
1260            else:
1261                ucs = ch
1262                ch2 = None
1263                if sys.maxunicode < 0x10000:
1264                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1265                    # surrogate pairs before calling _unichr_is_printable.
1266                    if (i < len(proxy)
1267                    and 0xD800 <= ord(ch) < 0xDC00 \
1268                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
1269                        ch2 = proxy[i]
1270                        ucs = ch + ch2
1271                        i += 1
1272
1273                # Unfortuately, Python 2's unicode type doesn't seem
1274                # to expose the "isprintable" method
1275                printable = _unichr_is_printable(ucs)
1276                if printable:
1277                    try:
1278                        ucs.encode(ENCODING)
1279                    except UnicodeEncodeError:
1280                        printable = False
1281
1282                # Map Unicode whitespace and control characters
1283                # (categories Z* and C* except ASCII space)
1284                if not printable:
1285                    if ch2 is not None:
1286                        # Match Python 3's representation of non-printable
1287                        # wide characters.
1288                        code = (ord(ch) & 0x03FF) << 10
1289                        code |= ord(ch2) & 0x03FF
1290                        code += 0x00010000
1291                    else:
1292                        code = ord(ucs)
1293
1294                    # Map 8-bit characters to '\\xhh'
1295                    if code <= 0xff:
1296                        out.write('\\x')
1297                        out.write(hexdigits[(code >> 4) & 0x000F])
1298                        out.write(hexdigits[code & 0x000F])
1299                    # Map 21-bit characters to '\U00xxxxxx'
1300                    elif code >= 0x10000:
1301                        out.write('\\U')
1302                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1303                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1304                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1305                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1306                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1307                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1308                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1309                        out.write(hexdigits[code & 0x0000000F])
1310                    # Map 16-bit characters to '\uxxxx'
1311                    else:
1312                        out.write('\\u')
1313                        out.write(hexdigits[(code >> 12) & 0x000F])
1314                        out.write(hexdigits[(code >> 8) & 0x000F])
1315                        out.write(hexdigits[(code >> 4) & 0x000F])
1316                        out.write(hexdigits[code & 0x000F])
1317                else:
1318                    # Copy characters as-is
1319                    out.write(ch)
1320                    if ch2 is not None:
1321                        out.write(ch2)
1322
1323        out.write(quote)
1324
1325    def __unicode__(self):
1326        return self.proxyval(set())
1327
1328    def __str__(self):
1329        # In Python 3, everything is unicode (including attributes of e.g.
1330        # code objects, such as function names). The Python 2 debugger code
1331        # uses PyUnicodePtr objects to format strings etc, whereas with a
1332        # Python 2 debuggee we'd get PyStringObjectPtr instances with __str__.
1333        # Be compatible with that.
1334        return unicode(self).encode('UTF-8')
1335
1336def int_from_int(gdbval):
1337    return int(str(gdbval))
1338
1339
1340def stringify(val):
1341    # TODO: repr() puts everything on one line; pformat can be nicer, but
1342    # can lead to v.long results; this function isolates the choice
1343    if True:
1344        return repr(val)
1345    else:
1346        from pprint import pformat
1347        return pformat(val)
1348
1349
1350class PyObjectPtrPrinter:
1351    "Prints a (PyObject*)"
1352
1353    def __init__ (self, gdbval):
1354        self.gdbval = gdbval
1355
1356    def to_string (self):
1357        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1358        if True:
1359            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1360        else:
1361            # Generate full proxy value then stringify it.
1362            # Doing so could be expensive
1363            proxyval = pyop.proxyval(set())
1364            return stringify(proxyval)
1365
1366def pretty_printer_lookup(gdbval):
1367    type = gdbval.type.unqualified()
1368    if type.code == gdb.TYPE_CODE_PTR:
1369        type = type.target().unqualified()
1370        if str(type) in all_pretty_typenames:
1371            return PyObjectPtrPrinter(gdbval)
1372
1373"""
1374During development, I've been manually invoking the code in this way:
1375(gdb) python
1376
1377import sys
1378sys.path.append('/home/david/coding/python-gdb')
1379import libpython
1380end
1381
1382then reloading it after each edit like this:
1383(gdb) python reload(libpython)
1384
1385The following code should ensure that the prettyprinter is registered
1386if the code is autoloaded by gdb when visiting libpython.so, provided
1387that this python file is installed to the same path as the library (or its
1388.debug file) plus a "-gdb.py" suffix, e.g:
1389  /usr/lib/libpython2.6.so.1.0-gdb.py
1390  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1391"""
1392def register (obj):
1393    if obj == None:
1394        obj = gdb
1395
1396    # Wire up the pretty-printer
1397    obj.pretty_printers.append(pretty_printer_lookup)
1398
1399register (gdb.current_objfile ())
1400
1401# Unfortunately, the exact API exposed by the gdb module varies somewhat
1402# from build to build
1403# See http://bugs.python.org/issue8279?#msg102276
1404
1405class Frame(object):
1406    '''
1407    Wrapper for gdb.Frame, adding various methods
1408    '''
1409    def __init__(self, gdbframe):
1410        self._gdbframe = gdbframe
1411
1412    def older(self):
1413        older = self._gdbframe.older()
1414        if older:
1415            return Frame(older)
1416        else:
1417            return None
1418
1419    def newer(self):
1420        newer = self._gdbframe.newer()
1421        if newer:
1422            return Frame(newer)
1423        else:
1424            return None
1425
1426    def select(self):
1427        '''If supported, select this frame and return True; return False if unsupported
1428
1429        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1430        onwards, but absent on Ubuntu buildbot'''
1431        if not hasattr(self._gdbframe, 'select'):
1432            print ('Unable to select frame: '
1433                   'this build of gdb does not expose a gdb.Frame.select method')
1434            return False
1435        self._gdbframe.select()
1436        return True
1437
1438    def get_index(self):
1439        '''Calculate index of frame, starting at 0 for the newest frame within
1440        this thread'''
1441        index = 0
1442        # Go down until you reach the newest frame:
1443        iter_frame = self
1444        while iter_frame.newer():
1445            index += 1
1446            iter_frame = iter_frame.newer()
1447        return index
1448
1449    def is_evalframeex(self):
1450        '''Is this a PyEval_EvalFrameEx frame?'''
1451        if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1452            '''
1453            I believe we also need to filter on the inline
1454            struct frame_id.inline_depth, only regarding frames with
1455            an inline depth of 0 as actually being this function
1456
1457            So we reject those with type gdb.INLINE_FRAME
1458            '''
1459            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1460                # We have a PyEval_EvalFrameEx frame:
1461                return True
1462
1463        return False
1464
1465    def read_var(self, varname):
1466        """
1467        read_var with respect to code blocks (gdbframe.read_var works with
1468        respect to the most recent block)
1469
1470        Apparently this function doesn't work, though, as it seems to read
1471        variables in other frames also sometimes.
1472        """
1473        block = self._gdbframe.block()
1474        var = None
1475
1476        while block and var is None:
1477            try:
1478                var = self._gdbframe.read_var(varname, block)
1479            except ValueError:
1480                pass
1481
1482            block = block.superblock
1483
1484        return var
1485
1486    def get_pyop(self):
1487        try:
1488            # self.read_var does not always work properly, so select our frame
1489            # and restore the previously selected frame
1490            selected_frame = gdb.selected_frame()
1491            self._gdbframe.select()
1492            f = gdb.parse_and_eval('f')
1493            selected_frame.select()
1494        except RuntimeError:
1495            return None
1496        else:
1497            return PyFrameObjectPtr.from_pyobject_ptr(f)
1498
1499    @classmethod
1500    def get_selected_frame(cls):
1501        _gdbframe = gdb.selected_frame()
1502        if _gdbframe:
1503            return Frame(_gdbframe)
1504        return None
1505
1506    @classmethod
1507    def get_selected_python_frame(cls):
1508        '''Try to obtain the Frame for the python code in the selected frame,
1509        or None'''
1510        frame = cls.get_selected_frame()
1511
1512        while frame:
1513            if frame.is_evalframeex():
1514                return frame
1515            frame = frame.older()
1516
1517        # Not found:
1518        return None
1519
1520    def print_summary(self):
1521        if self.is_evalframeex():
1522            pyop = self.get_pyop()
1523            if pyop:
1524                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1525                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
1526                sys.stdout.write(pyop.current_line())
1527            else:
1528                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1529        else:
1530            sys.stdout.write('#%i\n' % self.get_index())
1531
1532class PyList(gdb.Command):
1533    '''List the current Python source code, if any
1534
1535    Use
1536       py-list START
1537    to list at a different line number within the python source.
1538
1539    Use
1540       py-list START, END
1541    to list a specific range of lines within the python source.
1542    '''
1543
1544    def __init__(self):
1545        gdb.Command.__init__ (self,
1546                              "py-list",
1547                              gdb.COMMAND_FILES,
1548                              gdb.COMPLETE_NONE)
1549
1550
1551    def invoke(self, args, from_tty):
1552        import re
1553
1554        start = None
1555        end = None
1556
1557        m = re.match(r'\s*(\d+)\s*', args)
1558        if m:
1559            start = int(m.group(0))
1560            end = start + 10
1561
1562        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1563        if m:
1564            start, end = map(int, m.groups())
1565
1566        frame = Frame.get_selected_python_frame()
1567        if not frame:
1568            print 'Unable to locate python frame'
1569            return
1570
1571        pyop = frame.get_pyop()
1572        if not pyop:
1573            print 'Unable to read information on python frame'
1574            return
1575
1576        filename = pyop.filename()
1577        lineno = pyop.current_line_num()
1578
1579        if start is None:
1580            start = lineno - 5
1581            end = lineno + 5
1582
1583        if start<1:
1584            start = 1
1585
1586        with open(os_fsencode(filename), 'r') as f:
1587            all_lines = f.readlines()
1588            # start and end are 1-based, all_lines is 0-based;
1589            # so [start-1:end] as a python slice gives us [start, end] as a
1590            # closed interval
1591            for i, line in enumerate(all_lines[start-1:end]):
1592                linestr = str(i+start)
1593                # Highlight current line:
1594                if i + start == lineno:
1595                    linestr = '>' + linestr
1596                sys.stdout.write('%4s    %s' % (linestr, line))
1597
1598
1599# ...and register the command:
1600PyList()
1601
1602def move_in_stack(move_up):
1603    '''Move up or down the stack (for the py-up/py-down command)'''
1604    frame = Frame.get_selected_python_frame()
1605    while frame:
1606        if move_up:
1607            iter_frame = frame.older()
1608        else:
1609            iter_frame = frame.newer()
1610
1611        if not iter_frame:
1612            break
1613
1614        if iter_frame.is_evalframeex():
1615            # Result:
1616            if iter_frame.select():
1617                iter_frame.print_summary()
1618            return
1619
1620        frame = iter_frame
1621
1622    if move_up:
1623        print 'Unable to find an older python frame'
1624    else:
1625        print 'Unable to find a newer python frame'
1626
1627class PyUp(gdb.Command):
1628    'Select and print the python stack frame that called this one (if any)'
1629    def __init__(self):
1630        gdb.Command.__init__ (self,
1631                              "py-up",
1632                              gdb.COMMAND_STACK,
1633                              gdb.COMPLETE_NONE)
1634
1635
1636    def invoke(self, args, from_tty):
1637        move_in_stack(move_up=True)
1638
1639class PyDown(gdb.Command):
1640    'Select and print the python stack frame called by this one (if any)'
1641    def __init__(self):
1642        gdb.Command.__init__ (self,
1643                              "py-down",
1644                              gdb.COMMAND_STACK,
1645                              gdb.COMPLETE_NONE)
1646
1647
1648    def invoke(self, args, from_tty):
1649        move_in_stack(move_up=False)
1650
1651# Not all builds of gdb have gdb.Frame.select
1652if hasattr(gdb.Frame, 'select'):
1653    PyUp()
1654    PyDown()
1655
1656class PyBacktrace(gdb.Command):
1657    'Display the current python frame and all the frames within its call stack (if any)'
1658    def __init__(self):
1659        gdb.Command.__init__ (self,
1660                              "py-bt",
1661                              gdb.COMMAND_STACK,
1662                              gdb.COMPLETE_NONE)
1663
1664
1665    def invoke(self, args, from_tty):
1666        frame = Frame.get_selected_python_frame()
1667        while frame:
1668            if frame.is_evalframeex():
1669                frame.print_summary()
1670            frame = frame.older()
1671
1672PyBacktrace()
1673
1674class PyPrint(gdb.Command):
1675    'Look up the given python variable name, and print it'
1676    def __init__(self):
1677        gdb.Command.__init__ (self,
1678                              "py-print",
1679                              gdb.COMMAND_DATA,
1680                              gdb.COMPLETE_NONE)
1681
1682
1683    def invoke(self, args, from_tty):
1684        name = str(args)
1685
1686        frame = Frame.get_selected_python_frame()
1687        if not frame:
1688            print 'Unable to locate python frame'
1689            return
1690
1691        pyop_frame = frame.get_pyop()
1692        if not pyop_frame:
1693            print 'Unable to read information on python frame'
1694            return
1695
1696        pyop_var, scope = pyop_frame.get_var_by_name(name)
1697
1698        if pyop_var:
1699            print ('%s %r = %s'
1700                   % (scope,
1701                      name,
1702                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1703        else:
1704            print '%r not found' % name
1705
1706PyPrint()
1707
1708class PyLocals(gdb.Command):
1709    'Look up the given python variable name, and print it'
1710
1711    def invoke(self, args, from_tty):
1712        name = str(args)
1713
1714        frame = Frame.get_selected_python_frame()
1715        if not frame:
1716            print 'Unable to locate python frame'
1717            return
1718
1719        pyop_frame = frame.get_pyop()
1720        if not pyop_frame:
1721            print 'Unable to read information on python frame'
1722            return
1723
1724        namespace = self.get_namespace(pyop_frame)
1725        namespace = [(name.proxyval(set()), val) for name, val in namespace]
1726
1727        if namespace:
1728            name, val = max(namespace, key=lambda (name, val): len(name))
1729            max_name_length = len(name)
1730
1731            for name, pyop_value in namespace:
1732                value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)
1733                print ('%-*s = %s' % (max_name_length, name, value))
1734
1735    def get_namespace(self, pyop_frame):
1736        return pyop_frame.iter_locals()
1737
1738
1739class PyGlobals(PyLocals):
1740    'List all the globals in the currently select Python frame'
1741
1742    def get_namespace(self, pyop_frame):
1743        return pyop_frame.iter_globals()
1744
1745
1746PyLocals("py-locals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
1747PyGlobals("py-globals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
1748
1749
1750class PyNameEquals(gdb.Function):
1751
1752    def _get_pycurframe_attr(self, attr):
1753        frame = Frame(gdb.selected_frame())
1754        if frame.is_evalframeex():
1755            pyframe = frame.get_pyop()
1756            if pyframe is None:
1757                warnings.warn("Use a Python debug build, Python breakpoints "
1758                              "won't work otherwise.")
1759                return None
1760
1761            return getattr(pyframe, attr).proxyval(set())
1762
1763        return None
1764
1765    def invoke(self, funcname):
1766        attr = self._get_pycurframe_attr('co_name')
1767        return attr is not None and attr == funcname.string()
1768
1769PyNameEquals("pyname_equals")
1770
1771
1772class PyModEquals(PyNameEquals):
1773
1774    def invoke(self, modname):
1775        attr = self._get_pycurframe_attr('co_filename')
1776        if attr is not None:
1777            filename, ext = os.path.splitext(os.path.basename(attr))
1778            return filename == modname.string()
1779        return False
1780
1781PyModEquals("pymod_equals")
1782
1783
1784class PyBreak(gdb.Command):
1785    """
1786    Set a Python breakpoint. Examples:
1787
1788    Break on any function or method named 'func' in module 'modname'
1789
1790        py-break modname.func
1791
1792    Break on any function or method named 'func'
1793
1794        py-break func
1795    """
1796
1797    def invoke(self, funcname, from_tty):
1798        if '.' in funcname:
1799            modname, dot, funcname = funcname.rpartition('.')
1800            cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname,
1801                                                                    modname)
1802        else:
1803            cond = '$pyname_equals("%s")' % funcname
1804
1805        gdb.execute('break PyEval_EvalFrameEx if ' + cond)
1806
1807PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
1808
1809
1810class _LoggingState(object):
1811    """
1812    State that helps to provide a reentrant gdb.execute() function.
1813    """
1814
1815    def __init__(self):
1816        self.fd, self.filename = tempfile.mkstemp()
1817        self.file = os.fdopen(self.fd, 'r+')
1818        _execute("set logging file %s" % self.filename)
1819        self.file_position_stack = []
1820
1821        atexit.register(os.close, self.fd)
1822        atexit.register(os.remove, self.filename)
1823
1824    def __enter__(self):
1825        if not self.file_position_stack:
1826            _execute("set logging redirect on")
1827            _execute("set logging on")
1828            _execute("set pagination off")
1829
1830        self.file_position_stack.append(os.fstat(self.fd).st_size)
1831        return self
1832
1833    def getoutput(self):
1834        gdb.flush()
1835        self.file.seek(self.file_position_stack[-1])
1836        result = self.file.read()
1837        return result
1838
1839    def __exit__(self, exc_type, exc_val, tb):
1840        startpos = self.file_position_stack.pop()
1841        self.file.seek(startpos)
1842        self.file.truncate()
1843        if not self.file_position_stack:
1844            _execute("set logging off")
1845            _execute("set logging redirect off")
1846            _execute("set pagination on")
1847
1848
1849def execute(command, from_tty=False, to_string=False):
1850    """
1851    Replace gdb.execute() with this function and have it accept a 'to_string'
1852    argument (new in 7.2). Have it properly capture stderr also. Ensure
1853    reentrancy.
1854    """
1855    if to_string:
1856        with _logging_state as state:
1857            _execute(command, from_tty)
1858            return state.getoutput()
1859    else:
1860        _execute(command, from_tty)
1861
1862
1863_execute = gdb.execute
1864gdb.execute = execute
1865_logging_state = _LoggingState()
1866
1867
1868def get_selected_inferior():
1869    """
1870    Return the selected inferior in gdb.
1871    """
1872    # Woooh, another bug in gdb! Is there an end in sight?
1873    # http://sourceware.org/bugzilla/show_bug.cgi?id=12212
1874    return gdb.inferiors()[0]
1875
1876    selected_thread = gdb.selected_thread()
1877
1878    for inferior in gdb.inferiors():
1879        for thread in inferior.threads():
1880            if thread == selected_thread:
1881                return inferior
1882
1883def source_gdb_script(script_contents, to_string=False):
1884    """
1885    Source a gdb script with script_contents passed as a string. This is useful
1886    to provide defines for py-step and py-next to make them repeatable (this is
1887    not possible with gdb.execute()). See
1888    http://sourceware.org/bugzilla/show_bug.cgi?id=12216
1889    """
1890    fd, filename = tempfile.mkstemp()
1891    f = os.fdopen(fd, 'w')
1892    f.write(script_contents)
1893    f.close()
1894    gdb.execute("source %s" % filename, to_string=to_string)
1895    os.remove(filename)
1896
1897def register_defines():
1898    source_gdb_script(textwrap.dedent("""\
1899        define py-step
1900        -py-step
1901        end
1902
1903        define py-next
1904        -py-next
1905        end
1906
1907        document py-step
1908        %s
1909        end
1910
1911        document py-next
1912        %s
1913        end
1914    """) % (PyStep.__doc__, PyNext.__doc__))
1915
1916
1917def stackdepth(frame):
1918    "Tells the stackdepth of a gdb frame."
1919    depth = 0
1920    while frame:
1921        frame = frame.older()
1922        depth += 1
1923
1924    return depth
1925
1926class ExecutionControlCommandBase(gdb.Command):
1927    """
1928    Superclass for language specific execution control. Language specific
1929    features should be implemented by lang_info using the LanguageInfo
1930    interface. 'name' is the name of the command.
1931    """
1932
1933    def __init__(self, name, lang_info):
1934        super(ExecutionControlCommandBase, self).__init__(
1935                                name, gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
1936        self.lang_info = lang_info
1937
1938    def install_breakpoints(self):
1939        all_locations = itertools.chain(
1940            self.lang_info.static_break_functions(),
1941            self.lang_info.runtime_break_functions())
1942
1943        for location in all_locations:
1944            result = gdb.execute('break %s' % location, to_string=True)
1945            yield re.search(r'Breakpoint (\d+)', result).group(1)
1946
1947    def delete_breakpoints(self, breakpoint_list):
1948        for bp in breakpoint_list:
1949            gdb.execute("delete %s" % bp)
1950
1951    def filter_output(self, result):
1952        reflags = re.MULTILINE
1953
1954        output_on_halt = [
1955            (r'^Program received signal .*', reflags|re.DOTALL),
1956            (r'.*[Ww]arning.*', 0),
1957            (r'^Program exited .*', reflags),
1958        ]
1959
1960        output_always = [
1961            # output when halting on a watchpoint
1962            (r'^(Old|New) value = .*', reflags),
1963            # output from the 'display' command
1964            (r'^\d+: \w+ = .*', reflags),
1965        ]
1966
1967        def filter_output(regexes):
1968            output = []
1969            for regex, flags in regexes:
1970                for match in re.finditer(regex, result, flags):
1971                    output.append(match.group(0))
1972
1973            return '\n'.join(output)
1974
1975        # Filter the return value output of the 'finish' command
1976        match_finish = re.search(r'^Value returned is \$\d+ = (.*)', result,
1977                                 re.MULTILINE)
1978        if match_finish:
1979            finish_output = 'Value returned: %s\n' % match_finish.group(1)
1980        else:
1981            finish_output = ''
1982
1983        return (filter_output(output_on_halt),
1984                finish_output + filter_output(output_always))
1985
1986
1987    def stopped(self):
1988        return get_selected_inferior().pid == 0
1989
1990    def finish_executing(self, result):
1991        """
1992        After doing some kind of code running in the inferior, print the line
1993        of source code or the result of the last executed gdb command (passed
1994        in as the `result` argument).
1995        """
1996        output_on_halt, output_always = self.filter_output(result)
1997
1998        if self.stopped():
1999            print output_always
2000            print output_on_halt
2001        else:
2002            frame = gdb.selected_frame()
2003            source_line = self.lang_info.get_source_line(frame)
2004            if self.lang_info.is_relevant_function(frame):
2005                raised_exception = self.lang_info.exc_info(frame)
2006                if raised_exception:
2007                    print raised_exception
2008
2009            if source_line:
2010                if output_always.rstrip():
2011                    print output_always.rstrip()
2012                print source_line
2013            else:
2014                print result
2015
2016    def _finish(self):
2017        """
2018        Execute until the function returns (or until something else makes it
2019        stop)
2020        """
2021        if gdb.selected_frame().older() is not None:
2022            return gdb.execute('finish', to_string=True)
2023        else:
2024            # outermost frame, continue
2025            return gdb.execute('cont', to_string=True)
2026
2027    def _finish_frame(self):
2028        """
2029        Execute until the function returns to a relevant caller.
2030        """
2031        while True:
2032            result = self._finish()
2033
2034            try:
2035                frame = gdb.selected_frame()
2036            except RuntimeError:
2037                break
2038
2039            hitbp = re.search(r'Breakpoint (\d+)', result)
2040            is_relevant = self.lang_info.is_relevant_function(frame)
2041            if hitbp or is_relevant or self.stopped():
2042                break
2043
2044        return result
2045
2046    def finish(self, *args):
2047        "Implements the finish command."
2048        result = self._finish_frame()
2049        self.finish_executing(result)
2050
2051    def step(self, stepinto, stepover_command='next'):
2052        """
2053        Do a single step or step-over. Returns the result of the last gdb
2054        command that made execution stop.
2055
2056        This implementation, for stepping, sets (conditional) breakpoints for
2057        all functions that are deemed relevant. It then does a step over until
2058        either something halts execution, or until the next line is reached.
2059
2060        If, however, stepover_command is given, it should be a string gdb
2061        command that continues execution in some way. The idea is that the
2062        caller has set a (conditional) breakpoint or watchpoint that can work
2063        more efficiently than the step-over loop. For Python this means setting
2064        a watchpoint for f->f_lasti, which means we can then subsequently
2065        "finish" frames.
2066        We want f->f_lasti instead of f->f_lineno, because the latter only
2067        works properly with local trace functions, see
2068        PyFrameObjectPtr.current_line_num and PyFrameObjectPtr.addr2line.
2069        """
2070        if stepinto:
2071            breakpoint_list = list(self.install_breakpoints())
2072
2073        beginframe = gdb.selected_frame()
2074
2075        if self.lang_info.is_relevant_function(beginframe):
2076            # If we start in a relevant frame, initialize stuff properly. If
2077            # we don't start in a relevant frame, the loop will halt
2078            # immediately. So don't call self.lang_info.lineno() as it may
2079            # raise for irrelevant frames.
2080            beginline = self.lang_info.lineno(beginframe)
2081
2082            if not stepinto:
2083                depth = stackdepth(beginframe)
2084
2085        newframe = beginframe
2086
2087        while True:
2088            if self.lang_info.is_relevant_function(newframe):
2089                result = gdb.execute(stepover_command, to_string=True)
2090            else:
2091                result = self._finish_frame()
2092
2093            if self.stopped():
2094                break
2095
2096            newframe = gdb.selected_frame()
2097            is_relevant_function = self.lang_info.is_relevant_function(newframe)
2098            try:
2099                framename = newframe.name()
2100            except RuntimeError:
2101                framename = None
2102
2103            m = re.search(r'Breakpoint (\d+)', result)
2104            if m:
2105                if is_relevant_function and m.group(1) in breakpoint_list:
2106                    # although we hit a breakpoint, we still need to check
2107                    # that the function, in case hit by a runtime breakpoint,
2108                    # is in the right context
2109                    break
2110
2111            if newframe != beginframe:
2112                # new function
2113
2114                if not stepinto:
2115                    # see if we returned to the caller
2116                    newdepth = stackdepth(newframe)
2117                    is_relevant_function = (newdepth < depth and
2118                                            is_relevant_function)
2119
2120                if is_relevant_function:
2121                    break
2122            else:
2123                # newframe equals beginframe, check for a difference in the
2124                # line number
2125                lineno = self.lang_info.lineno(newframe)
2126                if lineno and lineno != beginline:
2127                    break
2128
2129        if stepinto:
2130            self.delete_breakpoints(breakpoint_list)
2131
2132        self.finish_executing(result)
2133
2134    def run(self, args, from_tty):
2135        self.finish_executing(gdb.execute('run ' + args, to_string=True))
2136
2137    def cont(self, *args):
2138        self.finish_executing(gdb.execute('cont', to_string=True))
2139
2140
2141class LanguageInfo(object):
2142    """
2143    This class defines the interface that ExecutionControlCommandBase needs to
2144    provide language-specific execution control.
2145
2146    Classes that implement this interface should implement:
2147
2148        lineno(frame)
2149            Tells the current line number (only called for a relevant frame).
2150            If lineno is a false value it is not checked for a difference.
2151
2152        is_relevant_function(frame)
2153            tells whether we care about frame 'frame'
2154
2155        get_source_line(frame)
2156            get the line of source code for the current line (only called for a
2157            relevant frame). If the source code cannot be retrieved this
2158            function should return None
2159
2160        exc_info(frame) -- optional
2161            tells whether an exception was raised, if so, it should return a
2162            string representation of the exception value, None otherwise.
2163
2164        static_break_functions()
2165            returns an iterable of function names that are considered relevant
2166            and should halt step-into execution. This is needed to provide a
2167            performing step-into
2168
2169        runtime_break_functions() -- optional
2170            list of functions that we should break into depending on the
2171            context
2172    """
2173
2174    def exc_info(self, frame):
2175        "See this class' docstring."
2176
2177    def runtime_break_functions(self):
2178        """
2179        Implement this if the list of step-into functions depends on the
2180        context.
2181        """
2182        return ()
2183
2184class PythonInfo(LanguageInfo):
2185
2186    def pyframe(self, frame):
2187        pyframe = Frame(frame).get_pyop()
2188        if pyframe:
2189            return pyframe
2190        else:
2191            raise gdb.RuntimeError(
2192                "Unable to find the Python frame, run your code with a debug "
2193                "build (configure with --with-pydebug or compile with -g).")
2194
2195    def lineno(self, frame):
2196        return self.pyframe(frame).current_line_num()
2197
2198    def is_relevant_function(self, frame):
2199        return Frame(frame).is_evalframeex()
2200
2201    def get_source_line(self, frame):
2202        try:
2203            pyframe = self.pyframe(frame)
2204            return '%4d    %s' % (pyframe.current_line_num(),
2205                                  pyframe.current_line().rstrip())
2206        except IOError, e:
2207            return None
2208
2209    def exc_info(self, frame):
2210        try:
2211            tstate = frame.read_var('tstate').dereference()
2212            if gdb.parse_and_eval('tstate->frame == f'):
2213                # tstate local variable initialized, check for an exception
2214                inf_type = tstate['curexc_type']
2215                inf_value = tstate['curexc_value']
2216
2217                if inf_type:
2218                    return 'An exception was raised: %s' % (inf_value,)
2219        except (ValueError, RuntimeError), e:
2220            # Could not read the variable tstate or it's memory, it's ok
2221            pass
2222
2223    def static_break_functions(self):
2224        yield 'PyEval_EvalFrameEx'
2225
2226
2227class PythonStepperMixin(object):
2228    """
2229    Make this a mixin so CyStep can also inherit from this and use a
2230    CythonCodeStepper at the same time.
2231    """
2232
2233    def python_step(self, stepinto):
2234        """
2235        Set a watchpoint on the Python bytecode instruction pointer and try
2236        to finish the frame
2237        """
2238        output = gdb.execute('watch f->f_lasti', to_string=True)
2239        watchpoint = int(re.search(r'[Ww]atchpoint (\d+):', output).group(1))
2240        self.step(stepinto=stepinto, stepover_command='finish')
2241        gdb.execute('delete %s' % watchpoint)
2242
2243
2244class PyStep(ExecutionControlCommandBase, PythonStepperMixin):
2245    "Step through Python code."
2246
2247    stepinto = True
2248
2249    def invoke(self, args, from_tty):
2250        self.python_step(stepinto=self.stepinto)
2251
2252class PyNext(PyStep):
2253    "Step-over Python code."
2254
2255    stepinto = False
2256
2257class PyFinish(ExecutionControlCommandBase):
2258    "Execute until function returns to a caller."
2259
2260    invoke = ExecutionControlCommandBase.finish
2261
2262class PyRun(ExecutionControlCommandBase):
2263    "Run the program."
2264
2265    invoke = ExecutionControlCommandBase.run
2266
2267class PyCont(ExecutionControlCommandBase):
2268
2269    invoke = ExecutionControlCommandBase.cont
2270
2271
2272def _pointervalue(gdbval):
2273    """
2274    Return the value of the pionter as a Python int.
2275
2276    gdbval.type must be a pointer type
2277    """
2278    # don't convert with int() as it will raise a RuntimeError
2279    if gdbval.address is not None:
2280        return long(gdbval.address)
2281    else:
2282        # the address attribute is None sometimes, in which case we can
2283        # still convert the pointer to an int
2284        return long(gdbval)
2285
2286def pointervalue(gdbval):
2287    pointer = _pointervalue(gdbval)
2288    try:
2289        if pointer < 0:
2290            raise gdb.GdbError("Negative pointer value, presumably a bug "
2291                               "in gdb, aborting.")
2292    except RuntimeError:
2293        # work around yet another bug in gdb where you get random behaviour
2294        # and tracebacks
2295        pass
2296
2297    return pointer
2298
2299def get_inferior_unicode_postfix():
2300    try:
2301        gdb.parse_and_eval('PyUnicode_FromEncodedObject')
2302    except RuntimeError:
2303        try:
2304            gdb.parse_and_eval('PyUnicodeUCS2_FromEncodedObject')
2305        except RuntimeError:
2306            return 'UCS4'
2307        else:
2308            return 'UCS2'
2309    else:
2310        return ''
2311
2312class PythonCodeExecutor(object):
2313
2314    Py_single_input = 256
2315    Py_file_input = 257
2316    Py_eval_input = 258
2317
2318    def malloc(self, size):
2319        chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
2320
2321        pointer = pointervalue(chunk)
2322        if pointer == 0:
2323            raise gdb.GdbError("No memory could be allocated in the inferior.")
2324
2325        return pointer
2326
2327    def alloc_string(self, string):
2328        pointer = self.malloc(len(string))
2329        get_selected_inferior().write_memory(pointer, string)
2330
2331        return pointer
2332
2333    def alloc_pystring(self, string):
2334        stringp = self.alloc_string(string)
2335        PyString_FromStringAndSize = 'PyString_FromStringAndSize'
2336
2337        try:
2338            gdb.parse_and_eval(PyString_FromStringAndSize)
2339        except RuntimeError:
2340            # Python 3
2341            PyString_FromStringAndSize = ('PyUnicode%s_FromStringAndSize' %
2342                                               (get_inferior_unicode_postfix(),))
2343
2344        try:
2345            result = gdb.parse_and_eval(
2346                '(PyObject *) %s((char *) %d, (size_t) %d)' % (
2347                            PyString_FromStringAndSize, stringp, len(string)))
2348        finally:
2349            self.free(stringp)
2350
2351        pointer = pointervalue(result)
2352        if pointer == 0:
2353            raise gdb.GdbError("Unable to allocate Python string in "
2354                               "the inferior.")
2355
2356        return pointer
2357
2358    def free(self, pointer):
2359        gdb.parse_and_eval("free((void *) %d)" % pointer)
2360
2361    def incref(self, pointer):
2362        "Increment the reference count of a Python object in the inferior."
2363        gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer)
2364
2365    def xdecref(self, pointer):
2366        "Decrement the reference count of a Python object in the inferior."
2367        # Py_DecRef is like Py_XDECREF, but a function. So we don't have
2368        # to check for NULL. This should also decref all our allocated
2369        # Python strings.
2370        gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer)
2371
2372    def evalcode(self, code, input_type, global_dict=None, local_dict=None):
2373        """
2374        Evaluate python code `code` given as a string in the inferior and
2375        return the result as a gdb.Value. Returns a new reference in the
2376        inferior.
2377
2378        Of course, executing any code in the inferior may be dangerous and may
2379        leave the debuggee in an unsafe state or terminate it alltogether.
2380        """
2381        if '\0' in code:
2382            raise gdb.GdbError("String contains NUL byte.")
2383
2384        code += '\0'
2385
2386        pointer = self.alloc_string(code)
2387
2388        globalsp = pointervalue(global_dict)
2389        localsp = pointervalue(local_dict)
2390
2391        if globalsp == 0 or localsp == 0:
2392            raise gdb.GdbError("Unable to obtain or create locals or globals.")
2393
2394        code = """
2395            PyRun_String(
2396                (char *) %(code)d,
2397                (int) %(start)d,
2398                (PyObject *) %(globals)s,
2399                (PyObject *) %(locals)d)
2400        """ % dict(code=pointer, start=input_type,
2401                   globals=globalsp, locals=localsp)
2402
2403        with FetchAndRestoreError():
2404            try:
2405                pyobject_return_value = gdb.parse_and_eval(code)
2406            finally:
2407                self.free(pointer)
2408
2409        return pyobject_return_value
2410
2411class FetchAndRestoreError(PythonCodeExecutor):
2412    """
2413    Context manager that fetches the error indicator in the inferior and
2414    restores it on exit.
2415    """
2416
2417    def __init__(self):
2418        self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof
2419        self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3)
2420
2421        type = self.pointer
2422        value = self.pointer + self.sizeof_PyObjectPtr
2423        traceback = self.pointer + self.sizeof_PyObjectPtr * 2
2424
2425        self.errstate = type, value, traceback
2426
2427    def __enter__(self):
2428        gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate)
2429
2430    def __exit__(self, *args):
2431        if gdb.parse_and_eval("(int) PyErr_Occurred()"):
2432            gdb.parse_and_eval("PyErr_Print()")
2433
2434        pyerr_restore = ("PyErr_Restore("
2435                            "(PyObject *) *%d,"
2436                            "(PyObject *) *%d,"
2437                            "(PyObject *) *%d)")
2438
2439        try:
2440            gdb.parse_and_eval(pyerr_restore % self.errstate)
2441        finally:
2442            self.free(self.pointer)
2443
2444
2445class FixGdbCommand(gdb.Command):
2446
2447    def __init__(self, command, actual_command):
2448        super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA,
2449                                            gdb.COMPLETE_NONE)
2450        self.actual_command = actual_command
2451
2452    def fix_gdb(self):
2453        """
2454        It seems that invoking either 'cy exec' and 'py-exec' work perfectly
2455        fine, but after this gdb's python API is entirely broken.
2456        Maybe some uncleared exception value is still set?
2457        sys.exc_clear() didn't help. A demonstration:
2458
2459        (gdb) cy exec 'hello'
2460        'hello'
2461        (gdb) python gdb.execute('cont')
2462        RuntimeError: Cannot convert value to int.
2463        Error while executing Python code.
2464        (gdb) python gdb.execute('cont')
2465        [15148 refs]
2466
2467        Program exited normally.
2468        """
2469        warnings.filterwarnings('ignore', r'.*', RuntimeWarning,
2470                                re.escape(__name__))
2471        try:
2472            long(gdb.parse_and_eval("(void *) 0")) == 0
2473        except RuntimeError:
2474            pass
2475        # warnings.resetwarnings()
2476
2477    def invoke(self, args, from_tty):
2478        self.fix_gdb()
2479        try:
2480            gdb.execute('%s %s' % (self.actual_command, args))
2481        except RuntimeError, e:
2482            raise gdb.GdbError(str(e))
2483        self.fix_gdb()
2484
2485
2486def _evalcode_python(executor, code, input_type):
2487    """
2488    Execute Python code in the most recent stack frame.
2489    """
2490    global_dict = gdb.parse_and_eval('PyEval_GetGlobals()')
2491    local_dict = gdb.parse_and_eval('PyEval_GetLocals()')
2492
2493    if (pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0):
2494        raise gdb.GdbError("Unable to find the locals or globals of the "
2495                           "most recent Python function (relative to the "
2496                           "selected frame).")
2497
2498    return executor.evalcode(code, input_type, global_dict, local_dict)
2499
2500class PyExec(gdb.Command):
2501
2502    def readcode(self, expr):
2503        if expr:
2504            return expr, PythonCodeExecutor.Py_single_input
2505        else:
2506            lines = []
2507            while True:
2508                try:
2509                    line = raw_input('>')
2510                except EOFError:
2511                    break
2512                else:
2513                    if line.rstrip() == 'end':
2514                        break
2515
2516                    lines.append(line)
2517
2518            return '\n'.join(lines), PythonCodeExecutor.Py_file_input
2519
2520    def invoke(self, expr, from_tty):
2521        expr, input_type = self.readcode(expr)
2522        executor = PythonCodeExecutor()
2523        executor.xdecref(_evalcode_python(executor, input_type, global_dict,
2524                                          local_dict))
2525
2526
2527gdb.execute('set breakpoint pending on')
2528
2529if hasattr(gdb, 'GdbError'):
2530     # Wrap py-step and py-next in gdb defines to make them repeatable.
2531    py_step = PyStep('-py-step', PythonInfo())
2532    py_next = PyNext('-py-next', PythonInfo())
2533    register_defines()
2534    py_finish = PyFinish('py-finish', PythonInfo())
2535    py_run = PyRun('py-run', PythonInfo())
2536    py_cont = PyCont('py-cont', PythonInfo())
2537
2538    py_exec = FixGdbCommand('py-exec', '-py-exec')
2539    _py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
2540else:
2541    warnings.warn("Use gdb 7.2 or higher to use the py-exec command.")
2542