1#
2# modify-python-lldb.py
3#
4# This script modifies the lldb module (which was automatically generated via
5# running swig) to support iteration and/or equality operations for certain lldb
6# objects, implements truth value testing for certain lldb objects, and adds a
7# global variable 'debugger_unique_id' which is initialized to 0.
8#
9# As a cleanup step, it also removes the 'residues' from the autodoc features of
10# swig.  For an example, take a look at SBTarget.h header file, where we take
11# advantage of the already existing doxygen C++-docblock and make it the Python
12# docstring for the same method.  The 'residues' in this context include the
13# '#endif', the '#ifdef SWIG', the c comment marker, the trailing blank (SPC's)
14# line, and the doxygen comment start marker.
15#
16# In addition to the 'residues' removal during the cleanup step, it also
17# transforms the 'char' data type (which was actually 'char *' but the 'autodoc'
18# feature of swig removes ' *' from it) into 'str' (as a Python str type).
19#
20# It also calls SBDebugger.Initialize() to initialize the lldb debugger
21# subsystem.
22#
23
24import sys, re, StringIO
25
26if len (sys.argv) != 2:
27    output_name = "./lldb.py"
28else:
29    output_name = sys.argv[1] + "/lldb.py"
30
31# print "output_name is '" + output_name + "'"
32
33#
34# Residues to be removed.
35#
36c_endif_swig = "#endif"
37c_ifdef_swig = "#ifdef SWIG"
38c_comment_marker = "//------------"
39# The pattern for recognizing the doxygen comment block line.
40doxygen_comment_start = re.compile("^\s*(/// ?)")
41# The demarcation point for turning on/off residue removal state.
42# When bracketed by the lines, the CLEANUP_DOCSTRING state (see below) is ON.
43toggle_docstring_cleanup_line = '        """'
44
45def char_to_str_xform(line):
46    """This transforms the 'char', i.e, 'char *' to 'str', Python string."""
47    line = line.replace(' char', ' str')
48    line = line.replace('char ', 'str ')
49    # Special case handling of 'char **argv' and 'char **envp'.
50    line = line.replace('str argv', 'list argv')
51    line = line.replace('str envp', 'list envp')
52    return line
53
54#
55# The one-liner docstring also needs char_to_str transformation, btw.
56#
57TWO_SPACES = ' ' * 2
58EIGHT_SPACES = ' ' * 8
59one_liner_docstring_pattern = re.compile('^(%s|%s)""".*"""$' % (TWO_SPACES, EIGHT_SPACES))
60
61#
62# lldb_helpers and lldb_iter() should appear before our first SB* class definition.
63#
64lldb_helpers = '''
65# ==================================
66# Helper function for SBModule class
67# ==================================
68def in_range(symbol, section):
69    """Test whether a symbol is within the range of a section."""
70    symSA = symbol.GetStartAddress().GetFileAddress()
71    symEA = symbol.GetEndAddress().GetFileAddress()
72    secSA = section.GetFileAddress()
73    secEA = secSA + section.GetByteSize()
74
75    if symEA != LLDB_INVALID_ADDRESS:
76        if secSA <= symSA and symEA <= secEA:
77            return True
78        else:
79            return False
80    else:
81        if secSA <= symSA and symSA < secEA:
82            return True
83        else:
84            return False
85'''
86
87lldb_iter_def = '''
88# ===================================
89# Iterator for lldb container objects
90# ===================================
91def lldb_iter(obj, getsize, getelem):
92    """A generator adaptor to support iteration for lldb container objects."""
93    size = getattr(obj, getsize)
94    elem = getattr(obj, getelem)
95    for i in range(size()):
96        yield elem(i)
97
98# ==============================================================================
99# The modify-python-lldb.py script is responsible for post-processing this SWIG-
100# generated lldb.py module.  It is responsible for adding the above lldb_iter()
101# function definition as well as the supports, in the following, for iteration
102# protocol: __iter__, rich comparison methods: __eq__ and __ne__, truth value
103# testing (and built-in operation bool()): __nonzero__, and built-in function
104# len(): __len__.
105# ==============================================================================
106'''
107
108#
109# linked_list_iter() is a special purpose iterator to treat the SBValue as the
110# head of a list data structure, where you specify the child member name which
111# points to the next item on the list and you specify the end-of-list function
112# which takes an SBValue and returns True if EOL is reached and False if not.
113#
114linked_list_iter_def = '''
115    def __eol_test__(val):
116        """Default function for end of list test takes an SBValue object.
117
118        Return True if val is invalid or it corresponds to a null pointer.
119        Otherwise, return False.
120        """
121        if not val or val.GetValueAsUnsigned() == 0:
122            return True
123        else:
124            return False
125
126    # ==================================================
127    # Iterator for lldb.SBValue treated as a linked list
128    # ==================================================
129    def linked_list_iter(self, next_item_name, end_of_list_test=__eol_test__):
130        """Generator adaptor to support iteration for SBValue as a linked list.
131
132        linked_list_iter() is a special purpose iterator to treat the SBValue as
133        the head of a list data structure, where you specify the child member
134        name which points to the next item on the list and you specify the
135        end-of-list test function which takes an SBValue for an item and returns
136        True if EOL is reached and False if not.
137
138        linked_list_iter() also detects infinite loop and bails out early.
139
140        The end_of_list_test arg, if omitted, defaults to the __eol_test__
141        function above.
142
143        For example,
144
145        # Get Frame #0.
146        ...
147
148        # Get variable 'task_head'.
149        task_head = frame0.FindVariable('task_head')
150        ...
151
152        for t in task_head.linked_list_iter('next'):
153            print t
154        """
155        if end_of_list_test(self):
156            return
157        item = self
158        visited = set()
159        try:
160            while not end_of_list_test(item) and not item.GetValueAsUnsigned() in visited:
161                visited.add(item.GetValueAsUnsigned())
162                yield item
163                # Prepare for the next iteration.
164                item = item.GetChildMemberWithName(next_item_name)
165        except:
166            # Exception occurred.  Stop the generator.
167            pass
168
169        return
170'''
171
172# This supports the iteration protocol.
173iter_def = "    def __iter__(self): return lldb_iter(self, '%s', '%s')"
174module_iter = "    def module_iter(self): return lldb_iter(self, '%s', '%s')"
175breakpoint_iter = "    def breakpoint_iter(self): return lldb_iter(self, '%s', '%s')"
176watchpoint_iter = "    def watchpoint_iter(self): return lldb_iter(self, '%s', '%s')"
177section_iter = "    def section_iter(self): return lldb_iter(self, '%s', '%s')"
178compile_unit_iter = "    def compile_unit_iter(self): return lldb_iter(self, '%s', '%s')"
179
180# Called to implement the built-in function len().
181# Eligible objects are those containers with unambiguous iteration support.
182len_def = "    def __len__(self): return self.%s()"
183
184# This supports the rich comparison methods of __eq__ and __ne__.
185eq_def = "    def __eq__(self, other): return isinstance(other, %s) and %s"
186ne_def = "    def __ne__(self, other): return not self.__eq__(other)"
187
188# Called to implement truth value testing and the built-in operation bool();
189# should return False or True, or their integer equivalents 0 or 1.
190# Delegate to self.IsValid() if it is defined for the current lldb object.
191nonzero_def = "    def __nonzero__(self): return self.IsValid()"
192
193# A convenience iterator for SBSymbol!
194symbol_in_section_iter_def = '''
195    def symbol_in_section_iter(self, section):
196        """Given a module and its contained section, returns an iterator on the
197        symbols within the section."""
198        for sym in self:
199            if in_range(sym, section):
200                yield sym
201'''
202
203#
204# This dictionary defines a mapping from classname to (getsize, getelem) tuple.
205#
206d = { 'SBBreakpoint':  ('GetNumLocations',   'GetLocationAtIndex'),
207      'SBCompileUnit': ('GetNumLineEntries', 'GetLineEntryAtIndex'),
208      'SBDebugger':    ('GetNumTargets',     'GetTargetAtIndex'),
209      'SBModule':      ('GetNumSymbols',     'GetSymbolAtIndex'),
210      'SBProcess':     ('GetNumThreads',     'GetThreadAtIndex'),
211      'SBSection':     ('GetNumSubSections', 'GetSubSectionAtIndex'),
212      'SBThread':      ('GetNumFrames',      'GetFrameAtIndex'),
213
214      'SBInstructionList':   ('GetSize', 'GetInstructionAtIndex'),
215      'SBStringList':        ('GetSize', 'GetStringAtIndex',),
216      'SBSymbolContextList': ('GetSize', 'GetContextAtIndex'),
217      'SBTypeList':          ('GetSize', 'GetTypeAtIndex'),
218      'SBValueList':         ('GetSize', 'GetValueAtIndex'),
219
220      'SBType':  ('GetNumberChildren', 'GetChildAtIndex'),
221      'SBValue': ('GetNumChildren',    'GetChildAtIndex'),
222
223      # SBTarget needs special processing, see below.
224      'SBTarget': {'module':     ('GetNumModules', 'GetModuleAtIndex'),
225                   'breakpoint': ('GetNumBreakpoints', 'GetBreakpointAtIndex'),
226                   'watchpoint': ('GetNumWatchpoints', 'GetWatchpointAtIndex')
227                   },
228
229      # SBModule has an additional section_iter(), see below.
230      'SBModule-section': ('GetNumSections', 'GetSectionAtIndex'),
231      # And compile_unit_iter().
232      'SBModule-compile-unit': ('GetNumCompileUnits', 'GetCompileUnitAtIndex'),
233      # As well as symbol_in_section_iter().
234      'SBModule-symbol-in-section': symbol_in_section_iter_def
235      }
236
237#
238# This dictionary defines a mapping from classname to equality method name(s).
239#
240e = { 'SBAddress':            ['GetFileAddress', 'GetModule'],
241      'SBBreakpoint':         ['GetID'],
242      'SBWatchpoint':         ['GetID'],
243      'SBFileSpec':           ['GetFilename', 'GetDirectory'],
244      'SBModule':             ['GetFileSpec', 'GetUUIDString'],
245      'SBType':               ['GetByteSize', 'GetName']
246      }
247
248def list_to_frag(list):
249    """Transform a list to equality program fragment.
250
251    For example, ['GetID'] is transformed to 'self.GetID() == other.GetID()',
252    and ['GetFilename', 'GetDirectory'] to 'self.GetFilename() == other.GetFilename()
253    and self.GetDirectory() == other.GetDirectory()'.
254    """
255    if not list:
256        raise Exception("list should be non-empty")
257    frag = StringIO.StringIO()
258    for i in range(len(list)):
259        if i > 0:
260            frag.write(" and ")
261        frag.write("self.{0}() == other.{0}()".format(list[i]))
262    return frag.getvalue()
263
264class NewContent(StringIO.StringIO):
265    """Simple facade to keep track of the previous line to be committed."""
266    def __init__(self):
267        StringIO.StringIO.__init__(self)
268        self.prev_line = None
269    def add_line(self, a_line):
270        """Add a line to the content, if there is a previous line, commit it."""
271        if self.prev_line != None:
272            print >> self, self.prev_line
273        self.prev_line = a_line
274    def del_line(self):
275        """Forget about the previous line, do not commit it."""
276        self.prev_line = None
277    def del_blank_line(self):
278        """Forget about the previous line if it is a blank line."""
279        if self.prev_line != None and not self.prev_line.strip():
280            self.prev_line = None
281    def finish(self):
282        """Call this when you're finished with populating content."""
283        if self.prev_line != None:
284            print >> self, self.prev_line
285        self.prev_line = None
286
287# The new content will have the iteration protocol defined for our lldb objects.
288new_content = NewContent()
289
290with open(output_name, 'r') as f_in:
291    content = f_in.read()
292
293# The pattern for recognizing the beginning of an SB class definition.
294class_pattern = re.compile("^class (SB.*)\(_object\):$")
295
296# The pattern for recognizing the beginning of the __init__ method definition.
297init_pattern = re.compile("^    def __init__\(self.*\):")
298
299# The pattern for recognizing the beginning of the IsValid method definition.
300isvalid_pattern = re.compile("^    def IsValid\(")
301
302# These define the states of our finite state machine.
303NORMAL = 0
304DEFINING_ITERATOR = 1
305DEFINING_EQUALITY = 2
306CLEANUP_DOCSTRING = 4
307
308# The lldb_iter_def only needs to be inserted once.
309lldb_iter_defined = False;
310
311# Our FSM begins its life in the NORMAL state, and transitions to the
312# DEFINING_ITERATOR and/or DEFINING_EQUALITY state whenever it encounters the
313# beginning of certain class definitions, see dictionaries 'd' and 'e' above.
314#
315# Note that the two states DEFINING_ITERATOR and DEFINING_EQUALITY are
316# orthogonal in that our FSM can be in one, the other, or both states at the
317# same time.  During such time, the FSM is eagerly searching for the __init__
318# method definition in order to insert the appropriate method(s) into the lldb
319# module.
320#
321# The state CLEANUP_DOCSTRING can be entered from either the NORMAL or the
322# DEFINING_ITERATOR/EQUALITY states.  While in this state, the FSM is fixing/
323# cleaning the Python docstrings generated by the swig docstring features.
324#
325# The FSM, in all possible states, also checks the current input for IsValid()
326# definition, and inserts a __nonzero__() method definition to implement truth
327# value testing and the built-in operation bool().
328state = NORMAL
329for line in content.splitlines():
330    # Handle the state transition into CLEANUP_DOCSTRING state as it is possible
331    # to enter this state from either NORMAL or DEFINING_ITERATOR/EQUALITY.
332    #
333    # If '        """' is the sole line, prepare to transition to the
334    # CLEANUP_DOCSTRING state or out of it.
335    if line == toggle_docstring_cleanup_line:
336        if state & CLEANUP_DOCSTRING:
337            # Special handling of the trailing blank line right before the '"""'
338            # end docstring marker.
339            new_content.del_blank_line()
340            state ^= CLEANUP_DOCSTRING
341        else:
342            state |= CLEANUP_DOCSTRING
343
344    if state == NORMAL:
345        match = class_pattern.search(line)
346        # Inserts lldb_helpers and the lldb_iter() definition before the first
347        # class definition.
348        if not lldb_iter_defined and match:
349            new_content.add_line(lldb_helpers)
350            new_content.add_line(lldb_iter_def)
351            lldb_iter_defined = True
352
353        # If we are at the beginning of the class definitions, prepare to
354        # transition to the DEFINING_ITERATOR/DEFINING_EQUALITY state for the
355        # right class names.
356        if match:
357            cls = match.group(1)
358            if cls in d:
359                # Adding support for iteration for the matched SB class.
360                state |= DEFINING_ITERATOR
361            if cls in e:
362                # Adding support for eq and ne for the matched SB class.
363                state |= DEFINING_EQUALITY
364
365    if (state & DEFINING_ITERATOR) or (state & DEFINING_EQUALITY):
366        match = init_pattern.search(line)
367        if match:
368            # We found the beginning of the __init__ method definition.
369            # This is a good spot to insert the iter and/or eq-ne support.
370            #
371            # But note that SBTarget has three types of iterations.
372            if cls == "SBTarget":
373                new_content.add_line(module_iter % (d[cls]['module']))
374                new_content.add_line(breakpoint_iter % (d[cls]['breakpoint']))
375                new_content.add_line(watchpoint_iter % (d[cls]['watchpoint']))
376            else:
377                if (state & DEFINING_ITERATOR):
378                    new_content.add_line(iter_def % d[cls])
379                    new_content.add_line(len_def % d[cls][0])
380                if (state & DEFINING_EQUALITY):
381                    new_content.add_line(eq_def % (cls, list_to_frag(e[cls])))
382                    new_content.add_line(ne_def)
383
384            # SBModule has extra SBSection, SBCompileUnit iterators and symbol_in_section_iter()!
385            if cls == "SBModule":
386                new_content.add_line(section_iter % d[cls+'-section'])
387                new_content.add_line(compile_unit_iter % d[cls+'-compile-unit'])
388                new_content.add_line(d[cls+'-symbol-in-section'])
389
390            # This special purpose iterator is for SBValue only!!!
391            if cls == "SBValue":
392                new_content.add_line(linked_list_iter_def)
393
394            # Next state will be NORMAL.
395            state = NORMAL
396
397    if (state & CLEANUP_DOCSTRING):
398        # Cleanse the lldb.py of the autodoc'ed residues.
399        if c_ifdef_swig in line or c_endif_swig in line:
400            continue
401        # As well as the comment marker line.
402        if c_comment_marker in line:
403            continue
404
405        # Also remove the '\a ' and '\b 'substrings.
406        line = line.replace('\a ', '')
407        line = line.replace('\b ', '')
408        # And the leading '///' substring.
409        doxygen_comment_match = doxygen_comment_start.match(line)
410        if doxygen_comment_match:
411            line = line.replace(doxygen_comment_match.group(1), '', 1)
412
413        line = char_to_str_xform(line)
414
415        # Note that the transition out of CLEANUP_DOCSTRING is handled at the
416        # beginning of this function already.
417
418    # This deals with one-liner docstring, for example, SBThread.GetName:
419    # """GetName(self) -> char""".
420    if one_liner_docstring_pattern.match(line):
421        line = char_to_str_xform(line)
422
423    # Look for 'def IsValid(*args):', and once located, add implementation
424    # of truth value testing for this object by delegation.
425    if isvalid_pattern.search(line):
426        new_content.add_line(nonzero_def)
427
428    # Pass the original line of content to new_content.
429    new_content.add_line(line)
430
431# We are finished with recording new content.
432new_content.finish()
433
434with open(output_name, 'w') as f_out:
435    f_out.write(new_content.getvalue())
436    f_out.write('''debugger_unique_id = 0
437SBDebugger.Initialize()
438debugger = None
439target = SBTarget()
440process = SBProcess()
441thread = SBThread()
442frame = SBFrame()''')
443
444