1# 2# modify-python-lldb.py 3# 4# This script modifies the lldb module (which was automatically generated via 5# running swig) to support iteration and/or equality operations for certain lldb 6# objects, implements truth value testing for certain lldb objects, and adds a 7# global variable 'debugger_unique_id' which is initialized to 0. 8# 9# As a cleanup step, it also removes the 'residues' from the autodoc features of 10# swig. For an example, take a look at SBTarget.h header file, where we take 11# advantage of the already existing doxygen C++-docblock and make it the Python 12# docstring for the same method. The 'residues' in this context include the 13# '#endif', the '#ifdef SWIG', the c comment marker, the trailing blank (SPC's) 14# line, and the doxygen comment start marker. 15# 16# In addition to the 'residues' removal during the cleanup step, it also 17# transforms the 'char' data type (which was actually 'char *' but the 'autodoc' 18# feature of swig removes ' *' from it) into 'str' (as a Python str type). 19# 20# It also calls SBDebugger.Initialize() to initialize the lldb debugger 21# subsystem. 22# 23 24import sys, re, StringIO 25 26if len (sys.argv) != 2: 27 output_name = "./lldb.py" 28else: 29 output_name = sys.argv[1] + "/lldb.py" 30 31# print "output_name is '" + output_name + "'" 32 33# 34# Residues to be removed. 35# 36c_endif_swig = "#endif" 37c_ifdef_swig = "#ifdef SWIG" 38c_comment_marker = "//------------" 39# The pattern for recognizing the doxygen comment block line. 40doxygen_comment_start = re.compile("^\s*(/// ?)") 41# The demarcation point for turning on/off residue removal state. 42# When bracketed by the lines, the CLEANUP_DOCSTRING state (see below) is ON. 43toggle_docstring_cleanup_line = ' """' 44 45def char_to_str_xform(line): 46 """This transforms the 'char', i.e, 'char *' to 'str', Python string.""" 47 line = line.replace(' char', ' str') 48 line = line.replace('char ', 'str ') 49 # Special case handling of 'char **argv' and 'char **envp'. 50 line = line.replace('str argv', 'list argv') 51 line = line.replace('str envp', 'list envp') 52 return line 53 54# 55# The one-liner docstring also needs char_to_str transformation, btw. 56# 57TWO_SPACES = ' ' * 2 58EIGHT_SPACES = ' ' * 8 59one_liner_docstring_pattern = re.compile('^(%s|%s)""".*"""$' % (TWO_SPACES, EIGHT_SPACES)) 60 61# 62# lldb_helpers and lldb_iter() should appear before our first SB* class definition. 63# 64lldb_helpers = ''' 65# ================================== 66# Helper function for SBModule class 67# ================================== 68def in_range(symbol, section): 69 """Test whether a symbol is within the range of a section.""" 70 symSA = symbol.GetStartAddress().GetFileAddress() 71 symEA = symbol.GetEndAddress().GetFileAddress() 72 secSA = section.GetFileAddress() 73 secEA = secSA + section.GetByteSize() 74 75 if symEA != LLDB_INVALID_ADDRESS: 76 if secSA <= symSA and symEA <= secEA: 77 return True 78 else: 79 return False 80 else: 81 if secSA <= symSA and symSA < secEA: 82 return True 83 else: 84 return False 85''' 86 87lldb_iter_def = ''' 88# =================================== 89# Iterator for lldb container objects 90# =================================== 91def lldb_iter(obj, getsize, getelem): 92 """A generator adaptor to support iteration for lldb container objects.""" 93 size = getattr(obj, getsize) 94 elem = getattr(obj, getelem) 95 for i in range(size()): 96 yield elem(i) 97 98# ============================================================================== 99# The modify-python-lldb.py script is responsible for post-processing this SWIG- 100# generated lldb.py module. It is responsible for adding the above lldb_iter() 101# function definition as well as the supports, in the following, for iteration 102# protocol: __iter__, rich comparison methods: __eq__ and __ne__, truth value 103# testing (and built-in operation bool()): __nonzero__, and built-in function 104# len(): __len__. 105# ============================================================================== 106''' 107 108# 109# linked_list_iter() is a special purpose iterator to treat the SBValue as the 110# head of a list data structure, where you specify the child member name which 111# points to the next item on the list and you specify the end-of-list function 112# which takes an SBValue and returns True if EOL is reached and False if not. 113# 114linked_list_iter_def = ''' 115 def __eol_test__(val): 116 """Default function for end of list test takes an SBValue object. 117 118 Return True if val is invalid or it corresponds to a null pointer. 119 Otherwise, return False. 120 """ 121 if not val or val.GetValueAsUnsigned() == 0: 122 return True 123 else: 124 return False 125 126 # ================================================== 127 # Iterator for lldb.SBValue treated as a linked list 128 # ================================================== 129 def linked_list_iter(self, next_item_name, end_of_list_test=__eol_test__): 130 """Generator adaptor to support iteration for SBValue as a linked list. 131 132 linked_list_iter() is a special purpose iterator to treat the SBValue as 133 the head of a list data structure, where you specify the child member 134 name which points to the next item on the list and you specify the 135 end-of-list test function which takes an SBValue for an item and returns 136 True if EOL is reached and False if not. 137 138 linked_list_iter() also detects infinite loop and bails out early. 139 140 The end_of_list_test arg, if omitted, defaults to the __eol_test__ 141 function above. 142 143 For example, 144 145 # Get Frame #0. 146 ... 147 148 # Get variable 'task_head'. 149 task_head = frame0.FindVariable('task_head') 150 ... 151 152 for t in task_head.linked_list_iter('next'): 153 print t 154 """ 155 if end_of_list_test(self): 156 return 157 item = self 158 visited = set() 159 try: 160 while not end_of_list_test(item) and not item.GetValueAsUnsigned() in visited: 161 visited.add(item.GetValueAsUnsigned()) 162 yield item 163 # Prepare for the next iteration. 164 item = item.GetChildMemberWithName(next_item_name) 165 except: 166 # Exception occurred. Stop the generator. 167 pass 168 169 return 170''' 171 172# This supports the iteration protocol. 173iter_def = " def __iter__(self): return lldb_iter(self, '%s', '%s')" 174module_iter = " def module_iter(self): return lldb_iter(self, '%s', '%s')" 175breakpoint_iter = " def breakpoint_iter(self): return lldb_iter(self, '%s', '%s')" 176watchpoint_iter = " def watchpoint_iter(self): return lldb_iter(self, '%s', '%s')" 177section_iter = " def section_iter(self): return lldb_iter(self, '%s', '%s')" 178compile_unit_iter = " def compile_unit_iter(self): return lldb_iter(self, '%s', '%s')" 179 180# Called to implement the built-in function len(). 181# Eligible objects are those containers with unambiguous iteration support. 182len_def = " def __len__(self): return self.%s()" 183 184# This supports the rich comparison methods of __eq__ and __ne__. 185eq_def = " def __eq__(self, other): return isinstance(other, %s) and %s" 186ne_def = " def __ne__(self, other): return not self.__eq__(other)" 187 188# Called to implement truth value testing and the built-in operation bool(); 189# should return False or True, or their integer equivalents 0 or 1. 190# Delegate to self.IsValid() if it is defined for the current lldb object. 191nonzero_def = " def __nonzero__(self): return self.IsValid()" 192 193# A convenience iterator for SBSymbol! 194symbol_in_section_iter_def = ''' 195 def symbol_in_section_iter(self, section): 196 """Given a module and its contained section, returns an iterator on the 197 symbols within the section.""" 198 for sym in self: 199 if in_range(sym, section): 200 yield sym 201''' 202 203# 204# This dictionary defines a mapping from classname to (getsize, getelem) tuple. 205# 206d = { 'SBBreakpoint': ('GetNumLocations', 'GetLocationAtIndex'), 207 'SBCompileUnit': ('GetNumLineEntries', 'GetLineEntryAtIndex'), 208 'SBDebugger': ('GetNumTargets', 'GetTargetAtIndex'), 209 'SBModule': ('GetNumSymbols', 'GetSymbolAtIndex'), 210 'SBProcess': ('GetNumThreads', 'GetThreadAtIndex'), 211 'SBSection': ('GetNumSubSections', 'GetSubSectionAtIndex'), 212 'SBThread': ('GetNumFrames', 'GetFrameAtIndex'), 213 214 'SBInstructionList': ('GetSize', 'GetInstructionAtIndex'), 215 'SBStringList': ('GetSize', 'GetStringAtIndex',), 216 'SBSymbolContextList': ('GetSize', 'GetContextAtIndex'), 217 'SBTypeList': ('GetSize', 'GetTypeAtIndex'), 218 'SBValueList': ('GetSize', 'GetValueAtIndex'), 219 220 'SBType': ('GetNumberChildren', 'GetChildAtIndex'), 221 'SBValue': ('GetNumChildren', 'GetChildAtIndex'), 222 223 # SBTarget needs special processing, see below. 224 'SBTarget': {'module': ('GetNumModules', 'GetModuleAtIndex'), 225 'breakpoint': ('GetNumBreakpoints', 'GetBreakpointAtIndex'), 226 'watchpoint': ('GetNumWatchpoints', 'GetWatchpointAtIndex') 227 }, 228 229 # SBModule has an additional section_iter(), see below. 230 'SBModule-section': ('GetNumSections', 'GetSectionAtIndex'), 231 # And compile_unit_iter(). 232 'SBModule-compile-unit': ('GetNumCompileUnits', 'GetCompileUnitAtIndex'), 233 # As well as symbol_in_section_iter(). 234 'SBModule-symbol-in-section': symbol_in_section_iter_def 235 } 236 237# 238# This dictionary defines a mapping from classname to equality method name(s). 239# 240e = { 'SBAddress': ['GetFileAddress', 'GetModule'], 241 'SBBreakpoint': ['GetID'], 242 'SBWatchpoint': ['GetID'], 243 'SBFileSpec': ['GetFilename', 'GetDirectory'], 244 'SBModule': ['GetFileSpec', 'GetUUIDString'], 245 'SBType': ['GetByteSize', 'GetName'] 246 } 247 248def list_to_frag(list): 249 """Transform a list to equality program fragment. 250 251 For example, ['GetID'] is transformed to 'self.GetID() == other.GetID()', 252 and ['GetFilename', 'GetDirectory'] to 'self.GetFilename() == other.GetFilename() 253 and self.GetDirectory() == other.GetDirectory()'. 254 """ 255 if not list: 256 raise Exception("list should be non-empty") 257 frag = StringIO.StringIO() 258 for i in range(len(list)): 259 if i > 0: 260 frag.write(" and ") 261 frag.write("self.{0}() == other.{0}()".format(list[i])) 262 return frag.getvalue() 263 264class NewContent(StringIO.StringIO): 265 """Simple facade to keep track of the previous line to be committed.""" 266 def __init__(self): 267 StringIO.StringIO.__init__(self) 268 self.prev_line = None 269 def add_line(self, a_line): 270 """Add a line to the content, if there is a previous line, commit it.""" 271 if self.prev_line != None: 272 print >> self, self.prev_line 273 self.prev_line = a_line 274 def del_line(self): 275 """Forget about the previous line, do not commit it.""" 276 self.prev_line = None 277 def del_blank_line(self): 278 """Forget about the previous line if it is a blank line.""" 279 if self.prev_line != None and not self.prev_line.strip(): 280 self.prev_line = None 281 def finish(self): 282 """Call this when you're finished with populating content.""" 283 if self.prev_line != None: 284 print >> self, self.prev_line 285 self.prev_line = None 286 287# The new content will have the iteration protocol defined for our lldb objects. 288new_content = NewContent() 289 290with open(output_name, 'r') as f_in: 291 content = f_in.read() 292 293# The pattern for recognizing the beginning of an SB class definition. 294class_pattern = re.compile("^class (SB.*)\(_object\):$") 295 296# The pattern for recognizing the beginning of the __init__ method definition. 297init_pattern = re.compile("^ def __init__\(self.*\):") 298 299# The pattern for recognizing the beginning of the IsValid method definition. 300isvalid_pattern = re.compile("^ def IsValid\(") 301 302# These define the states of our finite state machine. 303NORMAL = 0 304DEFINING_ITERATOR = 1 305DEFINING_EQUALITY = 2 306CLEANUP_DOCSTRING = 4 307 308# The lldb_iter_def only needs to be inserted once. 309lldb_iter_defined = False; 310 311# Our FSM begins its life in the NORMAL state, and transitions to the 312# DEFINING_ITERATOR and/or DEFINING_EQUALITY state whenever it encounters the 313# beginning of certain class definitions, see dictionaries 'd' and 'e' above. 314# 315# Note that the two states DEFINING_ITERATOR and DEFINING_EQUALITY are 316# orthogonal in that our FSM can be in one, the other, or both states at the 317# same time. During such time, the FSM is eagerly searching for the __init__ 318# method definition in order to insert the appropriate method(s) into the lldb 319# module. 320# 321# The state CLEANUP_DOCSTRING can be entered from either the NORMAL or the 322# DEFINING_ITERATOR/EQUALITY states. While in this state, the FSM is fixing/ 323# cleaning the Python docstrings generated by the swig docstring features. 324# 325# The FSM, in all possible states, also checks the current input for IsValid() 326# definition, and inserts a __nonzero__() method definition to implement truth 327# value testing and the built-in operation bool(). 328state = NORMAL 329for line in content.splitlines(): 330 # Handle the state transition into CLEANUP_DOCSTRING state as it is possible 331 # to enter this state from either NORMAL or DEFINING_ITERATOR/EQUALITY. 332 # 333 # If ' """' is the sole line, prepare to transition to the 334 # CLEANUP_DOCSTRING state or out of it. 335 if line == toggle_docstring_cleanup_line: 336 if state & CLEANUP_DOCSTRING: 337 # Special handling of the trailing blank line right before the '"""' 338 # end docstring marker. 339 new_content.del_blank_line() 340 state ^= CLEANUP_DOCSTRING 341 else: 342 state |= CLEANUP_DOCSTRING 343 344 if state == NORMAL: 345 match = class_pattern.search(line) 346 # Inserts lldb_helpers and the lldb_iter() definition before the first 347 # class definition. 348 if not lldb_iter_defined and match: 349 new_content.add_line(lldb_helpers) 350 new_content.add_line(lldb_iter_def) 351 lldb_iter_defined = True 352 353 # If we are at the beginning of the class definitions, prepare to 354 # transition to the DEFINING_ITERATOR/DEFINING_EQUALITY state for the 355 # right class names. 356 if match: 357 cls = match.group(1) 358 if cls in d: 359 # Adding support for iteration for the matched SB class. 360 state |= DEFINING_ITERATOR 361 if cls in e: 362 # Adding support for eq and ne for the matched SB class. 363 state |= DEFINING_EQUALITY 364 365 if (state & DEFINING_ITERATOR) or (state & DEFINING_EQUALITY): 366 match = init_pattern.search(line) 367 if match: 368 # We found the beginning of the __init__ method definition. 369 # This is a good spot to insert the iter and/or eq-ne support. 370 # 371 # But note that SBTarget has three types of iterations. 372 if cls == "SBTarget": 373 new_content.add_line(module_iter % (d[cls]['module'])) 374 new_content.add_line(breakpoint_iter % (d[cls]['breakpoint'])) 375 new_content.add_line(watchpoint_iter % (d[cls]['watchpoint'])) 376 else: 377 if (state & DEFINING_ITERATOR): 378 new_content.add_line(iter_def % d[cls]) 379 new_content.add_line(len_def % d[cls][0]) 380 if (state & DEFINING_EQUALITY): 381 new_content.add_line(eq_def % (cls, list_to_frag(e[cls]))) 382 new_content.add_line(ne_def) 383 384 # SBModule has extra SBSection, SBCompileUnit iterators and symbol_in_section_iter()! 385 if cls == "SBModule": 386 new_content.add_line(section_iter % d[cls+'-section']) 387 new_content.add_line(compile_unit_iter % d[cls+'-compile-unit']) 388 new_content.add_line(d[cls+'-symbol-in-section']) 389 390 # This special purpose iterator is for SBValue only!!! 391 if cls == "SBValue": 392 new_content.add_line(linked_list_iter_def) 393 394 # Next state will be NORMAL. 395 state = NORMAL 396 397 if (state & CLEANUP_DOCSTRING): 398 # Cleanse the lldb.py of the autodoc'ed residues. 399 if c_ifdef_swig in line or c_endif_swig in line: 400 continue 401 # As well as the comment marker line. 402 if c_comment_marker in line: 403 continue 404 405 # Also remove the '\a ' and '\b 'substrings. 406 line = line.replace('\a ', '') 407 line = line.replace('\b ', '') 408 # And the leading '///' substring. 409 doxygen_comment_match = doxygen_comment_start.match(line) 410 if doxygen_comment_match: 411 line = line.replace(doxygen_comment_match.group(1), '', 1) 412 413 line = char_to_str_xform(line) 414 415 # Note that the transition out of CLEANUP_DOCSTRING is handled at the 416 # beginning of this function already. 417 418 # This deals with one-liner docstring, for example, SBThread.GetName: 419 # """GetName(self) -> char""". 420 if one_liner_docstring_pattern.match(line): 421 line = char_to_str_xform(line) 422 423 # Look for 'def IsValid(*args):', and once located, add implementation 424 # of truth value testing for this object by delegation. 425 if isvalid_pattern.search(line): 426 new_content.add_line(nonzero_def) 427 428 # Pass the original line of content to new_content. 429 new_content.add_line(line) 430 431# We are finished with recording new content. 432new_content.finish() 433 434with open(output_name, 'w') as f_out: 435 f_out.write(new_content.getvalue()) 436 f_out.write('''debugger_unique_id = 0 437SBDebugger.Initialize() 438debugger = None 439target = SBTarget() 440process = SBProcess() 441thread = SBThread() 442frame = SBFrame()''') 443 444