14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""Simple code to extract class & function docstrings from a module. 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis code is used as an example in the library reference manual in the 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsection on using the parser module. Refer to the manual for a thorough 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdiscussion of the operation of this code. 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport os 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport parser 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport symbol 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport token 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport types 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom types import ListType, TupleType 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef get_docs(fileName): 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Retrieve information from the parse tree of a source file. 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fileName 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Name of the file to read Python source code from. 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm source = open(fileName).read() 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm basename = os.path.basename(os.path.splitext(fileName)[0]) 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ast = parser.suite(source) 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ModuleInfo(ast.totuple(), basename) 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass SuiteInfoBase: 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _docstring = '' 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _name = '' 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, tree = None): 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._class_info = {} 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._function_info = {} 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tree: 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._extract_info(tree) 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _extract_info(self, tree): 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # extract docstring 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(tree) == 2: 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if found: 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._docstring = eval(vars['docstring']) 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # discover inner definitions 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for node in tree[1:]: 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found, vars = match(COMPOUND_STMT_PATTERN, node) 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if found: 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cstmt = vars['compound'] 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cstmt[0] == symbol.funcdef: 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = cstmt[2][1] 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._function_info[name] = FunctionInfo(cstmt) 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cstmt[0] == symbol.classdef: 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = cstmt[2][1] 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._class_info[name] = ClassInfo(cstmt) 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_docstring(self): 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._docstring 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_name(self): 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._name 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_class_names(self): 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._class_info.keys() 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_class_info(self, name): 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._class_info[name] 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __getitem__(self, name): 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._class_info[name] 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._function_info[name] 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass SuiteFuncInfo: 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Mixin class providing access to function names and info. 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_function_names(self): 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._function_info.keys() 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_function_info(self, name): 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._function_info[name] 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass FunctionInfo(SuiteInfoBase, SuiteFuncInfo): 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, tree = None): 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._name = tree[2][1] 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm SuiteInfoBase.__init__(self, tree and tree[-1] or None) 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ClassInfo(SuiteInfoBase): 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, tree = None): 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._name = tree[2][1] 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm SuiteInfoBase.__init__(self, tree and tree[-1] or None) 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_method_names(self): 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._function_info.keys() 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_method_info(self, name): 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._function_info[name] 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ModuleInfo(SuiteInfoBase, SuiteFuncInfo): 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, tree = None, name = "<string>"): 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._name = name 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm SuiteInfoBase.__init__(self, tree) 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tree: 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if found: 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._docstring = vars["docstring"] 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef match(pattern, data, vars=None): 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Match `data' to `pattern', with variable extraction. 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pattern 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Pattern to match against, possibly containing variables. 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Data to be checked and against which variables are extracted. 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm vars 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Dictionary of variables which have already been found. If not 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm provided, an empty dictionary is created. 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The `pattern' value may contain variables of the form ['varname'] which 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm are allowed to match anything. The value that is matched is returned as 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm part of a dictionary which maps 'varname' to the matched value. 'varname' 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is not required to be a string object, but using strings makes patterns 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and the code which uses them more readable. 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This function returns two values: a boolean indicating whether a match 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm was found and a dictionary mapping variable names to their associated 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values. 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if vars is None: 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm vars = {} 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type(pattern) is ListType: # 'variables' are ['varname'] 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm vars[pattern[0]] = data 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1, vars 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type(pattern) is not TupleType: 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (pattern == data), vars 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(data) != len(pattern): 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0, vars 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for pattern, data in map(None, pattern, data): 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm same, vars = match(pattern, data, vars) 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not same: 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return same, vars 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# This pattern identifies compound statements, allowing them to be readily 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# differentiated from simple statements. 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmCOMPOUND_STMT_PATTERN = ( 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm symbol.stmt, 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.compound_stmt, ['compound']) 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# This pattern will match a 'stmt' node which *might* represent a docstring; 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# docstrings require that the statement which provides the docstring be the 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# first statement in the class or function, which this pattern does not check. 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDOCSTRING_STMT_PATTERN = ( 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm symbol.stmt, 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.simple_stmt, 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.small_stmt, 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.expr_stmt, 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.testlist, 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.test, 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.and_test, 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.not_test, 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.comparison, 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.expr, 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.xor_expr, 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.and_expr, 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.shift_expr, 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.arith_expr, 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.term, 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.factor, 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.power, 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (symbol.atom, 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (token.STRING, ['docstring']) 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )))))))))))))))), 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (token.NEWLINE, '') 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )) 191