14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""Simple code to extract class & function docstrings from a module.
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis code is used as an example in the library reference manual in the
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsection on using the parser module.  Refer to the manual for a thorough
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdiscussion of the operation of this code.
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport os
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport parser
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport symbol
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport token
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport types
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom types import ListType, TupleType
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef get_docs(fileName):
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Retrieve information from the parse tree of a source file.
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    fileName
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Name of the file to read Python source code from.
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    source = open(fileName).read()
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    basename = os.path.basename(os.path.splitext(fileName)[0])
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ast = parser.suite(source)
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return ModuleInfo(ast.totuple(), basename)
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass SuiteInfoBase:
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _docstring = ''
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _name = ''
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, tree = None):
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._class_info = {}
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._function_info = {}
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tree:
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._extract_info(tree)
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _extract_info(self, tree):
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # extract docstring
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if len(tree) == 2:
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if found:
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._docstring = eval(vars['docstring'])
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # discover inner definitions
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for node in tree[1:]:
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            found, vars = match(COMPOUND_STMT_PATTERN, node)
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if found:
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                cstmt = vars['compound']
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cstmt[0] == symbol.funcdef:
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    name = cstmt[2][1]
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._function_info[name] = FunctionInfo(cstmt)
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                elif cstmt[0] == symbol.classdef:
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    name = cstmt[2][1]
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._class_info[name] = ClassInfo(cstmt)
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_docstring(self):
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._docstring
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_name(self):
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._name
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_class_names(self):
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._class_info.keys()
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_class_info(self, name):
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._class_info[name]
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __getitem__(self, name):
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return self._class_info[name]
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except KeyError:
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return self._function_info[name]
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass SuiteFuncInfo:
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #  Mixin class providing access to function names and info.
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_function_names(self):
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._function_info.keys()
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_function_info(self, name):
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._function_info[name]
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, tree = None):
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._name = tree[2][1]
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        SuiteInfoBase.__init__(self, tree and tree[-1] or None)
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ClassInfo(SuiteInfoBase):
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, tree = None):
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._name = tree[2][1]
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        SuiteInfoBase.__init__(self, tree and tree[-1] or None)
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_method_names(self):
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._function_info.keys()
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_method_info(self, name):
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._function_info[name]
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, tree = None, name = "<string>"):
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._name = name
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        SuiteInfoBase.__init__(self, tree)
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tree:
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if found:
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._docstring = vars["docstring"]
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef match(pattern, data, vars=None):
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Match `data' to `pattern', with variable extraction.
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    pattern
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Pattern to match against, possibly containing variables.
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    data
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Data to be checked and against which variables are extracted.
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    vars
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Dictionary of variables which have already been found.  If not
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        provided, an empty dictionary is created.
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The `pattern' value may contain variables of the form ['varname'] which
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    are allowed to match anything.  The value that is matched is returned as
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    part of a dictionary which maps 'varname' to the matched value.  'varname'
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    is not required to be a string object, but using strings makes patterns
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    and the code which uses them more readable.
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This function returns two values: a boolean indicating whether a match
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    was found and a dictionary mapping variable names to their associated
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    values.
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if vars is None:
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        vars = {}
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if type(pattern) is ListType:       # 'variables' are ['varname']
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        vars[pattern[0]] = data
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 1, vars
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if type(pattern) is not TupleType:
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (pattern == data), vars
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if len(data) != len(pattern):
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 0, vars
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for pattern, data in map(None, pattern, data):
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        same, vars = match(pattern, data, vars)
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not same:
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            break
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return same, vars
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#  This pattern identifies compound statements, allowing them to be readily
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#  differentiated from simple statements.
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmCOMPOUND_STMT_PATTERN = (
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    symbol.stmt,
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (symbol.compound_stmt, ['compound'])
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    )
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#  This pattern will match a 'stmt' node which *might* represent a docstring;
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#  docstrings require that the statement which provides the docstring be the
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#  first statement in the class or function, which this pattern does not check.
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDOCSTRING_STMT_PATTERN = (
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    symbol.stmt,
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (symbol.simple_stmt,
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm     (symbol.small_stmt,
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      (symbol.expr_stmt,
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       (symbol.testlist,
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        (symbol.test,
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm         (symbol.and_test,
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          (symbol.not_test,
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm           (symbol.comparison,
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (symbol.expr,
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             (symbol.xor_expr,
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              (symbol.and_expr,
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm               (symbol.shift_expr,
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                (symbol.arith_expr,
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 (symbol.term,
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  (symbol.factor,
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   (symbol.power,
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    (symbol.atom,
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     (token.STRING, ['docstring'])
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     )))))))))))))))),
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm     (token.NEWLINE, '')
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm     ))
191