1"""Simple code to extract class & function docstrings from a module.
2
3This code is used as an example in the library reference manual in the
4section on using the parser module.  Refer to the manual for a thorough
5discussion of the operation of this code.
6"""
7
8import os
9import parser
10import symbol
11import token
12import types
13
14from types import ListType, TupleType
15
16
17def get_docs(fileName):
18    """Retrieve information from the parse tree of a source file.
19
20    fileName
21        Name of the file to read Python source code from.
22    """
23    source = open(fileName).read()
24    basename = os.path.basename(os.path.splitext(fileName)[0])
25    ast = parser.suite(source)
26    return ModuleInfo(ast.totuple(), basename)
27
28
29class SuiteInfoBase:
30    _docstring = ''
31    _name = ''
32
33    def __init__(self, tree = None):
34        self._class_info = {}
35        self._function_info = {}
36        if tree:
37            self._extract_info(tree)
38
39    def _extract_info(self, tree):
40        # extract docstring
41        if len(tree) == 2:
42            found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
43        else:
44            found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
45        if found:
46            self._docstring = eval(vars['docstring'])
47        # discover inner definitions
48        for node in tree[1:]:
49            found, vars = match(COMPOUND_STMT_PATTERN, node)
50            if found:
51                cstmt = vars['compound']
52                if cstmt[0] == symbol.funcdef:
53                    name = cstmt[2][1]
54                    self._function_info[name] = FunctionInfo(cstmt)
55                elif cstmt[0] == symbol.classdef:
56                    name = cstmt[2][1]
57                    self._class_info[name] = ClassInfo(cstmt)
58
59    def get_docstring(self):
60        return self._docstring
61
62    def get_name(self):
63        return self._name
64
65    def get_class_names(self):
66        return self._class_info.keys()
67
68    def get_class_info(self, name):
69        return self._class_info[name]
70
71    def __getitem__(self, name):
72        try:
73            return self._class_info[name]
74        except KeyError:
75            return self._function_info[name]
76
77
78class SuiteFuncInfo:
79    #  Mixin class providing access to function names and info.
80
81    def get_function_names(self):
82        return self._function_info.keys()
83
84    def get_function_info(self, name):
85        return self._function_info[name]
86
87
88class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
89    def __init__(self, tree = None):
90        self._name = tree[2][1]
91        SuiteInfoBase.__init__(self, tree and tree[-1] or None)
92
93
94class ClassInfo(SuiteInfoBase):
95    def __init__(self, tree = None):
96        self._name = tree[2][1]
97        SuiteInfoBase.__init__(self, tree and tree[-1] or None)
98
99    def get_method_names(self):
100        return self._function_info.keys()
101
102    def get_method_info(self, name):
103        return self._function_info[name]
104
105
106class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
107    def __init__(self, tree = None, name = "<string>"):
108        self._name = name
109        SuiteInfoBase.__init__(self, tree)
110        if tree:
111            found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
112            if found:
113                self._docstring = vars["docstring"]
114
115
116def match(pattern, data, vars=None):
117    """Match `data' to `pattern', with variable extraction.
118
119    pattern
120        Pattern to match against, possibly containing variables.
121
122    data
123        Data to be checked and against which variables are extracted.
124
125    vars
126        Dictionary of variables which have already been found.  If not
127        provided, an empty dictionary is created.
128
129    The `pattern' value may contain variables of the form ['varname'] which
130    are allowed to match anything.  The value that is matched is returned as
131    part of a dictionary which maps 'varname' to the matched value.  'varname'
132    is not required to be a string object, but using strings makes patterns
133    and the code which uses them more readable.
134
135    This function returns two values: a boolean indicating whether a match
136    was found and a dictionary mapping variable names to their associated
137    values.
138    """
139    if vars is None:
140        vars = {}
141    if type(pattern) is ListType:       # 'variables' are ['varname']
142        vars[pattern[0]] = data
143        return 1, vars
144    if type(pattern) is not TupleType:
145        return (pattern == data), vars
146    if len(data) != len(pattern):
147        return 0, vars
148    for pattern, data in map(None, pattern, data):
149        same, vars = match(pattern, data, vars)
150        if not same:
151            break
152    return same, vars
153
154
155#  This pattern identifies compound statements, allowing them to be readily
156#  differentiated from simple statements.
157#
158COMPOUND_STMT_PATTERN = (
159    symbol.stmt,
160    (symbol.compound_stmt, ['compound'])
161    )
162
163
164#  This pattern will match a 'stmt' node which *might* represent a docstring;
165#  docstrings require that the statement which provides the docstring be the
166#  first statement in the class or function, which this pattern does not check.
167#
168DOCSTRING_STMT_PATTERN = (
169    symbol.stmt,
170    (symbol.simple_stmt,
171     (symbol.small_stmt,
172      (symbol.expr_stmt,
173       (symbol.testlist,
174        (symbol.test,
175         (symbol.and_test,
176          (symbol.not_test,
177           (symbol.comparison,
178            (symbol.expr,
179             (symbol.xor_expr,
180              (symbol.and_expr,
181               (symbol.shift_expr,
182                (symbol.arith_expr,
183                 (symbol.term,
184                  (symbol.factor,
185                   (symbol.power,
186                    (symbol.atom,
187                     (token.STRING, ['docstring'])
188                     )))))))))))))))),
189     (token.NEWLINE, '')
190     ))
191