1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Pattern compiler.
5
6The grammar is taken from PatternGrammar.txt.
7
8The compiler compiles a pattern to a pytree.*Pattern instance.
9"""
10
11__author__ = "Guido van Rossum <guido@python.org>"
12
13# Python imports
14import io
15import os
16
17# Fairly local imports
18from .pgen2 import driver, literals, token, tokenize, parse, grammar
19
20# Really local imports
21from . import pytree
22from . import pygram
23
24# The pattern grammar file
25_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
26                                     "PatternGrammar.txt")
27
28
29class PatternSyntaxError(Exception):
30    pass
31
32
33def tokenize_wrapper(input):
34    """Tokenizes a string suppressing significant whitespace."""
35    skip = {token.NEWLINE, token.INDENT, token.DEDENT}
36    tokens = tokenize.generate_tokens(io.StringIO(input).readline)
37    for quintuple in tokens:
38        type, value, start, end, line_text = quintuple
39        if type not in skip:
40            yield quintuple
41
42
43class PatternCompiler(object):
44
45    def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
46        """Initializer.
47
48        Takes an optional alternative filename for the pattern grammar.
49        """
50        self.grammar = driver.load_grammar(grammar_file)
51        self.syms = pygram.Symbols(self.grammar)
52        self.pygrammar = pygram.python_grammar
53        self.pysyms = pygram.python_symbols
54        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
55
56    def compile_pattern(self, input, debug=False, with_tree=False):
57        """Compiles a pattern string to a nested pytree.*Pattern object."""
58        tokens = tokenize_wrapper(input)
59        try:
60            root = self.driver.parse_tokens(tokens, debug=debug)
61        except parse.ParseError as e:
62            raise PatternSyntaxError(str(e))
63        if with_tree:
64            return self.compile_node(root), root
65        else:
66            return self.compile_node(root)
67
68    def compile_node(self, node):
69        """Compiles a node, recursively.
70
71        This is one big switch on the node type.
72        """
73        # XXX Optimize certain Wildcard-containing-Wildcard patterns
74        # that can be merged
75        if node.type == self.syms.Matcher:
76            node = node.children[0] # Avoid unneeded recursion
77
78        if node.type == self.syms.Alternatives:
79            # Skip the odd children since they are just '|' tokens
80            alts = [self.compile_node(ch) for ch in node.children[::2]]
81            if len(alts) == 1:
82                return alts[0]
83            p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
84            return p.optimize()
85
86        if node.type == self.syms.Alternative:
87            units = [self.compile_node(ch) for ch in node.children]
88            if len(units) == 1:
89                return units[0]
90            p = pytree.WildcardPattern([units], min=1, max=1)
91            return p.optimize()
92
93        if node.type == self.syms.NegatedUnit:
94            pattern = self.compile_basic(node.children[1:])
95            p = pytree.NegatedPattern(pattern)
96            return p.optimize()
97
98        assert node.type == self.syms.Unit
99
100        name = None
101        nodes = node.children
102        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
103            name = nodes[0].value
104            nodes = nodes[2:]
105        repeat = None
106        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
107            repeat = nodes[-1]
108            nodes = nodes[:-1]
109
110        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
111        pattern = self.compile_basic(nodes, repeat)
112
113        if repeat is not None:
114            assert repeat.type == self.syms.Repeater
115            children = repeat.children
116            child = children[0]
117            if child.type == token.STAR:
118                min = 0
119                max = pytree.HUGE
120            elif child.type == token.PLUS:
121                min = 1
122                max = pytree.HUGE
123            elif child.type == token.LBRACE:
124                assert children[-1].type == token.RBRACE
125                assert  len(children) in (3, 5)
126                min = max = self.get_int(children[1])
127                if len(children) == 5:
128                    max = self.get_int(children[3])
129            else:
130                assert False
131            if min != 1 or max != 1:
132                pattern = pattern.optimize()
133                pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
134
135        if name is not None:
136            pattern.name = name
137        return pattern.optimize()
138
139    def compile_basic(self, nodes, repeat=None):
140        # Compile STRING | NAME [Details] | (...) | [...]
141        assert len(nodes) >= 1
142        node = nodes[0]
143        if node.type == token.STRING:
144            value = str(literals.evalString(node.value))
145            return pytree.LeafPattern(_type_of_literal(value), value)
146        elif node.type == token.NAME:
147            value = node.value
148            if value.isupper():
149                if value not in TOKEN_MAP:
150                    raise PatternSyntaxError("Invalid token: %r" % value)
151                if nodes[1:]:
152                    raise PatternSyntaxError("Can't have details for token")
153                return pytree.LeafPattern(TOKEN_MAP[value])
154            else:
155                if value == "any":
156                    type = None
157                elif not value.startswith("_"):
158                    type = getattr(self.pysyms, value, None)
159                    if type is None:
160                        raise PatternSyntaxError("Invalid symbol: %r" % value)
161                if nodes[1:]: # Details present
162                    content = [self.compile_node(nodes[1].children[1])]
163                else:
164                    content = None
165                return pytree.NodePattern(type, content)
166        elif node.value == "(":
167            return self.compile_node(nodes[1])
168        elif node.value == "[":
169            assert repeat is None
170            subpattern = self.compile_node(nodes[1])
171            return pytree.WildcardPattern([[subpattern]], min=0, max=1)
172        assert False, node
173
174    def get_int(self, node):
175        assert node.type == token.NUMBER
176        return int(node.value)
177
178
179# Map named tokens to the type value for a LeafPattern
180TOKEN_MAP = {"NAME": token.NAME,
181             "STRING": token.STRING,
182             "NUMBER": token.NUMBER,
183             "TOKEN": None}
184
185
186def _type_of_literal(value):
187    if value[0].isalpha():
188        return token.NAME
189    elif value in grammar.opmap:
190        return grammar.opmap[value]
191    else:
192        return None
193
194
195def pattern_convert(grammar, raw_node_info):
196    """Converts raw node information to a Node or Leaf instance."""
197    type, value, context, children = raw_node_info
198    if children or type in grammar.number2symbol:
199        return pytree.Node(type, children, context=context)
200    else:
201        return pytree.Leaf(type, value, context=context)
202
203
204def compile_pattern(pattern):
205    return PatternCompiler().compile_pattern(pattern)
206