1ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# Copyright 2006 Google, Inc. All Rights Reserved.
2ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# Licensed to PSF under a Contributor Agreement.
3ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
4ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh"""
5ffab958fd8d42ed7227d83007350e61555a1fa36Andrew HsiehPython parse tree definitions.
6ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
7ffab958fd8d42ed7227d83007350e61555a1fa36Andrew HsiehThis is a very concrete parse tree; we need to keep every token and
8ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieheven the comments and whitespace between tokens.
9ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
10ffab958fd8d42ed7227d83007350e61555a1fa36Andrew HsiehThere's also a pattern matching implementation here.
11ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh"""
12ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
13ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh__author__ = "Guido van Rossum <guido@python.org>"
14ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
15ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sys
16ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport warnings
17ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom StringIO import StringIO
18ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
19ffab958fd8d42ed7227d83007350e61555a1fa36Andrew HsiehHUGE = 0x7FFFFFFF  # maximum repeat count, default max
20ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
21ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh_type_reprs = {}
22ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef type_repr(type_num):
23ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    global _type_reprs
24ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if not _type_reprs:
25ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        from .pygram import python_symbols
26ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # printing tokens is possible but not as useful
27ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # from .pgen2 import token // token.__dict__.items():
28ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for name, val in python_symbols.__dict__.items():
29ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if type(val) == int: _type_reprs[val] = name
30ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    return _type_reprs.setdefault(type_num, type_num)
31ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
32ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass Base(object):
33ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
34ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
35ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    Abstract base class for Node and Leaf.
36ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
37ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    This provides some default functionality and boilerplate using the
38ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    template pattern.
39ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
40ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    A node may be a subnode of at most one parent.
41ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
42ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
43ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # Default values for instance variables
44ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    type = None    # int: token number (< 256) or symbol number (>= 256)
45ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    parent = None  # Parent node pointer, or None
46ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    children = ()  # Tuple of subnodes
47ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    was_changed = False
48ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    was_checked = False
49ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
50ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __new__(cls, *args, **kwds):
51ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Constructor that prevents Base from being instantiated."""
52ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert cls is not Base, "Cannot instantiate Base"
53ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return object.__new__(cls)
54ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
55ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __eq__(self, other):
56ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
57ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Compare two nodes for equality.
58ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
59ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This calls the method _eq().
60ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
61ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.__class__ is not other.__class__:
62ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return NotImplemented
63ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self._eq(other)
64ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
65ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    __hash__ = None # For Py3 compatibility.
66ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
67ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __ne__(self, other):
68ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
69ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Compare two nodes for inequality.
70ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
71ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This calls the method _eq().
72ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
73ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.__class__ is not other.__class__:
74ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return NotImplemented
75ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return not self._eq(other)
76ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
77ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _eq(self, other):
78ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
79ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Compare two nodes for equality.
80ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
81ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This is called by __eq__ and __ne__.  It is only called if the two nodes
82ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        have the same type.  This must be implemented by the concrete subclass.
83ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Nodes should be considered equal if they have the same structure,
84ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        ignoring the prefix string and other context information.
85ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
86ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        raise NotImplementedError
87ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
88ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def clone(self):
89ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
90ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return a cloned (deep) copy of self.
91ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
92ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This must be implemented by the concrete subclass.
93ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
94ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        raise NotImplementedError
95ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
96ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def post_order(self):
97ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
98ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return a post-order iterator for the tree.
99ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
100ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This must be implemented by the concrete subclass.
101ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
102ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        raise NotImplementedError
103ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
104ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def pre_order(self):
105ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
106ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return a pre-order iterator for the tree.
107ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
108ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This must be implemented by the concrete subclass.
109ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
110ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        raise NotImplementedError
111ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
112ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def set_prefix(self, prefix):
113ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
114ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Set the prefix for the node (see Leaf class).
115ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
116ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        DEPRECATED; use the prefix property directly.
117ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
118ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        warnings.warn("set_prefix() is deprecated; use the prefix property",
119ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                      DeprecationWarning, stacklevel=2)
120ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.prefix = prefix
121ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
122ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def get_prefix(self):
123ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
124ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return the prefix for the node (see Leaf class).
125ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
126ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        DEPRECATED; use the prefix property directly.
127ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
128ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        warnings.warn("get_prefix() is deprecated; use the prefix property",
129ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                      DeprecationWarning, stacklevel=2)
130ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.prefix
131ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
132ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def replace(self, new):
133ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Replace this node with a new one in the parent."""
134ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert self.parent is not None, str(self)
135ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert new is not None
136ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if not isinstance(new, list):
137ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            new = [new]
138ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        l_children = []
139ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        found = False
140ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for ch in self.parent.children:
141ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if ch is self:
142ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                assert not found, (self.parent.children, self, new)
143ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if new is not None:
144ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    l_children.extend(new)
145ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                found = True
146ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
147ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                l_children.append(ch)
148ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert found, (self.children, self, new)
149ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.parent.changed()
150ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.parent.children = l_children
151ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for x in new:
152ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            x.parent = self.parent
153ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.parent = None
154ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
155ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def get_lineno(self):
156ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return the line number which generated the invocant node."""
157ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        node = self
158ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        while not isinstance(node, Leaf):
159ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if not node.children:
160ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return
161ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            node = node.children[0]
162ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return node.lineno
163ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
164ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def changed(self):
165ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.parent:
166ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.parent.changed()
167ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.was_changed = True
168ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
169ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def remove(self):
170ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
171ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Remove the node from the tree. Returns the position of the node in its
172ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        parent's children before it was removed.
173ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
174ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.parent:
175ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for i, node in enumerate(self.parent.children):
176ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if node is self:
177ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    self.parent.changed()
178ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    del self.parent.children[i]
179ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    self.parent = None
180ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    return i
181ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
182ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    @property
183ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def next_sibling(self):
184ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
185ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The node immediately following the invocant in their parent's children
186ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        list. If the invocant does not have a next sibling, it is None
187ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
188ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.parent is None:
189ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return None
190ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
191ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Can't use index(); we need to test by identity
192ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for i, child in enumerate(self.parent.children):
193ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if child is self:
194ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                try:
195ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    return self.parent.children[i+1]
196ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                except IndexError:
197ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    return None
198ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
199ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    @property
200ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def prev_sibling(self):
201ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
202ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The node immediately preceding the invocant in their parent's children
203ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        list. If the invocant does not have a previous sibling, it is None.
204ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
205ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.parent is None:
206ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return None
207ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
208ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Can't use index(); we need to test by identity
209ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for i, child in enumerate(self.parent.children):
210ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if child is self:
211ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if i == 0:
212ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    return None
213ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return self.parent.children[i-1]
214ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
215ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def leaves(self):
216ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for child in self.children:
217ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for x in child.leaves():
218ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield x
219ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
220ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def depth(self):
221ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.parent is None:
222ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return 0
223ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return 1 + self.parent.depth()
224ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
225ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def get_suffix(self):
226ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
227ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return the string immediately following the invocant node. This is
228ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        effectively equivalent to node.next_sibling.prefix
229ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
230ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        next_sib = self.next_sibling
231ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if next_sib is None:
232ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return u""
233ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return next_sib.prefix
234ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
235ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if sys.version_info < (3, 0):
236ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        def __str__(self):
237ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return unicode(self).encode("ascii")
238ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
239ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass Node(Base):
240ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
241ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """Concrete implementation for interior nodes."""
242ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
243ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self,type, children,
244ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 context=None,
245ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 prefix=None,
246ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 fixers_applied=None):
247ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
248ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.
249ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
250ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Takes a type constant (a symbol number >= 256), a sequence of
251ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child nodes, and an optional context keyword argument.
252ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
253ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        As a side effect, the parent pointers of the children are updated.
254ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
255ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert type >= 256, type
256ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.type = type
257ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.children = list(children)
258ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for ch in self.children:
259ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert ch.parent is None, repr(ch)
260ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            ch.parent = self
261ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if prefix is not None:
262ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.prefix = prefix
263ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if fixers_applied:
264ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.fixers_applied = fixers_applied[:]
265ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
266ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.fixers_applied = None
267ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
268ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __repr__(self):
269ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a canonical string representation."""
270ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return "%s(%s, %r)" % (self.__class__.__name__,
271ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                               type_repr(self.type),
272ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                               self.children)
273ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
274ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __unicode__(self):
275ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
276ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return a pretty string representation.
277ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
278ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This reproduces the input source exactly.
279ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
280ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return u"".join(map(unicode, self.children))
281ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
282ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if sys.version_info > (3, 0):
283ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        __str__ = __unicode__
284ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
285ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _eq(self, other):
286ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Compare two nodes for equality."""
287ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return (self.type, self.children) == (other.type, other.children)
288ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
289ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def clone(self):
290ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a cloned (deep) copy of self."""
291ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return Node(self.type, [ch.clone() for ch in self.children],
292ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    fixers_applied=self.fixers_applied)
293ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
294ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def post_order(self):
295ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a post-order iterator for the tree."""
296ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for child in self.children:
297ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for node in child.post_order():
298ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield node
299ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield self
300ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
301ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def pre_order(self):
302ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a pre-order iterator for the tree."""
303ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield self
304ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for child in self.children:
305ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for node in child.pre_order():
306ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield node
307ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
308ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _prefix_getter(self):
309ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
310ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The whitespace and comments preceding this node in the input.
311ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
312ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if not self.children:
313ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return ""
314ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.children[0].prefix
315ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
316ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _prefix_setter(self, prefix):
317ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.children:
318ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.children[0].prefix = prefix
319ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
320ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    prefix = property(_prefix_getter, _prefix_setter)
321ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
322ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def set_child(self, i, child):
323ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
324ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Equivalent to 'node.children[i] = child'. This method also sets the
325ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child's parent attribute appropriately.
326ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
327ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child.parent = self
328ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.children[i].parent = None
329ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.children[i] = child
330ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.changed()
331ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
332ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def insert_child(self, i, child):
333ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
334ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Equivalent to 'node.children.insert(i, child)'. This method also sets
335ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        the child's parent attribute appropriately.
336ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
337ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child.parent = self
338ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.children.insert(i, child)
339ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.changed()
340ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
341ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def append_child(self, child):
342ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
343ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Equivalent to 'node.children.append(child)'. This method also sets the
344ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child's parent attribute appropriately.
345ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
346ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        child.parent = self
347ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.children.append(child)
348ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.changed()
349ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
350ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
351ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass Leaf(Base):
352ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
353ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """Concrete implementation for leaf nodes."""
354ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
355ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # Default values for instance variables
356ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    _prefix = ""  # Whitespace and comments preceding this token in the input
357ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    lineno = 0    # Line where this token starts in the input
358ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    column = 0    # Column where this token tarts in the input
359ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
360ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, type, value,
361ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 context=None,
362ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 prefix=None,
363ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                 fixers_applied=[]):
364ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
365ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.
366ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
367ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Takes a type constant (a token number < 256), a string value, and an
368ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        optional context keyword argument.
369ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
370ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert 0 <= type < 256, type
371ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if context is not None:
372ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self._prefix, (self.lineno, self.column) = context
373ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.type = type
374ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.value = value
375ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if prefix is not None:
376ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self._prefix = prefix
377ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.fixers_applied = fixers_applied[:]
378ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
379ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __repr__(self):
380ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a canonical string representation."""
381ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return "%s(%r, %r)" % (self.__class__.__name__,
382ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                               self.type,
383ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                               self.value)
384ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
385ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __unicode__(self):
386ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
387ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Return a pretty string representation.
388ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
389ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This reproduces the input source exactly.
390ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
391ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.prefix + unicode(self.value)
392ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
393ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if sys.version_info > (3, 0):
394ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        __str__ = __unicode__
395ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
396ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _eq(self, other):
397ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Compare two nodes for equality."""
398ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return (self.type, self.value) == (other.type, other.value)
399ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
400ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def clone(self):
401ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a cloned (deep) copy of self."""
402ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return Leaf(self.type, self.value,
403ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    (self.prefix, (self.lineno, self.column)),
404ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    fixers_applied=self.fixers_applied)
405ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
406ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def leaves(self):
407ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield self
408ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
409ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def post_order(self):
410ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a post-order iterator for the tree."""
411ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield self
412ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
413ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def pre_order(self):
414ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Return a pre-order iterator for the tree."""
415ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield self
416ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
417ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _prefix_getter(self):
418ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
419ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The whitespace and comments preceding this token in the input.
420ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
421ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self._prefix
422ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
423ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _prefix_setter(self, prefix):
424ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.changed()
425ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self._prefix = prefix
426ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
427ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    prefix = property(_prefix_getter, _prefix_setter)
428ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
429ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef convert(gr, raw_node):
430ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
431ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    Convert raw node information to a Node or Leaf instance.
432ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
433ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    This is passed to the parser driver which calls it whenever a reduction of a
434ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    grammar rule produces a new complete node, so that the tree is build
435ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    strictly bottom-up.
436ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
437ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    type, value, context, children = raw_node
438ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if children or type in gr.number2symbol:
439ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # If there's exactly one child, return that child instead of
440ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # creating a new node.
441ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if len(children) == 1:
442ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return children[0]
443ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return Node(type, children, context=context)
444ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
445ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return Leaf(type, value, context=context)
446ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
447ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
448ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass BasePattern(object):
449ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
450ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
451ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    A pattern is a tree matching pattern.
452ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
453ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    It looks for a specific node type (token or symbol), and
454ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    optionally for a specific content.
455ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
456ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    This is an abstract base class.  There are three concrete
457ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    subclasses:
458ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
459ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    - LeafPattern matches a single leaf node;
460ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    - NodePattern matches a single node (usually non-leaf);
461ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    - WildcardPattern matches a sequence of nodes of variable length.
462ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
463ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
464ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # Defaults for instance variables
465ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    type = None     # Node type (token if < 256, symbol if >= 256)
466ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    content = None  # Optional content matching pattern
467ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    name = None     # Optional name used to store match in results dict
468ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
469ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __new__(cls, *args, **kwds):
470ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Constructor that prevents BasePattern from being instantiated."""
471ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert cls is not BasePattern, "Cannot instantiate BasePattern"
472ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return object.__new__(cls)
473ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
474ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __repr__(self):
475ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        args = [type_repr(self.type), self.content, self.name]
476ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        while args and args[-1] is None:
477ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            del args[-1]
478ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
479ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
480ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def optimize(self):
481ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
482ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        A subclass can define this as a hook for optimizations.
483ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
484ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Returns either self or another node with the same effect.
485ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
486ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self
487ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
488ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match(self, node, results=None):
489ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
490ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Does this pattern exactly match a node?
491ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
492ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Returns True if it matches, False if not.
493ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
494ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If results is not None, it must be a dict which will be
495ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        updated with the nodes matching named subpatterns.
496ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
497ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Default implementation for non-wildcard patterns.
498ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
499ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.type is not None and node.type != self.type:
500ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return False
501ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.content is not None:
502ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            r = None
503ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if results is not None:
504ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                r = {}
505ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if not self._submatch(node, r):
506ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return False
507ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if r:
508ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                results.update(r)
509ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if results is not None and self.name:
510ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            results[self.name] = node
511ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return True
512ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
513ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match_seq(self, nodes, results=None):
514ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
515ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Does this pattern exactly match a sequence of nodes?
516ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
517ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Default implementation for non-wildcard patterns.
518ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
519ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if len(nodes) != 1:
520ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return False
521ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.match(nodes[0], results)
522ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
523ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def generate_matches(self, nodes):
524ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
525ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Generator yielding all matches for this pattern.
526ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
527ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Default implementation for non-wildcard patterns.
528ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
529ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        r = {}
530ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if nodes and self.match(nodes[0], r):
531ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            yield 1, r
532ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
533ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
534ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass LeafPattern(BasePattern):
535ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
536ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, type=None, content=None, name=None):
537ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
538ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.  Takes optional type, content, and name.
539ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
540ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The type, if given must be a token type (< 256).  If not given,
541ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        this matches any *leaf* node; the content may still be required.
542ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
543ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The content, if given, must be a string.
544ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
545ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If a name is given, the matching node is stored in the results
546ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        dict under that key.
547ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
548ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if type is not None:
549ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert 0 <= type < 256, type
550ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if content is not None:
551ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert isinstance(content, basestring), repr(content)
552ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.type = type
553ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.content = content
554ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.name = name
555ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
556ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match(self, node, results=None):
557ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Override match() to insist on a leaf node."""
558ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if not isinstance(node, Leaf):
559ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return False
560ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return BasePattern.match(self, node, results)
561ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
562ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _submatch(self, node, results=None):
563ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
564ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Match the pattern's content to the node's children.
565ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
566ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This assumes the node type matches and self.content is not None.
567ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
568ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Returns True if it matches, False if not.
569ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
570ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If results is not None, it must be a dict which will be
571ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        updated with the nodes matching named subpatterns.
572ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
573ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        When returning False, the results dict may still be updated.
574ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
575ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.content == node.value
576ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
577ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
578ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass NodePattern(BasePattern):
579ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
580ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    wildcards = False
581ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
582ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, type=None, content=None, name=None):
583ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
584ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.  Takes optional type, content, and name.
585ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
586ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The type, if given, must be a symbol type (>= 256).  If the
587ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        type is None this matches *any* single node (leaf or not),
588ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except if content is not None, in which it only matches
589ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        non-leaf nodes that also match the content pattern.
590ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
591ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The content, if not None, must be a sequence of Patterns that
592ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        must match the node's children exactly.  If the content is
593ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        given, the type must not be None.
594ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
595ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If a name is given, the matching node is stored in the results
596ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        dict under that key.
597ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
598ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if type is not None:
599ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert type >= 256, type
600ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if content is not None:
601ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert not isinstance(content, basestring), repr(content)
602ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            content = list(content)
603ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for i, item in enumerate(content):
604ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                assert isinstance(item, BasePattern), (i, item)
605ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if isinstance(item, WildcardPattern):
606ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    self.wildcards = True
607ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.type = type
608ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.content = content
609ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.name = name
610ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
611ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _submatch(self, node, results=None):
612ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
613ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Match the pattern's content to the node's children.
614ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
615ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        This assumes the node type matches and self.content is not None.
616ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
617ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Returns True if it matches, False if not.
618ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
619ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        If results is not None, it must be a dict which will be
620ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        updated with the nodes matching named subpatterns.
621ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
622ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        When returning False, the results dict may still be updated.
623ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
624ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.wildcards:
625ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for c, r in generate_matches(self.content, node.children):
626ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if c == len(node.children):
627ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if results is not None:
628ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        results.update(r)
629ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    return True
630ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return False
631ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if len(self.content) != len(node.children):
632ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return False
633ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for subpattern, child in zip(self.content, node.children):
634ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if not subpattern.match(child, results):
635ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return False
636ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return True
637ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
638ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
639ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass WildcardPattern(BasePattern):
640ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
641ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
642ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    A wildcard pattern can match zero or more nodes.
643ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
644ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    This has all the flexibility needed to implement patterns like:
645ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
646ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    .*      .+      .?      .{m,n}
647ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    (a b c | d e | f)
648ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    (...)*  (...)+  (...)?  (...){m,n}
649ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
650ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    except it always uses non-greedy matching.
651ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
652ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
653ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, content=None, min=0, max=HUGE, name=None):
654ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
655ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.
656ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
657ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Args:
658ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            content: optional sequence of subsequences of patterns;
659ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                     if absent, matches one node;
660ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                     if present, each subsequence is an alternative [*]
661ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            min: optional minimum number of times to match, default 0
662ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            max: optional maximum number of times to match, default HUGE
663ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            name: optional name assigned to this match
664ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
665ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
666ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            equivalent to (a b c | d e | f g h); if content is None,
667ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            this is equivalent to '.' in regular expression terms.
668ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            The min and max parameters work as follows:
669ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                min=0, max=maxint: .*
670ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                min=1, max=maxint: .+
671ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                min=0, max=1: .?
672ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                min=1, max=1: .
673ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            If content is not None, replace the dot with the parenthesized
674ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            list of alternatives, e.g. (a b c | d e | f g h)*
675ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
676ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert 0 <= min <= max <= HUGE, (min, max)
677ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if content is not None:
678ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            content = tuple(map(tuple, content))  # Protect against alterations
679ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # Check sanity of alternatives
680ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert len(content), repr(content)  # Can't have zero alternatives
681ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for alt in content:
682ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                assert len(alt), repr(alt) # Can have empty alternatives
683ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.content = content
684ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.min = min
685ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.max = max
686ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.name = name
687ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
688ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def optimize(self):
689ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Optimize certain stacked wildcard patterns."""
690ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        subpattern = None
691ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if (self.content is not None and
692ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            len(self.content) == 1 and len(self.content[0]) == 1):
693ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            subpattern = self.content[0][0]
694ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.min == 1 and self.max == 1:
695ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if self.content is None:
696ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return NodePattern(name=self.name)
697ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if subpattern is not None and  self.name == subpattern.name:
698ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return subpattern.optimize()
699ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
700ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            subpattern.min <= 1 and self.name == subpattern.name):
701ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return WildcardPattern(subpattern.content,
702ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   self.min*subpattern.min,
703ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   self.max*subpattern.max,
704ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   subpattern.name)
705ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self
706ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
707ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match(self, node, results=None):
708ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Does this pattern exactly match a node?"""
709ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return self.match_seq([node], results)
710ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
711ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match_seq(self, nodes, results=None):
712ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Does this pattern exactly match a sequence of nodes?"""
713ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for c, r in self.generate_matches(nodes):
714ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if c == len(nodes):
715ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if results is not None:
716ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    results.update(r)
717ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if self.name:
718ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        results[self.name] = list(nodes)
719ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return True
720ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return False
721ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
722ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def generate_matches(self, nodes):
723ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
724ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Generator yielding matches for a sequence of nodes.
725ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
726ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Args:
727ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            nodes: sequence of nodes
728ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
729ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Yields:
730ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (count, results) tuples where:
731ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            count: the match comprises nodes[:count];
732ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            results: dict containing named submatches.
733ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
734ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.content is None:
735ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # Shortcut for special case (see __init__.__doc__)
736ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for count in xrange(self.min, 1 + min(len(nodes), self.max)):
737ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                r = {}
738ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if self.name:
739ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    r[self.name] = nodes[:count]
740ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield count, r
741ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        elif self.name == "bare_name":
742ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            yield self._bare_name_matches(nodes)
743ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
744ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # The reason for this is that hitting the recursion limit usually
745ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # results in some ugly messages about how RuntimeErrors are being
746ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # ignored. We don't do this on non-CPython implementation because
747ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # they don't have this problem.
748ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if hasattr(sys, "getrefcount"):
749ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                save_stderr = sys.stderr
750ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                sys.stderr = StringIO()
751ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            try:
752ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                for count, r in self._recursive_matches(nodes, 0):
753ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if self.name:
754ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        r[self.name] = nodes[:count]
755ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    yield count, r
756ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            except RuntimeError:
757ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # We fall back to the iterative pattern matching scheme if the recursive
758ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # scheme hits the recursion limit.
759ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                for count, r in self._iterative_matches(nodes):
760ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if self.name:
761ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        r[self.name] = nodes[:count]
762ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    yield count, r
763ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            finally:
764ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if hasattr(sys, "getrefcount"):
765ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    sys.stderr = save_stderr
766ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
767ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _iterative_matches(self, nodes):
768ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Helper to iteratively yield the matches."""
769ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        nodelen = len(nodes)
770ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if 0 >= self.min:
771ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            yield 0, {}
772ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
773ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        results = []
774ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # generate matches that use just one alt from self.content
775ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for alt in self.content:
776ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for c, r in generate_matches(alt, nodes):
777ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield c, r
778ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                results.append((c, r))
779ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
780ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # for each match, iterate down the nodes
781ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        while results:
782ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            new_results = []
783ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for c0, r0 in results:
784ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # stop if the entire set of nodes has been matched
785ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if c0 < nodelen and c0 <= self.max:
786ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    for alt in self.content:
787ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        for c1, r1 in generate_matches(alt, nodes[c0:]):
788ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            if c1 > 0:
789ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                r = {}
790ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                r.update(r0)
791ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                r.update(r1)
792ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                yield c0 + c1, r
793ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                new_results.append((c0 + c1, r))
794ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            results = new_results
795ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
796ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _bare_name_matches(self, nodes):
797ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Special optimized matcher for bare_name."""
798ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        count = 0
799ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        r = {}
800ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        done = False
801ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        max = len(nodes)
802ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        while not done and count < max:
803ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            done = True
804ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for leaf in self.content:
805ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if leaf[0].match(nodes[count], r):
806ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    count += 1
807ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    done = False
808ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    break
809ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        r[self.name] = nodes[:count]
810ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return count, r
811ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
812ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def _recursive_matches(self, nodes, count):
813ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """Helper to recursively yield the matches."""
814ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        assert self.content is not None
815ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if count >= self.min:
816ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            yield 0, {}
817ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if count < self.max:
818ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for alt in self.content:
819ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                for c0, r0 in generate_matches(alt, nodes):
820ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
821ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        r = {}
822ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        r.update(r0)
823ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        r.update(r1)
824ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        yield c0 + c1, r
825ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
826ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
827ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass NegatedPattern(BasePattern):
828ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
829ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def __init__(self, content=None):
830ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
831ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        Initializer.
832ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
833ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        The argument is either a pattern or None.  If it is None, this
834ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        only matches an empty sequence (effectively '$' in regex
835ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        lingo).  If it is not None, this matches whenever the argument
836ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern doesn't have any matches.
837ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
838ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if content is not None:
839ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            assert isinstance(content, BasePattern), repr(content)
840ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.content = content
841ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
842ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match(self, node):
843ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # We never match a node in its entirety
844ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return False
845ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
846ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def match_seq(self, nodes):
847ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # We only match an empty sequence of nodes in its entirety
848ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return len(nodes) == 0
849ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
850ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def generate_matches(self, nodes):
851ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if self.content is None:
852ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # Return a match if there is an empty sequence
853ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if len(nodes) == 0:
854ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield 0, {}
855ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
856ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # Return a match if the argument pattern has no matches
857ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for c, r in self.content.generate_matches(nodes):
858ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                return
859ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            yield 0, {}
860ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
861ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
862ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef generate_matches(patterns, nodes):
863ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    """
864ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    Generator yielding matches for a sequence of patterns and nodes.
865ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
866ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    Args:
867ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        patterns: a sequence of patterns
868ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        nodes: a sequence of nodes
869ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
870ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    Yields:
871ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        (count, results) tuples where:
872ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        count: the entire sequence of patterns matches nodes[:count];
873ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        results: dict containing named submatches.
874ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        """
875ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if not patterns:
876ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        yield 0, {}
877ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
878ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p, rest = patterns[0], patterns[1:]
879ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for c0, r0 in p.generate_matches(nodes):
880ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if not rest:
881ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                yield c0, r0
882ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
883ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                for c1, r1 in generate_matches(rest, nodes[c0:]):
884ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    r = {}
885ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    r.update(r0)
886ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    r.update(r1)
887ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    yield c0 + c1, r
888