14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright 2006 Google, Inc. All Rights Reserved.
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Licensed to PSF under a Contributor Agreement.
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPython parse tree definitions.
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis is a very concrete parse tree; we need to keep every token and
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmeven the comments and whitespace between tokens.
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThere's also a pattern matching implementation here.
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__author__ = "Guido van Rossum <guido@python.org>"
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport warnings
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom StringIO import StringIO
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHUGE = 0x7FFFFFFF  # maximum repeat count, default max
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_type_reprs = {}
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef type_repr(type_num):
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    global _type_reprs
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not _type_reprs:
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        from .pygram import python_symbols
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # printing tokens is possible but not as useful
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # from .pgen2 import token // token.__dict__.items():
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for name, val in python_symbols.__dict__.items():
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if type(val) == int: _type_reprs[val] = name
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _type_reprs.setdefault(type_num, type_num)
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Base(object):
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Abstract base class for Node and Leaf.
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This provides some default functionality and boilerplate using the
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    template pattern.
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A node may be a subnode of at most one parent.
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Default values for instance variables
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    type = None    # int: token number (< 256) or symbol number (>= 256)
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parent = None  # Parent node pointer, or None
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    children = ()  # Tuple of subnodes
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    was_changed = False
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    was_checked = False
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __new__(cls, *args, **kwds):
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Constructor that prevents Base from being instantiated."""
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert cls is not Base, "Cannot instantiate Base"
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return object.__new__(cls)
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __eq__(self, other):
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Compare two nodes for equality.
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This calls the method _eq().
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.__class__ is not other.__class__:
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return NotImplemented
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._eq(other)
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    __hash__ = None # For Py3 compatibility.
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __ne__(self, other):
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Compare two nodes for inequality.
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This calls the method _eq().
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.__class__ is not other.__class__:
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return NotImplemented
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return not self._eq(other)
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _eq(self, other):
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Compare two nodes for equality.
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This is called by __eq__ and __ne__.  It is only called if the two nodes
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        have the same type.  This must be implemented by the concrete subclass.
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Nodes should be considered equal if they have the same structure,
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ignoring the prefix string and other context information.
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clone(self):
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return a cloned (deep) copy of self.
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This must be implemented by the concrete subclass.
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def post_order(self):
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return a post-order iterator for the tree.
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This must be implemented by the concrete subclass.
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def pre_order(self):
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return a pre-order iterator for the tree.
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This must be implemented by the concrete subclass.
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_prefix(self, prefix):
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Set the prefix for the node (see Leaf class).
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        DEPRECATED; use the prefix property directly.
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn("set_prefix() is deprecated; use the prefix property",
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      DeprecationWarning, stacklevel=2)
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.prefix = prefix
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_prefix(self):
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return the prefix for the node (see Leaf class).
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        DEPRECATED; use the prefix property directly.
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn("get_prefix() is deprecated; use the prefix property",
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      DeprecationWarning, stacklevel=2)
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.prefix
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def replace(self, new):
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Replace this node with a new one in the parent."""
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert self.parent is not None, str(self)
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert new is not None
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not isinstance(new, list):
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            new = [new]
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        l_children = []
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        found = False
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ch in self.parent.children:
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ch is self:
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                assert not found, (self.parent.children, self, new)
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if new is not None:
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    l_children.extend(new)
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                found = True
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                l_children.append(ch)
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert found, (self.children, self, new)
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.parent.changed()
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.parent.children = l_children
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for x in new:
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            x.parent = self.parent
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.parent = None
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_lineno(self):
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return the line number which generated the invocant node."""
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        node = self
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while not isinstance(node, Leaf):
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not node.children:
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            node = node.children[0]
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return node.lineno
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def changed(self):
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.parent:
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.parent.changed()
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.was_changed = True
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def remove(self):
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Remove the node from the tree. Returns the position of the node in its
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        parent's children before it was removed.
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.parent:
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for i, node in enumerate(self.parent.children):
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if node is self:
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.parent.changed()
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    del self.parent.children[i]
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.parent = None
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return i
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    @property
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def next_sibling(self):
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The node immediately following the invocant in their parent's children
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        list. If the invocant does not have a next sibling, it is None
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.parent is None:
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Can't use index(); we need to test by identity
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for i, child in enumerate(self.parent.children):
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if child is self:
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                try:
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return self.parent.children[i+1]
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                except IndexError:
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return None
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    @property
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def prev_sibling(self):
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The node immediately preceding the invocant in their parent's children
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        list. If the invocant does not have a previous sibling, it is None.
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.parent is None:
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Can't use index(); we need to test by identity
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for i, child in enumerate(self.parent.children):
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if child is self:
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if i == 0:
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return None
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self.parent.children[i-1]
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def leaves(self):
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for child in self.children:
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for x in child.leaves():
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield x
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def depth(self):
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.parent is None:
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return 0
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 1 + self.parent.depth()
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_suffix(self):
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return the string immediately following the invocant node. This is
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        effectively equivalent to node.next_sibling.prefix
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        next_sib = self.next_sibling
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if next_sib is None:
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return u""
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return next_sib.prefix
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if sys.version_info < (3, 0):
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        def __str__(self):
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return unicode(self).encode("ascii")
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Node(Base):
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Concrete implementation for interior nodes."""
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self,type, children,
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 context=None,
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 prefix=None,
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 fixers_applied=None):
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Takes a type constant (a symbol number >= 256), a sequence of
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child nodes, and an optional context keyword argument.
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        As a side effect, the parent pointers of the children are updated.
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert type >= 256, type
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.type = type
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.children = list(children)
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ch in self.children:
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert ch.parent is None, repr(ch)
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ch.parent = self
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if prefix is not None:
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.prefix = prefix
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if fixers_applied:
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fixers_applied = fixers_applied[:]
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fixers_applied = None
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a canonical string representation."""
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "%s(%s, %r)" % (self.__class__.__name__,
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                               type_repr(self.type),
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                               self.children)
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __unicode__(self):
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return a pretty string representation.
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This reproduces the input source exactly.
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return u"".join(map(unicode, self.children))
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if sys.version_info > (3, 0):
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        __str__ = __unicode__
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _eq(self, other):
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Compare two nodes for equality."""
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (self.type, self.children) == (other.type, other.children)
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clone(self):
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a cloned (deep) copy of self."""
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return Node(self.type, [ch.clone() for ch in self.children],
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    fixers_applied=self.fixers_applied)
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def post_order(self):
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a post-order iterator for the tree."""
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for child in self.children:
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for node in child.post_order():
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield node
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield self
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def pre_order(self):
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a pre-order iterator for the tree."""
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield self
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for child in self.children:
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for node in child.pre_order():
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield node
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _prefix_getter(self):
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The whitespace and comments preceding this node in the input.
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not self.children:
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return ""
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.children[0].prefix
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _prefix_setter(self, prefix):
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.children:
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.children[0].prefix = prefix
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    prefix = property(_prefix_getter, _prefix_setter)
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_child(self, i, child):
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Equivalent to 'node.children[i] = child'. This method also sets the
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child's parent attribute appropriately.
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child.parent = self
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.children[i].parent = None
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.children[i] = child
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.changed()
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def insert_child(self, i, child):
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Equivalent to 'node.children.insert(i, child)'. This method also sets
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        the child's parent attribute appropriately.
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child.parent = self
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.children.insert(i, child)
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.changed()
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def append_child(self, child):
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Equivalent to 'node.children.append(child)'. This method also sets the
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child's parent attribute appropriately.
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        child.parent = self
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.children.append(child)
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.changed()
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Leaf(Base):
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Concrete implementation for leaf nodes."""
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Default values for instance variables
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _prefix = ""  # Whitespace and comments preceding this token in the input
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    lineno = 0    # Line where this token starts in the input
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    column = 0    # Column where this token tarts in the input
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, type, value,
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 context=None,
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 prefix=None,
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 fixers_applied=[]):
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Takes a type constant (a token number < 256), a string value, and an
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        optional context keyword argument.
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert 0 <= type < 256, type
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if context is not None:
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._prefix, (self.lineno, self.column) = context
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.type = type
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.value = value
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if prefix is not None:
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._prefix = prefix
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fixers_applied = fixers_applied[:]
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a canonical string representation."""
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "%s(%r, %r)" % (self.__class__.__name__,
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                               self.type,
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                               self.value)
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __unicode__(self):
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Return a pretty string representation.
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This reproduces the input source exactly.
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.prefix + unicode(self.value)
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if sys.version_info > (3, 0):
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        __str__ = __unicode__
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _eq(self, other):
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Compare two nodes for equality."""
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (self.type, self.value) == (other.type, other.value)
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clone(self):
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a cloned (deep) copy of self."""
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return Leaf(self.type, self.value,
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    (self.prefix, (self.lineno, self.column)),
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    fixers_applied=self.fixers_applied)
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def leaves(self):
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield self
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def post_order(self):
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a post-order iterator for the tree."""
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield self
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def pre_order(self):
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a pre-order iterator for the tree."""
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield self
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _prefix_getter(self):
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The whitespace and comments preceding this token in the input.
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._prefix
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _prefix_setter(self, prefix):
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.changed()
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._prefix = prefix
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    prefix = property(_prefix_getter, _prefix_setter)
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef convert(gr, raw_node):
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Convert raw node information to a Node or Leaf instance.
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This is passed to the parser driver which calls it whenever a reduction of a
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    grammar rule produces a new complete node, so that the tree is build
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    strictly bottom-up.
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    type, value, context, children = raw_node
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if children or type in gr.number2symbol:
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # If there's exactly one child, return that child instead of
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # creating a new node.
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if len(children) == 1:
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return children[0]
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return Node(type, children, context=context)
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return Leaf(type, value, context=context)
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass BasePattern(object):
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A pattern is a tree matching pattern.
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    It looks for a specific node type (token or symbol), and
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    optionally for a specific content.
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This is an abstract base class.  There are three concrete
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    subclasses:
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    - LeafPattern matches a single leaf node;
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    - NodePattern matches a single node (usually non-leaf);
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    - WildcardPattern matches a sequence of nodes of variable length.
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Defaults for instance variables
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    type = None     # Node type (token if < 256, symbol if >= 256)
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    content = None  # Optional content matching pattern
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    name = None     # Optional name used to store match in results dict
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __new__(cls, *args, **kwds):
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Constructor that prevents BasePattern from being instantiated."""
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert cls is not BasePattern, "Cannot instantiate BasePattern"
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return object.__new__(cls)
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args = [type_repr(self.type), self.content, self.name]
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while args and args[-1] is None:
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del args[-1]
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def optimize(self):
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        A subclass can define this as a hook for optimizations.
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Returns either self or another node with the same effect.
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match(self, node, results=None):
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Does this pattern exactly match a node?
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Returns True if it matches, False if not.
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If results is not None, it must be a dict which will be
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        updated with the nodes matching named subpatterns.
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Default implementation for non-wildcard patterns.
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.type is not None and node.type != self.type:
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.content is not None:
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            r = None
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if results is not None:
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                r = {}
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not self._submatch(node, r):
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if r:
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                results.update(r)
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if results is not None and self.name:
5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            results[self.name] = node
5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match_seq(self, nodes, results=None):
5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Does this pattern exactly match a sequence of nodes?
5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Default implementation for non-wildcard patterns.
5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if len(nodes) != 1:
5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.match(nodes[0], results)
5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def generate_matches(self, nodes):
5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Generator yielding all matches for this pattern.
5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Default implementation for non-wildcard patterns.
5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        r = {}
5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if nodes and self.match(nodes[0], r):
5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield 1, r
5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass LeafPattern(BasePattern):
5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, type=None, content=None, name=None):
5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.  Takes optional type, content, and name.
5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The type, if given must be a token type (< 256).  If not given,
5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        this matches any *leaf* node; the content may still be required.
5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The content, if given, must be a string.
5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If a name is given, the matching node is stored in the results
5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        dict under that key.
5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if type is not None:
5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert 0 <= type < 256, type
5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if content is not None:
5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert isinstance(content, basestring), repr(content)
5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.type = type
5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.content = content
5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.name = name
5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match(self, node, results=None):
5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Override match() to insist on a leaf node."""
5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not isinstance(node, Leaf):
5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return BasePattern.match(self, node, results)
5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _submatch(self, node, results=None):
5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Match the pattern's content to the node's children.
5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This assumes the node type matches and self.content is not None.
5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Returns True if it matches, False if not.
5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If results is not None, it must be a dict which will be
5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        updated with the nodes matching named subpatterns.
5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        When returning False, the results dict may still be updated.
5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.content == node.value
5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NodePattern(BasePattern):
5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    wildcards = False
5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, type=None, content=None, name=None):
5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.  Takes optional type, content, and name.
5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The type, if given, must be a symbol type (>= 256).  If the
5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        type is None this matches *any* single node (leaf or not),
5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except if content is not None, in which it only matches
5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        non-leaf nodes that also match the content pattern.
5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The content, if not None, must be a sequence of Patterns that
5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        must match the node's children exactly.  If the content is
5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        given, the type must not be None.
5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If a name is given, the matching node is stored in the results
5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        dict under that key.
5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if type is not None:
5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert type >= 256, type
6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if content is not None:
6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert not isinstance(content, basestring), repr(content)
6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            content = list(content)
6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for i, item in enumerate(content):
6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                assert isinstance(item, BasePattern), (i, item)
6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if isinstance(item, WildcardPattern):
6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.wildcards = True
6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.type = type
6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.content = content
6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.name = name
6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _submatch(self, node, results=None):
6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Match the pattern's content to the node's children.
6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This assumes the node type matches and self.content is not None.
6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Returns True if it matches, False if not.
6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If results is not None, it must be a dict which will be
6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        updated with the nodes matching named subpatterns.
6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        When returning False, the results dict may still be updated.
6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.wildcards:
6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for c, r in generate_matches(self.content, node.children):
6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if c == len(node.children):
6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if results is not None:
6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        results.update(r)
6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return True
6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if len(self.content) != len(node.children):
6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for subpattern, child in zip(self.content, node.children):
6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not subpattern.match(child, results):
6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass WildcardPattern(BasePattern):
6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A wildcard pattern can match zero or more nodes.
6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This has all the flexibility needed to implement patterns like:
6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    .*      .+      .?      .{m,n}
6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (a b c | d e | f)
6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (...)*  (...)+  (...)?  (...){m,n}
6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except it always uses non-greedy matching.
6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, content=None, min=0, max=HUGE, name=None):
6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.
6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Args:
6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            content: optional sequence of subsequences of patterns;
6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     if absent, matches one node;
6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     if present, each subsequence is an alternative [*]
6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            min: optional minimum number of times to match, default 0
6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            max: optional maximum number of times to match, default HUGE
6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            name: optional name assigned to this match
6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            equivalent to (a b c | d e | f g h); if content is None,
6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            this is equivalent to '.' in regular expression terms.
6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            The min and max parameters work as follows:
6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                min=0, max=maxint: .*
6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                min=1, max=maxint: .+
6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                min=0, max=1: .?
6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                min=1, max=1: .
6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            If content is not None, replace the dot with the parenthesized
6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            list of alternatives, e.g. (a b c | d e | f g h)*
6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert 0 <= min <= max <= HUGE, (min, max)
6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if content is not None:
6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            content = tuple(map(tuple, content))  # Protect against alterations
6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Check sanity of alternatives
6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert len(content), repr(content)  # Can't have zero alternatives
6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for alt in content:
6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                assert len(alt), repr(alt) # Can have empty alternatives
6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.content = content
6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.min = min
6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.max = max
6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.name = name
6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def optimize(self):
6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Optimize certain stacked wildcard patterns."""
6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        subpattern = None
6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (self.content is not None and
6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            len(self.content) == 1 and len(self.content[0]) == 1):
6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            subpattern = self.content[0][0]
6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.min == 1 and self.max == 1:
6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.content is None:
6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return NodePattern(name=self.name)
6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if subpattern is not None and  self.name == subpattern.name:
6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return subpattern.optimize()
6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            subpattern.min <= 1 and self.name == subpattern.name):
7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return WildcardPattern(subpattern.content,
7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                   self.min*subpattern.min,
7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                   self.max*subpattern.max,
7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                   subpattern.name)
7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self
7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match(self, node, results=None):
7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Does this pattern exactly match a node?"""
7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.match_seq([node], results)
7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match_seq(self, nodes, results=None):
7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Does this pattern exactly match a sequence of nodes?"""
7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c, r in self.generate_matches(nodes):
7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if c == len(nodes):
7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if results is not None:
7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    results.update(r)
7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if self.name:
7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        results[self.name] = list(nodes)
7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return True
7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def generate_matches(self, nodes):
7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Generator yielding matches for a sequence of nodes.
7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Args:
7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            nodes: sequence of nodes
7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Yields:
7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (count, results) tuples where:
7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            count: the match comprises nodes[:count];
7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            results: dict containing named submatches.
7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.content is None:
7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Shortcut for special case (see __init__.__doc__)
7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for count in xrange(self.min, 1 + min(len(nodes), self.max)):
7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                r = {}
7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if self.name:
7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    r[self.name] = nodes[:count]
7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield count, r
7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif self.name == "bare_name":
7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield self._bare_name_matches(nodes)
7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # The reason for this is that hitting the recursion limit usually
7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # results in some ugly messages about how RuntimeErrors are being
7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # ignored. We don't do this on non-CPython implementation because
7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # they don't have this problem.
7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if hasattr(sys, "getrefcount"):
7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                save_stderr = sys.stderr
7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                sys.stderr = StringIO()
7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for count, r in self._recursive_matches(nodes, 0):
7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if self.name:
7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        r[self.name] = nodes[:count]
7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    yield count, r
7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except RuntimeError:
7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # We fall back to the iterative pattern matching scheme if the recursive
7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # scheme hits the recursion limit.
7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for count, r in self._iterative_matches(nodes):
7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if self.name:
7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        r[self.name] = nodes[:count]
7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    yield count, r
7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            finally:
7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if hasattr(sys, "getrefcount"):
7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    sys.stderr = save_stderr
7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _iterative_matches(self, nodes):
7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Helper to iteratively yield the matches."""
7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nodelen = len(nodes)
7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if 0 >= self.min:
7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield 0, {}
7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        results = []
7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # generate matches that use just one alt from self.content
7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for alt in self.content:
7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for c, r in generate_matches(alt, nodes):
7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield c, r
7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                results.append((c, r))
7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # for each match, iterate down the nodes
7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while results:
7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            new_results = []
7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for c0, r0 in results:
7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # stop if the entire set of nodes has been matched
7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if c0 < nodelen and c0 <= self.max:
7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    for alt in self.content:
7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        for c1, r1 in generate_matches(alt, nodes[c0:]):
7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                            if c1 > 0:
7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                r = {}
7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                r.update(r0)
7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                r.update(r1)
7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                yield c0 + c1, r
7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                new_results.append((c0 + c1, r))
7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            results = new_results
7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _bare_name_matches(self, nodes):
7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Special optimized matcher for bare_name."""
7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        count = 0
7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        r = {}
8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        done = False
8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        max = len(nodes)
8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while not done and count < max:
8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            done = True
8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for leaf in self.content:
8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if leaf[0].match(nodes[count], r):
8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    count += 1
8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    done = False
8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break
8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        r[self.name] = nodes[:count]
8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return count, r
8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _recursive_matches(self, nodes, count):
8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Helper to recursively yield the matches."""
8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert self.content is not None
8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if count >= self.min:
8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield 0, {}
8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if count < self.max:
8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for alt in self.content:
8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for c0, r0 in generate_matches(alt, nodes):
8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        r = {}
8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        r.update(r0)
8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        r.update(r1)
8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        yield c0 + c1, r
8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NegatedPattern(BasePattern):
8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, content=None):
8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Initializer.
8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The argument is either a pattern or None.  If it is None, this
8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        only matches an empty sequence (effectively '$' in regex
8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        lingo).  If it is not None, this matches whenever the argument
8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        pattern doesn't have any matches.
8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if content is not None:
8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            assert isinstance(content, BasePattern), repr(content)
8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.content = content
8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match(self, node):
8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # We never match a node in its entirety
8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def match_seq(self, nodes):
8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # We only match an empty sequence of nodes in its entirety
8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return len(nodes) == 0
8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def generate_matches(self, nodes):
8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.content is None:
8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Return a match if there is an empty sequence
8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if len(nodes) == 0:
8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield 0, {}
8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Return a match if the argument pattern has no matches
8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for c, r in self.content.generate_matches(nodes):
8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return
8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield 0, {}
8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef generate_matches(patterns, nodes):
8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Generator yielding matches for a sequence of patterns and nodes.
8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Args:
8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        patterns: a sequence of patterns
8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nodes: a sequence of nodes
8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Yields:
8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        (count, results) tuples where:
8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        count: the entire sequence of patterns matches nodes[:count];
8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        results: dict containing named submatches.
8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not patterns:
8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yield 0, {}
8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        p, rest = patterns[0], patterns[1:]
8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c0, r0 in p.generate_matches(nodes):
8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not rest:
8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield c0, r0
8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for c1, r1 in generate_matches(rest, nodes[c0:]):
8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    r = {}
8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    r.update(r0)
8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    r.update(r1)
8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    yield c0 + c1, r
888