14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright 2006 Google, Inc. All Rights Reserved. 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Licensed to PSF under a Contributor Agreement. 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPython parse tree definitions. 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis is a very concrete parse tree; we need to keep every token and 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmeven the comments and whitespace between tokens. 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThere's also a pattern matching implementation here. 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__author__ = "Guido van Rossum <guido@python.org>" 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport warnings 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom StringIO import StringIO 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHUGE = 0x7FFFFFFF # maximum repeat count, default max 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_type_reprs = {} 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef type_repr(type_num): 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm global _type_reprs 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not _type_reprs: 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from .pygram import python_symbols 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # printing tokens is possible but not as useful 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # from .pgen2 import token // token.__dict__.items(): 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for name, val in python_symbols.__dict__.items(): 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type(val) == int: _type_reprs[val] = name 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _type_reprs.setdefault(type_num, type_num) 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Base(object): 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Abstract base class for Node and Leaf. 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This provides some default functionality and boilerplate using the 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm template pattern. 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A node may be a subnode of at most one parent. 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Default values for instance variables 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type = None # int: token number (< 256) or symbol number (>= 256) 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parent = None # Parent node pointer, or None 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm children = () # Tuple of subnodes 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm was_changed = False 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm was_checked = False 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __new__(cls, *args, **kwds): 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Constructor that prevents Base from being instantiated.""" 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert cls is not Base, "Cannot instantiate Base" 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return object.__new__(cls) 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __eq__(self, other): 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Compare two nodes for equality. 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This calls the method _eq(). 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.__class__ is not other.__class__: 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NotImplemented 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._eq(other) 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm __hash__ = None # For Py3 compatibility. 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __ne__(self, other): 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Compare two nodes for inequality. 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This calls the method _eq(). 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.__class__ is not other.__class__: 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NotImplemented 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return not self._eq(other) 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _eq(self, other): 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Compare two nodes for equality. 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is called by __eq__ and __ne__. It is only called if the two nodes 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm have the same type. This must be implemented by the concrete subclass. 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Nodes should be considered equal if they have the same structure, 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ignoring the prefix string and other context information. 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clone(self): 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return a cloned (deep) copy of self. 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This must be implemented by the concrete subclass. 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def post_order(self): 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return a post-order iterator for the tree. 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This must be implemented by the concrete subclass. 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def pre_order(self): 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return a pre-order iterator for the tree. 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This must be implemented by the concrete subclass. 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_prefix(self, prefix): 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Set the prefix for the node (see Leaf class). 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DEPRECATED; use the prefix property directly. 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn("set_prefix() is deprecated; use the prefix property", 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DeprecationWarning, stacklevel=2) 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.prefix = prefix 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_prefix(self): 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return the prefix for the node (see Leaf class). 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DEPRECATED; use the prefix property directly. 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn("get_prefix() is deprecated; use the prefix property", 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DeprecationWarning, stacklevel=2) 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.prefix 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def replace(self, new): 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Replace this node with a new one in the parent.""" 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self.parent is not None, str(self) 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert new is not None 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(new, list): 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new = [new] 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l_children = [] 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found = False 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ch in self.parent.children: 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ch is self: 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert not found, (self.parent.children, self, new) 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if new is not None: 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l_children.extend(new) 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found = True 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l_children.append(ch) 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert found, (self.children, self, new) 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent.changed() 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent.children = l_children 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for x in new: 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x.parent = self.parent 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent = None 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_lineno(self): 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the line number which generated the invocant node.""" 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm node = self 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while not isinstance(node, Leaf): 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not node.children: 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm node = node.children[0] 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return node.lineno 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def changed(self): 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.parent: 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent.changed() 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.was_changed = True 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def remove(self): 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Remove the node from the tree. Returns the position of the node in its 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parent's children before it was removed. 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.parent: 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i, node in enumerate(self.parent.children): 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if node is self: 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent.changed() 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self.parent.children[i] 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.parent = None 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return i 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @property 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def next_sibling(self): 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The node immediately following the invocant in their parent's children 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list. If the invocant does not have a next sibling, it is None 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.parent is None: 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Can't use index(); we need to test by identity 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i, child in enumerate(self.parent.children): 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if child is self: 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.parent.children[i+1] 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except IndexError: 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @property 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def prev_sibling(self): 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The node immediately preceding the invocant in their parent's children 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list. If the invocant does not have a previous sibling, it is None. 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.parent is None: 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Can't use index(); we need to test by identity 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i, child in enumerate(self.parent.children): 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if child is self: 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i == 0: 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.parent.children[i-1] 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def leaves(self): 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for child in self.children: 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for x in child.leaves(): 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield x 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def depth(self): 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.parent is None: 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1 + self.parent.depth() 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_suffix(self): 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return the string immediately following the invocant node. This is 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm effectively equivalent to node.next_sibling.prefix 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm next_sib = self.next_sibling 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if next_sib is None: 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"" 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return next_sib.prefix 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sys.version_info < (3, 0): 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return unicode(self).encode("ascii") 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Node(Base): 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Concrete implementation for interior nodes.""" 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self,type, children, 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm context=None, 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix=None, 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixers_applied=None): 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Takes a type constant (a symbol number >= 256), a sequence of 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child nodes, and an optional context keyword argument. 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm As a side effect, the parent pointers of the children are updated. 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert type >= 256, type 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.type = type 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children = list(children) 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ch in self.children: 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert ch.parent is None, repr(ch) 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ch.parent = self 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix is not None: 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.prefix = prefix 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if fixers_applied: 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fixers_applied = fixers_applied[:] 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fixers_applied = None 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a canonical string representation.""" 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%s(%s, %r)" % (self.__class__.__name__, 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type_repr(self.type), 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children) 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return a pretty string representation. 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This reproduces the input source exactly. 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"".join(map(unicode, self.children)) 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sys.version_info > (3, 0): 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm __str__ = __unicode__ 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _eq(self, other): 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Compare two nodes for equality.""" 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (self.type, self.children) == (other.type, other.children) 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clone(self): 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a cloned (deep) copy of self.""" 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return Node(self.type, [ch.clone() for ch in self.children], 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixers_applied=self.fixers_applied) 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def post_order(self): 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a post-order iterator for the tree.""" 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for child in self.children: 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for node in child.post_order(): 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield node 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def pre_order(self): 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a pre-order iterator for the tree.""" 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for child in self.children: 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for node in child.pre_order(): 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield node 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _prefix_getter(self): 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The whitespace and comments preceding this node in the input. 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self.children: 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "" 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.children[0].prefix 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _prefix_setter(self, prefix): 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.children: 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children[0].prefix = prefix 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = property(_prefix_getter, _prefix_setter) 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_child(self, i, child): 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Equivalent to 'node.children[i] = child'. This method also sets the 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child's parent attribute appropriately. 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child.parent = self 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children[i].parent = None 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children[i] = child 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.changed() 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def insert_child(self, i, child): 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Equivalent to 'node.children.insert(i, child)'. This method also sets 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the child's parent attribute appropriately. 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child.parent = self 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children.insert(i, child) 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.changed() 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def append_child(self, child): 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Equivalent to 'node.children.append(child)'. This method also sets the 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child's parent attribute appropriately. 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm child.parent = self 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.children.append(child) 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.changed() 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Leaf(Base): 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Concrete implementation for leaf nodes.""" 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Default values for instance variables 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _prefix = "" # Whitespace and comments preceding this token in the input 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lineno = 0 # Line where this token starts in the input 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm column = 0 # Column where this token tarts in the input 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, type, value, 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm context=None, 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix=None, 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixers_applied=[]): 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Takes a type constant (a token number < 256), a string value, and an 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm optional context keyword argument. 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert 0 <= type < 256, type 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if context is not None: 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._prefix, (self.lineno, self.column) = context 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.type = type 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.value = value 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if prefix is not None: 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._prefix = prefix 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fixers_applied = fixers_applied[:] 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a canonical string representation.""" 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%s(%r, %r)" % (self.__class__.__name__, 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.type, 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.value) 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return a pretty string representation. 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This reproduces the input source exactly. 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.prefix + unicode(self.value) 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sys.version_info > (3, 0): 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm __str__ = __unicode__ 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _eq(self, other): 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Compare two nodes for equality.""" 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (self.type, self.value) == (other.type, other.value) 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clone(self): 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a cloned (deep) copy of self.""" 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return Leaf(self.type, self.value, 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (self.prefix, (self.lineno, self.column)), 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixers_applied=self.fixers_applied) 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def leaves(self): 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def post_order(self): 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a post-order iterator for the tree.""" 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def pre_order(self): 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a pre-order iterator for the tree.""" 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _prefix_getter(self): 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The whitespace and comments preceding this token in the input. 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._prefix 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _prefix_setter(self, prefix): 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.changed() 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._prefix = prefix 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = property(_prefix_getter, _prefix_setter) 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef convert(gr, raw_node): 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Convert raw node information to a Node or Leaf instance. 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is passed to the parser driver which calls it whenever a reduction of a 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm grammar rule produces a new complete node, so that the tree is build 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strictly bottom-up. 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type, value, context, children = raw_node 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if children or type in gr.number2symbol: 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If there's exactly one child, return that child instead of 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # creating a new node. 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(children) == 1: 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return children[0] 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return Node(type, children, context=context) 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return Leaf(type, value, context=context) 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass BasePattern(object): 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A pattern is a tree matching pattern. 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm It looks for a specific node type (token or symbol), and 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm optionally for a specific content. 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is an abstract base class. There are three concrete 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subclasses: 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - LeafPattern matches a single leaf node; 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - NodePattern matches a single node (usually non-leaf); 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - WildcardPattern matches a sequence of nodes of variable length. 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Defaults for instance variables 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type = None # Node type (token if < 256, symbol if >= 256) 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm content = None # Optional content matching pattern 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = None # Optional name used to store match in results dict 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __new__(cls, *args, **kwds): 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Constructor that prevents BasePattern from being instantiated.""" 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert cls is not BasePattern, "Cannot instantiate BasePattern" 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return object.__new__(cls) 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args = [type_repr(self.type), self.content, self.name] 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while args and args[-1] is None: 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del args[-1] 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def optimize(self): 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A subclass can define this as a hook for optimizations. 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns either self or another node with the same effect. 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match(self, node, results=None): 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Does this pattern exactly match a node? 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns True if it matches, False if not. 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If results is not None, it must be a dict which will be 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm updated with the nodes matching named subpatterns. 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Default implementation for non-wildcard patterns. 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.type is not None and node.type != self.type: 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.content is not None: 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = None 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if results is not None: 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self._submatch(node, r): 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if r: 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results.update(r) 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if results is not None and self.name: 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results[self.name] = node 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match_seq(self, nodes, results=None): 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Does this pattern exactly match a sequence of nodes? 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Default implementation for non-wildcard patterns. 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(nodes) != 1: 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.match(nodes[0], results) 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def generate_matches(self, nodes): 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Generator yielding all matches for this pattern. 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Default implementation for non-wildcard patterns. 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if nodes and self.match(nodes[0], r): 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 1, r 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass LeafPattern(BasePattern): 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, type=None, content=None, name=None): 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. Takes optional type, content, and name. 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The type, if given must be a token type (< 256). If not given, 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm this matches any *leaf* node; the content may still be required. 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The content, if given, must be a string. 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If a name is given, the matching node is stored in the results 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dict under that key. 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type is not None: 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert 0 <= type < 256, type 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if content is not None: 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert isinstance(content, basestring), repr(content) 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.type = type 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.content = content 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = name 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match(self, node, results=None): 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Override match() to insist on a leaf node.""" 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(node, Leaf): 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return BasePattern.match(self, node, results) 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _submatch(self, node, results=None): 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Match the pattern's content to the node's children. 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This assumes the node type matches and self.content is not None. 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns True if it matches, False if not. 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If results is not None, it must be a dict which will be 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm updated with the nodes matching named subpatterns. 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm When returning False, the results dict may still be updated. 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.content == node.value 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NodePattern(BasePattern): 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm wildcards = False 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, type=None, content=None, name=None): 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. Takes optional type, content, and name. 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The type, if given, must be a symbol type (>= 256). If the 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type is None this matches *any* single node (leaf or not), 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except if content is not None, in which it only matches 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm non-leaf nodes that also match the content pattern. 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The content, if not None, must be a sequence of Patterns that 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm must match the node's children exactly. If the content is 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm given, the type must not be None. 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If a name is given, the matching node is stored in the results 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dict under that key. 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type is not None: 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert type >= 256, type 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if content is not None: 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert not isinstance(content, basestring), repr(content) 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm content = list(content) 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i, item in enumerate(content): 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert isinstance(item, BasePattern), (i, item) 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(item, WildcardPattern): 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.wildcards = True 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.type = type 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.content = content 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = name 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _submatch(self, node, results=None): 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Match the pattern's content to the node's children. 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This assumes the node type matches and self.content is not None. 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns True if it matches, False if not. 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If results is not None, it must be a dict which will be 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm updated with the nodes matching named subpatterns. 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm When returning False, the results dict may still be updated. 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.wildcards: 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c, r in generate_matches(self.content, node.children): 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if c == len(node.children): 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if results is not None: 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results.update(r) 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(self.content) != len(node.children): 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for subpattern, child in zip(self.content, node.children): 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not subpattern.match(child, results): 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass WildcardPattern(BasePattern): 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A wildcard pattern can match zero or more nodes. 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This has all the flexibility needed to implement patterns like: 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm .* .+ .? .{m,n} 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (a b c | d e | f) 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (...)* (...)+ (...)? (...){m,n} 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except it always uses non-greedy matching. 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, content=None, min=0, max=HUGE, name=None): 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Args: 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm content: optional sequence of subsequences of patterns; 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if absent, matches one node; 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if present, each subsequence is an alternative [*] 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min: optional minimum number of times to match, default 0 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm max: optional maximum number of times to match, default HUGE 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name: optional name assigned to this match 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm equivalent to (a b c | d e | f g h); if content is None, 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm this is equivalent to '.' in regular expression terms. 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The min and max parameters work as follows: 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min=0, max=maxint: .* 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min=1, max=maxint: .+ 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min=0, max=1: .? 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min=1, max=1: . 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If content is not None, replace the dot with the parenthesized 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list of alternatives, e.g. (a b c | d e | f g h)* 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert 0 <= min <= max <= HUGE, (min, max) 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if content is not None: 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm content = tuple(map(tuple, content)) # Protect against alterations 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Check sanity of alternatives 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert len(content), repr(content) # Can't have zero alternatives 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for alt in content: 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert len(alt), repr(alt) # Can have empty alternatives 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.content = content 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.min = min 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.max = max 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = name 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def optimize(self): 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Optimize certain stacked wildcard patterns.""" 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subpattern = None 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (self.content is not None and 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm len(self.content) == 1 and len(self.content[0]) == 1): 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subpattern = self.content[0][0] 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.min == 1 and self.max == 1: 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.content is None: 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NodePattern(name=self.name) 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if subpattern is not None and self.name == subpattern.name: 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return subpattern.optimize() 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subpattern.min <= 1 and self.name == subpattern.name): 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return WildcardPattern(subpattern.content, 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.min*subpattern.min, 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.max*subpattern.max, 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subpattern.name) 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match(self, node, results=None): 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Does this pattern exactly match a node?""" 7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.match_seq([node], results) 7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match_seq(self, nodes, results=None): 7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Does this pattern exactly match a sequence of nodes?""" 7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c, r in self.generate_matches(nodes): 7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if c == len(nodes): 7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if results is not None: 7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results.update(r) 7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.name: 7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results[self.name] = list(nodes) 7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def generate_matches(self, nodes): 7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Generator yielding matches for a sequence of nodes. 7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Args: 7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nodes: sequence of nodes 7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Yields: 7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (count, results) tuples where: 7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count: the match comprises nodes[:count]; 7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results: dict containing named submatches. 7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.content is None: 7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Shortcut for special case (see __init__.__doc__) 7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for count in xrange(self.min, 1 + min(len(nodes), self.max)): 7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.name: 7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r[self.name] = nodes[:count] 7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield count, r 7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif self.name == "bare_name": 7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield self._bare_name_matches(nodes) 7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # The reason for this is that hitting the recursion limit usually 7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # results in some ugly messages about how RuntimeErrors are being 7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # ignored. We don't do this on non-CPython implementation because 7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # they don't have this problem. 7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(sys, "getrefcount"): 7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm save_stderr = sys.stderr 7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sys.stderr = StringIO() 7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for count, r in self._recursive_matches(nodes, 0): 7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.name: 7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r[self.name] = nodes[:count] 7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield count, r 7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except RuntimeError: 7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We fall back to the iterative pattern matching scheme if the recursive 7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # scheme hits the recursion limit. 7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for count, r in self._iterative_matches(nodes): 7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.name: 7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r[self.name] = nodes[:count] 7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield count, r 7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(sys, "getrefcount"): 7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sys.stderr = save_stderr 7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _iterative_matches(self, nodes): 7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Helper to iteratively yield the matches.""" 7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nodelen = len(nodes) 7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 0 >= self.min: 7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 0, {} 7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results = [] 7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # generate matches that use just one alt from self.content 7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for alt in self.content: 7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c, r in generate_matches(alt, nodes): 7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield c, r 7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results.append((c, r)) 7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for each match, iterate down the nodes 7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while results: 7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new_results = [] 7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c0, r0 in results: 7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # stop if the entire set of nodes has been matched 7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if c0 < nodelen and c0 <= self.max: 7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for alt in self.content: 7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c1, r1 in generate_matches(alt, nodes[c0:]): 7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if c1 > 0: 7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r0) 7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r1) 7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield c0 + c1, r 7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new_results.append((c0 + c1, r)) 7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results = new_results 7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _bare_name_matches(self, nodes): 7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Special optimized matcher for bare_name.""" 7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count = 0 7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm done = False 8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm max = len(nodes) 8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while not done and count < max: 8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm done = True 8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for leaf in self.content: 8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if leaf[0].match(nodes[count], r): 8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count += 1 8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm done = False 8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r[self.name] = nodes[:count] 8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return count, r 8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _recursive_matches(self, nodes, count): 8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Helper to recursively yield the matches.""" 8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert self.content is not None 8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if count >= self.min: 8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 0, {} 8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if count < self.max: 8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for alt in self.content: 8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c0, r0 in generate_matches(alt, nodes): 8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c1, r1 in self._recursive_matches(nodes[c0:], count+1): 8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r0) 8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r1) 8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield c0 + c1, r 8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NegatedPattern(BasePattern): 8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, content=None): 8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Initializer. 8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The argument is either a pattern or None. If it is None, this 8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm only matches an empty sequence (effectively '$' in regex 8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lingo). If it is not None, this matches whenever the argument 8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pattern doesn't have any matches. 8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if content is not None: 8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert isinstance(content, BasePattern), repr(content) 8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.content = content 8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match(self, node): 8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We never match a node in its entirety 8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def match_seq(self, nodes): 8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We only match an empty sequence of nodes in its entirety 8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return len(nodes) == 0 8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def generate_matches(self, nodes): 8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.content is None: 8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Return a match if there is an empty sequence 8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(nodes) == 0: 8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 0, {} 8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Return a match if the argument pattern has no matches 8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c, r in self.content.generate_matches(nodes): 8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 0, {} 8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef generate_matches(patterns, nodes): 8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Generator yielding matches for a sequence of patterns and nodes. 8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Args: 8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm patterns: a sequence of patterns 8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nodes: a sequence of nodes 8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Yields: 8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (count, results) tuples where: 8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count: the entire sequence of patterns matches nodes[:count]; 8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm results: dict containing named submatches. 8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not patterns: 8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield 0, {} 8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm p, rest = patterns[0], patterns[1:] 8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c0, r0 in p.generate_matches(nodes): 8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not rest: 8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield c0, r0 8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c1, r1 in generate_matches(rest, nodes[c0:]): 8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = {} 8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r0) 8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r.update(r1) 8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield c0 + c1, r 888