1ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom test.test_support import verbose, run_unittest, import_module
2ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom test.test_support import precisionbigmemtest, _2G, cpython_only
3ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport re
4ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom re import Scanner
5ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sre_constants
6ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sys
7ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport string
8ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport traceback
9ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom weakref import proxy
10ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
11ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
12ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# Misc tests from Tim Peters' re.doc
13ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
14ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# WARNING: Don't change details in these tests if you don't know
15ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# what you're doing. Some of these tests were carefully modeled to
16ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# cover most of the code.
17ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
18ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport unittest
19ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
20ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass ReTests(unittest.TestCase):
21ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
22ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_weakref(self):
23ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = 'QabbbcR'
24ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        x = re.compile('ab+c')
25ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        y = proxy(x)
26ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
28ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_search_star_plus(self):
29ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
33ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('x', 'aaa'), None)
34ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
38ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('a+', 'xxx'), None)
39ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
40ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def bump_num(self, matchobj):
41ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        int_value = int(matchobj.group(0))
42ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        return str(int_value + 1)
43ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
44ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_basic_re_sub(self):
45ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         '9.3 -3 24x100y')
48ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         '9.3 -3 23x99y')
50ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
51ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
53ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
54ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = r"\1\1"
55ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
58ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
59ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
63ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
64ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
69ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
70ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
71ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
72ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_449964(self):
73ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # fails for group followed by other escape
74ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'xx\bxx\b')
76ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
77ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_449000(self):
78ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Test for sub() on escaped characters
79ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'abc\ndef\n')
81ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'abc\ndef\n')
83ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'abc\ndef\n')
85ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'abc\ndef\n')
87ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
88ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_1140(self):
89ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # re.sub(x, y, u'') should return u'', not '', and
90ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # re.sub(x, y, '') should return '', not u''.
91ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Also:
92ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # re.sub(x, y, unicode(x)) should return unicode(y), and
93ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # re.sub(x, y, str(x)) should return
94ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        #     str(y) if isinstance(y, str) else unicode(y).
95ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for x in 'x', u'x':
96ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            for y in 'y', u'y':
97ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                z = re.sub(x, y, u'')
98ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(z, u'')
99ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(type(z), unicode)
100ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                #
101ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                z = re.sub(x, y, '')
102ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(z, '')
103ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(type(z), str)
104ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                #
105ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                z = re.sub(x, y, unicode(x))
106ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(z, y)
107ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(type(z), unicode)
108ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                #
109ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                z = re.sub(x, y, str(x))
110ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(z, y)
111ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(type(z), type(y))
112ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
113ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_1661(self):
114ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Verify that flags do not get silently ignored with compiled patterns
115ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = re.compile('.')
116ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(ValueError, re.compile, pattern, re.I)
120ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
121ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_3629(self):
122ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # A regex that triggered a bug in the sre-code validator
123ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        re.compile("(?P<quote>)(?(quote))")
124ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
125ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_sub_template_numeric_escape(self):
126ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bug 776311 and friends
127ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
135ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
138ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
144ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
146ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
147ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
160ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # in python2.3 (etc), these loop endlessly in sre_parser.py
161ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'xz8')
164ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'xza')
166ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
167ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_qualified_re_sub(self):
168ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
170ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
171ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_114660(self):
172ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
173ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         'hello there')
174ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
175ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_462270(self):
176ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Test for empty sub() behaviour, see SF bug #462270
177ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
180ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_symbolic_groups(self):
181ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        re.compile('(?P<a>x)(?P=a)(?(a)y)')
182ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?Px)')
185ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P=)')
186ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P=1)')
187ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P=a)')
188ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P=a1)')
189ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P=a.)')
190ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P<)')
191ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P<>)')
192ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P<1>)')
193ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?P<a.>)')
194ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?())')
195ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?(a))')
196ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?(1a))')
197ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, '(?(a.))')
198ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
199ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_symbolic_refs(self):
200ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
204ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
205ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
208ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
209ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
210ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
211ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_subn(self):
212ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
217ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
218ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_split(self):
219ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(:*)", ":a:b::c"),
222ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', ':', 'a', ':', 'b', '::', 'c'])
223ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(:)*", ":a:b::c"),
225ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', ':', 'a', ':', 'b', ':', 'c'])
226ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', ':', 'a', ':b::', 'c'])
228ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
229ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
230ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                          None, '::', 'c'])
231ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
232ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', 'a', '', '', 'c'])
233ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
234ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_qualified_re_split(self):
235ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(:)", ":a:b::c", 2),
238ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', ':', 'a', ':', 'b::c'])
239ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['', ':', 'a', ':', 'b::c'])
241ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
242ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_findall(self):
243ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall(":+", "abc"), [])
244ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                                               (":", ":"),
248ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                                               (":", "::")])
249ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
250ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_117612(self):
251ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall(r"(a|(b))", "aba"),
252ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         [("a", ""),("b", "b"),("a", "")])
253ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
254ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_match(self):
255ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('a', 'a').groups(), ())
256ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
260ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
261ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pat = re.compile('((a)|(b))(c)?')
262ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
267ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
268ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # A single group
269ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        m = re.match('(a)', 'a')
270ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.group(0), 'a')
271ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.group(0), 'a')
272ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.group(1), 'a')
273ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.group(1, 1), ('a', 'a'))
274ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
275ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
276ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, 'b', None))
279ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
280ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
281ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_groupref_exists(self):
282ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
283ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('(', 'a'))
284ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
285ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, 'a'))
286ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
287ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
288ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
289ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', 'b'))
290ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
291ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, 'd'))
292ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
293ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, 'd'))
294ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
295ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', ''))
296ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
297ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Tests for bug #1177831: exercise groups other than the first group
298ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
299ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(p.match('abc').groups(),
300ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', 'b', 'c'))
301ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(p.match('ad').groups(),
302ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', None, 'd'))
303ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(p.match('abd'), None)
304ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(p.match('ac'), None)
305ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
306ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
307ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_groupref(self):
308ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
309ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('|', 'a'))
310ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
311ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, 'a'))
312ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
313ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
314ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
315ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', 'a'))
316ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
317ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (None, None))
318ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
319ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_groupdict(self):
320ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                  'first second').groupdict(),
322ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         {'first':'first', 'second':'second'})
323ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
324ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_expand(self):
325ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                  "first second")
327ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                  .expand(r"\2 \1 \g<second> \g<first>"),
328ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         "second first second first")
329ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
330ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_repeat_minmax(self):
331ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
336ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
345ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^x{1}$", "xxx"), None)
346ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
350ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
359ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("^x{}$", "xxx"), None)
360ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
362ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_getattr(self):
363ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a)", "a").pos, 0)
364ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a)", "a").endpos, 1)
365ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a)", "a").string, "a")
366ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(re.match("(a)", "a").re, None)
368ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
369ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_special_escapes(self):
370ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b(b.)\b",
371ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abcd abc bcd bx").group(1), "bx")
372ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\B(b.)\B",
373ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abc bcd bc abxd").group(1), "bx")
374ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b(b.)\b",
375ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
376ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\B(b.)\B",
377ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
378ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b(b.)\b",
379ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
380ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\B(b.)\B",
381ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
382ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b(b.)\b",
386ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   u"abcd abc bcd bx").group(1), "bx")
387ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\B(b.)\B",
388ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   u"abc bcd bc abxd").group(1), "bx")
389ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\d\D\w\W\s\S",
393ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "1aa! a").group(0), "1aa! a")
394ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\d\D\w\W\s\S",
395ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
396ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\d\D\w\W\s\S",
397ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
398ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
399ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_string_boundaries(self):
400ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # See http://bugs.python.org/issue10713
401ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         "abc")
403ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # There's a word boundary at the start of a string.
404ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertTrue(re.match(r"\b", "abc"))
405ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # A non-empty string includes a non-boundary zero-length match.
406ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertTrue(re.search(r"\B", "abc"))
407ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # There is no non-boundary match at the start of a string.
408ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertFalse(re.match(r"\B", "abc"))
409ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # However, an empty string contains no word boundaries, and also no
410ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # non-boundaries.
411ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\B", ""), None)
412ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # This one is questionable and different from the perlre behaviour,
413ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # but describes current behavior.
414ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search(r"\b", ""), None)
415ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # A single word-character string has two boundaries, but no
416ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # non-boundary gaps.
417ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(re.findall(r"\b", "a")), 2)
418ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(re.findall(r"\B", "a")), 0)
419ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # If there are no words, there are no boundaries
420ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(re.findall(r"\b", " ")), 0)
421ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
422ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Can match around the whitespace.
423ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(re.findall(r"\B", " ")), 2)
424ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
425ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bigcharset(self):
426ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(u"([\u2222\u2223])",
427ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                  u"\u2222").group(1), u"\u2222")
428ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(u"([\u2222\u2223])",
429ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
430ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
431ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_big_codesize(self):
432ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #1160
433ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        r = re.compile('|'.join(('%d'%x for x in range(10000))))
434ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNotNone(r.match('1000'))
435ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNotNone(r.match('9999'))
436ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
437ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_anyall(self):
438ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
439ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         "a\nb")
440ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
441ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         "a\n\nb")
442ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
443ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_non_consuming(self):
444ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
445ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
446ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
447ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
448ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
449ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
450ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
451ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
452ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
453ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
454ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
455ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
456ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
457ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_ignore_case(self):
458ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
459ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
460ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
461ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
462ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
463ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
464ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
465ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
466ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
467ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
468ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
469ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_category(self):
470ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
471ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
472ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_getlower(self):
473ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import _sre
474ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
475ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
476ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
477ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
478ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
479ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
480ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
481ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_not_literal(self):
482ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
483ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
484ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
485ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_search_coverage(self):
486ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
487ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
488ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
489ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def assertMatch(self, pattern, text, match=None, span=None,
490ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    matcher=re.match):
491ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if match is None and span is None:
492ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            # the pattern matches the whole text
493ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            match = text
494ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            span = (0, len(text))
495ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        elif match is None or span is None:
496ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            raise ValueError('If match is not None, span should be specified '
497ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                             '(and vice versa).')
498ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        m = matcher(pattern, text)
499ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertTrue(m)
500ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.group(), match)
501ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.span(), span)
502ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
503ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_escape(self):
504ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        alnum_chars = string.ascii_letters + string.digits
505ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = u''.join(unichr(i) for i in range(256))
506ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for c in p:
507ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if c in alnum_chars:
508ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(c), c)
509ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            elif c == u'\x00':
510ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(c), u'\\000')
511ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
512ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(c), u'\\' + c)
513ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertMatch(re.escape(c), c)
514ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertMatch(re.escape(p), p)
515ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
516ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_escape_byte(self):
517ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
518ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = ''.join(chr(i) for i in range(256))
519ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for b in p:
520ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if b in alnum_chars:
521ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(b), b)
522ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            elif b == b'\x00':
523ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(b), b'\\000')
524ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
525ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                self.assertEqual(re.escape(b), b'\\' + b)
526ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertMatch(re.escape(b), b)
527ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertMatch(re.escape(p), p)
528ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
529ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_escape_non_ascii(self):
530ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = u'xxx\u2620\u2620\u2620xxx'
531ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s_escaped = re.escape(s)
532ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
533ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertMatch(s_escaped, s)
534ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
535ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
536ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
537ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_re_escape_non_ascii_bytes(self):
538ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        b = u'y\u2620y\u2620y'.encode('utf-8')
539ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        b_escaped = re.escape(b)
540ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
541ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertMatch(b_escaped, b)
542ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
543ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(len(res), 2)
544ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
545ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_pickling(self):
546ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import pickle
547ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.pickle_test(pickle)
548ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import cPickle
549ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.pickle_test(cPickle)
550ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # old pickles expect the _compile() reconstructor in sre module
551ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import_module("sre", deprecated=True)
552ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        from sre import _compile
553ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
554ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def pickle_test(self, pickle):
555ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
556ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = pickle.dumps(oldpat)
557ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        newpat = pickle.loads(s)
558ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(oldpat, newpat)
559ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
560ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_constants(self):
561ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.I, re.IGNORECASE)
562ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.L, re.LOCALE)
563ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.M, re.MULTILINE)
564ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.S, re.DOTALL)
565ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.X, re.VERBOSE)
566ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
567ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_flags(self):
568ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for flag in [re.I, re.M, re.X, re.S, re.L]:
569ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.compile('^pattern$', flag), None)
570ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
571ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_sre_character_literals(self):
572ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
573ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
574ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
575ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
576ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
577ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
578ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
579ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.match, "\911", "")
580ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
581ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_sre_character_class_literals(self):
582ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
583ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
584ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
585ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
586ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
587ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
588ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
589ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.match, "[\911]", "")
590ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
591ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_113254(self):
592ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
593ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
594ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
595ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
596ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_527371(self):
597ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bug described in patches 527371/672491
598ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
599ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
600ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
601ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
602ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match("((a))", "a").lastindex, 1)
603ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
604ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_545855(self):
605ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bug 545855 -- This pattern failed to cause a compile error as it
606ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # should, instead provoking a TypeError.
607ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(re.error, re.compile, 'foo[a-')
608ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
609ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_418626(self):
610ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
611ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
612ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # pattern '*?' on a long string.
613ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
614ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
615ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         20003)
616ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
617ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # non-simple '*?' still used to hit the recursion limit, before the
618ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # non-recursive scheme was implemented.
619ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
620ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
621ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_612074(self):
622ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pat=u"["+re.escape(u"\u2039")+u"]"
623ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.compile(pat) and 1, 1)
624ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
625ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_stack_overflow(self):
626ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # nasty cases that used to overflow the straightforward recursive
627ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # implementation of repeated groups.
628ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
629ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
630ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
631ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
632ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_unlimited_zero_width_repeat(self):
633ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #9669
634ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
635ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
636ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
637ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
638ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
639ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
640ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
641ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_scanner(self):
642ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        def s_ident(scanner, token): return token
643ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        def s_operator(scanner, token): return "op%s" % token
644ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        def s_float(scanner, token): return float(token)
645ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        def s_int(scanner, token): return int(token)
646ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
647ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        scanner = Scanner([
648ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (r"[a-zA-Z_]\w*", s_ident),
649ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (r"\d+\.\d*", s_float),
650ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (r"\d+", s_int),
651ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (r"=|\+|-|\*|/", s_operator),
652ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            (r"\s+", None),
653ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            ])
654ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
655ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
656ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
657ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
658ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
659ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                           'op+', 'bar'], ''))
660ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
661ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_448951(self):
662ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bug 448951 (similar to 429357, but with single char match)
663ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # (Also test greedy matches.)
664ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for op in '','?','*':
665ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
666ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                             (None, None))
667ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
668ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                             ('a:', 'a'))
669ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
670ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_725106(self):
671ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # capturing groups in alternatives in repeats
672ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
673ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', 'a'))
674ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
675ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('c', 'b'))
676ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
677ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', None))
678ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
679ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', None))
680ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
681ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', 'a'))
682ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
683ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('c', 'b'))
684ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
685ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', None))
686ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
687ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('b', None))
688ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
689ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_725149(self):
690ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # mark_stack_base restoring before restoring marks
691ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
692ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', None))
693ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
694ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ('a', None, None))
695ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
696ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_764548(self):
697ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # bug 764548, re.compile() barfs on str/unicode subclasses
698ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
699ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            unicode
700ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except NameError:
701ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return  # no problem if we have no unicode
702ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        class my_unicode(unicode): pass
703ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pat = re.compile(my_unicode("abc"))
704ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pat.match("xyz"), None)
705ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
706ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_finditer(self):
707ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        iter = re.finditer(r":+", "a:b::c:::d")
708ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual([item.group(0) for item in iter],
709ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         [":", "::", ":::"])
710ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
711ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_926075(self):
712ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
713ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            unicode
714ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except NameError:
715ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            return # no problem if we have no unicode
716ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertTrue(re.compile('bug_926075') is not
717ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                     re.compile(eval("u'bug_926075'")))
718ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
719ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_931848(self):
720ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
721ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            unicode
722ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except NameError:
723ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            pass
724ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
725ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.compile(pattern).split("a.b.c"),
726ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['a','b','c'])
727ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
728ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_581080(self):
729ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        iter = re.finditer(r"\s", "a b")
730ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(iter.next().span(), (1,2))
731ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(StopIteration, iter.next)
732ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
733ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        scanner = re.compile(r"\s").scanner("a b")
734ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(scanner.search().span(), (1, 2))
735ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(scanner.search(), None)
736ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
737ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_817234(self):
738ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        iter = re.finditer(r".*", "asdf")
739ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(iter.next().span(), (0, 4))
740ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(iter.next().span(), (4, 4))
741ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(StopIteration, iter.next)
742ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
743ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_6561(self):
744ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # '\d' should match characters in Unicode category 'Nd'
745ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # (Number, Decimal Digit), but not those in 'Nl' (Number,
746ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Letter) or 'No' (Number, Other).
747ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        decimal_digits = [
748ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
749ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
750ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
751ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            ]
752ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for x in decimal_digits:
753ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
754ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
755ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        not_decimal_digits = [
756ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
757ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
758ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
759ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
760ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            ]
761ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for x in not_decimal_digits:
762ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
763ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
764ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_empty_array(self):
765ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # SF buf 1647541
766ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import array
767ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        for typecode in 'cbBuhHiIlLfd':
768ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            a = array.array(typecode)
769ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertEqual(re.compile("bla").match(a), None)
770ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.assertEqual(re.compile("").match(a).groups(), ())
771ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
772ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_inline_flags(self):
773ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Bug #1700
774ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
775ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
776ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
777ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile(upper_char, re.I | re.U)
778ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(lower_char)
779ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
780ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
781ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile(lower_char, re.I | re.U)
782ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(upper_char)
783ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
784ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
785ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile('(?i)' + upper_char, re.U)
786ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(lower_char)
787ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
788ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
789ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile('(?i)' + lower_char, re.U)
790ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(upper_char)
791ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
792ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
793ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile('(?iu)' + upper_char)
794ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(lower_char)
795ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
796ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
797ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        p = re.compile('(?iu)' + lower_char)
798ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        q = p.match(upper_char)
799ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertNotEqual(q, None)
800ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
801ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_dollar_matches_twice(self):
802ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        "$ matches the end of string, and just before the terminating \n"
803ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = re.compile('$')
804ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
805ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
806ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
807ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
808ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = re.compile('$', re.MULTILINE)
809ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
810ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
811ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
812ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
813ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_dealloc(self):
814ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # issue 3299: check for segfault in debug build
815ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        import _sre
816ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # the overflow limit is different on wide and narrow builds and it
817ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # depends on the definition of SRE_CODE (see sre.h).
818ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # 2**128 should be big enough to overflow on both. For smaller values
819ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # a RuntimeError is raised instead of OverflowError.
820ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        long_overflow = 2**128
821ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(TypeError, re.finditer, "a", {})
822ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
823ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
824ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_compile(self):
825ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Test return value when given string and pattern as parameter
826ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = re.compile('random pattern')
827ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsInstance(pattern, re._pattern_type)
828ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        same_pattern = re.compile(pattern)
829ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsInstance(same_pattern, re._pattern_type)
830ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIs(same_pattern, pattern)
831ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Test behaviour when not given a string or pattern as parameter
832ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(TypeError, re.compile, 0)
833ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
834ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_bug_13899(self):
835ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
836ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # nothing. Ditto B and Z.
837ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
838ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         ['A', 'B', '\b', 'C', 'Z'])
839ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
840ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    @precisionbigmemtest(size=_2G, memuse=1)
841ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_large_search(self, size):
842ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #10182: indices were 32-bit-truncated.
843ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = 'a' * size
844ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        m = re.search('$', s)
845ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNotNone(m)
846ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.start(), size)
847ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(m.end(), size)
848ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
849ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # The huge memuse is because of re.sub() using a list and a join()
850ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    # to create the replacement result.
851ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    @precisionbigmemtest(size=_2G, memuse=16 + 2)
852ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_large_subn(self, size):
853ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #10182: indices were 32-bit-truncated.
854ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        s = 'a' * size
855ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        r, n = re.subn('', '', s)
856ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(r, s)
857ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(n, size + 1)
858ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
859ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
860ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_repeat_minmax_overflow(self):
861ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue #13169
862ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        string = "x" * 100000
863ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
864ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
865ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
866ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
867ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
868ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
869ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
870ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
871ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
872ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
873ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
874ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
875ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    @cpython_only
876ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_repeat_minmax_overflow_maxrepeat(self):
877ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
878ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            from _sre import MAXREPEAT
879ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except ImportError:
880ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            self.skipTest('requires _sre.MAXREPEAT constant')
881ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        string = "x" * 100000
882ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
883ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
884ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                         (0, 100000))
885ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
886ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
887ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
888ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
889ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
890ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_backref_group_name_in_exception(self):
891ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue 17341: Poor error message when compiling invalid regex
892ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
893ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            re.compile('(?P=<foo>)')
894ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
895ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    def test_group_name_in_exception(self):
896ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # Issue 17341: Poor error message when compiling invalid regex
897ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
898ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            re.compile('(?P<?foo>)')
899ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
900ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
901ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef run_re_tests():
902ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
903ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    if verbose:
904ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        print 'Running re_tests test suite'
905ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    else:
906ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        # To save time, only run the first and last 10 tests
907ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        #tests = tests[:10] + tests[-10:]
908ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pass
909ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
910ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    for t in tests:
911ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        sys.stdout.flush()
912ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        pattern = s = outcome = repl = expected = None
913ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        if len(t) == 5:
914ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            pattern, s, outcome, repl, expected = t
915ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        elif len(t) == 3:
916ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            pattern, s, outcome = t
917ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
918ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
919ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
920ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        try:
921ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            obj = re.compile(pattern)
922ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except re.error:
923ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
924ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            else:
925ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                print '=== Syntax error:', t
926ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except KeyboardInterrupt: raise KeyboardInterrupt
927ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        except:
928ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            print '*** Unexpected error ***', t
929ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if verbose:
930ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                traceback.print_exc(file=sys.stdout)
931ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh        else:
932ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            try:
933ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                result = obj.search(s)
934ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            except re.error, msg:
935ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                print '=== Unexpected exception', t, repr(msg)
936ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            if outcome == SYNTAX_ERROR:
937ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # This should have been a syntax error; forget it.
938ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                pass
939ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            elif outcome == FAIL:
940ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is None: pass   # No match, as expected
941ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                else: print '=== Succeeded incorrectly', t
942ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh            elif outcome == SUCCEED:
943ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is not None:
944ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    # Matched, as expected, so now we compute the
945ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    # result string and compare it to our expected result.
946ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    start, end = result.span(0)
947ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    vardict={'found': result.group(0),
948ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                             'groups': result.group(),
949ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                             'flags': result.re.flags}
950ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    for i in range(1, 100):
951ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        try:
952ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            gi = result.group(i)
953ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            # Special hack because else the string concat fails:
954ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            if gi is None:
955ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                gi = "None"
956ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        except IndexError:
957ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            gi = "Error"
958ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        vardict['g%d' % i] = gi
959ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    for i in result.re.groupindex.keys():
960ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        try:
961ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            gi = result.group(i)
962ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            if gi is None:
963ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                                gi = "None"
964ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        except IndexError:
965ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                            gi = "Error"
966ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        vardict[i] = gi
967ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    repl = eval(repl, vardict)
968ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if repl != expected:
969ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        print '=== grouping error', t,
970ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        print repr(repl) + ' should be ' + repr(expected)
971ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                else:
972ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    print '=== Failed incorrectly', t
973ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
974ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match on a unicode string, and check that it
975ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # still succeeds.
976ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                try:
977ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    result = obj.search(unicode(s, "latin-1"))
978ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if result is None:
979ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        print '=== Fails on unicode match', t
980ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                except NameError:
981ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    continue # 1.5.2
982ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                except TypeError:
983ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    continue # unicode test case
984ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
985ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match on a unicode pattern, and check that it
986ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # still succeeds.
987ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                obj=re.compile(unicode(pattern, "latin-1"))
988ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                result = obj.search(s)
989ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is None:
990ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    print '=== Fails on unicode pattern match', t
991ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
992ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match with the search area limited to the extent
993ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # of the match and see if it still succeeds.  \B will
994ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # break (because it won't match at the end or start of a
995ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # string), so we'll ignore patterns that feature it.
996ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
997ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
998ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                               and result is not None:
999ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    obj = re.compile(pattern)
1000ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    result = obj.search(s, result.start(0), result.end(0) + 1)
1001ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    if result is None:
1002ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                        print '=== Failed on range-limited match', t
1003ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
1004ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match with IGNORECASE enabled, and check that it
1005ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # still succeeds.
1006ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                obj = re.compile(pattern, re.IGNORECASE)
1007ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                result = obj.search(s)
1008ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is None:
1009ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    print '=== Fails on case-insensitive match', t
1010ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
1011ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match with LOCALE enabled, and check that it
1012ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # still succeeds.
1013ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                obj = re.compile(pattern, re.LOCALE)
1014ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                result = obj.search(s)
1015ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is None:
1016ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    print '=== Fails on locale-sensitive match', t
1017ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
1018ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # Try the match with UNICODE locale enabled, and check
1019ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                # that it still succeeds.
1020ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                obj = re.compile(pattern, re.UNICODE)
1021ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                result = obj.search(s)
1022ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                if result is None:
1023ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh                    print '=== Fails on unicode-sensitive match', t
1024ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
1025ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef test_main():
1026ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    run_unittest(ReTests)
1027ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    run_re_tests()
1028ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh
1029ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehif __name__ == "__main__":
1030ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh    test_main()
1031