1from test.test_support import verbose, run_unittest, import_module
2from test.test_support import precisionbigmemtest, _2G, cpython_only
3import re
4from re import Scanner
5import sre_constants
6import sys
7import string
8import traceback
9from weakref import proxy
10
11
12# Misc tests from Tim Peters' re.doc
13
14# WARNING: Don't change details in these tests if you don't know
15# what you're doing. Some of these tests were carefully modeled to
16# cover most of the code.
17
18import unittest
19
20class ReTests(unittest.TestCase):
21
22    def test_weakref(self):
23        s = 'QabbbcR'
24        x = re.compile('ab+c')
25        y = proxy(x)
26        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27
28    def test_search_star_plus(self):
29        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
33        self.assertEqual(re.search('x', 'aaa'), None)
34        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
38        self.assertEqual(re.match('a+', 'xxx'), None)
39
40    def bump_num(self, matchobj):
41        int_value = int(matchobj.group(0))
42        return str(int_value + 1)
43
44    def test_basic_re_sub(self):
45        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47                         '9.3 -3 24x100y')
48        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49                         '9.3 -3 23x99y')
50
51        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
53
54        s = r"\1\1"
55        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
58
59        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
63
64        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
69
70        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
71
72    def test_bug_449964(self):
73        # fails for group followed by other escape
74        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75                         'xx\bxx\b')
76
77    def test_bug_449000(self):
78        # Test for sub() on escaped characters
79        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80                         'abc\ndef\n')
81        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82                         'abc\ndef\n')
83        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84                         'abc\ndef\n')
85        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86                         'abc\ndef\n')
87
88    def test_bug_1140(self):
89        # re.sub(x, y, u'') should return u'', not '', and
90        # re.sub(x, y, '') should return '', not u''.
91        # Also:
92        # re.sub(x, y, unicode(x)) should return unicode(y), and
93        # re.sub(x, y, str(x)) should return
94        #     str(y) if isinstance(y, str) else unicode(y).
95        for x in 'x', u'x':
96            for y in 'y', u'y':
97                z = re.sub(x, y, u'')
98                self.assertEqual(z, u'')
99                self.assertEqual(type(z), unicode)
100                #
101                z = re.sub(x, y, '')
102                self.assertEqual(z, '')
103                self.assertEqual(type(z), str)
104                #
105                z = re.sub(x, y, unicode(x))
106                self.assertEqual(z, y)
107                self.assertEqual(type(z), unicode)
108                #
109                z = re.sub(x, y, str(x))
110                self.assertEqual(z, y)
111                self.assertEqual(type(z), type(y))
112
113    def test_bug_1661(self):
114        # Verify that flags do not get silently ignored with compiled patterns
115        pattern = re.compile('.')
116        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119        self.assertRaises(ValueError, re.compile, pattern, re.I)
120
121    def test_bug_3629(self):
122        # A regex that triggered a bug in the sre-code validator
123        re.compile("(?P<quote>)(?(quote))")
124
125    def test_sub_template_numeric_escape(self):
126        # bug 776311 and friends
127        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134
135        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137
138        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143
144        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
146
147        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159
160        # in python2.3 (etc), these loop endlessly in sre_parser.py
161        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163                         'xz8')
164        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165                         'xza')
166
167    def test_qualified_re_sub(self):
168        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
170
171    def test_bug_114660(self):
172        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
173                         'hello there')
174
175    def test_bug_462270(self):
176        # Test for empty sub() behaviour, see SF bug #462270
177        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179
180    def test_symbolic_groups(self):
181        re.compile('(?P<a>x)(?P=a)(?(a)y)')
182        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184        self.assertRaises(re.error, re.compile, '(?Px)')
185        self.assertRaises(re.error, re.compile, '(?P=)')
186        self.assertRaises(re.error, re.compile, '(?P=1)')
187        self.assertRaises(re.error, re.compile, '(?P=a)')
188        self.assertRaises(re.error, re.compile, '(?P=a1)')
189        self.assertRaises(re.error, re.compile, '(?P=a.)')
190        self.assertRaises(re.error, re.compile, '(?P<)')
191        self.assertRaises(re.error, re.compile, '(?P<>)')
192        self.assertRaises(re.error, re.compile, '(?P<1>)')
193        self.assertRaises(re.error, re.compile, '(?P<a.>)')
194        self.assertRaises(re.error, re.compile, '(?())')
195        self.assertRaises(re.error, re.compile, '(?(a))')
196        self.assertRaises(re.error, re.compile, '(?(1a))')
197        self.assertRaises(re.error, re.compile, '(?(a.))')
198
199    def test_symbolic_refs(self):
200        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
204        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
205        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
208        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
209        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
210
211    def test_re_subn(self):
212        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
217
218    def test_re_split(self):
219        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221        self.assertEqual(re.split("(:*)", ":a:b::c"),
222                         ['', ':', 'a', ':', 'b', '::', 'c'])
223        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224        self.assertEqual(re.split("(:)*", ":a:b::c"),
225                         ['', ':', 'a', ':', 'b', ':', 'c'])
226        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227                         ['', ':', 'a', ':b::', 'c'])
228        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
229                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
230                          None, '::', 'c'])
231        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
232                         ['', 'a', '', '', 'c'])
233
234    def test_qualified_re_split(self):
235        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237        self.assertEqual(re.split("(:)", ":a:b::c", 2),
238                         ['', ':', 'a', ':', 'b::c'])
239        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240                         ['', ':', 'a', ':', 'b::c'])
241
242    def test_re_findall(self):
243        self.assertEqual(re.findall(":+", "abc"), [])
244        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247                                                               (":", ":"),
248                                                               (":", "::")])
249
250    def test_bug_117612(self):
251        self.assertEqual(re.findall(r"(a|(b))", "aba"),
252                         [("a", ""),("b", "b"),("a", "")])
253
254    def test_re_match(self):
255        self.assertEqual(re.match('a', 'a').groups(), ())
256        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
260
261        pat = re.compile('((a)|(b))(c)?')
262        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
267
268        # A single group
269        m = re.match('(a)', 'a')
270        self.assertEqual(m.group(0), 'a')
271        self.assertEqual(m.group(0), 'a')
272        self.assertEqual(m.group(1), 'a')
273        self.assertEqual(m.group(1, 1), ('a', 'a'))
274
275        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
276        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278                         (None, 'b', None))
279        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
280
281    def test_re_groupref_exists(self):
282        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
283                         ('(', 'a'))
284        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
285                         (None, 'a'))
286        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
287        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
288        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
289                         ('a', 'b'))
290        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
291                         (None, 'd'))
292        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
293                         (None, 'd'))
294        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
295                         ('a', ''))
296
297        # Tests for bug #1177831: exercise groups other than the first group
298        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
299        self.assertEqual(p.match('abc').groups(),
300                         ('a', 'b', 'c'))
301        self.assertEqual(p.match('ad').groups(),
302                         ('a', None, 'd'))
303        self.assertEqual(p.match('abd'), None)
304        self.assertEqual(p.match('ac'), None)
305
306
307    def test_re_groupref(self):
308        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
309                         ('|', 'a'))
310        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
311                         (None, 'a'))
312        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
313        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
314        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
315                         ('a', 'a'))
316        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
317                         (None, None))
318
319    def test_groupdict(self):
320        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321                                  'first second').groupdict(),
322                         {'first':'first', 'second':'second'})
323
324    def test_expand(self):
325        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326                                  "first second")
327                                  .expand(r"\2 \1 \g<second> \g<first>"),
328                         "second first second first")
329
330    def test_repeat_minmax(self):
331        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335
336        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344
345        self.assertEqual(re.match("^x{1}$", "xxx"), None)
346        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349
350        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358
359        self.assertEqual(re.match("^x{}$", "xxx"), None)
360        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361
362    def test_getattr(self):
363        self.assertEqual(re.match("(a)", "a").pos, 0)
364        self.assertEqual(re.match("(a)", "a").endpos, 1)
365        self.assertEqual(re.match("(a)", "a").string, "a")
366        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367        self.assertNotEqual(re.match("(a)", "a").re, None)
368
369    def test_special_escapes(self):
370        self.assertEqual(re.search(r"\b(b.)\b",
371                                   "abcd abc bcd bx").group(1), "bx")
372        self.assertEqual(re.search(r"\B(b.)\B",
373                                   "abc bcd bc abxd").group(1), "bx")
374        self.assertEqual(re.search(r"\b(b.)\b",
375                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
376        self.assertEqual(re.search(r"\B(b.)\B",
377                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
378        self.assertEqual(re.search(r"\b(b.)\b",
379                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
380        self.assertEqual(re.search(r"\B(b.)\B",
381                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
382        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385        self.assertEqual(re.search(r"\b(b.)\b",
386                                   u"abcd abc bcd bx").group(1), "bx")
387        self.assertEqual(re.search(r"\B(b.)\B",
388                                   u"abc bcd bc abxd").group(1), "bx")
389        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392        self.assertEqual(re.search(r"\d\D\w\W\s\S",
393                                   "1aa! a").group(0), "1aa! a")
394        self.assertEqual(re.search(r"\d\D\w\W\s\S",
395                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
396        self.assertEqual(re.search(r"\d\D\w\W\s\S",
397                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
398
399    def test_string_boundaries(self):
400        # See http://bugs.python.org/issue10713
401        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402                         "abc")
403        # There's a word boundary at the start of a string.
404        self.assertTrue(re.match(r"\b", "abc"))
405        # A non-empty string includes a non-boundary zero-length match.
406        self.assertTrue(re.search(r"\B", "abc"))
407        # There is no non-boundary match at the start of a string.
408        self.assertFalse(re.match(r"\B", "abc"))
409        # However, an empty string contains no word boundaries, and also no
410        # non-boundaries.
411        self.assertEqual(re.search(r"\B", ""), None)
412        # This one is questionable and different from the perlre behaviour,
413        # but describes current behavior.
414        self.assertEqual(re.search(r"\b", ""), None)
415        # A single word-character string has two boundaries, but no
416        # non-boundary gaps.
417        self.assertEqual(len(re.findall(r"\b", "a")), 2)
418        self.assertEqual(len(re.findall(r"\B", "a")), 0)
419        # If there are no words, there are no boundaries
420        self.assertEqual(len(re.findall(r"\b", " ")), 0)
421        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
422        # Can match around the whitespace.
423        self.assertEqual(len(re.findall(r"\B", " ")), 2)
424
425    def test_bigcharset(self):
426        self.assertEqual(re.match(u"([\u2222\u2223])",
427                                  u"\u2222").group(1), u"\u2222")
428        self.assertEqual(re.match(u"([\u2222\u2223])",
429                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
430
431    def test_big_codesize(self):
432        # Issue #1160
433        r = re.compile('|'.join(('%d'%x for x in range(10000))))
434        self.assertIsNotNone(r.match('1000'))
435        self.assertIsNotNone(r.match('9999'))
436
437    def test_anyall(self):
438        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
439                         "a\nb")
440        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
441                         "a\n\nb")
442
443    def test_non_consuming(self):
444        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
445        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
446        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
447        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
448        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
449        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
450        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
451
452        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
453        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
454        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
455        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
456
457    def test_ignore_case(self):
458        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
459        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
460        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
461        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
462        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
463        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
464        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
465        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
466        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
467        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
468
469    def test_category(self):
470        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
471
472    def test_getlower(self):
473        import _sre
474        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
475        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
476        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
477
478        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
479        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
480
481    def test_not_literal(self):
482        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
483        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
484
485    def test_search_coverage(self):
486        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
487        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
488
489    def assertMatch(self, pattern, text, match=None, span=None,
490                    matcher=re.match):
491        if match is None and span is None:
492            # the pattern matches the whole text
493            match = text
494            span = (0, len(text))
495        elif match is None or span is None:
496            raise ValueError('If match is not None, span should be specified '
497                             '(and vice versa).')
498        m = matcher(pattern, text)
499        self.assertTrue(m)
500        self.assertEqual(m.group(), match)
501        self.assertEqual(m.span(), span)
502
503    def test_re_escape(self):
504        alnum_chars = string.ascii_letters + string.digits
505        p = u''.join(unichr(i) for i in range(256))
506        for c in p:
507            if c in alnum_chars:
508                self.assertEqual(re.escape(c), c)
509            elif c == u'\x00':
510                self.assertEqual(re.escape(c), u'\\000')
511            else:
512                self.assertEqual(re.escape(c), u'\\' + c)
513            self.assertMatch(re.escape(c), c)
514        self.assertMatch(re.escape(p), p)
515
516    def test_re_escape_byte(self):
517        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
518        p = ''.join(chr(i) for i in range(256))
519        for b in p:
520            if b in alnum_chars:
521                self.assertEqual(re.escape(b), b)
522            elif b == b'\x00':
523                self.assertEqual(re.escape(b), b'\\000')
524            else:
525                self.assertEqual(re.escape(b), b'\\' + b)
526            self.assertMatch(re.escape(b), b)
527        self.assertMatch(re.escape(p), p)
528
529    def test_re_escape_non_ascii(self):
530        s = u'xxx\u2620\u2620\u2620xxx'
531        s_escaped = re.escape(s)
532        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
533        self.assertMatch(s_escaped, s)
534        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
535                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
536
537    def test_re_escape_non_ascii_bytes(self):
538        b = u'y\u2620y\u2620y'.encode('utf-8')
539        b_escaped = re.escape(b)
540        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
541        self.assertMatch(b_escaped, b)
542        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
543        self.assertEqual(len(res), 2)
544
545    def test_pickling(self):
546        import pickle
547        self.pickle_test(pickle)
548        import cPickle
549        self.pickle_test(cPickle)
550        # old pickles expect the _compile() reconstructor in sre module
551        import_module("sre", deprecated=True)
552        from sre import _compile
553
554    def pickle_test(self, pickle):
555        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
556        s = pickle.dumps(oldpat)
557        newpat = pickle.loads(s)
558        self.assertEqual(oldpat, newpat)
559
560    def test_constants(self):
561        self.assertEqual(re.I, re.IGNORECASE)
562        self.assertEqual(re.L, re.LOCALE)
563        self.assertEqual(re.M, re.MULTILINE)
564        self.assertEqual(re.S, re.DOTALL)
565        self.assertEqual(re.X, re.VERBOSE)
566
567    def test_flags(self):
568        for flag in [re.I, re.M, re.X, re.S, re.L]:
569            self.assertNotEqual(re.compile('^pattern$', flag), None)
570
571    def test_sre_character_literals(self):
572        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
573            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
574            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
575            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
576            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
577            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
578            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
579        self.assertRaises(re.error, re.match, "\911", "")
580
581    def test_sre_character_class_literals(self):
582        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
583            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
584            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
585            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
586            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
587            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
588            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
589        self.assertRaises(re.error, re.match, "[\911]", "")
590
591    def test_bug_113254(self):
592        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
593        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
594        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
595
596    def test_bug_527371(self):
597        # bug described in patches 527371/672491
598        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
599        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
600        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
601        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
602        self.assertEqual(re.match("((a))", "a").lastindex, 1)
603
604    def test_bug_545855(self):
605        # bug 545855 -- This pattern failed to cause a compile error as it
606        # should, instead provoking a TypeError.
607        self.assertRaises(re.error, re.compile, 'foo[a-')
608
609    def test_bug_418626(self):
610        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
611        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
612        # pattern '*?' on a long string.
613        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
614        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
615                         20003)
616        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
617        # non-simple '*?' still used to hit the recursion limit, before the
618        # non-recursive scheme was implemented.
619        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
620
621    def test_bug_612074(self):
622        pat=u"["+re.escape(u"\u2039")+u"]"
623        self.assertEqual(re.compile(pat) and 1, 1)
624
625    def test_stack_overflow(self):
626        # nasty cases that used to overflow the straightforward recursive
627        # implementation of repeated groups.
628        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
629        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
630        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
631
632    def test_unlimited_zero_width_repeat(self):
633        # Issue #9669
634        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
635        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
636        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
637        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
638        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
639        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
640
641    def test_scanner(self):
642        def s_ident(scanner, token): return token
643        def s_operator(scanner, token): return "op%s" % token
644        def s_float(scanner, token): return float(token)
645        def s_int(scanner, token): return int(token)
646
647        scanner = Scanner([
648            (r"[a-zA-Z_]\w*", s_ident),
649            (r"\d+\.\d*", s_float),
650            (r"\d+", s_int),
651            (r"=|\+|-|\*|/", s_operator),
652            (r"\s+", None),
653            ])
654
655        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
656
657        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
658                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
659                           'op+', 'bar'], ''))
660
661    def test_bug_448951(self):
662        # bug 448951 (similar to 429357, but with single char match)
663        # (Also test greedy matches.)
664        for op in '','?','*':
665            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
666                             (None, None))
667            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
668                             ('a:', 'a'))
669
670    def test_bug_725106(self):
671        # capturing groups in alternatives in repeats
672        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
673                         ('b', 'a'))
674        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
675                         ('c', 'b'))
676        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
677                         ('b', None))
678        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
679                         ('b', None))
680        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
681                         ('b', 'a'))
682        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
683                         ('c', 'b'))
684        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
685                         ('b', None))
686        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
687                         ('b', None))
688
689    def test_bug_725149(self):
690        # mark_stack_base restoring before restoring marks
691        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
692                         ('a', None))
693        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
694                         ('a', None, None))
695
696    def test_bug_764548(self):
697        # bug 764548, re.compile() barfs on str/unicode subclasses
698        try:
699            unicode
700        except NameError:
701            return  # no problem if we have no unicode
702        class my_unicode(unicode): pass
703        pat = re.compile(my_unicode("abc"))
704        self.assertEqual(pat.match("xyz"), None)
705
706    def test_finditer(self):
707        iter = re.finditer(r":+", "a:b::c:::d")
708        self.assertEqual([item.group(0) for item in iter],
709                         [":", "::", ":::"])
710
711    def test_bug_926075(self):
712        try:
713            unicode
714        except NameError:
715            return # no problem if we have no unicode
716        self.assertTrue(re.compile('bug_926075') is not
717                     re.compile(eval("u'bug_926075'")))
718
719    def test_bug_931848(self):
720        try:
721            unicode
722        except NameError:
723            pass
724        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
725        self.assertEqual(re.compile(pattern).split("a.b.c"),
726                         ['a','b','c'])
727
728    def test_bug_581080(self):
729        iter = re.finditer(r"\s", "a b")
730        self.assertEqual(iter.next().span(), (1,2))
731        self.assertRaises(StopIteration, iter.next)
732
733        scanner = re.compile(r"\s").scanner("a b")
734        self.assertEqual(scanner.search().span(), (1, 2))
735        self.assertEqual(scanner.search(), None)
736
737    def test_bug_817234(self):
738        iter = re.finditer(r".*", "asdf")
739        self.assertEqual(iter.next().span(), (0, 4))
740        self.assertEqual(iter.next().span(), (4, 4))
741        self.assertRaises(StopIteration, iter.next)
742
743    def test_bug_6561(self):
744        # '\d' should match characters in Unicode category 'Nd'
745        # (Number, Decimal Digit), but not those in 'Nl' (Number,
746        # Letter) or 'No' (Number, Other).
747        decimal_digits = [
748            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
749            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
750            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
751            ]
752        for x in decimal_digits:
753            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
754
755        not_decimal_digits = [
756            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
757            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
758            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
759            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
760            ]
761        for x in not_decimal_digits:
762            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
763
764    def test_empty_array(self):
765        # SF buf 1647541
766        import array
767        for typecode in 'cbBuhHiIlLfd':
768            a = array.array(typecode)
769            self.assertEqual(re.compile("bla").match(a), None)
770            self.assertEqual(re.compile("").match(a).groups(), ())
771
772    def test_inline_flags(self):
773        # Bug #1700
774        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
775        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
776
777        p = re.compile(upper_char, re.I | re.U)
778        q = p.match(lower_char)
779        self.assertNotEqual(q, None)
780
781        p = re.compile(lower_char, re.I | re.U)
782        q = p.match(upper_char)
783        self.assertNotEqual(q, None)
784
785        p = re.compile('(?i)' + upper_char, re.U)
786        q = p.match(lower_char)
787        self.assertNotEqual(q, None)
788
789        p = re.compile('(?i)' + lower_char, re.U)
790        q = p.match(upper_char)
791        self.assertNotEqual(q, None)
792
793        p = re.compile('(?iu)' + upper_char)
794        q = p.match(lower_char)
795        self.assertNotEqual(q, None)
796
797        p = re.compile('(?iu)' + lower_char)
798        q = p.match(upper_char)
799        self.assertNotEqual(q, None)
800
801    def test_dollar_matches_twice(self):
802        "$ matches the end of string, and just before the terminating \n"
803        pattern = re.compile('$')
804        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
805        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
806        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
807
808        pattern = re.compile('$', re.MULTILINE)
809        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
810        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
811        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
812
813    def test_dealloc(self):
814        # issue 3299: check for segfault in debug build
815        import _sre
816        # the overflow limit is different on wide and narrow builds and it
817        # depends on the definition of SRE_CODE (see sre.h).
818        # 2**128 should be big enough to overflow on both. For smaller values
819        # a RuntimeError is raised instead of OverflowError.
820        long_overflow = 2**128
821        self.assertRaises(TypeError, re.finditer, "a", {})
822        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
823
824    def test_compile(self):
825        # Test return value when given string and pattern as parameter
826        pattern = re.compile('random pattern')
827        self.assertIsInstance(pattern, re._pattern_type)
828        same_pattern = re.compile(pattern)
829        self.assertIsInstance(same_pattern, re._pattern_type)
830        self.assertIs(same_pattern, pattern)
831        # Test behaviour when not given a string or pattern as parameter
832        self.assertRaises(TypeError, re.compile, 0)
833
834    def test_bug_13899(self):
835        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
836        # nothing. Ditto B and Z.
837        self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
838                         ['A', 'B', '\b', 'C', 'Z'])
839
840    @precisionbigmemtest(size=_2G, memuse=1)
841    def test_large_search(self, size):
842        # Issue #10182: indices were 32-bit-truncated.
843        s = 'a' * size
844        m = re.search('$', s)
845        self.assertIsNotNone(m)
846        self.assertEqual(m.start(), size)
847        self.assertEqual(m.end(), size)
848
849    # The huge memuse is because of re.sub() using a list and a join()
850    # to create the replacement result.
851    @precisionbigmemtest(size=_2G, memuse=16 + 2)
852    def test_large_subn(self, size):
853        # Issue #10182: indices were 32-bit-truncated.
854        s = 'a' * size
855        r, n = re.subn('', '', s)
856        self.assertEqual(r, s)
857        self.assertEqual(n, size + 1)
858
859
860    def test_repeat_minmax_overflow(self):
861        # Issue #13169
862        string = "x" * 100000
863        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
864        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
865        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
866        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
867        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
868        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
869        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
870        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
871        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
872        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
873        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
874
875    @cpython_only
876    def test_repeat_minmax_overflow_maxrepeat(self):
877        try:
878            from _sre import MAXREPEAT
879        except ImportError:
880            self.skipTest('requires _sre.MAXREPEAT constant')
881        string = "x" * 100000
882        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
883        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
884                         (0, 100000))
885        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
886        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
887        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
888        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
889
890    def test_backref_group_name_in_exception(self):
891        # Issue 17341: Poor error message when compiling invalid regex
892        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
893            re.compile('(?P=<foo>)')
894
895    def test_group_name_in_exception(self):
896        # Issue 17341: Poor error message when compiling invalid regex
897        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
898            re.compile('(?P<?foo>)')
899
900
901def run_re_tests():
902    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
903    if verbose:
904        print 'Running re_tests test suite'
905    else:
906        # To save time, only run the first and last 10 tests
907        #tests = tests[:10] + tests[-10:]
908        pass
909
910    for t in tests:
911        sys.stdout.flush()
912        pattern = s = outcome = repl = expected = None
913        if len(t) == 5:
914            pattern, s, outcome, repl, expected = t
915        elif len(t) == 3:
916            pattern, s, outcome = t
917        else:
918            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
919
920        try:
921            obj = re.compile(pattern)
922        except re.error:
923            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
924            else:
925                print '=== Syntax error:', t
926        except KeyboardInterrupt: raise KeyboardInterrupt
927        except:
928            print '*** Unexpected error ***', t
929            if verbose:
930                traceback.print_exc(file=sys.stdout)
931        else:
932            try:
933                result = obj.search(s)
934            except re.error, msg:
935                print '=== Unexpected exception', t, repr(msg)
936            if outcome == SYNTAX_ERROR:
937                # This should have been a syntax error; forget it.
938                pass
939            elif outcome == FAIL:
940                if result is None: pass   # No match, as expected
941                else: print '=== Succeeded incorrectly', t
942            elif outcome == SUCCEED:
943                if result is not None:
944                    # Matched, as expected, so now we compute the
945                    # result string and compare it to our expected result.
946                    start, end = result.span(0)
947                    vardict={'found': result.group(0),
948                             'groups': result.group(),
949                             'flags': result.re.flags}
950                    for i in range(1, 100):
951                        try:
952                            gi = result.group(i)
953                            # Special hack because else the string concat fails:
954                            if gi is None:
955                                gi = "None"
956                        except IndexError:
957                            gi = "Error"
958                        vardict['g%d' % i] = gi
959                    for i in result.re.groupindex.keys():
960                        try:
961                            gi = result.group(i)
962                            if gi is None:
963                                gi = "None"
964                        except IndexError:
965                            gi = "Error"
966                        vardict[i] = gi
967                    repl = eval(repl, vardict)
968                    if repl != expected:
969                        print '=== grouping error', t,
970                        print repr(repl) + ' should be ' + repr(expected)
971                else:
972                    print '=== Failed incorrectly', t
973
974                # Try the match on a unicode string, and check that it
975                # still succeeds.
976                try:
977                    result = obj.search(unicode(s, "latin-1"))
978                    if result is None:
979                        print '=== Fails on unicode match', t
980                except NameError:
981                    continue # 1.5.2
982                except TypeError:
983                    continue # unicode test case
984
985                # Try the match on a unicode pattern, and check that it
986                # still succeeds.
987                obj=re.compile(unicode(pattern, "latin-1"))
988                result = obj.search(s)
989                if result is None:
990                    print '=== Fails on unicode pattern match', t
991
992                # Try the match with the search area limited to the extent
993                # of the match and see if it still succeeds.  \B will
994                # break (because it won't match at the end or start of a
995                # string), so we'll ignore patterns that feature it.
996
997                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
998                               and result is not None:
999                    obj = re.compile(pattern)
1000                    result = obj.search(s, result.start(0), result.end(0) + 1)
1001                    if result is None:
1002                        print '=== Failed on range-limited match', t
1003
1004                # Try the match with IGNORECASE enabled, and check that it
1005                # still succeeds.
1006                obj = re.compile(pattern, re.IGNORECASE)
1007                result = obj.search(s)
1008                if result is None:
1009                    print '=== Fails on case-insensitive match', t
1010
1011                # Try the match with LOCALE enabled, and check that it
1012                # still succeeds.
1013                obj = re.compile(pattern, re.LOCALE)
1014                result = obj.search(s)
1015                if result is None:
1016                    print '=== Fails on locale-sensitive match', t
1017
1018                # Try the match with UNICODE locale enabled, and check
1019                # that it still succeeds.
1020                obj = re.compile(pattern, re.UNICODE)
1021                result = obj.search(s)
1022                if result is None:
1023                    print '=== Fails on unicode-sensitive match', t
1024
1025def test_main():
1026    run_unittest(ReTests)
1027    run_re_tests()
1028
1029if __name__ == "__main__":
1030    test_main()
1031