1from test.test_support import verbose, run_unittest, import_module 2from test.test_support import precisionbigmemtest, _2G, cpython_only 3import re 4from re import Scanner 5import sre_constants 6import sys 7import string 8import traceback 9from weakref import proxy 10 11 12# Misc tests from Tim Peters' re.doc 13 14# WARNING: Don't change details in these tests if you don't know 15# what you're doing. Some of these tests were carefully modeled to 16# cover most of the code. 17 18import unittest 19 20class ReTests(unittest.TestCase): 21 22 def test_weakref(self): 23 s = 'QabbbcR' 24 x = re.compile('ab+c') 25 y = proxy(x) 26 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 27 28 def test_search_star_plus(self): 29 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0)) 30 self.assertEqual(re.search('x*', 'axx').span(), (0, 0)) 31 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3)) 32 self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) 33 self.assertEqual(re.search('x', 'aaa'), None) 34 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) 35 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) 36 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) 37 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) 38 self.assertEqual(re.match('a+', 'xxx'), None) 39 40 def bump_num(self, matchobj): 41 int_value = int(matchobj.group(0)) 42 return str(int_value + 1) 43 44 def test_basic_re_sub(self): 45 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 47 '9.3 -3 24x100y') 48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 49 '9.3 -3 23x99y') 50 51 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') 52 self.assertEqual(re.sub('.', r"\n", 'x'), '\n') 53 54 s = r"\1\1" 55 self.assertEqual(re.sub('(.)', s, 'x'), 'xx') 56 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) 57 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) 58 59 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') 60 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') 61 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') 62 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') 63 64 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), 65 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') 66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') 67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), 68 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) 69 70 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') 71 72 def test_bug_449964(self): 73 # fails for group followed by other escape 74 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 75 'xx\bxx\b') 76 77 def test_bug_449000(self): 78 # Test for sub() on escaped characters 79 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 80 'abc\ndef\n') 81 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 82 'abc\ndef\n') 83 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 84 'abc\ndef\n') 85 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 86 'abc\ndef\n') 87 88 def test_bug_1140(self): 89 # re.sub(x, y, u'') should return u'', not '', and 90 # re.sub(x, y, '') should return '', not u''. 91 # Also: 92 # re.sub(x, y, unicode(x)) should return unicode(y), and 93 # re.sub(x, y, str(x)) should return 94 # str(y) if isinstance(y, str) else unicode(y). 95 for x in 'x', u'x': 96 for y in 'y', u'y': 97 z = re.sub(x, y, u'') 98 self.assertEqual(z, u'') 99 self.assertEqual(type(z), unicode) 100 # 101 z = re.sub(x, y, '') 102 self.assertEqual(z, '') 103 self.assertEqual(type(z), str) 104 # 105 z = re.sub(x, y, unicode(x)) 106 self.assertEqual(z, y) 107 self.assertEqual(type(z), unicode) 108 # 109 z = re.sub(x, y, str(x)) 110 self.assertEqual(z, y) 111 self.assertEqual(type(z), type(y)) 112 113 def test_bug_1661(self): 114 # Verify that flags do not get silently ignored with compiled patterns 115 pattern = re.compile('.') 116 self.assertRaises(ValueError, re.match, pattern, 'A', re.I) 117 self.assertRaises(ValueError, re.search, pattern, 'A', re.I) 118 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) 119 self.assertRaises(ValueError, re.compile, pattern, re.I) 120 121 def test_bug_3629(self): 122 # A regex that triggered a bug in the sre-code validator 123 re.compile("(?P<quote>)(?(quote))") 124 125 def test_sub_template_numeric_escape(self): 126 # bug 776311 and friends 127 self.assertEqual(re.sub('x', r'\0', 'x'), '\0') 128 self.assertEqual(re.sub('x', r'\000', 'x'), '\000') 129 self.assertEqual(re.sub('x', r'\001', 'x'), '\001') 130 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') 131 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') 132 self.assertEqual(re.sub('x', r'\111', 'x'), '\111') 133 self.assertEqual(re.sub('x', r'\117', 'x'), '\117') 134 135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') 136 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') 137 138 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') 139 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') 140 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') 141 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') 142 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') 143 144 self.assertEqual(re.sub('x', r'\400', 'x'), '\0') 145 self.assertEqual(re.sub('x', r'\777', 'x'), '\377') 146 147 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') 148 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') 149 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x') 150 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x') 151 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x') 152 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') 153 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') 154 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') 155 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' 156 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') 157 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' 158 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' 159 160 # in python2.3 (etc), these loop endlessly in sre_parser.py 161 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') 162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 163 'xz8') 164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 165 'xza') 166 167 def test_qualified_re_sub(self): 168 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') 169 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') 170 171 def test_bug_114660(self): 172 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 173 'hello there') 174 175 def test_bug_462270(self): 176 # Test for empty sub() behaviour, see SF bug #462270 177 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') 178 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') 179 180 def test_symbolic_groups(self): 181 re.compile('(?P<a>x)(?P=a)(?(a)y)') 182 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)') 183 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)') 184 self.assertRaises(re.error, re.compile, '(?Px)') 185 self.assertRaises(re.error, re.compile, '(?P=)') 186 self.assertRaises(re.error, re.compile, '(?P=1)') 187 self.assertRaises(re.error, re.compile, '(?P=a)') 188 self.assertRaises(re.error, re.compile, '(?P=a1)') 189 self.assertRaises(re.error, re.compile, '(?P=a.)') 190 self.assertRaises(re.error, re.compile, '(?P<)') 191 self.assertRaises(re.error, re.compile, '(?P<>)') 192 self.assertRaises(re.error, re.compile, '(?P<1>)') 193 self.assertRaises(re.error, re.compile, '(?P<a.>)') 194 self.assertRaises(re.error, re.compile, '(?())') 195 self.assertRaises(re.error, re.compile, '(?(a))') 196 self.assertRaises(re.error, re.compile, '(?(1a))') 197 self.assertRaises(re.error, re.compile, '(?(a.))') 198 199 def test_symbolic_refs(self): 200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') 201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx') 202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') 203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') 204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx') 205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 206 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') 207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') 208 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') 209 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') 210 211 def test_re_subn(self): 212 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 213 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 214 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) 215 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 216 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 217 218 def test_re_split(self): 219 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) 220 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) 221 self.assertEqual(re.split("(:*)", ":a:b::c"), 222 ['', ':', 'a', ':', 'b', '::', 'c']) 223 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) 224 self.assertEqual(re.split("(:)*", ":a:b::c"), 225 ['', ':', 'a', ':', 'b', ':', 'c']) 226 self.assertEqual(re.split("([b:]+)", ":a:b::c"), 227 ['', ':', 'a', ':b::', 'c']) 228 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), 229 ['', None, ':', 'a', None, ':', '', 'b', None, '', 230 None, '::', 'c']) 231 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), 232 ['', 'a', '', '', 'c']) 233 234 def test_qualified_re_split(self): 235 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 236 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) 237 self.assertEqual(re.split("(:)", ":a:b::c", 2), 238 ['', ':', 'a', ':', 'b::c']) 239 self.assertEqual(re.split("(:*)", ":a:b::c", 2), 240 ['', ':', 'a', ':', 'b::c']) 241 242 def test_re_findall(self): 243 self.assertEqual(re.findall(":+", "abc"), []) 244 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"]) 245 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"]) 246 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""), 247 (":", ":"), 248 (":", "::")]) 249 250 def test_bug_117612(self): 251 self.assertEqual(re.findall(r"(a|(b))", "aba"), 252 [("a", ""),("b", "b"),("a", "")]) 253 254 def test_re_match(self): 255 self.assertEqual(re.match('a', 'a').groups(), ()) 256 self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) 257 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') 258 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') 259 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) 260 261 pat = re.compile('((a)|(b))(c)?') 262 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 263 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 264 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 265 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 266 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 267 268 # A single group 269 m = re.match('(a)', 'a') 270 self.assertEqual(m.group(0), 'a') 271 self.assertEqual(m.group(0), 'a') 272 self.assertEqual(m.group(1), 'a') 273 self.assertEqual(m.group(1, 1), ('a', 'a')) 274 275 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 276 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 277 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 278 (None, 'b', None)) 279 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 280 281 def test_re_groupref_exists(self): 282 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 283 ('(', 'a')) 284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), 285 (None, 'a')) 286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) 287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) 288 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 289 ('a', 'b')) 290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), 291 (None, 'd')) 292 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), 293 (None, 'd')) 294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), 295 ('a', '')) 296 297 # Tests for bug #1177831: exercise groups other than the first group 298 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') 299 self.assertEqual(p.match('abc').groups(), 300 ('a', 'b', 'c')) 301 self.assertEqual(p.match('ad').groups(), 302 ('a', None, 'd')) 303 self.assertEqual(p.match('abd'), None) 304 self.assertEqual(p.match('ac'), None) 305 306 307 def test_re_groupref(self): 308 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), 309 ('|', 'a')) 310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), 311 (None, 'a')) 312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None) 313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None) 314 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), 315 ('a', 'a')) 316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), 317 (None, None)) 318 319 def test_groupdict(self): 320 self.assertEqual(re.match('(?P<first>first) (?P<second>second)', 321 'first second').groupdict(), 322 {'first':'first', 'second':'second'}) 323 324 def test_expand(self): 325 self.assertEqual(re.match("(?P<first>first) (?P<second>second)", 326 "first second") 327 .expand(r"\2 \1 \g<second> \g<first>"), 328 "second first second first") 329 330 def test_repeat_minmax(self): 331 self.assertEqual(re.match("^(\w){1}$", "abc"), None) 332 self.assertEqual(re.match("^(\w){1}?$", "abc"), None) 333 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None) 334 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None) 335 336 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c") 337 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c") 338 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c") 339 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 340 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c") 341 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c") 342 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c") 343 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 344 345 self.assertEqual(re.match("^x{1}$", "xxx"), None) 346 self.assertEqual(re.match("^x{1}?$", "xxx"), None) 347 self.assertEqual(re.match("^x{1,2}$", "xxx"), None) 348 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None) 349 350 self.assertNotEqual(re.match("^x{3}$", "xxx"), None) 351 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None) 352 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None) 353 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 354 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None) 355 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None) 356 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None) 357 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 358 359 self.assertEqual(re.match("^x{}$", "xxx"), None) 360 self.assertNotEqual(re.match("^x{}$", "x{}"), None) 361 362 def test_getattr(self): 363 self.assertEqual(re.match("(a)", "a").pos, 0) 364 self.assertEqual(re.match("(a)", "a").endpos, 1) 365 self.assertEqual(re.match("(a)", "a").string, "a") 366 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1))) 367 self.assertNotEqual(re.match("(a)", "a").re, None) 368 369 def test_special_escapes(self): 370 self.assertEqual(re.search(r"\b(b.)\b", 371 "abcd abc bcd bx").group(1), "bx") 372 self.assertEqual(re.search(r"\B(b.)\B", 373 "abc bcd bc abxd").group(1), "bx") 374 self.assertEqual(re.search(r"\b(b.)\b", 375 "abcd abc bcd bx", re.LOCALE).group(1), "bx") 376 self.assertEqual(re.search(r"\B(b.)\B", 377 "abc bcd bc abxd", re.LOCALE).group(1), "bx") 378 self.assertEqual(re.search(r"\b(b.)\b", 379 "abcd abc bcd bx", re.UNICODE).group(1), "bx") 380 self.assertEqual(re.search(r"\B(b.)\B", 381 "abc bcd bc abxd", re.UNICODE).group(1), "bx") 382 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") 383 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") 384 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) 385 self.assertEqual(re.search(r"\b(b.)\b", 386 u"abcd abc bcd bx").group(1), "bx") 387 self.assertEqual(re.search(r"\B(b.)\B", 388 u"abc bcd bc abxd").group(1), "bx") 389 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc") 390 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc") 391 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None) 392 self.assertEqual(re.search(r"\d\D\w\W\s\S", 393 "1aa! a").group(0), "1aa! a") 394 self.assertEqual(re.search(r"\d\D\w\W\s\S", 395 "1aa! a", re.LOCALE).group(0), "1aa! a") 396 self.assertEqual(re.search(r"\d\D\w\W\s\S", 397 "1aa! a", re.UNICODE).group(0), "1aa! a") 398 399 def test_string_boundaries(self): 400 # See http://bugs.python.org/issue10713 401 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), 402 "abc") 403 # There's a word boundary at the start of a string. 404 self.assertTrue(re.match(r"\b", "abc")) 405 # A non-empty string includes a non-boundary zero-length match. 406 self.assertTrue(re.search(r"\B", "abc")) 407 # There is no non-boundary match at the start of a string. 408 self.assertFalse(re.match(r"\B", "abc")) 409 # However, an empty string contains no word boundaries, and also no 410 # non-boundaries. 411 self.assertEqual(re.search(r"\B", ""), None) 412 # This one is questionable and different from the perlre behaviour, 413 # but describes current behavior. 414 self.assertEqual(re.search(r"\b", ""), None) 415 # A single word-character string has two boundaries, but no 416 # non-boundary gaps. 417 self.assertEqual(len(re.findall(r"\b", "a")), 2) 418 self.assertEqual(len(re.findall(r"\B", "a")), 0) 419 # If there are no words, there are no boundaries 420 self.assertEqual(len(re.findall(r"\b", " ")), 0) 421 self.assertEqual(len(re.findall(r"\b", " ")), 0) 422 # Can match around the whitespace. 423 self.assertEqual(len(re.findall(r"\B", " ")), 2) 424 425 def test_bigcharset(self): 426 self.assertEqual(re.match(u"([\u2222\u2223])", 427 u"\u2222").group(1), u"\u2222") 428 self.assertEqual(re.match(u"([\u2222\u2223])", 429 u"\u2222", re.UNICODE).group(1), u"\u2222") 430 431 def test_big_codesize(self): 432 # Issue #1160 433 r = re.compile('|'.join(('%d'%x for x in range(10000)))) 434 self.assertIsNotNone(r.match('1000')) 435 self.assertIsNotNone(r.match('9999')) 436 437 def test_anyall(self): 438 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), 439 "a\nb") 440 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), 441 "a\n\nb") 442 443 def test_non_consuming(self): 444 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") 445 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") 446 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a") 447 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a") 448 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") 449 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") 450 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") 451 452 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") 453 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") 454 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") 455 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") 456 457 def test_ignore_case(self): 458 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 459 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 460 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b") 461 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb") 462 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b") 463 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb") 464 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a") 465 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa") 466 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") 467 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") 468 469 def test_category(self): 470 self.assertEqual(re.match(r"(\s)", " ").group(1), " ") 471 472 def test_getlower(self): 473 import _sre 474 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) 475 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) 476 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) 477 478 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 479 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 480 481 def test_not_literal(self): 482 self.assertEqual(re.search("\s([^a])", " b").group(1), "b") 483 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb") 484 485 def test_search_coverage(self): 486 self.assertEqual(re.search("\s(b)", " b").group(1), "b") 487 self.assertEqual(re.search("a\s", "a ").group(0), "a ") 488 489 def assertMatch(self, pattern, text, match=None, span=None, 490 matcher=re.match): 491 if match is None and span is None: 492 # the pattern matches the whole text 493 match = text 494 span = (0, len(text)) 495 elif match is None or span is None: 496 raise ValueError('If match is not None, span should be specified ' 497 '(and vice versa).') 498 m = matcher(pattern, text) 499 self.assertTrue(m) 500 self.assertEqual(m.group(), match) 501 self.assertEqual(m.span(), span) 502 503 def test_re_escape(self): 504 alnum_chars = string.ascii_letters + string.digits 505 p = u''.join(unichr(i) for i in range(256)) 506 for c in p: 507 if c in alnum_chars: 508 self.assertEqual(re.escape(c), c) 509 elif c == u'\x00': 510 self.assertEqual(re.escape(c), u'\\000') 511 else: 512 self.assertEqual(re.escape(c), u'\\' + c) 513 self.assertMatch(re.escape(c), c) 514 self.assertMatch(re.escape(p), p) 515 516 def test_re_escape_byte(self): 517 alnum_chars = (string.ascii_letters + string.digits).encode('ascii') 518 p = ''.join(chr(i) for i in range(256)) 519 for b in p: 520 if b in alnum_chars: 521 self.assertEqual(re.escape(b), b) 522 elif b == b'\x00': 523 self.assertEqual(re.escape(b), b'\\000') 524 else: 525 self.assertEqual(re.escape(b), b'\\' + b) 526 self.assertMatch(re.escape(b), b) 527 self.assertMatch(re.escape(p), p) 528 529 def test_re_escape_non_ascii(self): 530 s = u'xxx\u2620\u2620\u2620xxx' 531 s_escaped = re.escape(s) 532 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') 533 self.assertMatch(s_escaped, s) 534 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, 535 u'x\u2620\u2620\u2620x', (2, 7), re.search) 536 537 def test_re_escape_non_ascii_bytes(self): 538 b = u'y\u2620y\u2620y'.encode('utf-8') 539 b_escaped = re.escape(b) 540 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') 541 self.assertMatch(b_escaped, b) 542 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) 543 self.assertEqual(len(res), 2) 544 545 def test_pickling(self): 546 import pickle 547 self.pickle_test(pickle) 548 import cPickle 549 self.pickle_test(cPickle) 550 # old pickles expect the _compile() reconstructor in sre module 551 import_module("sre", deprecated=True) 552 from sre import _compile 553 554 def pickle_test(self, pickle): 555 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') 556 s = pickle.dumps(oldpat) 557 newpat = pickle.loads(s) 558 self.assertEqual(oldpat, newpat) 559 560 def test_constants(self): 561 self.assertEqual(re.I, re.IGNORECASE) 562 self.assertEqual(re.L, re.LOCALE) 563 self.assertEqual(re.M, re.MULTILINE) 564 self.assertEqual(re.S, re.DOTALL) 565 self.assertEqual(re.X, re.VERBOSE) 566 567 def test_flags(self): 568 for flag in [re.I, re.M, re.X, re.S, re.L]: 569 self.assertNotEqual(re.compile('^pattern$', flag), None) 570 571 def test_sre_character_literals(self): 572 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 573 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None) 574 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None) 575 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None) 576 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) 577 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) 578 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) 579 self.assertRaises(re.error, re.match, "\911", "") 580 581 def test_sre_character_class_literals(self): 582 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 583 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None) 584 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None) 585 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None) 586 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) 587 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) 588 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) 589 self.assertRaises(re.error, re.match, "[\911]", "") 590 591 def test_bug_113254(self): 592 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) 593 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) 594 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) 595 596 def test_bug_527371(self): 597 # bug described in patches 527371/672491 598 self.assertEqual(re.match(r'(a)?a','a').lastindex, None) 599 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1) 600 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') 601 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a') 602 self.assertEqual(re.match("((a))", "a").lastindex, 1) 603 604 def test_bug_545855(self): 605 # bug 545855 -- This pattern failed to cause a compile error as it 606 # should, instead provoking a TypeError. 607 self.assertRaises(re.error, re.compile, 'foo[a-') 608 609 def test_bug_418626(self): 610 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code 611 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of 612 # pattern '*?' on a long string. 613 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) 614 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 615 20003) 616 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) 617 # non-simple '*?' still used to hit the recursion limit, before the 618 # non-recursive scheme was implemented. 619 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) 620 621 def test_bug_612074(self): 622 pat=u"["+re.escape(u"\u2039")+u"]" 623 self.assertEqual(re.compile(pat) and 1, 1) 624 625 def test_stack_overflow(self): 626 # nasty cases that used to overflow the straightforward recursive 627 # implementation of repeated groups. 628 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') 629 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 630 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 631 632 def test_unlimited_zero_width_repeat(self): 633 # Issue #9669 634 self.assertIsNone(re.match(r'(?:a?)*y', 'z')) 635 self.assertIsNone(re.match(r'(?:a?)+y', 'z')) 636 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z')) 637 self.assertIsNone(re.match(r'(?:a?)*?y', 'z')) 638 self.assertIsNone(re.match(r'(?:a?)+?y', 'z')) 639 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z')) 640 641 def test_scanner(self): 642 def s_ident(scanner, token): return token 643 def s_operator(scanner, token): return "op%s" % token 644 def s_float(scanner, token): return float(token) 645 def s_int(scanner, token): return int(token) 646 647 scanner = Scanner([ 648 (r"[a-zA-Z_]\w*", s_ident), 649 (r"\d+\.\d*", s_float), 650 (r"\d+", s_int), 651 (r"=|\+|-|\*|/", s_operator), 652 (r"\s+", None), 653 ]) 654 655 self.assertNotEqual(scanner.scanner.scanner("").pattern, None) 656 657 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), 658 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 659 'op+', 'bar'], '')) 660 661 def test_bug_448951(self): 662 # bug 448951 (similar to 429357, but with single char match) 663 # (Also test greedy matches.) 664 for op in '','?','*': 665 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(), 666 (None, None)) 667 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), 668 ('a:', 'a')) 669 670 def test_bug_725106(self): 671 # capturing groups in alternatives in repeats 672 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), 673 ('b', 'a')) 674 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), 675 ('c', 'b')) 676 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), 677 ('b', None)) 678 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), 679 ('b', None)) 680 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), 681 ('b', 'a')) 682 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), 683 ('c', 'b')) 684 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), 685 ('b', None)) 686 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), 687 ('b', None)) 688 689 def test_bug_725149(self): 690 # mark_stack_base restoring before restoring marks 691 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), 692 ('a', None)) 693 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), 694 ('a', None, None)) 695 696 def test_bug_764548(self): 697 # bug 764548, re.compile() barfs on str/unicode subclasses 698 try: 699 unicode 700 except NameError: 701 return # no problem if we have no unicode 702 class my_unicode(unicode): pass 703 pat = re.compile(my_unicode("abc")) 704 self.assertEqual(pat.match("xyz"), None) 705 706 def test_finditer(self): 707 iter = re.finditer(r":+", "a:b::c:::d") 708 self.assertEqual([item.group(0) for item in iter], 709 [":", "::", ":::"]) 710 711 def test_bug_926075(self): 712 try: 713 unicode 714 except NameError: 715 return # no problem if we have no unicode 716 self.assertTrue(re.compile('bug_926075') is not 717 re.compile(eval("u'bug_926075'"))) 718 719 def test_bug_931848(self): 720 try: 721 unicode 722 except NameError: 723 pass 724 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') 725 self.assertEqual(re.compile(pattern).split("a.b.c"), 726 ['a','b','c']) 727 728 def test_bug_581080(self): 729 iter = re.finditer(r"\s", "a b") 730 self.assertEqual(iter.next().span(), (1,2)) 731 self.assertRaises(StopIteration, iter.next) 732 733 scanner = re.compile(r"\s").scanner("a b") 734 self.assertEqual(scanner.search().span(), (1, 2)) 735 self.assertEqual(scanner.search(), None) 736 737 def test_bug_817234(self): 738 iter = re.finditer(r".*", "asdf") 739 self.assertEqual(iter.next().span(), (0, 4)) 740 self.assertEqual(iter.next().span(), (4, 4)) 741 self.assertRaises(StopIteration, iter.next) 742 743 def test_bug_6561(self): 744 # '\d' should match characters in Unicode category 'Nd' 745 # (Number, Decimal Digit), but not those in 'Nl' (Number, 746 # Letter) or 'No' (Number, Other). 747 decimal_digits = [ 748 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' 749 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' 750 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' 751 ] 752 for x in decimal_digits: 753 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) 754 755 not_decimal_digits = [ 756 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' 757 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' 758 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' 759 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' 760 ] 761 for x in not_decimal_digits: 762 self.assertIsNone(re.match('^\d$', x, re.UNICODE)) 763 764 def test_empty_array(self): 765 # SF buf 1647541 766 import array 767 for typecode in 'cbBuhHiIlLfd': 768 a = array.array(typecode) 769 self.assertEqual(re.compile("bla").match(a), None) 770 self.assertEqual(re.compile("").match(a).groups(), ()) 771 772 def test_inline_flags(self): 773 # Bug #1700 774 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow 775 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow 776 777 p = re.compile(upper_char, re.I | re.U) 778 q = p.match(lower_char) 779 self.assertNotEqual(q, None) 780 781 p = re.compile(lower_char, re.I | re.U) 782 q = p.match(upper_char) 783 self.assertNotEqual(q, None) 784 785 p = re.compile('(?i)' + upper_char, re.U) 786 q = p.match(lower_char) 787 self.assertNotEqual(q, None) 788 789 p = re.compile('(?i)' + lower_char, re.U) 790 q = p.match(upper_char) 791 self.assertNotEqual(q, None) 792 793 p = re.compile('(?iu)' + upper_char) 794 q = p.match(lower_char) 795 self.assertNotEqual(q, None) 796 797 p = re.compile('(?iu)' + lower_char) 798 q = p.match(upper_char) 799 self.assertNotEqual(q, None) 800 801 def test_dollar_matches_twice(self): 802 "$ matches the end of string, and just before the terminating \n" 803 pattern = re.compile('$') 804 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') 805 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') 806 self.assertEqual(pattern.sub('#', '\n'), '#\n#') 807 808 pattern = re.compile('$', re.MULTILINE) 809 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) 810 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') 811 self.assertEqual(pattern.sub('#', '\n'), '#\n#') 812 813 def test_dealloc(self): 814 # issue 3299: check for segfault in debug build 815 import _sre 816 # the overflow limit is different on wide and narrow builds and it 817 # depends on the definition of SRE_CODE (see sre.h). 818 # 2**128 should be big enough to overflow on both. For smaller values 819 # a RuntimeError is raised instead of OverflowError. 820 long_overflow = 2**128 821 self.assertRaises(TypeError, re.finditer, "a", {}) 822 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) 823 824 def test_compile(self): 825 # Test return value when given string and pattern as parameter 826 pattern = re.compile('random pattern') 827 self.assertIsInstance(pattern, re._pattern_type) 828 same_pattern = re.compile(pattern) 829 self.assertIsInstance(same_pattern, re._pattern_type) 830 self.assertIs(same_pattern, pattern) 831 # Test behaviour when not given a string or pattern as parameter 832 self.assertRaises(TypeError, re.compile, 0) 833 834 def test_bug_13899(self): 835 # Issue #13899: re pattern r"[\A]" should work like "A" but matches 836 # nothing. Ditto B and Z. 837 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'), 838 ['A', 'B', '\b', 'C', 'Z']) 839 840 @precisionbigmemtest(size=_2G, memuse=1) 841 def test_large_search(self, size): 842 # Issue #10182: indices were 32-bit-truncated. 843 s = 'a' * size 844 m = re.search('$', s) 845 self.assertIsNotNone(m) 846 self.assertEqual(m.start(), size) 847 self.assertEqual(m.end(), size) 848 849 # The huge memuse is because of re.sub() using a list and a join() 850 # to create the replacement result. 851 @precisionbigmemtest(size=_2G, memuse=16 + 2) 852 def test_large_subn(self, size): 853 # Issue #10182: indices were 32-bit-truncated. 854 s = 'a' * size 855 r, n = re.subn('', '', s) 856 self.assertEqual(r, s) 857 self.assertEqual(n, size + 1) 858 859 860 def test_repeat_minmax_overflow(self): 861 # Issue #13169 862 string = "x" * 100000 863 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) 864 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) 865 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) 866 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) 867 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) 868 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) 869 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. 870 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) 871 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) 872 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) 873 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) 874 875 @cpython_only 876 def test_repeat_minmax_overflow_maxrepeat(self): 877 try: 878 from _sre import MAXREPEAT 879 except ImportError: 880 self.skipTest('requires _sre.MAXREPEAT constant') 881 string = "x" * 100000 882 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) 883 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), 884 (0, 100000)) 885 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) 886 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) 887 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) 888 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) 889 890 def test_backref_group_name_in_exception(self): 891 # Issue 17341: Poor error message when compiling invalid regex 892 with self.assertRaisesRegexp(sre_constants.error, '<foo>'): 893 re.compile('(?P=<foo>)') 894 895 def test_group_name_in_exception(self): 896 # Issue 17341: Poor error message when compiling invalid regex 897 with self.assertRaisesRegexp(sre_constants.error, '\?foo'): 898 re.compile('(?P<?foo>)') 899 900 901def run_re_tests(): 902 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR 903 if verbose: 904 print 'Running re_tests test suite' 905 else: 906 # To save time, only run the first and last 10 tests 907 #tests = tests[:10] + tests[-10:] 908 pass 909 910 for t in tests: 911 sys.stdout.flush() 912 pattern = s = outcome = repl = expected = None 913 if len(t) == 5: 914 pattern, s, outcome, repl, expected = t 915 elif len(t) == 3: 916 pattern, s, outcome = t 917 else: 918 raise ValueError, ('Test tuples should have 3 or 5 fields', t) 919 920 try: 921 obj = re.compile(pattern) 922 except re.error: 923 if outcome == SYNTAX_ERROR: pass # Expected a syntax error 924 else: 925 print '=== Syntax error:', t 926 except KeyboardInterrupt: raise KeyboardInterrupt 927 except: 928 print '*** Unexpected error ***', t 929 if verbose: 930 traceback.print_exc(file=sys.stdout) 931 else: 932 try: 933 result = obj.search(s) 934 except re.error, msg: 935 print '=== Unexpected exception', t, repr(msg) 936 if outcome == SYNTAX_ERROR: 937 # This should have been a syntax error; forget it. 938 pass 939 elif outcome == FAIL: 940 if result is None: pass # No match, as expected 941 else: print '=== Succeeded incorrectly', t 942 elif outcome == SUCCEED: 943 if result is not None: 944 # Matched, as expected, so now we compute the 945 # result string and compare it to our expected result. 946 start, end = result.span(0) 947 vardict={'found': result.group(0), 948 'groups': result.group(), 949 'flags': result.re.flags} 950 for i in range(1, 100): 951 try: 952 gi = result.group(i) 953 # Special hack because else the string concat fails: 954 if gi is None: 955 gi = "None" 956 except IndexError: 957 gi = "Error" 958 vardict['g%d' % i] = gi 959 for i in result.re.groupindex.keys(): 960 try: 961 gi = result.group(i) 962 if gi is None: 963 gi = "None" 964 except IndexError: 965 gi = "Error" 966 vardict[i] = gi 967 repl = eval(repl, vardict) 968 if repl != expected: 969 print '=== grouping error', t, 970 print repr(repl) + ' should be ' + repr(expected) 971 else: 972 print '=== Failed incorrectly', t 973 974 # Try the match on a unicode string, and check that it 975 # still succeeds. 976 try: 977 result = obj.search(unicode(s, "latin-1")) 978 if result is None: 979 print '=== Fails on unicode match', t 980 except NameError: 981 continue # 1.5.2 982 except TypeError: 983 continue # unicode test case 984 985 # Try the match on a unicode pattern, and check that it 986 # still succeeds. 987 obj=re.compile(unicode(pattern, "latin-1")) 988 result = obj.search(s) 989 if result is None: 990 print '=== Fails on unicode pattern match', t 991 992 # Try the match with the search area limited to the extent 993 # of the match and see if it still succeeds. \B will 994 # break (because it won't match at the end or start of a 995 # string), so we'll ignore patterns that feature it. 996 997 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \ 998 and result is not None: 999 obj = re.compile(pattern) 1000 result = obj.search(s, result.start(0), result.end(0) + 1) 1001 if result is None: 1002 print '=== Failed on range-limited match', t 1003 1004 # Try the match with IGNORECASE enabled, and check that it 1005 # still succeeds. 1006 obj = re.compile(pattern, re.IGNORECASE) 1007 result = obj.search(s) 1008 if result is None: 1009 print '=== Fails on case-insensitive match', t 1010 1011 # Try the match with LOCALE enabled, and check that it 1012 # still succeeds. 1013 obj = re.compile(pattern, re.LOCALE) 1014 result = obj.search(s) 1015 if result is None: 1016 print '=== Fails on locale-sensitive match', t 1017 1018 # Try the match with UNICODE locale enabled, and check 1019 # that it still succeeds. 1020 obj = re.compile(pattern, re.UNICODE) 1021 result = obj.search(s) 1022 if result is None: 1023 print '=== Fails on unicode-sensitive match', t 1024 1025def test_main(): 1026 run_unittest(ReTests) 1027 run_re_tests() 1028 1029if __name__ == "__main__": 1030 test_main() 1031