1ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom test.test_support import verbose, run_unittest, import_module 2ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom test.test_support import precisionbigmemtest, _2G, cpython_only 3ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport re 4ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom re import Scanner 5ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sre_constants 6ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport sys 7ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport string 8ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport traceback 9ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehfrom weakref import proxy 10ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 11ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 12ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# Misc tests from Tim Peters' re.doc 13ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 14ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# WARNING: Don't change details in these tests if you don't know 15ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# what you're doing. Some of these tests were carefully modeled to 16ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh# cover most of the code. 17ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 18ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehimport unittest 19ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 20ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehclass ReTests(unittest.TestCase): 21ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 22ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_weakref(self): 23ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = 'QabbbcR' 24ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh x = re.compile('ab+c') 25ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh y = proxy(x) 26ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 27ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 28ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_search_star_plus(self): 29ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('x*', 'axx').span(0), (0, 0)) 30ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('x*', 'axx').span(), (0, 0)) 31ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('x+', 'axx').span(0), (1, 3)) 32ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) 33ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('x', 'aaa'), None) 34ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) 35ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) 36ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) 37ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) 38ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('a+', 'xxx'), None) 39ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 40ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def bump_num(self, matchobj): 41ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh int_value = int(matchobj.group(0)) 42ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh return str(int_value + 1) 43ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 44ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_basic_re_sub(self): 45ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 46ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 47ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh '9.3 -3 24x100y') 48ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 49ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh '9.3 -3 23x99y') 50ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 51ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') 52ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('.', r"\n", 'x'), '\n') 53ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 54ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = r"\1\1" 55ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(.)', s, 'x'), 'xx') 56ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) 57ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) 58ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 59ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') 60ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') 61ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') 62ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') 63ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 64ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), 65ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') 66ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') 67ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), 68ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) 69ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 70ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') 71ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 72ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_449964(self): 73ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # fails for group followed by other escape 74ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 75ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'xx\bxx\b') 76ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 77ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_449000(self): 78ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Test for sub() on escaped characters 79ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 80ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'abc\ndef\n') 81ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 82ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'abc\ndef\n') 83ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 84ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'abc\ndef\n') 85ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 86ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'abc\ndef\n') 87ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 88ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_1140(self): 89ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # re.sub(x, y, u'') should return u'', not '', and 90ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # re.sub(x, y, '') should return '', not u''. 91ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Also: 92ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # re.sub(x, y, unicode(x)) should return unicode(y), and 93ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # re.sub(x, y, str(x)) should return 94ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # str(y) if isinstance(y, str) else unicode(y). 95ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for x in 'x', u'x': 96ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for y in 'y', u'y': 97ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh z = re.sub(x, y, u'') 98ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(z, u'') 99ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(type(z), unicode) 100ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # 101ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh z = re.sub(x, y, '') 102ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(z, '') 103ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(type(z), str) 104ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # 105ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh z = re.sub(x, y, unicode(x)) 106ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(z, y) 107ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(type(z), unicode) 108ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # 109ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh z = re.sub(x, y, str(x)) 110ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(z, y) 111ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(type(z), type(y)) 112ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 113ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_1661(self): 114ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Verify that flags do not get silently ignored with compiled patterns 115ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = re.compile('.') 116ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(ValueError, re.match, pattern, 'A', re.I) 117ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(ValueError, re.search, pattern, 'A', re.I) 118ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) 119ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(ValueError, re.compile, pattern, re.I) 120ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 121ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_3629(self): 122ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # A regex that triggered a bug in the sre-code validator 123ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile("(?P<quote>)(?(quote))") 124ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 125ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_sub_template_numeric_escape(self): 126ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bug 776311 and friends 127ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\0', 'x'), '\0') 128ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\000', 'x'), '\000') 129ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\001', 'x'), '\001') 130ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') 131ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') 132ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\111', 'x'), '\111') 133ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\117', 'x'), '\117') 134ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 135ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') 136ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') 137ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 138ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') 139ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') 140ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') 141ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') 142ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') 143ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 144ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\400', 'x'), '\0') 145ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x', r'\777', 'x'), '\377') 146ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 147ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') 148ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') 149ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\9', 'x') 150ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\11', 'x') 151ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\18', 'x') 152ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') 153ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') 154ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') 155ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' 156ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') 157ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' 158ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' 159ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 160ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # in python2.3 (etc), these loop endlessly in sre_parser.py 161ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') 162ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 163ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'xz8') 164ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 165ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'xza') 166ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 167ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_qualified_re_sub(self): 168ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') 169ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') 170ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 171ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_114660(self): 172ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 173ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'hello there') 174ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 175ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_462270(self): 176ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Test for empty sub() behaviour, see SF bug #462270 177ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') 178ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') 179ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 180ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_symbolic_groups(self): 181ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile('(?P<a>x)(?P=a)(?(a)y)') 182ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile('(?P<a1>x)(?P=a1)(?(a1)y)') 183ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)') 184ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?Px)') 185ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P=)') 186ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P=1)') 187ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P=a)') 188ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P=a1)') 189ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P=a.)') 190ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P<)') 191ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P<>)') 192ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P<1>)') 193ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?P<a.>)') 194ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?())') 195ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?(a))') 196ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?(1a))') 197ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, '(?(a.))') 198ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 199ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_symbolic_refs(self): 200ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') 201ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx') 202ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') 203ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') 204ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx') 205ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 206ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') 207ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') 208ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') 209ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') 210ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 211ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_subn(self): 212ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 213ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 214ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) 215ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 216ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 217ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 218ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_split(self): 219ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) 220ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) 221ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(:*)", ":a:b::c"), 222ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', ':', 'a', ':', 'b', '::', 'c']) 223ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) 224ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(:)*", ":a:b::c"), 225ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', ':', 'a', ':', 'b', ':', 'c']) 226ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("([b:]+)", ":a:b::c"), 227ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', ':', 'a', ':b::', 'c']) 228ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), 229ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', None, ':', 'a', None, ':', '', 'b', None, '', 230ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh None, '::', 'c']) 231ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), 232ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', 'a', '', '', 'c']) 233ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 234ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_qualified_re_split(self): 235ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 236ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) 237ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(:)", ":a:b::c", 2), 238ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', ':', 'a', ':', 'b::c']) 239ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.split("(:*)", ":a:b::c", 2), 240ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['', ':', 'a', ':', 'b::c']) 241ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 242ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_findall(self): 243ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall(":+", "abc"), []) 244ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"]) 245ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"]) 246ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""), 247ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (":", ":"), 248ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (":", "::")]) 249ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 250ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_117612(self): 251ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall(r"(a|(b))", "aba"), 252ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh [("a", ""),("b", "b"),("a", "")]) 253ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 254ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_match(self): 255ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('a', 'a').groups(), ()) 256ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) 257ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') 258ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') 259ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) 260ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 261ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pat = re.compile('((a)|(b))(c)?') 262ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 263ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 264ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 265ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 266ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 267ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 268ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # A single group 269ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh m = re.match('(a)', 'a') 270ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.group(0), 'a') 271ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.group(0), 'a') 272ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.group(1), 'a') 273ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.group(1, 1), ('a', 'a')) 274ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 275ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 276ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 277ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 278ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, 'b', None)) 279ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 280ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 281ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_groupref_exists(self): 282ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 283ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('(', 'a')) 284ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), 285ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, 'a')) 286ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) 287ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) 288ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 289ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', 'b')) 290ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), 291ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, 'd')) 292ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), 293ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, 'd')) 294ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), 295ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', '')) 296ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 297ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Tests for bug #1177831: exercise groups other than the first group 298ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') 299ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(p.match('abc').groups(), 300ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', 'b', 'c')) 301ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(p.match('ad').groups(), 302ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', None, 'd')) 303ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(p.match('abd'), None) 304ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(p.match('ac'), None) 305ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 306ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 307ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_groupref(self): 308ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), 309ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('|', 'a')) 310ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), 311ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, 'a')) 312ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None) 313ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None) 314ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), 315ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', 'a')) 316ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), 317ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, None)) 318ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 319ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_groupdict(self): 320ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(?P<first>first) (?P<second>second)', 321ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'first second').groupdict(), 322ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh {'first':'first', 'second':'second'}) 323ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 324ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_expand(self): 325ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(?P<first>first) (?P<second>second)", 326ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "first second") 327ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh .expand(r"\2 \1 \g<second> \g<first>"), 328ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "second first second first") 329ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 330ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_repeat_minmax(self): 331ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1}$", "abc"), None) 332ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1}?$", "abc"), None) 333ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,2}$", "abc"), None) 334ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None) 335ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 336ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c") 337ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c") 338ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c") 339ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 340ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c") 341ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c") 342ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c") 343ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 344ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 345ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^x{1}$", "xxx"), None) 346ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^x{1}?$", "xxx"), None) 347ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^x{1,2}$", "xxx"), None) 348ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^x{1,2}?$", "xxx"), None) 349ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 350ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{3}$", "xxx"), None) 351ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None) 352ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None) 353ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 354ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{3}?$", "xxx"), None) 355ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None) 356ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None) 357ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 358ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 359ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("^x{}$", "xxx"), None) 360ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("^x{}$", "x{}"), None) 361ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 362ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_getattr(self): 363ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a)", "a").pos, 0) 364ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a)", "a").endpos, 1) 365ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a)", "a").string, "a") 366ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1))) 367ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match("(a)", "a").re, None) 368ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 369ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_special_escapes(self): 370ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b(b.)\b", 371ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abcd abc bcd bx").group(1), "bx") 372ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\B(b.)\B", 373ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abc bcd bc abxd").group(1), "bx") 374ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b(b.)\b", 375ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abcd abc bcd bx", re.LOCALE).group(1), "bx") 376ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\B(b.)\B", 377ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abc bcd bc abxd", re.LOCALE).group(1), "bx") 378ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b(b.)\b", 379ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abcd abc bcd bx", re.UNICODE).group(1), "bx") 380ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\B(b.)\B", 381ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abc bcd bc abxd", re.UNICODE).group(1), "bx") 382ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") 383ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") 384ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) 385ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b(b.)\b", 386ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u"abcd abc bcd bx").group(1), "bx") 387ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\B(b.)\B", 388ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u"abc bcd bc abxd").group(1), "bx") 389ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc") 390ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc") 391ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None) 392ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\d\D\w\W\s\S", 393ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "1aa! a").group(0), "1aa! a") 394ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\d\D\w\W\s\S", 395ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "1aa! a", re.LOCALE).group(0), "1aa! a") 396ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\d\D\w\W\s\S", 397ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "1aa! a", re.UNICODE).group(0), "1aa! a") 398ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 399ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_string_boundaries(self): 400ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # See http://bugs.python.org/issue10713 401ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), 402ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "abc") 403ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # There's a word boundary at the start of a string. 404ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertTrue(re.match(r"\b", "abc")) 405ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # A non-empty string includes a non-boundary zero-length match. 406ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertTrue(re.search(r"\B", "abc")) 407ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # There is no non-boundary match at the start of a string. 408ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertFalse(re.match(r"\B", "abc")) 409ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # However, an empty string contains no word boundaries, and also no 410ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # non-boundaries. 411ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\B", ""), None) 412ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # This one is questionable and different from the perlre behaviour, 413ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # but describes current behavior. 414ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search(r"\b", ""), None) 415ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # A single word-character string has two boundaries, but no 416ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # non-boundary gaps. 417ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(re.findall(r"\b", "a")), 2) 418ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(re.findall(r"\B", "a")), 0) 419ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # If there are no words, there are no boundaries 420ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(re.findall(r"\b", " ")), 0) 421ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(re.findall(r"\b", " ")), 0) 422ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Can match around the whitespace. 423ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(re.findall(r"\B", " ")), 2) 424ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 425ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bigcharset(self): 426ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(u"([\u2222\u2223])", 427ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u"\u2222").group(1), u"\u2222") 428ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(u"([\u2222\u2223])", 429ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u"\u2222", re.UNICODE).group(1), u"\u2222") 430ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 431ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_big_codesize(self): 432ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #1160 433ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh r = re.compile('|'.join(('%d'%x for x in range(10000)))) 434ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNotNone(r.match('1000')) 435ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNotNone(r.match('9999')) 436ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 437ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_anyall(self): 438ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), 439ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "a\nb") 440ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), 441ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "a\n\nb") 442ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 443ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_non_consuming(self): 444ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") 445ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") 446ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a") 447ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a") 448ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") 449ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") 450ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") 451ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 452ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") 453ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") 454ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") 455ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") 456ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 457ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_ignore_case(self): 458ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 459ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 460ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b") 461ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb") 462ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b") 463ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb") 464ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a") 465ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa") 466ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") 467ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") 468ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 469ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_category(self): 470ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r"(\s)", " ").group(1), " ") 471ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 472ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_getlower(self): 473ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import _sre 474ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) 475ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) 476ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) 477ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 478ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 479ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 480ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 481ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_not_literal(self): 482ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search("\s([^a])", " b").group(1), "b") 483ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb") 484ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 485ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_search_coverage(self): 486ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search("\s(b)", " b").group(1), "b") 487ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search("a\s", "a ").group(0), "a ") 488ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 489ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def assertMatch(self, pattern, text, match=None, span=None, 490ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh matcher=re.match): 491ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if match is None and span is None: 492ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # the pattern matches the whole text 493ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh match = text 494ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh span = (0, len(text)) 495ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif match is None or span is None: 496ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh raise ValueError('If match is not None, span should be specified ' 497ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh '(and vice versa).') 498ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh m = matcher(pattern, text) 499ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertTrue(m) 500ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.group(), match) 501ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.span(), span) 502ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 503ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_escape(self): 504ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh alnum_chars = string.ascii_letters + string.digits 505ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = u''.join(unichr(i) for i in range(256)) 506ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for c in p: 507ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if c in alnum_chars: 508ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(c), c) 509ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif c == u'\x00': 510ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(c), u'\\000') 511ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 512ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(c), u'\\' + c) 513ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(re.escape(c), c) 514ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(re.escape(p), p) 515ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 516ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_escape_byte(self): 517ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh alnum_chars = (string.ascii_letters + string.digits).encode('ascii') 518ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = ''.join(chr(i) for i in range(256)) 519ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for b in p: 520ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if b in alnum_chars: 521ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(b), b) 522ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif b == b'\x00': 523ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(b), b'\\000') 524ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 525ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.escape(b), b'\\' + b) 526ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(re.escape(b), b) 527ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(re.escape(p), p) 528ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 529ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_escape_non_ascii(self): 530ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = u'xxx\u2620\u2620\u2620xxx' 531ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s_escaped = re.escape(s) 532ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') 533ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(s_escaped, s) 534ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, 535ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'x\u2620\u2620\u2620x', (2, 7), re.search) 536ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 537ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_re_escape_non_ascii_bytes(self): 538ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh b = u'y\u2620y\u2620y'.encode('utf-8') 539ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh b_escaped = re.escape(b) 540ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') 541ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertMatch(b_escaped, b) 542ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) 543ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(len(res), 2) 544ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 545ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_pickling(self): 546ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import pickle 547ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.pickle_test(pickle) 548ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import cPickle 549ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.pickle_test(cPickle) 550ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # old pickles expect the _compile() reconstructor in sre module 551ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import_module("sre", deprecated=True) 552ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh from sre import _compile 553ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 554ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def pickle_test(self, pickle): 555ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') 556ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = pickle.dumps(oldpat) 557ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh newpat = pickle.loads(s) 558ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(oldpat, newpat) 559ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 560ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_constants(self): 561ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.I, re.IGNORECASE) 562ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.L, re.LOCALE) 563ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.M, re.MULTILINE) 564ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.S, re.DOTALL) 565ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.X, re.VERBOSE) 566ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 567ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_flags(self): 568ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for flag in [re.I, re.M, re.X, re.S, re.L]: 569ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.compile('^pattern$', flag), None) 570ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 571ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_sre_character_literals(self): 572ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for i in [0, 8, 16, 32, 64, 127, 128, 255]: 573ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None) 574ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None) 575ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None) 576ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) 577ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) 578ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) 579ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.match, "\911", "") 580ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 581ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_sre_character_class_literals(self): 582ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for i in [0, 8, 16, 32, 64, 127, 128, 255]: 583ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None) 584ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None) 585ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None) 586ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) 587ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) 588ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) 589ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.match, "[\911]", "") 590ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 591ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_113254(self): 592ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) 593ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) 594ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) 595ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 596ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_527371(self): 597ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bug described in patches 527371/672491 598ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)?a','a').lastindex, None) 599ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1) 600ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') 601ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a') 602ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match("((a))", "a").lastindex, 1) 603ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 604ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_545855(self): 605ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bug 545855 -- This pattern failed to cause a compile error as it 606ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # should, instead provoking a TypeError. 607ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(re.error, re.compile, 'foo[a-') 608ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 609ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_418626(self): 610ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bugs 418626 at al. -- Testing Greg Chapman's addition of op code 611ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of 612ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # pattern '*?' on a long string. 613ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) 614ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 615ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 20003) 616ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) 617ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # non-simple '*?' still used to hit the recursion limit, before the 618ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # non-recursive scheme was implemented. 619ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) 620ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 621ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_612074(self): 622ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pat=u"["+re.escape(u"\u2039")+u"]" 623ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.compile(pat) and 1, 1) 624ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 625ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_stack_overflow(self): 626ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # nasty cases that used to overflow the straightforward recursive 627ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # implementation of repeated groups. 628ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') 629ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 630ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 631ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 632ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_unlimited_zero_width_repeat(self): 633ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #9669 634ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?)*y', 'z')) 635ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?)+y', 'z')) 636ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?){2,}y', 'z')) 637ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?)*?y', 'z')) 638ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?)+?y', 'z')) 639ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z')) 640ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 641ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_scanner(self): 642ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def s_ident(scanner, token): return token 643ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def s_operator(scanner, token): return "op%s" % token 644ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def s_float(scanner, token): return float(token) 645ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def s_int(scanner, token): return int(token) 646ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 647ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh scanner = Scanner([ 648ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (r"[a-zA-Z_]\w*", s_ident), 649ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (r"\d+\.\d*", s_float), 650ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (r"\d+", s_int), 651ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (r"=|\+|-|\*|/", s_operator), 652ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (r"\s+", None), 653ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ]) 654ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 655ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(scanner.scanner.scanner("").pattern, None) 656ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 657ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), 658ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 659ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'op+', 'bar'], '')) 660ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 661ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_448951(self): 662ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bug 448951 (similar to 429357, but with single char match) 663ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # (Also test greedy matches.) 664ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for op in '','?','*': 665ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(), 666ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (None, None)) 667ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), 668ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a:', 'a')) 669ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 670ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_725106(self): 671ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # capturing groups in alternatives in repeats 672ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), 673ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', 'a')) 674ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), 675ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('c', 'b')) 676ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), 677ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', None)) 678ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), 679ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', None)) 680ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), 681ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', 'a')) 682ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), 683ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('c', 'b')) 684ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), 685ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', None)) 686ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), 687ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('b', None)) 688ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 689ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_725149(self): 690ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # mark_stack_base restoring before restoring marks 691ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), 692ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', None)) 693ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), 694ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ('a', None, None)) 695ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 696ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_764548(self): 697ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # bug 764548, re.compile() barfs on str/unicode subclasses 698ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 699ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh unicode 700ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except NameError: 701ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh return # no problem if we have no unicode 702ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh class my_unicode(unicode): pass 703ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pat = re.compile(my_unicode("abc")) 704ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pat.match("xyz"), None) 705ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 706ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_finditer(self): 707ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh iter = re.finditer(r":+", "a:b::c:::d") 708ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual([item.group(0) for item in iter], 709ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh [":", "::", ":::"]) 710ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 711ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_926075(self): 712ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 713ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh unicode 714ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except NameError: 715ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh return # no problem if we have no unicode 716ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertTrue(re.compile('bug_926075') is not 717ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile(eval("u'bug_926075'"))) 718ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 719ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_931848(self): 720ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 721ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh unicode 722ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except NameError: 723ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pass 724ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') 725ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.compile(pattern).split("a.b.c"), 726ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['a','b','c']) 727ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 728ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_581080(self): 729ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh iter = re.finditer(r"\s", "a b") 730ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(iter.next().span(), (1,2)) 731ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(StopIteration, iter.next) 732ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 733ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh scanner = re.compile(r"\s").scanner("a b") 734ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(scanner.search().span(), (1, 2)) 735ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(scanner.search(), None) 736ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 737ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_817234(self): 738ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh iter = re.finditer(r".*", "asdf") 739ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(iter.next().span(), (0, 4)) 740ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(iter.next().span(), (4, 4)) 741ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(StopIteration, iter.next) 742ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 743ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_6561(self): 744ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # '\d' should match characters in Unicode category 'Nd' 745ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # (Number, Decimal Digit), but not those in 'Nl' (Number, 746ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Letter) or 'No' (Number, Other). 747ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh decimal_digits = [ 748ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' 749ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' 750ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' 751ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ] 752ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for x in decimal_digits: 753ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) 754ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 755ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh not_decimal_digits = [ 756ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' 757ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' 758ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' 759ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' 760ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ] 761ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for x in not_decimal_digits: 762ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match('^\d$', x, re.UNICODE)) 763ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 764ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_empty_array(self): 765ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # SF buf 1647541 766ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import array 767ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for typecode in 'cbBuhHiIlLfd': 768ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh a = array.array(typecode) 769ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.compile("bla").match(a), None) 770ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.compile("").match(a).groups(), ()) 771ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 772ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_inline_flags(self): 773ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Bug #1700 774ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow 775ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow 776ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 777ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile(upper_char, re.I | re.U) 778ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(lower_char) 779ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 780ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 781ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile(lower_char, re.I | re.U) 782ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(upper_char) 783ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 784ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 785ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile('(?i)' + upper_char, re.U) 786ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(lower_char) 787ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 788ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 789ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile('(?i)' + lower_char, re.U) 790ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(upper_char) 791ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 792ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 793ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile('(?iu)' + upper_char) 794ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(lower_char) 795ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 796ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 797ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh p = re.compile('(?iu)' + lower_char) 798ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh q = p.match(upper_char) 799ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertNotEqual(q, None) 800ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 801ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_dollar_matches_twice(self): 802ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh "$ matches the end of string, and just before the terminating \n" 803ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = re.compile('$') 804ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') 805ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') 806ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', '\n'), '#\n#') 807ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 808ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = re.compile('$', re.MULTILINE) 809ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) 810ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') 811ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(pattern.sub('#', '\n'), '#\n#') 812ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 813ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_dealloc(self): 814ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # issue 3299: check for segfault in debug build 815ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh import _sre 816ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # the overflow limit is different on wide and narrow builds and it 817ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # depends on the definition of SRE_CODE (see sre.h). 818ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # 2**128 should be big enough to overflow on both. For smaller values 819ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # a RuntimeError is raised instead of OverflowError. 820ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh long_overflow = 2**128 821ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(TypeError, re.finditer, "a", {}) 822ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) 823ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 824ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_compile(self): 825ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Test return value when given string and pattern as parameter 826ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = re.compile('random pattern') 827ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsInstance(pattern, re._pattern_type) 828ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh same_pattern = re.compile(pattern) 829ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsInstance(same_pattern, re._pattern_type) 830ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIs(same_pattern, pattern) 831ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Test behaviour when not given a string or pattern as parameter 832ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(TypeError, re.compile, 0) 833ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 834ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_bug_13899(self): 835ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #13899: re pattern r"[\A]" should work like "A" but matches 836ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # nothing. Ditto B and Z. 837ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'), 838ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh ['A', 'B', '\b', 'C', 'Z']) 839ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 840ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh @precisionbigmemtest(size=_2G, memuse=1) 841ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_large_search(self, size): 842ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #10182: indices were 32-bit-truncated. 843ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = 'a' * size 844ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh m = re.search('$', s) 845ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNotNone(m) 846ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.start(), size) 847ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(m.end(), size) 848ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 849ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # The huge memuse is because of re.sub() using a list and a join() 850ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # to create the replacement result. 851ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh @precisionbigmemtest(size=_2G, memuse=16 + 2) 852ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_large_subn(self, size): 853ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #10182: indices were 32-bit-truncated. 854ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh s = 'a' * size 855ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh r, n = re.subn('', '', s) 856ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(r, s) 857ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(n, size + 1) 858ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 859ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 860ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_repeat_minmax_overflow(self): 861ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue #13169 862ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh string = "x" * 100000 863ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) 864ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) 865ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) 866ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) 867ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) 868ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) 869ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. 870ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) 871ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) 872ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) 873ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) 874ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 875ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh @cpython_only 876ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_repeat_minmax_overflow_maxrepeat(self): 877ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 878ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh from _sre import MAXREPEAT 879ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except ImportError: 880ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.skipTest('requires _sre.MAXREPEAT constant') 881ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh string = "x" * 100000 882ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) 883ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), 884ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh (0, 100000)) 885ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) 886ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) 887ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) 888ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) 889ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 890ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_backref_group_name_in_exception(self): 891ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue 17341: Poor error message when compiling invalid regex 892ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh with self.assertRaisesRegexp(sre_constants.error, '<foo>'): 893ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile('(?P=<foo>)') 894ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 895ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh def test_group_name_in_exception(self): 896ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Issue 17341: Poor error message when compiling invalid regex 897ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh with self.assertRaisesRegexp(sre_constants.error, '\?foo'): 898ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh re.compile('(?P<?foo>)') 899ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 900ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 901ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef run_re_tests(): 902ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR 903ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if verbose: 904ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print 'Running re_tests test suite' 905ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 906ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # To save time, only run the first and last 10 tests 907ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh #tests = tests[:10] + tests[-10:] 908ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pass 909ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 910ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for t in tests: 911ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh sys.stdout.flush() 912ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern = s = outcome = repl = expected = None 913ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if len(t) == 5: 914ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern, s, outcome, repl, expected = t 915ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif len(t) == 3: 916ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pattern, s, outcome = t 917ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 918ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh raise ValueError, ('Test tuples should have 3 or 5 fields', t) 919ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 920ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 921ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj = re.compile(pattern) 922ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except re.error: 923ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if outcome == SYNTAX_ERROR: pass # Expected a syntax error 924ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 925ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Syntax error:', t 926ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except KeyboardInterrupt: raise KeyboardInterrupt 927ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except: 928ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '*** Unexpected error ***', t 929ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if verbose: 930ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh traceback.print_exc(file=sys.stdout) 931ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 932ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 933ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s) 934ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except re.error, msg: 935ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Unexpected exception', t, repr(msg) 936ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if outcome == SYNTAX_ERROR: 937ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # This should have been a syntax error; forget it. 938ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh pass 939ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif outcome == FAIL: 940ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: pass # No match, as expected 941ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: print '=== Succeeded incorrectly', t 942ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh elif outcome == SUCCEED: 943ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is not None: 944ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Matched, as expected, so now we compute the 945ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # result string and compare it to our expected result. 946ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh start, end = result.span(0) 947ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh vardict={'found': result.group(0), 948ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'groups': result.group(), 949ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 'flags': result.re.flags} 950ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for i in range(1, 100): 951ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 952ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = result.group(i) 953ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Special hack because else the string concat fails: 954ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if gi is None: 955ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = "None" 956ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except IndexError: 957ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = "Error" 958ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh vardict['g%d' % i] = gi 959ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh for i in result.re.groupindex.keys(): 960ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 961ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = result.group(i) 962ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if gi is None: 963ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = "None" 964ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except IndexError: 965ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh gi = "Error" 966ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh vardict[i] = gi 967ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh repl = eval(repl, vardict) 968ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if repl != expected: 969ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== grouping error', t, 970ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print repr(repl) + ' should be ' + repr(expected) 971ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh else: 972ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Failed incorrectly', t 973ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 974ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match on a unicode string, and check that it 975ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # still succeeds. 976ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh try: 977ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(unicode(s, "latin-1")) 978ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 979ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Fails on unicode match', t 980ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except NameError: 981ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh continue # 1.5.2 982ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh except TypeError: 983ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh continue # unicode test case 984ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 985ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match on a unicode pattern, and check that it 986ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # still succeeds. 987ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj=re.compile(unicode(pattern, "latin-1")) 988ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s) 989ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 990ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Fails on unicode pattern match', t 991ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 992ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match with the search area limited to the extent 993ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # of the match and see if it still succeeds. \B will 994ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # break (because it won't match at the end or start of a 995ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # string), so we'll ignore patterns that feature it. 996ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 997ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \ 998ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh and result is not None: 999ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj = re.compile(pattern) 1000ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s, result.start(0), result.end(0) + 1) 1001ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 1002ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Failed on range-limited match', t 1003ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 1004ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match with IGNORECASE enabled, and check that it 1005ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # still succeeds. 1006ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj = re.compile(pattern, re.IGNORECASE) 1007ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s) 1008ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 1009ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Fails on case-insensitive match', t 1010ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 1011ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match with LOCALE enabled, and check that it 1012ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # still succeeds. 1013ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj = re.compile(pattern, re.LOCALE) 1014ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s) 1015ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 1016ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Fails on locale-sensitive match', t 1017ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 1018ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # Try the match with UNICODE locale enabled, and check 1019ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh # that it still succeeds. 1020ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh obj = re.compile(pattern, re.UNICODE) 1021ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh result = obj.search(s) 1022ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh if result is None: 1023ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh print '=== Fails on unicode-sensitive match', t 1024ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 1025ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehdef test_main(): 1026ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh run_unittest(ReTests) 1027ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh run_re_tests() 1028ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh 1029ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsiehif __name__ == "__main__": 1030ffab958fd8d42ed7227d83007350e61555a1fa36Andrew Hsieh test_main() 1031