1r"""Test correct treatment of various string literals by the parser. 2 3There are four types of string literals: 4 5 'abc' -- normal str 6 r'abc' -- raw str 7 b'xyz' -- normal bytes 8 br'xyz' | rb'xyz' -- raw bytes 9 10The difference between normal and raw strings is of course that in a 11raw string, \ escapes (while still used to determine the end of the 12literal) are not interpreted, so that r'\x00' contains four 13characters: a backslash, an x, and two zeros; while '\x00' contains a 14single character (code point zero). 15 16The tricky thing is what should happen when non-ASCII bytes are used 17inside literals. For bytes literals, this is considered illegal. But 18for str literals, those bytes are supposed to be decoded using the 19encoding declared for the file (UTF-8 by default). 20 21We have to test this with various file encodings. We also test it with 22exec()/eval(), which uses a different code path. 23 24This file is really about correct treatment of encodings and 25backslashes. It doesn't concern itself with issues like single 26vs. double quotes or singly- vs. triply-quoted strings: that's dealt 27with elsewhere (I assume). 28""" 29 30import os 31import sys 32import shutil 33import tempfile 34import warnings 35import unittest 36 37 38TEMPLATE = r"""# coding: %s 39a = 'x' 40assert ord(a) == 120 41b = '\x01' 42assert ord(b) == 1 43c = r'\x01' 44assert list(map(ord, c)) == [92, 120, 48, 49] 45d = '\x81' 46assert ord(d) == 0x81 47e = r'\x81' 48assert list(map(ord, e)) == [92, 120, 56, 49] 49f = '\u1881' 50assert ord(f) == 0x1881 51g = r'\u1881' 52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] 53h = '\U0001d120' 54assert ord(h) == 0x1d120 55i = r'\U0001d120' 56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] 57""" 58 59 60def byte(i): 61 return bytes([i]) 62 63 64class TestLiterals(unittest.TestCase): 65 66 def setUp(self): 67 self.save_path = sys.path[:] 68 self.tmpdir = tempfile.mkdtemp() 69 sys.path.insert(0, self.tmpdir) 70 71 def tearDown(self): 72 sys.path[:] = self.save_path 73 shutil.rmtree(self.tmpdir, ignore_errors=True) 74 75 def test_template(self): 76 # Check that the template doesn't contain any non-printables 77 # except for \n. 78 for c in TEMPLATE: 79 assert c == '\n' or ' ' <= c <= '~', repr(c) 80 81 def test_eval_str_normal(self): 82 self.assertEqual(eval(""" 'x' """), 'x') 83 self.assertEqual(eval(r""" '\x01' """), chr(1)) 84 self.assertEqual(eval(""" '\x01' """), chr(1)) 85 self.assertEqual(eval(r""" '\x81' """), chr(0x81)) 86 self.assertEqual(eval(""" '\x81' """), chr(0x81)) 87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) 88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) 89 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) 90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) 91 92 def test_eval_str_incomplete(self): 93 self.assertRaises(SyntaxError, eval, r""" '\x' """) 94 self.assertRaises(SyntaxError, eval, r""" '\x0' """) 95 self.assertRaises(SyntaxError, eval, r""" '\u' """) 96 self.assertRaises(SyntaxError, eval, r""" '\u0' """) 97 self.assertRaises(SyntaxError, eval, r""" '\u00' """) 98 self.assertRaises(SyntaxError, eval, r""" '\u000' """) 99 self.assertRaises(SyntaxError, eval, r""" '\U' """) 100 self.assertRaises(SyntaxError, eval, r""" '\U0' """) 101 self.assertRaises(SyntaxError, eval, r""" '\U00' """) 102 self.assertRaises(SyntaxError, eval, r""" '\U000' """) 103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """) 104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """) 105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """) 106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) 107 108 def test_eval_str_invalid_escape(self): 109 for b in range(1, 128): 110 if b in b"""\n\r"'01234567NU\\abfnrtuvx""": 111 continue 112 with self.assertWarns(DeprecationWarning): 113 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) 114 115 with warnings.catch_warnings(record=True) as w: 116 warnings.simplefilter('always', category=DeprecationWarning) 117 eval("'''\n\\z'''") 118 self.assertEqual(len(w), 1) 119 self.assertEqual(w[0].filename, '<string>') 120 self.assertEqual(w[0].lineno, 2) 121 122 with warnings.catch_warnings(record=True) as w: 123 warnings.simplefilter('error', category=DeprecationWarning) 124 with self.assertRaises(SyntaxError) as cm: 125 eval("'''\n\\z'''") 126 exc = cm.exception 127 self.assertEqual(w, []) 128 self.assertEqual(exc.filename, '<string>') 129 self.assertEqual(exc.lineno, 2) 130 131 def test_eval_str_raw(self): 132 self.assertEqual(eval(""" r'x' """), 'x') 133 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') 134 self.assertEqual(eval(""" r'\x01' """), chr(1)) 135 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81') 136 self.assertEqual(eval(""" r'\x81' """), chr(0x81)) 137 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') 138 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) 139 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') 140 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) 141 142 def test_eval_bytes_normal(self): 143 self.assertEqual(eval(""" b'x' """), b'x') 144 self.assertEqual(eval(r""" b'\x01' """), byte(1)) 145 self.assertEqual(eval(""" b'\x01' """), byte(1)) 146 self.assertEqual(eval(r""" b'\x81' """), byte(0x81)) 147 self.assertRaises(SyntaxError, eval, """ b'\x81' """) 148 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881') 149 self.assertRaises(SyntaxError, eval, """ b'\u1881' """) 150 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120') 151 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) 152 153 def test_eval_bytes_incomplete(self): 154 self.assertRaises(SyntaxError, eval, r""" b'\x' """) 155 self.assertRaises(SyntaxError, eval, r""" b'\x0' """) 156 157 def test_eval_bytes_invalid_escape(self): 158 for b in range(1, 128): 159 if b in b"""\n\r"'01234567\\abfnrtvx""": 160 continue 161 with self.assertWarns(DeprecationWarning): 162 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) 163 164 with warnings.catch_warnings(record=True) as w: 165 warnings.simplefilter('always', category=DeprecationWarning) 166 eval("b'''\n\\z'''") 167 self.assertEqual(len(w), 1) 168 self.assertEqual(w[0].filename, '<string>') 169 self.assertEqual(w[0].lineno, 2) 170 171 with warnings.catch_warnings(record=True) as w: 172 warnings.simplefilter('error', category=DeprecationWarning) 173 with self.assertRaises(SyntaxError) as cm: 174 eval("b'''\n\\z'''") 175 exc = cm.exception 176 self.assertEqual(w, []) 177 self.assertEqual(exc.filename, '<string>') 178 self.assertEqual(exc.lineno, 2) 179 180 def test_eval_bytes_raw(self): 181 self.assertEqual(eval(""" br'x' """), b'x') 182 self.assertEqual(eval(""" rb'x' """), b'x') 183 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') 184 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') 185 self.assertEqual(eval(""" br'\x01' """), byte(1)) 186 self.assertEqual(eval(""" rb'\x01' """), byte(1)) 187 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") 188 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") 189 self.assertRaises(SyntaxError, eval, """ br'\x81' """) 190 self.assertRaises(SyntaxError, eval, """ rb'\x81' """) 191 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") 192 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") 193 self.assertRaises(SyntaxError, eval, """ br'\u1881' """) 194 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) 195 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") 196 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120") 197 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) 198 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """) 199 self.assertRaises(SyntaxError, eval, """ bb'' """) 200 self.assertRaises(SyntaxError, eval, """ rr'' """) 201 self.assertRaises(SyntaxError, eval, """ brr'' """) 202 self.assertRaises(SyntaxError, eval, """ bbr'' """) 203 self.assertRaises(SyntaxError, eval, """ rrb'' """) 204 self.assertRaises(SyntaxError, eval, """ rbb'' """) 205 206 def test_eval_str_u(self): 207 self.assertEqual(eval(""" u'x' """), 'x') 208 self.assertEqual(eval(""" U'\u00e4' """), 'ä') 209 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') 210 self.assertRaises(SyntaxError, eval, """ ur'' """) 211 self.assertRaises(SyntaxError, eval, """ ru'' """) 212 self.assertRaises(SyntaxError, eval, """ bu'' """) 213 self.assertRaises(SyntaxError, eval, """ ub'' """) 214 215 def check_encoding(self, encoding, extra=""): 216 modname = "xx_" + encoding.replace("-", "_") 217 fn = os.path.join(self.tmpdir, modname + ".py") 218 f = open(fn, "w", encoding=encoding) 219 try: 220 f.write(TEMPLATE % encoding) 221 f.write(extra) 222 finally: 223 f.close() 224 __import__(modname) 225 del sys.modules[modname] 226 227 def test_file_utf_8(self): 228 extra = "z = '\u1234'; assert ord(z) == 0x1234\n" 229 self.check_encoding("utf-8", extra) 230 231 def test_file_utf_8_error(self): 232 extra = "b'\x80'\n" 233 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) 234 235 def test_file_utf8(self): 236 self.check_encoding("utf-8") 237 238 def test_file_iso_8859_1(self): 239 self.check_encoding("iso-8859-1") 240 241 def test_file_latin_1(self): 242 self.check_encoding("latin-1") 243 244 def test_file_latin9(self): 245 self.check_encoding("latin9") 246 247 248if __name__ == "__main__": 249 unittest.main() 250