1r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5    'abc'             -- normal str
6    r'abc'            -- raw str
7    b'xyz'            -- normal bytes
8    br'xyz' | rb'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals.  For bytes literals, this is considered illegal.  But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings.  We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
25backslashes.  It doesn't concern itself with issues like single
26vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import warnings
35import unittest
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57"""
58
59
60def byte(i):
61    return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66    def setUp(self):
67        self.save_path = sys.path[:]
68        self.tmpdir = tempfile.mkdtemp()
69        sys.path.insert(0, self.tmpdir)
70
71    def tearDown(self):
72        sys.path[:] = self.save_path
73        shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75    def test_template(self):
76        # Check that the template doesn't contain any non-printables
77        # except for \n.
78        for c in TEMPLATE:
79            assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81    def test_eval_str_normal(self):
82        self.assertEqual(eval(""" 'x' """), 'x')
83        self.assertEqual(eval(r""" '\x01' """), chr(1))
84        self.assertEqual(eval(""" '\x01' """), chr(1))
85        self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86        self.assertEqual(eval(""" '\x81' """), chr(0x81))
87        self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88        self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92    def test_eval_str_incomplete(self):
93        self.assertRaises(SyntaxError, eval, r""" '\x' """)
94        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95        self.assertRaises(SyntaxError, eval, r""" '\u' """)
96        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99        self.assertRaises(SyntaxError, eval, r""" '\U' """)
100        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
107
108    def test_eval_str_invalid_escape(self):
109        for b in range(1, 128):
110            if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111                continue
112            with self.assertWarns(DeprecationWarning):
113                self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
114
115        with warnings.catch_warnings(record=True) as w:
116            warnings.simplefilter('always', category=DeprecationWarning)
117            eval("'''\n\\z'''")
118        self.assertEqual(len(w), 1)
119        self.assertEqual(w[0].filename, '<string>')
120        self.assertEqual(w[0].lineno, 2)
121
122        with warnings.catch_warnings(record=True) as w:
123            warnings.simplefilter('error', category=DeprecationWarning)
124            with self.assertRaises(SyntaxError) as cm:
125                eval("'''\n\\z'''")
126            exc = cm.exception
127        self.assertEqual(w, [])
128        self.assertEqual(exc.filename, '<string>')
129        self.assertEqual(exc.lineno, 2)
130
131    def test_eval_str_raw(self):
132        self.assertEqual(eval(""" r'x' """), 'x')
133        self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
134        self.assertEqual(eval(""" r'\x01' """), chr(1))
135        self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
136        self.assertEqual(eval(""" r'\x81' """), chr(0x81))
137        self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
138        self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
139        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
140        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
141
142    def test_eval_bytes_normal(self):
143        self.assertEqual(eval(""" b'x' """), b'x')
144        self.assertEqual(eval(r""" b'\x01' """), byte(1))
145        self.assertEqual(eval(""" b'\x01' """), byte(1))
146        self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
147        self.assertRaises(SyntaxError, eval, """ b'\x81' """)
148        self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
149        self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
150        self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
151        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
152
153    def test_eval_bytes_incomplete(self):
154        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
155        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
156
157    def test_eval_bytes_invalid_escape(self):
158        for b in range(1, 128):
159            if b in b"""\n\r"'01234567\\abfnrtvx""":
160                continue
161            with self.assertWarns(DeprecationWarning):
162                self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
163
164        with warnings.catch_warnings(record=True) as w:
165            warnings.simplefilter('always', category=DeprecationWarning)
166            eval("b'''\n\\z'''")
167        self.assertEqual(len(w), 1)
168        self.assertEqual(w[0].filename, '<string>')
169        self.assertEqual(w[0].lineno, 2)
170
171        with warnings.catch_warnings(record=True) as w:
172            warnings.simplefilter('error', category=DeprecationWarning)
173            with self.assertRaises(SyntaxError) as cm:
174                eval("b'''\n\\z'''")
175            exc = cm.exception
176        self.assertEqual(w, [])
177        self.assertEqual(exc.filename, '<string>')
178        self.assertEqual(exc.lineno, 2)
179
180    def test_eval_bytes_raw(self):
181        self.assertEqual(eval(""" br'x' """), b'x')
182        self.assertEqual(eval(""" rb'x' """), b'x')
183        self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
184        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
185        self.assertEqual(eval(""" br'\x01' """), byte(1))
186        self.assertEqual(eval(""" rb'\x01' """), byte(1))
187        self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
188        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
189        self.assertRaises(SyntaxError, eval, """ br'\x81' """)
190        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
191        self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
192        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
193        self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
194        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
195        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
196        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
197        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
198        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
199        self.assertRaises(SyntaxError, eval, """ bb'' """)
200        self.assertRaises(SyntaxError, eval, """ rr'' """)
201        self.assertRaises(SyntaxError, eval, """ brr'' """)
202        self.assertRaises(SyntaxError, eval, """ bbr'' """)
203        self.assertRaises(SyntaxError, eval, """ rrb'' """)
204        self.assertRaises(SyntaxError, eval, """ rbb'' """)
205
206    def test_eval_str_u(self):
207        self.assertEqual(eval(""" u'x' """), 'x')
208        self.assertEqual(eval(""" U'\u00e4' """), 'ä')
209        self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
210        self.assertRaises(SyntaxError, eval, """ ur'' """)
211        self.assertRaises(SyntaxError, eval, """ ru'' """)
212        self.assertRaises(SyntaxError, eval, """ bu'' """)
213        self.assertRaises(SyntaxError, eval, """ ub'' """)
214
215    def check_encoding(self, encoding, extra=""):
216        modname = "xx_" + encoding.replace("-", "_")
217        fn = os.path.join(self.tmpdir, modname + ".py")
218        f = open(fn, "w", encoding=encoding)
219        try:
220            f.write(TEMPLATE % encoding)
221            f.write(extra)
222        finally:
223            f.close()
224        __import__(modname)
225        del sys.modules[modname]
226
227    def test_file_utf_8(self):
228        extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
229        self.check_encoding("utf-8", extra)
230
231    def test_file_utf_8_error(self):
232        extra = "b'\x80'\n"
233        self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
234
235    def test_file_utf8(self):
236        self.check_encoding("utf-8")
237
238    def test_file_iso_8859_1(self):
239        self.check_encoding("iso-8859-1")
240
241    def test_file_latin_1(self):
242        self.check_encoding("latin-1")
243
244    def test_file_latin9(self):
245        self.check_encoding("latin9")
246
247
248if __name__ == "__main__":
249    unittest.main()
250