14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" Test script for the Unicode implementation. 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWritten by Marc-Andre Lemburg (mal@lemburg.com). 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""#" 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys, struct, codecs 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom test import test_support, string_tests 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Error handling (bad decoder return) 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef search_function(encoding): 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def decode1(input, errors="strict"): 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 42 # not a tuple 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def encode1(input, errors="strict"): 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 42 # not a tuple 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def encode2(input, errors="strict"): 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (42, 42) # no unicode 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def decode2(input, errors="strict"): 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (42, 42) # no unicode 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if encoding=="test.unicode1": 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (encode1, decode1, None, None) 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif encoding=="test.unicode2": 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (encode2, decode2, None, None) 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcodecs.register(search_function) 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass UnicodeTest( 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest, 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest, 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeTest, 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type2test = unicode 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def assertEqual(self, first, second, msg=None): 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # strict assertEqual method: reject implicit bytes/unicode equality 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm super(UnicodeTest, self).assertEqual(first, second, msg) 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(first, unicode) or isinstance(second, unicode): 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIsInstance(first, unicode) 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIsInstance(second, unicode) 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif isinstance(first, str) or isinstance(second, str): 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIsInstance(first, str) 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIsInstance(second, str) 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def checkequalnofix(self, result, object, methodname, *args): 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm method = getattr(object, methodname) 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm realresult = method(*args) 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(realresult, result) 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(type(realresult) is type(result)) 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if the original is returned make sure that 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # this doesn't happen with subclasses 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if realresult is object: 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class usub(unicode): 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 'usub(%r)' % unicode.__repr__(self) 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm object = usub(object) 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm method = getattr(object, methodname) 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm realresult = method(*args) 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(realresult, result) 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(object is not realresult) 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_literals(self): 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\xff', u'\u00ff') 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\uffff', u'\U0000ffff') 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'') 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'') 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000) 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_repr(self): 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sys.platform.startswith('java'): 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Test basic sanity of repr() 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'abc'), "u'abc'") 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'") 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'ab\\'), "u'ab\\\\'") 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\\c'), "u'\\\\c'") 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\\'), "u'\\\\'") 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\n'), "u'\\n'") 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\r'), "u'\\r'") 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\t'), "u'\\t'") 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'\b'), "u'\\x08'") 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u"'\""), """u'\\'"'""") 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u"'\""), """u'\\'"'""") 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u"'"), '''u"'"''') 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u'"'), """u'"'""") 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm latin1repr = ( 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r" 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a" 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI" 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f" 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d" 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b" 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9" 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7" 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5" 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3" 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1" 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef" 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd" 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "\\xfe\\xff'") 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm testrepr = repr(u''.join(map(unichr, xrange(256)))) 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(testrepr, latin1repr) 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Test repr works on wide unicode escapes without overflow. 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096), 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm repr(u"\U00010000" * 39 + u"\uffff" * 4096)) 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_count(self): 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_count(self) 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # check mixed argument types 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(3, 'aaa', 'count', u'a') 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, 'aaa', 'count', u'b') 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(3, u'aaa', 'count', 'a') 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, u'aaa', 'count', 'b') 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, u'aaa', 'count', 'b') 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(1, u'aaa', 'count', 'a', -1) 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(3, u'aaa', 'count', 'a', -10) 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1) 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10) 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_find(self): 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc') 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1) 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4) 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u'hello'.find) 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u'hello'.find, 42) 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_rfind(self): 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_rfind(self) 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # check mixed argument types 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc') 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'') 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '') 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_index(self): 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_index(self) 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # check mixed argument types 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (t1, t2) in ((str, unicode), (unicode, str)): 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('')) 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def')) 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc')) 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1) 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib')) 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1) 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8) 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1) 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_rindex(self): 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_rindex(self) 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # check mixed argument types 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (t1, t2) in ((str, unicode), (unicode, str)): 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2('')) 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def')) 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc')) 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1) 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib')) 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1) 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1) 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8) 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1) 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_translate(self): 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None}) 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')}) 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'}) 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'}) 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''}) 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'}) 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u'hello'.translate) 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''}) 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_split(self): 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_split(self) 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Mixed arguments 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//') 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//') 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test') 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_join(self): 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_join(self) 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # mixed arguments 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd']) 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd')) 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz')) 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd']) 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd']) 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd')) 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz')) 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_strip(self): 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_strip(self) 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, u"hello".strip, "\xff") 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_replace(self): 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.CommonTest.test_replace(self) 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # method call forwarded from str implementation because of unicode argument 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1) 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, 'replace'.replace, u"r", 42) 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_comparison(self): 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Comparisons: 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abc' == 'abc') 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue('abc' == u'abc') 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abc' == u'abc') 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abcd' > 'abc') 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue('abcd' > u'abc') 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abcd' > u'abc') 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abc' < 'abcd') 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue('abc' < u'abcd') 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'abc' < u'abcd') 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 0: 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Move these tests to a Unicode collation module test... 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Testing UTF-16 code point order comparisons... 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # No surrogates, no fixup required. 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'\u0061' < u'\u20ac') 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Non surrogate below surrogate value, no fixup required 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'\u0061' < u'\ud800\udc02') 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Non surrogate above surrogate value, fixup required 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_lecmp(s, s2): 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(s < s2) 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_fixup(s): 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud800\udc01' 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud900\udc01' 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\uda00\udc01' 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\udb00\udc01' 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud800\udd01' 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud900\udd01' 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\uda00\udd01' 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\udb00\udd01' 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud800\ude01' 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud900\ude01' 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\uda00\ude01' 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\udb00\ude01' 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud800\udfff' 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\ud900\udfff' 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\uda00\udfff' 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s2 = u'\udb00\udfff' 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_lecmp(s, s2) 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_fixup(u'\ue000') 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_fixup(u'\uff61') 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Surrogates on both sides, no fixup required 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(u'\ud800\udc02' < u'\ud84d\udc56') 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_islower(self): 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_islower(self) 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\u1FFc', 'islower') 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isupper(self): 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_isupper(self) 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sys.platform.startswith('java'): 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\u1FFc', 'isupper') 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_istitle(self): 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_title(self) 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u1FFc', 'istitle') 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle') 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isspace(self): 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_isspace(self) 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u2000', 'isspace') 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u200a', 'isspace') 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\u2014', 'isspace') 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isalpha(self): 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self) 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u1FFc', 'isalpha') 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isdecimal(self): 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'', 'isdecimal') 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'a', 'isdecimal') 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'0', 'isdecimal') 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'0123456789', 'isdecimal') 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'0123456789a', 'isdecimal') 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkraises(TypeError, 'abc', 'isdecimal', 42) 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isdigit(self): 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self) 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u2460', 'isdigit') 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'\xbc', 'isdigit') 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u0660', 'isdigit') 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_isnumeric(self): 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'', 'isnumeric') 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'a', 'isnumeric') 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'0', 'isnumeric') 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u2460', 'isnumeric') 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\xbc', 'isnumeric') 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'\u0660', 'isnumeric') 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(True, u'0123456789', 'isnumeric') 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.checkequalnofix(False, u'0123456789a', 'isnumeric') 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"abc".isnumeric, 42) 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_contains(self): 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Testing Unicode contains method 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'abdb') 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'bdab') 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'bdaba') 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'bdba') 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'bdba') 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', u'bdba') 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'a', u'bdb') 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'a', 'bdb') 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', 'bdba') 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', ('a',1,None)) 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', (1,None,'a')) 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', (1,None,u'a')) 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', ('a',1,None)) 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', (1,None,'a')) 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', (1,None,u'a')) 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn('a', ('x',1,u'y')) 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn('a', ('x',1,None)) 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'abcd', u'abcxxxx') 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'ab', u'abcd') 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('ab', u'abc') 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'ab', 'abc') 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'ab', (1,None,u'ab')) 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'', u'abc') 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('', u'abc') 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If the following fails either 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the contains operator does not propagate UnicodeErrors or 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # someone has changed the default encoding 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 'g\xe2teau'.__contains__, u'\xe2') 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, u'g\xe2teau'.__contains__, '\xe2') 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'', '') 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('', u'') 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'', u'') 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'', 'abc') 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('', u'abc') 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'', u'abc') 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'\0', 'abc') 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn('\0', u'abc') 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'\0', u'abc') 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'\0', '\0abc') 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('\0', u'\0abc') 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'\0', u'\0abc') 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'\0', 'abc\0') 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('\0', u'abc\0') 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'\0', u'abc\0') 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', '\0abc') 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('a', u'\0abc') 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'a', u'\0abc') 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'asdf', 'asdf') 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('asdf', u'asdf') 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn(u'asdf', u'asdf') 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'asdf', 'asd') 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn('asdf', u'asd') 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'asdf', u'asd') 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'asdf', '') 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn('asdf', u'') 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertNotIn(u'asdf', u'') 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"abc".__contains__) 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"abc".__contains__, object()) 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_formatting(self): 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Testing Unicode formatting strings... 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc') 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00') 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00') 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50') 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57') 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57') 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sys.platform.startswith('java'): 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'") 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def') 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def') 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'%c' % 0x1234, u'\u1234') 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,)) 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"%.1\u1032f".__mod__, (1.0/3)) 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for num in range(0x00,0x80): 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm char = chr(num) 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%c" % char, unicode(char)) 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"%c" % num, unicode(char)) 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(char == u"%c" % char) 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(char == u"%c" % num) 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Issue 7649 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for num in range(0x80,0x100): 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uchar = unichr(num) 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(uchar, u"%c" % num) # works only with ints 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(uchar, u"%c" % uchar) # and unicode chars 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the implicit decoding should fail for non-ascii chars 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num)) 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num)) 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # formatting jobs delegated from the string implementation: 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...') 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...') 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...') 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...') 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...') 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...') 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...') 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...') 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('...%s...' % u"abc", u'...abc...') 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%*s' % (5,u'abc',), u' abc') 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%*s' % (-5,u'abc',), u'abc ') 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab') 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc') 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc') 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc') 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%c' % u'a', u'a') 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Wrapper: 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'\u1234' 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual('%s' % Wrapper(), u'\u1234') 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_startswith_endswith_errors(self): 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for meth in (u'foo'.startswith, u'foo'.endswith): 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm with self.assertRaises(UnicodeDecodeError): 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm meth('\xff') 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm with self.assertRaises(TypeError) as cm: 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm meth(['f']) 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm exc = str(cm.exception) 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('unicode', exc) 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('str', exc) 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertIn('tuple', exc) 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_format_float(self): 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # should not format with a comma, but always with C locale 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'1.0', u'%.1f' % 1.0) 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_constructor(self): 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # unicode(obj) tests (this maps to PyObject_Unicode() at C level) 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode(u'unicode remains unicode'), 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'unicode remains unicode' 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class UnicodeSubclass(unicode): 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode(UnicodeSubclass('unicode subclass becomes unicode')), 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'unicode subclass becomes unicode' 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode('strings are converted to unicode'), 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'strings are converted to unicode' 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class UnicodeCompat: 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.x 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode(UnicodeCompat('__unicode__ compatible objects are recognized')), 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'__unicode__ compatible objects are recognized') 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class StringCompat: 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.x 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode(StringCompat('__str__ compatible objects are recognized')), 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'__str__ compatible objects are recognized' 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # unicode(obj) is compatible to str(): 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm o = StringCompat('unicode(obj) is compatible to str()') 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()') 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(str(o), 'unicode(obj) is compatible to str()') 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # %-formatting and .__unicode__() 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'%s' % 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"), 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u"u'%s' % obj uses obj.__unicode__()") 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'%s' % 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"), 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u"u'%s' % obj falls back to obj.__str__()") 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for obj in (123, 123.45, 123L): 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(obj), unicode(str(obj))) 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # unicode(obj, encoding, error) tests (this maps to 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # PyUnicode_FromEncodedObject() at C level) 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sys.platform.startswith('java'): 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises( 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm TypeError, 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode, 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'decoding unicode is not supported', 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'utf-8', 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'strict' 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode('strings are decoded to unicode', 'utf-8', 'strict'), 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'strings are decoded to unicode' 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sys.platform.startswith('java'): 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm with test_support.check_py3k_warnings(): 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm buf = buffer('character buffers are decoded to unicode') 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unicode( 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm buf, 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'utf-8', 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'strict' 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ), 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'character buffers are decoded to unicode' 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, unicode, 42, 42, 42) 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs_utf7(self): 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm utfTests = [ 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'+', '+-'), 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'+-', '+--'), 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'+?', '+-?'), 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'\?', '+AFw?'), 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'+?', '+-?'), 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (ur'\\?', '+AFwAXA?'), 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (ur'\\\?', '+AFwAXABc?'), 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (ur'++--', '+-+---'), 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'\U000abcde', '+2m/c3g-'), # surrogate pairs 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'/', '/'), 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ] 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (x, y) in utfTests: 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(x.encode('utf-7'), y) 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Unpaired surrogates not supported 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7') 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd') 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Direct encoded characters 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?" 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Optional direct characters 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm set_o = '!"#$%&*;<=>@[]^_`{|}' 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c in set_d: 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(c.encode('utf7'), c.encode('ascii')) 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c)) 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(c == c.encode('ascii').decode('utf7')) 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c in set_o: 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c)) 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertTrue(c == c.encode('ascii').decode('utf7')) 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs_utf8(self): 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u''.encode('utf-8'), '') 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac') 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82') 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96') 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80') 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80') 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (u'\ud800\udc02'*1000).encode('utf-8'), 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xf0\x90\x80\x82'*1000 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual( 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das' 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u' Nunstuck git und'.encode('utf-8'), 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81' 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3' 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe' 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8' 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81' 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81' 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3' 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf' 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\xe3\x80\x8cWenn ist das Nunstuck git und' 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # UTF-8 specific decoding tests 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456') 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002') 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac') 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Other possible utf-8 test cases: 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # * strict decoding testing for all of the 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # UTF8_ERROR cases in PyUnicode_DecodeUTF8 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_utf8_decode_valid_sequences(self): 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sequences = [ 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # single byte 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\x00', u'\x00'), ('a', u'a'), ('\x7f', u'\x7f'), 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2 bytes 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc2\x80', u'\x80'), ('\xdf\xbf', u'\u07ff'), 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3 bytes 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe0\xa0\x80', u'\u0800'), ('\xed\x9f\xbf', u'\ud7ff'), 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xee\x80\x80', u'\uE000'), ('\xef\xbf\xbf', u'\uffff'), 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4 bytes 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xF0\x90\x80\x80', u'\U00010000'), 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf4\x8f\xbf\xbf', u'\U0010FFFF') 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ] 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for seq, res in sequences: 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(seq.decode('utf-8'), res) 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ch in map(unichr, range(0, sys.maxunicode)): 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(ch, ch.encode('utf-8').decode('utf-8')) 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_utf8_decode_invalid_sequences(self): 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # continuation bytes in a sequence of 2, 3, or 4 bytes 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continuation_bytes = map(chr, range(0x80, 0xC0)) 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2)) 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8)) 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm invalid_start_bytes = ( 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continuation_bytes + invalid_2B_seq_start_bytes + 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm invalid_4B_seq_start_bytes + map(chr, range(0xF7, 0x100)) 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for byte in invalid_start_bytes: 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8') 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for sb in invalid_2B_seq_start_bytes: 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb in continuation_bytes: 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8') 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for sb in invalid_4B_seq_start_bytes: 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb1 in continuation_bytes[:3]: 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb3 in continuation_bytes[:3]: 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (sb+cb1+'\x80'+cb3).decode, 'utf-8') 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb in map(chr, range(0x80, 0xA0)): 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xE0'+cb+'\x80').decode, 'utf-8') 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xE0'+cb+'\xBF').decode, 'utf-8') 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX: surrogates shouldn't be valid UTF-8! 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #for cb in map(chr, range(0xA0, 0xC0)): 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #self.assertRaises(UnicodeDecodeError, 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #('\xED'+cb+'\x80').decode, 'utf-8') 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #self.assertRaises(UnicodeDecodeError, 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #('\xED'+cb+'\xBF').decode, 'utf-8') 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # but since they are valid on Python 2 add a test for that: 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)), 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm map(unichr, range(0xd800, 0xe000, 64))): 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoded = '\xED'+cb+'\x80' 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(encoded.decode('utf-8'), surrogate) 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(surrogate.encode('utf-8'), encoded) 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb in map(chr, range(0x80, 0x90)): 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xF0'+cb+'\x80\x80').decode, 'utf-8') 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xF0'+cb+'\xBF\xBF').decode, 'utf-8') 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cb in map(chr, range(0x90, 0xC0)): 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xF4'+cb+'\x80\x80').decode, 'utf-8') 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xF4'+cb+'\xBF\xBF').decode, 'utf-8') 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_issue8271(self): 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # only the start byte and the continuation byte(s) are now considered 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # invalid, instead of the number of bytes specified by the start byte. 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # table 3-8, Row 2) for more information about the algorithm used. 7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FFFD = u'\ufffd' 7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sequences = [ 7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # invalid start bytes 7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\x80', FFFD), # continuation byte 7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\x80\x80', FFFD*2), # 2 continuation bytes 7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc0', FFFD), 7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc0\xc0', FFFD*2), 7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc1', FFFD), 7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc1\xc0', FFFD*2), 7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc0\xc1', FFFD*2), 7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with start byte of a 2-byte sequence 7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc2', FFFD), # only the start byte 7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc2\xc2', FFFD*2), # 2 start bytes 7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc2\xc2\xc2', FFFD*3), # 2 start bytes 7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xc2\x41', FFFD+'A'), # invalid continuation byte 7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with start byte of a 3-byte sequence 7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1', FFFD), # only the start byte 7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\xe1', FFFD*2), # 2 start bytes 7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\xe1\xe1', FFFD*3), # 3 start bytes 7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes 7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x80', FFFD), # only 1 continuation byte 7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x41', FFFD+'A'), # invalid continuation byte 7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb 7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes 7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte 7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid 7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid 7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with start byte of a 4-byte sequence 7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1', FFFD), # only the start byte 7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\xf1', FFFD*2), # 2 start bytes 7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\xf1\xf1', FFFD*3), # 3 start bytes 7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes 7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes 7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80', FFFD), # only 1 continuation bytes 7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\x80', FFFD), # only 2 continuation bytes 7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid 7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid 7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid 7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid 7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid 7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid 7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid 7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\xf1\x80', FFFD+'A'+FFFD), 7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2), 7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\xf1\x80\x41', FFFD*2+'A'), 7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2), 7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with invalid start byte of a 4-byte sequence (rfc2279) 7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5', FFFD), # only the start byte 7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\xf5', FFFD*2), # 2 start bytes 7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x80', FFFD*2), # only 1 continuation byte 7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x80\x80', FFFD*3), # only 2 continuation byte 7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes 7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x80\x41', FFFD*2+'A'), # 1 valid cb and 1 invalid 7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD), 7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'), 7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with invalid start byte of a 5-byte sequence (rfc2279) 7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf8', FFFD), # only the start byte 7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf8\xf8', FFFD*2), # 2 start bytes 7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf8\x80', FFFD*2), # only one continuation byte 7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid 7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes 7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # with invalid start byte of a 6-byte sequence (rfc2279) 7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfc', FFFD), # only the start byte 7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfc\xfc', FFFD*2), # 2 start bytes 7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfc\x80\x80', FFFD*3), # only 2 continuation bytes 7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes 7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # invalid start byte 7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfe', FFFD), 7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xfe\x80\x80', FFFD*3), 7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # other sequences 7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'), 7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'), 7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'), 7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64', 7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), 7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ] 7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for n, (seq, res) in enumerate(sequences): 7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict') 7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(seq.decode('utf-8', 'replace'), res) 7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual((seq+'b').decode('utf-8', 'replace'), res+'b') 7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(seq.decode('utf-8', 'ignore'), 7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm res.replace(u'\uFFFD', '')) 7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs_idna(self): 7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Test whether trailing dot is preserved 7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.") 7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs_errors(self): 7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (encoding) 7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii') 7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict') 8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x") 8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x") 8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'), 8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'Andr\202 x'.encode('ascii', errors='replace')) 8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'), 8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'Andr\202 x'.encode(encoding='ascii', errors='ignore')) 8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (decoding) 8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii') 8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict') 8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x") 8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x') 8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'abcde'.decode('ascii', 'ignore'), 8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'abcde'.decode('ascii', errors='ignore')) 8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'abcde'.decode('ascii', 'replace'), 8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u'abcde'.decode(encoding='ascii', errors='replace')) 8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (unknown character names) 8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx") 8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (truncated escape sequence) 8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, "\\".decode, "unicode-escape") 8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, "hello".decode, "test.unicode1") 8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, unicode, "hello", "test.unicode2") 8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"hello".encode, "test.unicode1") 8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"hello".encode, "test.unicode2") 8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # executes PyUnicode_Encode() 8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import imp 8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises( 8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ImportError, 8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm imp.find_module, 8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "non-existing module", 8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [u"non-existing dir"] 8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ) 8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (wrong arguments) 8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u"hello".encode, 42, 42, 42) 8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Error handling (PyUnicode_EncodeDecimal()) 8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeError, int, u"\u0200") 8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs(self): 8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Encoding 8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('ascii'), 'hello') 8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('utf-7'), 'hello') 8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('utf-8'), 'hello') 8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('utf8'), 'hello') 8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000') 8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o') 8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'hello'.encode('latin-1'), 'hello') 8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Roundtrip safety for BMP (just the first 1024 chars) 8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c in xrange(1024): 8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = unichr(c) 8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'utf-16-be', 'raw_unicode_escape', 8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'unicode_escape', 'unicode_internal'): 8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(u.encode(encoding),encoding), u) 8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Roundtrip safety for BMP (just the first 256 chars) 8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c in xrange(256): 8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = unichr(c) 8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ('latin-1',): 8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(u.encode(encoding),encoding), u) 8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Roundtrip safety for BMP (just the first 128 chars) 8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for c in xrange(128): 8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = unichr(c) 8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ('ascii',): 8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(u.encode(encoding),encoding), u) 8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Roundtrip safety for non-BMP (just a few chars) 8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = u'\U00010001\U00020002\U00030003\U00040004\U00050005' 8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'raw_unicode_escape', 8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'unicode_escape', 'unicode_internal'): 8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(u.encode(encoding),encoding), u) 8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # UTF-8 must be roundtrip safe for all UCS-2 code points 8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This excludes surrogates: in the full range, there would be 8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # a surrogate pair (\udbff\udc00), which gets converted back 8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # to a non-BMP character (\U0010fc00) 8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000))) 8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ('utf-8',): 8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(u.encode(encoding),encoding), u) 8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_codecs_charmap(self): 8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 0-127 8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = ''.join(map(chr, xrange(128))) 8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ( 8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp037', 'cp1026', 8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862', 8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp863', 'cp865', 'cp866', 8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', 8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'mac_cyrillic', 'mac_latin2', 8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp1256', 'cp1257', 'cp1258', 9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', 9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', 9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp1006', 'iso8859_8', 9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ### These have undefined mappings: 9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp424', 9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ### These fail the round-trip: 9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp875' 9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(s, encoding).encode(encoding), s) 9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 128-255 9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = ''.join(map(chr, xrange(128, 256))) 9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for encoding in ( 9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp037', 'cp1026', 9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862', 9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'cp863', 'cp865', 'cp866', 9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_2', 'iso8859_4', 'iso8859_5', 9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'iso8859_9', 'koi8_r', 'latin_1', 9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'mac_cyrillic', 'mac_latin2', 9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ### These have undefined mappings: 9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp1256', 'cp1257', 'cp1258', 9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', 9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'iso8859_3', 'iso8859_6', 'iso8859_7', 9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', 9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ### These fail the round-trip: 9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #'cp1006', 'cp875', 'iso8859_8', 9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(s, encoding).encode(encoding), s) 9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_concatenation(self): 9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual((u"abc" u"def"), u"abcdef") 9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(("abc" u"def"), u"abcdef") 9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual((u"abc" "def"), u"abcdef") 9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi") 9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi") 9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_printing(self): 9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class BitBucket: 9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def write(self, text): 9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm out = BitBucket() 9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc' 9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc', u'def' 9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc', 'def' 9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, 'abc', u'def' 9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc\n' 9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc\n', 9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'abc\n', 9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'def\n' 9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print >>out, u'def\n' 9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_ucs4(self): 9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x = u'\U00100000' 9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm y = x.encode("raw-unicode-escape").decode("raw-unicode-escape") 9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(x, y) 9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm y = r'\U00100000' 9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x = y.decode("raw-unicode-escape").encode("raw-unicode-escape") 9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(x, y) 9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm y = r'\U00010000' 9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x = y.decode("raw-unicode-escape").encode("raw-unicode-escape") 9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(x, y) 9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '\U11111111'.decode("raw-unicode-escape") 9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except UnicodeDecodeError as e: 9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(e.start, 0) 9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(e.end, 10) 9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fail("Should have raised UnicodeDecodeError") 9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_conversion(self): 9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Make sure __unicode__() works properly 9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo0: 9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foo" 9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo1: 9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"foo" 9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo2(object): 9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"foo" 9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo3(object): 9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foo" 10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo4(str): 10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foo" 10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo5(unicode): 10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foo" 10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo6(str): 10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foos" 10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"foou" 10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo7(unicode): 10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "foos" 10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"foou" 10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo8(unicode): 10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __new__(cls, content=""): 10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return unicode.__new__(cls, 2*content) 10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self 10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class Foo9(unicode): 10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "string" 10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "not unicode" 10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo0()), u"foo") 10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo1()), u"foo") 10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo2()), u"foo") 10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo3()), u"foo") 10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo4("bar")), u"foo") 10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo5("bar")), u"foo") 10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo6("bar")), u"foou") 10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo7("bar")), u"foou") 10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo8("foo")), u"foofoo") 10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(str(Foo9("foo")), "string") 10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(unicode(Foo9("foo")), u"not unicode") 10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_unicode_repr(self): 10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class s1: 10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return '\\n' 10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class s2: 10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'\\n' 10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(s1()), '\\n') 10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(repr(s2()), '\\n') 10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_expandtabs_overflows_gracefully(self): 10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This test only affects 32-bit platforms because expandtabs can only take 10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # an int as the max value, not a 64-bit C long. If expandtabs is changed 10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # to take a 64-bit long, this test should apply to all platforms. 10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sys.maxint > (1 << 32) or struct.calcsize('P') != 4: 10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint) 10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test__format__(self): 10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test(value, format, expected): 10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test both with and without the trailing 's' 10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(value.__format__(format), expected) 10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(value.__format__(format + u's'), expected) 10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'', u'', u'') 10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'', u'abc') 10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'.3', u'abc') 10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'ab', u'.3', u'ab') 10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abcdef', u'.3', u'abc') 10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abcdef', u'.0', u'') 10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'3.3', u'abc') 10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'2.3', u'abc') 10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'2.2', u'ab') 10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'abc', u'3.2', u'ab ') 10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'x<0', u'result') 10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'x<5', u'result') 10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'x<6', u'result') 10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'x<7', u'resultx') 10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'x<8', u'resultxx') 10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u' <7', u'result ') 10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'<7', u'result ') 10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'>7', u' result') 10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'>8', u' result') 10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'^8', u' result ') 10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'^9', u' result ') 10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'result', u'^10', u' result ') 10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'a', u'10000', u'a' + u' ' * 9999) 10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'', u'10000', u' ' * 10000) 10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test(u'', u'10000000', u' ' * 10000000) 10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test mixing unicode and str 11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'abc'.__format__('s'), u'abc') 11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'abc'.__format__('->10s'), u'-------abc') 11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_format(self): 11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u''.format(), u'') 11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a'.format(), u'a') 11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'ab'.format(), u'ab') 11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a{{'.format(), u'a{') 11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a}}'.format(), u'a}') 11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{b'.format(), u'{b') 11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'}}b'.format(), u'}b') 11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a{{b'.format(), u'a{b') 11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # examples from the PEP: 11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import datetime 11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred") 11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')), 11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u"My name is Fred") 11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'), 11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u"My name is Fred :-{}") 11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # datetime.__format__ doesn't work with unicode 11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #d = datetime.date(2007, 8, 18) 11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #self.assertEqual("The year is {0.year}".format(d), 11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # "The year is 2007") 11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # classes we'll use for testing 11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class C: 11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x=100): 11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._x = x 11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, spec): 11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return spec 11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class D: 11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, spec): 11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return str(self.x) 11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # class with __str__, but no __format__ 11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class E: 11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'E(' + self.x + u')' 11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # class with __repr__, but no __format__ or __str__ 11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class F: 11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'F(' + self.x + u')' 11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # class with __format__ that forwards to string, for some format_spec's 11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class G: 11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x): 11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.x = x 11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u"string is " + self.x 11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, format_spec): 11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if format_spec == 'd': 11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'G(' + self.x + u')' 11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return object.__format__(self, format_spec) 11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # class that returns a bad type from __format__ 11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class H: 11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, format_spec): 11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1.0 11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class I(datetime.date): 11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, format_spec): 11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.strftime(format_spec) 11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class J(int): 11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, format_spec): 11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return int.__format__(self * 2, format_spec) 11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u''.format(), u'') 11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'abc'.format(), u'abc') 11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format(u'abc'), u'abc') 11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:}'.format(u'abc'), u'abc') 11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc') 11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}X'.format(u'abc'), u'abcX') 11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY') 11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{1}'.format(1, u'abc'), u'abc') 11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc') 11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX') 11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY') 11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format(-15), u'-15') 11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc') 11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc') 11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{'.format(), u'{') 11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'}}'.format(), u'}') 11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{}}'.format(), u'{}') 11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{x}}'.format(), u'{x}') 11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{{0}}}'.format(123), u'{123}') 11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}') 11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'}}{{'.format(), u'}{') 11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'}}x{{'.format(), u'}x{') 12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # weird field names 12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz') 12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz') 12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3') 12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20') 12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010') 12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc') 12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc') 12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def') 12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def') 12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def') 12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # strings 12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc') 12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab') 12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc') 12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'') 12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc') 12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc') 12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab') 12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ') 12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result') 12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result') 12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result') 12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx') 12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx') 12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ') 12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ') 12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:>7s}'.format(u'result'), u' result') 12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:>8s}'.format(u'result'), u' result') 12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ') 12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:^9s}'.format(u'result'), u' result ') 12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:^10s}'.format(u'result'), u' result ') 12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999) 12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000) 12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000) 12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # format specifiers for user defined type 12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:abc}'.format(C()), u'abc') 12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # !r and !s coercions 12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello') 12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello') 12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello ') 12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello ') 12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'") 12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'") 12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)') 12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test fallback to object.__format__ 12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format({}), u'{}') 12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format([]), u'[]') 12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format([1]), u'[1]') 12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)') 12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)') 12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data') 12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm msg = 'object.__format__ with a non-empty format string is deprecated' 12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm with test_support.check_warnings((msg, PendingDeprecationWarning)): 12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data) ') 12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data) ') 12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data') 12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007, 12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm month=8, 12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm day=27)), 12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u"date: 2007-08-27") 12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test deriving from a builtin type and overriding __format__ 12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0}".format(J(10)), u"20") 12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # string format specifiers 12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{0:}'.format('a'), u'a') 12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # computed format specifiers 12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:.{1}}".format(u'hello world', 5), u'hello') 12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:.{1}s}".format(u'hello world', 5), u'hello') 12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), u'hello') 12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), u'hello ') 12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), u'hello ') 12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test various errors 12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{'.format) 12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'}'.format) 12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'a{'.format) 12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'a}'.format) 12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{a'.format) 12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'}a'.format) 12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u'{0}'.format) 12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u'{1}'.format, u'abc') 12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(KeyError, u'{x}'.format) 12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"}{".format) 12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{".format) 12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"}".format) 12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"abc{0:{}".format) 12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0".format) 12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u"{0.}".format) 13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0.}".format, 0) 13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u"{0[}".format) 13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0[}".format, []) 13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(KeyError, u"{0]}".format) 13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0.[]}".format, 0) 13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0..foo}".format, 0) 13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0[0}".format, 0) 13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0[0:foo}".format, 0) 13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(KeyError, u"{c]}".format) 13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{{ {{{0}}".format, 0) 13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0}}".format, 0) 13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(KeyError, u"{foo}".format, bar=3) 13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0!x}".format, 3) 13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0!}".format, 0) 13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0!rs}".format, 0) 13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{!}".format) 13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u"{:}".format) 13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u"{:s}".format) 13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(IndexError, u"{}".format) 13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm big = u"23098475029384702983476098230754973209482573" 13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, (u"{" + big + u"}").format) 13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, (u"{[" + big + u"]}").format, [0]) 13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # issue 6089 13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0[0]x}".format, [None]) 13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0[0](10)}".format, [None]) 13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # can't have a replacement on the field name portion 13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(TypeError, u'{0[{1}]}'.format, u'abcdefg', 4) 13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # exceed maximum recursion depth 13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0:{1:{2}}}".format, u'abc', u's', u'') 13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format, 13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 0, 1, 2, 3, 4, 5, 6, 7) 13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # string format spec errors 13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0:-s}".format, u'') 13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, format, u"", u"-") 13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u"{0:=s}".format, u'') 13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # test combining string and unicode 13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u"foo{0}".format('bar'), u'foobar') 13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This will try to convert the argument from unicode to str, which 13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # will succeed 13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual("foo{0}".format(u'bar'), 'foobar') 13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This will try to convert the argument from unicode to str, which 13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # will fail 13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar') 13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_format_auto_numbering(self): 13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class C: 13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, x=100): 13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._x = x 13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __format__(self, spec): 13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return spec 13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{}'.format(10), u'10') 13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{:5}'.format('s'), u's ') 13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{!r}'.format('s'), u"'s'") 13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{._x}'.format(C(10)), u'10') 13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{[1]}'.format([1, 2]), u'2') 13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4') 13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c') 13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a x b') 13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b') 13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # can't mix and match numbering and auto-numbering 13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{}{1}'.format, 1, 2) 13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{1}{}'.format, 1, 2) 13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2) 13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2) 13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # can mix and match auto-numbering and named 13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4') 13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test') 13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3') 13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g') 13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_raiseMemError(self): 13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Ensure that the freelist contains a consistent object, even 13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # when a string allocation fails with a MemoryError. 13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This used to crash the interpreter, 13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # or leak references when the number was smaller. 13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charwidth = 4 if sys.maxunicode >= 0x10000 else 2 13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Note: sys.maxsize is half of the actual max allocation because of 13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the signedness of Py_ssize_t. 13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm alloc = lambda: u"a" * (sys.maxsize // charwidth * 2) 13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(MemoryError, alloc) 13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertRaises(MemoryError, alloc) 13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def test_format_subclass(self): 13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class U(unicode): 13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __unicode__(self): 13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return u'__unicode__ overridden' 13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm u = U(u'xxx') 13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual("%s" % u, u'__unicode__ overridden') 13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.assertEqual("{}".format(u), '__unicode__ overridden') 13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef test_main(): 14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_support.run_unittest(__name__) 14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == "__main__": 14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm test_main() 1405