14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" Test script for the Unicode implementation.
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWritten by Marc-Andre Lemburg (mal@lemburg.com).
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""#"
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys, struct, codecs
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom test import test_support, string_tests
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Error handling (bad decoder return)
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef search_function(encoding):
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def decode1(input, errors="strict"):
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 42 # not a tuple
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def encode1(input, errors="strict"):
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 42 # not a tuple
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def encode2(input, errors="strict"):
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (42, 42) # no unicode
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def decode2(input, errors="strict"):
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (42, 42) # no unicode
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if encoding=="test.unicode1":
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (encode1, decode1, None, None)
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    elif encoding=="test.unicode2":
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return (encode2, decode2, None, None)
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return None
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcodecs.register(search_function)
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass UnicodeTest(
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string_tests.CommonTest,
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string_tests.MixinStrUnicodeUserStringTest,
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string_tests.MixinStrUnicodeTest,
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ):
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    type2test = unicode
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def assertEqual(self, first, second, msg=None):
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # strict assertEqual method: reject implicit bytes/unicode equality
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        super(UnicodeTest, self).assertEqual(first, second, msg)
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(first, unicode) or isinstance(second, unicode):
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIsInstance(first, unicode)
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIsInstance(second, unicode)
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif isinstance(first, str) or isinstance(second, str):
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIsInstance(first, str)
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIsInstance(second, str)
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def checkequalnofix(self, result, object, methodname, *args):
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        method = getattr(object, methodname)
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        realresult = method(*args)
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(realresult, result)
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(type(realresult) is type(result))
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # if the original is returned make sure that
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # this doesn't happen with subclasses
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if realresult is object:
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            class usub(unicode):
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                def __repr__(self):
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return 'usub(%r)' % unicode.__repr__(self)
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            object = usub(object)
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            method = getattr(object, methodname)
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            realresult = method(*args)
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(realresult, result)
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(object is not realresult)
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_literals(self):
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\xff', u'\u00ff')
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\uffff', u'\U0000ffff')
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'')
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'')
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000)
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_repr(self):
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not sys.platform.startswith('java'):
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Test basic sanity of repr()
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'abc'), "u'abc'")
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\\c'), "u'\\\\c'")
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\\'), "u'\\\\'")
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\n'), "u'\\n'")
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\r'), "u'\\r'")
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\t'), "u'\\t'")
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'\b'), "u'\\x08'")
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u"'\""), """u'\\'"'""")
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u"'\""), """u'\\'"'""")
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u"'"), '''u"'"''')
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u'"'), """u'"'""")
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            latin1repr = (
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "\\xfe\\xff'")
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            testrepr = repr(u''.join(map(unichr, xrange(256))))
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(testrepr, latin1repr)
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Test repr works on wide unicode escapes without overflow.
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             repr(u"\U00010000" * 39 + u"\uffff" * 4096))
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_count(self):
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_count(self)
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # check mixed argument types
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(3,  'aaa', 'count', u'a')
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(0,  'aaa', 'count', u'b')
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(3, u'aaa', 'count',  'a')
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(0, u'aaa', 'count',  'b')
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(0, u'aaa', 'count',  'b')
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(1, u'aaa', 'count',  'a', -1)
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(3, u'aaa', 'count',  'a', -10)
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(2, u'aaa', 'count',  'a', 0, -1)
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(0, u'aaa', 'count',  'a', 0, -10)
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_find(self):
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(0,  u'abcdefghiabc', 'find', u'abc')
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(9,  u'abcdefghiabc', 'find', u'abc', 1)
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u'hello'.find)
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u'hello'.find, 42)
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_rfind(self):
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_rfind(self)
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # check mixed argument types
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(9,   'abcdefghiabc', 'rfind', u'abc')
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(12,  'abcdefghiabc', 'rfind', u'')
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(12, u'abcdefghiabc', 'rfind',  '')
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_index(self):
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_index(self)
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # check mixed argument types
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (t1, t2) in ((str, unicode), (unicode, str)):
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2(''))
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(3, t1('abcdefghiabc'), 'index',  t2('def'))
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2('abc'))
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(9, t1('abcdefghiabc'), 'index',  t2('abc'), 1)
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghiab').index,  t2('abc'), 1)
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), 8)
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), -1)
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_rindex(self):
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_rindex(self)
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # check mixed argument types
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (t1, t2) in ((str, unicode), (unicode, str)):
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex',  t2(''))
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(3,  t1('abcdefghiabc'), 'rindex',  t2('def'))
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(9,  t1('abcdefghiabc'), 'rindex',  t2('abc'))
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(0,  t1('abcdefghiabc'), 'rindex',  t2('abc'), 0, -1)
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghiabc').rindex,  t2('hib'))
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('def'), 1)
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('abc'), 0, -1)
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, 8)
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, -1)
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_translate(self):
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u'hello'.translate)
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_split(self):
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_split(self)
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Mixed arguments
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_join(self):
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_join(self)
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # mixed arguments
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_strip(self):
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_strip(self)
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, u"hello".strip, "\xff")
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_replace(self):
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.CommonTest.test_replace(self)
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # method call forwarded from str implementation because of unicode argument
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_comparison(self):
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Comparisons:
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abc' == 'abc')
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue('abc' == u'abc')
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abc' == u'abc')
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abcd' > 'abc')
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue('abcd' > u'abc')
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abcd' > u'abc')
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abc' < 'abcd')
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue('abc' < u'abcd')
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'abc' < u'abcd')
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if 0:
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Move these tests to a Unicode collation module test...
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Testing UTF-16 code point order comparisons...
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # No surrogates, no fixup required.
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(u'\u0061' < u'\u20ac')
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Non surrogate below surrogate value, no fixup required
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(u'\u0061' < u'\ud800\udc02')
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Non surrogate above surrogate value, fixup required
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def test_lecmp(s, s2):
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.assertTrue(s < s2)
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def test_fixup(s):
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud800\udc01'
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud900\udc01'
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\uda00\udc01'
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\udb00\udc01'
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud800\udd01'
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud900\udd01'
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\uda00\udd01'
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\udb00\udd01'
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud800\ude01'
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud900\ude01'
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\uda00\ude01'
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\udb00\ude01'
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud800\udfff'
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\ud900\udfff'
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\uda00\udfff'
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s2 = u'\udb00\udfff'
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_lecmp(s, s2)
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_fixup(u'\ue000')
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                test_fixup(u'\uff61')
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Surrogates on both sides, no fixup required
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertTrue(u'\ud800\udc02' < u'\ud84d\udc56')
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_islower(self):
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'\u1FFc', 'islower')
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isupper(self):
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not sys.platform.startswith('java'):
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.checkequalnofix(False, u'\u1FFc', 'isupper')
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_istitle(self):
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_title(self)
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u1FFc', 'istitle')
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isspace(self):
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u2000', 'isspace')
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u200a', 'isspace')
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'\u2014', 'isspace')
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isalpha(self):
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u1FFc', 'isalpha')
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isdecimal(self):
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'', 'isdecimal')
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'a', 'isdecimal')
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'0', 'isdecimal')
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'0123456789', 'isdecimal')
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'0123456789a', 'isdecimal')
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkraises(TypeError, 'abc', 'isdecimal', 42)
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isdigit(self):
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u2460', 'isdigit')
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'\xbc', 'isdigit')
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u0660', 'isdigit')
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_isnumeric(self):
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'', 'isnumeric')
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'a', 'isnumeric')
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'0', 'isnumeric')
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u2460', 'isnumeric')
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\xbc', 'isnumeric')
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'\u0660', 'isnumeric')
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(True, u'0123456789', 'isnumeric')
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.checkequalnofix(False, u'0123456789a', 'isnumeric')
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"abc".isnumeric, 42)
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_contains(self):
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Testing Unicode contains method
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'abdb')
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'bdab')
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'bdaba')
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'bdba')
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'bdba')
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', u'bdba')
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'a', u'bdb')
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'a', 'bdb')
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', 'bdba')
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', ('a',1,None))
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', (1,None,'a'))
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', (1,None,u'a'))
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', ('a',1,None))
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', (1,None,'a'))
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', (1,None,u'a'))
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn('a', ('x',1,u'y'))
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn('a', ('x',1,None))
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'abcd', u'abcxxxx')
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'ab', u'abcd')
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('ab', u'abc')
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'ab', 'abc')
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'ab', (1,None,u'ab'))
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'', u'abc')
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('', u'abc')
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # If the following fails either
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # the contains operator does not propagate UnicodeErrors or
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # someone has changed the default encoding
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeDecodeError, 'g\xe2teau'.__contains__, u'\xe2')
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeDecodeError, u'g\xe2teau'.__contains__, '\xe2')
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'', '')
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('', u'')
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'', u'')
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'', 'abc')
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('', u'abc')
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'', u'abc')
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'\0', 'abc')
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn('\0', u'abc')
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'\0', u'abc')
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'\0', '\0abc')
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('\0', u'\0abc')
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'\0', u'\0abc')
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'\0', 'abc\0')
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('\0', u'abc\0')
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'\0', u'abc\0')
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', '\0abc')
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('a', u'\0abc')
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'a', u'\0abc')
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'asdf', 'asdf')
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn('asdf', u'asdf')
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertIn(u'asdf', u'asdf')
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'asdf', 'asd')
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn('asdf', u'asd')
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'asdf', u'asd')
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'asdf', '')
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn('asdf', u'')
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertNotIn(u'asdf', u'')
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"abc".__contains__)
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"abc".__contains__, object())
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_formatting(self):
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Testing Unicode formatting strings...
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000,  3.00')
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000,  3.00')
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not sys.platform.startswith('java'):
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'%c' % 0x1234, u'\u1234')
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"%.1\u1032f".__mod__, (1.0/3))
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for num in range(0x00,0x80):
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            char = chr(num)
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u"%c" % char, unicode(char))
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u"%c" % num, unicode(char))
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(char == u"%c" % char)
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(char == u"%c" % num)
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Issue 7649
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for num in range(0x80,0x100):
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            uchar = unichr(num)
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(uchar, u"%c" % num)   # works only with ints
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # the implicit decoding should fail for non-ascii chars
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # formatting jobs delegated from the string implementation:
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123},  u'...abc...')
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('...%s...' % u"abc", u'...abc...')
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%*s' % (5,u'abc',), u'  abc')
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%*s' % (-5,u'abc',), u'abc  ')
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%*.*s' % (5,2,u'abc',), u'   ab')
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%*.*s' % (5,3,u'abc',), u'  abc')
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10   abc')
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103   abc')
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%c' % u'a', u'a')
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Wrapper:
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u'\u1234'
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual('%s' % Wrapper(), u'\u1234')
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_startswith_endswith_errors(self):
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for meth in (u'foo'.startswith, u'foo'.endswith):
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            with self.assertRaises(UnicodeDecodeError):
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                meth('\xff')
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            with self.assertRaises(TypeError) as cm:
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                meth(['f'])
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            exc = str(cm.exception)
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIn('unicode', exc)
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIn('str', exc)
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertIn('tuple', exc)
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_format_float(self):
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # should not format with a comma, but always with C locale
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'1.0', u'%.1f' % 1.0)
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_constructor(self):
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode(u'unicode remains unicode'),
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'unicode remains unicode'
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class UnicodeSubclass(unicode):
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode(UnicodeSubclass('unicode subclass becomes unicode')),
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'unicode subclass becomes unicode'
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode('strings are converted to unicode'),
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'strings are converted to unicode'
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class UnicodeCompat:
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self.x
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'__unicode__ compatible objects are recognized')
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class StringCompat:
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self.x
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode(StringCompat('__str__ compatible objects are recognized')),
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'__str__ compatible objects are recognized'
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # unicode(obj) is compatible to str():
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        o = StringCompat('unicode(obj) is compatible to str()')
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # %-formatting and .__unicode__()
5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'%s' %
5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u"u'%s' % obj uses obj.__unicode__()")
5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'%s' %
5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u"u'%s' % obj falls back to obj.__str__()")
5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for obj in (123, 123.45, 123L):
5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(unicode(obj), unicode(str(obj)))
5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # unicode(obj, encoding, error) tests (this maps to
5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # PyUnicode_FromEncodedObject() at C level)
5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not sys.platform.startswith('java'):
5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(
5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                TypeError,
5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                unicode,
5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                u'decoding unicode is not supported',
5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                'utf-8',
5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                'strict'
5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            )
5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            unicode('strings are decoded to unicode', 'utf-8', 'strict'),
5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'strings are decoded to unicode'
5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not sys.platform.startswith('java'):
5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            with test_support.check_py3k_warnings():
5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                buf = buffer('character buffers are decoded to unicode')
5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(
5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                unicode(
5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    buf,
5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    'utf-8',
5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    'strict'
5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ),
5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                u'character buffers are decoded to unicode'
5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            )
5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, unicode, 42, 42, 42)
5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs_utf7(self):
5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        utfTests = [
5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'+', '+-'),
5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'+-', '+--'),
5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'+?', '+-?'),
5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'\?', '+AFw?'),
5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'+?', '+-?'),
5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (ur'\\?', '+AFwAXA?'),
5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (ur'\\\?', '+AFwAXABc?'),
5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (ur'++--', '+-+---'),
5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'\U000abcde', '+2m/c3g-'),                  # surrogate pairs
5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'/', '/'),
5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ]
5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (x, y) in utfTests:
5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(x.encode('utf-7'), y)
5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Unpaired surrogates not supported
5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Direct encoded characters
5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Optional direct characters
5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        set_o = '!"#$%&*;<=>@[]^_`{|}'
5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c in set_d:
5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(c.encode('utf7'), c.encode('ascii'))
5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(c == c.encode('ascii').decode('utf7'))
5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c in set_o:
5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(c == c.encode('ascii').decode('utf7'))
5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs_utf8(self):
5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u''.encode('utf-8'), '')
5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (u'\ud800\udc02'*1000).encode('utf-8'),
5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xf0\x90\x80\x82'*1000
5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(
6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u' Nunstuck git und'.encode('utf-8'),
6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\xe3\x80\x8cWenn ist das Nunstuck git und'
6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # UTF-8 specific decoding tests
6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456')
6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002')
6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac')
6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Other possible utf-8 test cases:
6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # * strict decoding testing for all of the
6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #   UTF8_ERROR cases in PyUnicode_DecodeUTF8
6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_utf8_decode_valid_sequences(self):
6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sequences = [
6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # single byte
6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\x00', u'\x00'), ('a', u'a'), ('\x7f', u'\x7f'),
6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # 2 bytes
6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc2\x80', u'\x80'), ('\xdf\xbf', u'\u07ff'),
6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # 3 bytes
6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe0\xa0\x80', u'\u0800'), ('\xed\x9f\xbf', u'\ud7ff'),
6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xee\x80\x80', u'\uE000'), ('\xef\xbf\xbf', u'\uffff'),
6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # 4 bytes
6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xF0\x90\x80\x80', u'\U00010000'),
6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf4\x8f\xbf\xbf', u'\U0010FFFF')
6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ]
6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for seq, res in sequences:
6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(seq.decode('utf-8'), res)
6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ch in map(unichr, range(0, sys.maxunicode)):
6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(ch, ch.encode('utf-8').decode('utf-8'))
6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_utf8_decode_invalid_sequences(self):
6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # continuation bytes in a sequence of 2, 3, or 4 bytes
6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        continuation_bytes = map(chr, range(0x80, 0xC0))
6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2))
6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8))
6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        invalid_start_bytes = (
6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continuation_bytes + invalid_2B_seq_start_bytes +
6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            invalid_4B_seq_start_bytes + map(chr, range(0xF7, 0x100))
6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for byte in invalid_start_bytes:
6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8')
6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for sb in invalid_2B_seq_start_bytes:
6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cb in continuation_bytes:
6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8')
6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for sb in invalid_4B_seq_start_bytes:
6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cb1 in continuation_bytes[:3]:
6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for cb3 in continuation_bytes[:3]:
6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.assertRaises(UnicodeDecodeError,
6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                      (sb+cb1+'\x80'+cb3).decode, 'utf-8')
6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cb in map(chr, range(0x80, 0xA0)):
6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xE0'+cb+'\x80').decode, 'utf-8')
6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xE0'+cb+'\xBF').decode, 'utf-8')
6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # XXX: surrogates shouldn't be valid UTF-8!
6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #for cb in map(chr, range(0xA0, 0xC0)):
6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #self.assertRaises(UnicodeDecodeError,
6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              #('\xED'+cb+'\x80').decode, 'utf-8')
6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #self.assertRaises(UnicodeDecodeError,
6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              #('\xED'+cb+'\xBF').decode, 'utf-8')
6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # but since they are valid on Python 2 add a test for that:
6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)),
6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                 map(unichr, range(0xd800, 0xe000, 64))):
6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            encoded = '\xED'+cb+'\x80'
6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(encoded.decode('utf-8'), surrogate)
6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(surrogate.encode('utf-8'), encoded)
6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cb in map(chr, range(0x80, 0x90)):
6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xF0'+cb+'\x80\x80').decode, 'utf-8')
6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xF0'+cb+'\xBF\xBF').decode, 'utf-8')
6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cb in map(chr, range(0x90, 0xC0)):
6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xF4'+cb+'\x80\x80').decode, 'utf-8')
7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError,
7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              ('\xF4'+cb+'\xBF\xBF').decode, 'utf-8')
7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_issue8271(self):
7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # only the start byte and the continuation byte(s) are now considered
7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # invalid, instead of the number of bytes specified by the start byte.
7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # table 3-8, Row 2) for more information about the algorithm used.
7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        FFFD = u'\ufffd'
7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sequences = [
7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # invalid start bytes
7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\x80', FFFD), # continuation byte
7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\x80\x80', FFFD*2), # 2 continuation bytes
7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc0', FFFD),
7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc0\xc0', FFFD*2),
7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc1', FFFD),
7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc1\xc0', FFFD*2),
7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc0\xc1', FFFD*2),
7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with start byte of a 2-byte sequence
7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc2', FFFD), # only the start byte
7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc2\xc2', FFFD*2), # 2 start bytes
7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc2\xc2\xc2', FFFD*3), # 2 start bytes
7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xc2\x41', FFFD+'A'), # invalid continuation byte
7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with start byte of a 3-byte sequence
7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1', FFFD), # only the start byte
7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\xe1', FFFD*2), # 2 start bytes
7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\xe1\xe1', FFFD*3), # 3 start bytes
7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes
7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x80', FFFD), # only 1 continuation byte
7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x41', FFFD+'A'), # invalid continuation byte
7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb
7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes
7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte
7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid
7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid
7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with start byte of a 4-byte sequence
7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1', FFFD), # only the start byte
7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\xf1', FFFD*2), # 2 start bytes
7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\xf1\xf1', FFFD*3), # 3 start bytes
7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes
7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes
7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80', FFFD), # only 1 continuation bytes
7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\x80', FFFD), # only 2 continuation bytes
7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid
7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid
7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid
7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid
7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid
7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid
7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid
7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\xf1\x80', FFFD+'A'+FFFD),
7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2),
7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\xf1\x80\x41', FFFD*2+'A'),
7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2),
7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with invalid start byte of a 4-byte sequence (rfc2279)
7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5', FFFD), # only the start byte
7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\xf5', FFFD*2), # 2 start bytes
7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x80', FFFD*2), # only 1 continuation byte
7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x80\x80', FFFD*3), # only 2 continuation byte
7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes
7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x80\x41', FFFD*2+'A'), #  1 valid cb and 1 invalid
7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD),
7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'),
7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with invalid start byte of a 5-byte sequence (rfc2279)
7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf8', FFFD), # only the start byte
7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf8\xf8', FFFD*2), # 2 start bytes
7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf8\x80', FFFD*2), # only one continuation byte
7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid
7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes
7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # with invalid start byte of a 6-byte sequence (rfc2279)
7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfc', FFFD), # only the start byte
7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfc\xfc', FFFD*2), # 2 start bytes
7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfc\x80\x80', FFFD*3), # only 2 continuation bytes
7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes
7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # invalid start byte
7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfe', FFFD),
7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xfe\x80\x80', FFFD*3),
7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # other sequences
7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'),
7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'),
7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'),
7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',
7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),
7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ]
7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for n, (seq, res) in enumerate(sequences):
7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict')
7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(seq.decode('utf-8', 'replace'), res)
7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual((seq+'b').decode('utf-8', 'replace'), res+'b')
7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(seq.decode('utf-8', 'ignore'),
7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             res.replace(u'\uFFFD', ''))
7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs_idna(self):
7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Test whether trailing dot is preserved
7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs_errors(self):
7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (encoding)
7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u'Andr\202 x'.encode('ascii', errors='replace'))
8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (decoding)
8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u'abcde'.decode('ascii', errors='ignore'))
8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'abcde'.decode('ascii', 'replace'),
8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u'abcde'.decode(encoding='ascii', errors='replace'))
8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (unknown character names)
8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (truncated escape sequence)
8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, "hello".decode, "test.unicode1")
8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # executes PyUnicode_Encode()
8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        import imp
8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(
8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ImportError,
8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            imp.find_module,
8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            "non-existing module",
8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            [u"non-existing dir"]
8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        )
8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (wrong arguments)
8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Error handling (PyUnicode_EncodeDecimal())
8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeError, int, u"\u0200")
8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs(self):
8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Encoding
8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('ascii'), 'hello')
8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('utf-7'), 'hello')
8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('utf-8'), 'hello')
8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('utf8'), 'hello')
8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'hello'.encode('latin-1'), 'hello')
8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Roundtrip safety for BMP (just the first 1024 chars)
8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c in xrange(1024):
8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u = unichr(c)
8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             'utf-16-be', 'raw_unicode_escape',
8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             'unicode_escape', 'unicode_internal'):
8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.assertEqual(unicode(u.encode(encoding),encoding), u)
8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Roundtrip safety for BMP (just the first 256 chars)
8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c in xrange(256):
8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u = unichr(c)
8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for encoding in ('latin-1',):
8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.assertEqual(unicode(u.encode(encoding),encoding), u)
8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Roundtrip safety for BMP (just the first 128 chars)
8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for c in xrange(128):
8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            u = unichr(c)
8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for encoding in ('ascii',):
8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.assertEqual(unicode(u.encode(encoding),encoding), u)
8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Roundtrip safety for non-BMP (just a few chars)
8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         #'raw_unicode_escape',
8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         'unicode_escape', 'unicode_internal'):
8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(unicode(u.encode(encoding),encoding), u)
8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # UTF-8 must be roundtrip safe for all UCS-2 code points
8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # This excludes surrogates: in the full range, there would be
8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # a surrogate pair (\udbff\udc00), which gets converted back
8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # to a non-BMP character (\U0010fc00)
8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for encoding in ('utf-8',):
8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(unicode(u.encode(encoding),encoding), u)
8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_codecs_charmap(self):
8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # 0-127
8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s = ''.join(map(chr, xrange(128)))
8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for encoding in (
8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp037', 'cp1026',
8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp863', 'cp865', 'cp866',
8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'mac_cyrillic', 'mac_latin2',
8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp1256', 'cp1257', 'cp1258',
9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp1006', 'iso8859_8',
9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ### These have undefined mappings:
9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp424',
9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ### These fail the round-trip:
9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp875'
9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ):
9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(unicode(s, encoding).encode(encoding), s)
9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # 128-255
9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s = ''.join(map(chr, xrange(128, 256)))
9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for encoding in (
9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp037', 'cp1026',
9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'cp863', 'cp865', 'cp866',
9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_2', 'iso8859_4', 'iso8859_5',
9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'iso8859_9', 'koi8_r', 'latin_1',
9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            'mac_cyrillic', 'mac_latin2',
9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ### These have undefined mappings:
9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp1256', 'cp1257', 'cp1258',
9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'iso8859_3', 'iso8859_6', 'iso8859_7',
9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ### These fail the round-trip:
9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #'cp1006', 'cp875', 'iso8859_8',
9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ):
9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(unicode(s, encoding).encode(encoding), s)
9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_concatenation(self):
9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual((u"abc" u"def"), u"abcdef")
9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(("abc" u"def"), u"abcdef")
9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual((u"abc" "def"), u"abcdef")
9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_printing(self):
9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class BitBucket:
9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def write(self, text):
9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                pass
9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        out = BitBucket()
9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc'
9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc', u'def'
9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc', 'def'
9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, 'abc', u'def'
9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc\n'
9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc\n',
9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'abc\n',
9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'def\n'
9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print >>out, u'def\n'
9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_ucs4(self):
9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        x = u'\U00100000'
9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(x, y)
9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        y = r'\U00100000'
9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(x, y)
9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        y = r'\U00010000'
9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(x, y)
9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            '\U11111111'.decode("raw-unicode-escape")
9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except UnicodeDecodeError as e:
9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(e.start, 0)
9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(e.end, 10)
9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fail("Should have raised UnicodeDecodeError")
9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_conversion(self):
9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Make sure __unicode__() works properly
9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo0:
9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foo"
9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo1:
9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u"foo"
9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo2(object):
9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u"foo"
9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo3(object):
9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foo"
10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo4(str):
10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foo"
10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo5(unicode):
10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foo"
10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo6(str):
10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foos"
10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u"foou"
10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo7(unicode):
10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "foos"
10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u"foou"
10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo8(unicode):
10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __new__(cls, content=""):
10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return unicode.__new__(cls, 2*content)
10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self
10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class Foo9(unicode):
10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "string"
10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return "not unicode"
10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo0()), u"foo")
10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo1()), u"foo")
10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo2()), u"foo")
10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo3()), u"foo")
10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo4("bar")), u"foo")
10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo5("bar")), u"foo")
10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo6("bar")), u"foou")
10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo7("bar")), u"foou")
10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo8("foo")), u"foofoo")
10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(str(Foo9("foo")), "string")
10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(unicode(Foo9("foo")), u"not unicode")
10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_unicode_repr(self):
10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class s1:
10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __repr__(self):
10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return '\\n'
10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class s2:
10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __repr__(self):
10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u'\\n'
10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(repr(s1()), '\\n')
10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(repr(s2()), '\\n')
10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_expandtabs_overflows_gracefully(self):
10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # This test only affects 32-bit platforms because expandtabs can only take
10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # to take a 64-bit long, this test should apply to all platforms.
10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if sys.maxint > (1 << 32) or struct.calcsize('P') != 4:
10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return
10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test__format__(self):
10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        def test(value, format, expected):
10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # test both with and without the trailing 's'
10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(value.__format__(format), expected)
10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(value.__format__(format + u's'), expected)
10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'', u'', u'')
10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'', u'abc')
10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'.3', u'abc')
10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'ab', u'.3', u'ab')
10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abcdef', u'.3', u'abc')
10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abcdef', u'.0', u'')
10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'3.3', u'abc')
10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'2.3', u'abc')
10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'2.2', u'ab')
10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'abc', u'3.2', u'ab ')
10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'x<0', u'result')
10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'x<5', u'result')
10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'x<6', u'result')
10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'x<7', u'resultx')
10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'x<8', u'resultxx')
10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u' <7', u'result ')
10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'<7', u'result ')
10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'>7', u' result')
10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'>8', u'  result')
10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'^8', u' result ')
10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'^9', u' result  ')
10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'result', u'^10', u'  result  ')
10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'a', u'10000', u'a' + u' ' * 9999)
10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'', u'10000', u' ' * 10000)
10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test(u'', u'10000000', u' ' * 10000000)
10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # test mixing unicode and str
11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'abc'.__format__('s'), u'abc')
11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'abc'.__format__('->10s'), u'-------abc')
11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_format(self):
11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u''.format(), u'')
11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a'.format(), u'a')
11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'ab'.format(), u'ab')
11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a{{'.format(), u'a{')
11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a}}'.format(), u'a}')
11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{b'.format(), u'{b')
11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'}}b'.format(), u'}b')
11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a{{b'.format(), u'a{b')
11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # examples from the PEP:
11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        import datetime
11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred")
11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')),
11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u"My name is Fred")
11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'),
11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u"My name is Fred :-{}")
11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # datetime.__format__ doesn't work with unicode
11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #d = datetime.date(2007, 8, 18)
11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #self.assertEqual("The year is {0.year}".format(d),
11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #                 "The year is 2007")
11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # classes we'll use for testing
11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class C:
11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x=100):
11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._x = x
11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, spec):
11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return spec
11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class D:
11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, spec):
11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return str(self.x)
11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # class with __str__, but no __format__
11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class E:
11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u'E(' + self.x + u')'
11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # class with __repr__, but no __format__ or __str__
11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class F:
11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __repr__(self):
11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u'F(' + self.x + u')'
11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # class with __format__ that forwards to string, for some format_spec's
11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class G:
11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x):
11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.x = x
11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __str__(self):
11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u"string is " + self.x
11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, format_spec):
11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if format_spec == 'd':
11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return u'G(' + self.x + u')'
11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return object.__format__(self, format_spec)
11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # class that returns a bad type from __format__
11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class H:
11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, format_spec):
11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return 1.0
11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class I(datetime.date):
11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, format_spec):
11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self.strftime(format_spec)
11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class J(int):
11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, format_spec):
11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return int.__format__(self * 2, format_spec)
11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u''.format(), u'')
11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'abc'.format(), u'abc')
11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format(u'abc'), u'abc')
11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:}'.format(u'abc'), u'abc')
11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc')
11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}X'.format(u'abc'), u'abcX')
11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY')
11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{1}'.format(1, u'abc'), u'abc')
11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc')
11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX')
11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY')
11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format(-15), u'-15')
11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc')
11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc')
11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{'.format(), u'{')
11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'}}'.format(), u'}')
11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{}}'.format(), u'{}')
11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{x}}'.format(), u'{x}')
11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{{0}}}'.format(123), u'{123}')
11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}')
11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'}}{{'.format(), u'}{')
11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'}}x{{'.format(), u'}x{')
12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # weird field names
12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz')
12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz')
12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3')
12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20')
12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010')
12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc')
12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc')
12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def')
12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def')
12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def')
12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # strings
12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc')
12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab')
12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc')
12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'')
12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc')
12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc')
12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab')
12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ')
12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result')
12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result')
12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result')
12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx')
12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx')
12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ')
12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ')
12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:>7s}'.format(u'result'), u' result')
12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:>8s}'.format(u'result'), u'  result')
12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ')
12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:^9s}'.format(u'result'), u' result  ')
12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:^10s}'.format(u'result'), u'  result  ')
12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999)
12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000)
12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000)
12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # format specifiers for user defined type
12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:abc}'.format(C()), u'abc')
12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # !r and !s coercions
12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello          ')
12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello          ')
12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'")
12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'")
12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)')
12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # test fallback to object.__format__
12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format({}), u'{}')
12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format([]), u'[]')
12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format([1]), u'[1]')
12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        msg = 'object.__format__ with a non-empty format string is deprecated'
12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        with test_support.check_warnings((msg, PendingDeprecationWarning)):
12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                                        month=8,
12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                                        day=27)),
12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         u"date: 2007-08-27")
12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # test deriving from a builtin type and overriding __format__
12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0}".format(J(10)), u"20")
12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # string format specifiers
12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{0:}'.format('a'), u'a')
12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # computed format specifiers
12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:.{1}}".format(u'hello world', 5), u'hello')
12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:.{1}s}".format(u'hello world', 5), u'hello')
12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), u'hello')
12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), u'hello     ')
12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), u'hello     ')
12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # test various errors
12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{'.format)
12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'}'.format)
12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'a{'.format)
12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'a}'.format)
12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{a'.format)
12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'}a'.format)
12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u'{0}'.format)
12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u'{1}'.format, u'abc')
12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(KeyError,   u'{x}'.format)
12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"}{".format)
12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{".format)
12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"}".format)
12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"abc{0:{}".format)
12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0".format)
12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u"{0.}".format)
13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0.}".format, 0)
13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u"{0[}".format)
13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0[}".format, [])
13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(KeyError,   u"{0]}".format)
13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0.[]}".format, 0)
13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0..foo}".format, 0)
13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0[0}".format, 0)
13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(KeyError,   u"{c]}".format)
13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0}}".format, 0)
13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(KeyError,   u"{foo}".format, bar=3)
13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0!x}".format, 3)
13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0!}".format, 0)
13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0!rs}".format, 0)
13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{!}".format)
13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u"{:}".format)
13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u"{:s}".format)
13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(IndexError, u"{}".format)
13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        big = u"23098475029384702983476098230754973209482573"
13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, (u"{" + big + u"}").format)
13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, (u"{[" + big + u"]}").format, [0])
13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # issue 6089
13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0[0]x}".format, [None])
13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0[0](10)}".format, [None])
13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # can't have a replacement on the field name portion
13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(TypeError, u'{0[{1}]}'.format, u'abcdefg', 4)
13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # exceed maximum recursion depth
13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0:{1:{2}}}".format, u'abc', u's', u'')
13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                          0, 1, 2, 3, 4, 5, 6, 7)
13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # string format spec errors
13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0:-s}".format, u'')
13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, format, u"", u"-")
13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u"{0:=s}".format, u'')
13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # test combining string and unicode
13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u"foo{0}".format('bar'), u'foobar')
13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # This will try to convert the argument from unicode to str, which
13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #  will succeed
13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual("foo{0}".format(u'bar'), 'foobar')
13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # This will try to convert the argument from unicode to str, which
13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #  will fail
13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_format_auto_numbering(self):
13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class C:
13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __init__(self, x=100):
13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._x = x
13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __format__(self, spec):
13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return spec
13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{}'.format(10), u'10')
13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{:5}'.format('s'), u's    ')
13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{!r}'.format('s'), u"'s'")
13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{._x}'.format(C(10)), u'10')
13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a    x     b')
13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # can't mix and match numbering and auto-numbering
13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # can mix and match auto-numbering and named
13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_raiseMemError(self):
13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Ensure that the freelist contains a consistent object, even
13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # when a string allocation fails with a MemoryError.
13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # This used to crash the interpreter,
13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # or leak references when the number was smaller.
13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        charwidth = 4 if sys.maxunicode >= 0x10000 else 2
13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Note: sys.maxsize is half of the actual max allocation because of
13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # the signedness of Py_ssize_t.
13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(MemoryError, alloc)
13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertRaises(MemoryError, alloc)
13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def test_format_subclass(self):
13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class U(unicode):
13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            def __unicode__(self):
13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return u'__unicode__ overridden'
13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        u = U(u'xxx')
13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual("%s" % u, u'__unicode__ overridden')
13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.assertEqual("{}".format(u), '__unicode__ overridden')
13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef test_main():
14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_support.run_unittest(__name__)
14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == "__main__":
14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_main()
1405