10a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport test.test_support, unittest 20a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys, codecs, htmlentitydefs, unicodedata 30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 40a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass PosReturn: 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # this can be used for configurable callbacks 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos = 0 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handle(self, exc): 110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao oldpos = self.pos 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos = oldpos 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realpos<0: 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos = len(exc.object) + realpos 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # if we don't advance this time, terminate on the next call 160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # otherwise we'd get an endless loop 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if realpos <= exc.start: 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos = len(exc.object) 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"<?>", oldpos) 200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeEncodeError object with a bad start attribute 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadStartUnicodeEncodeError(UnicodeEncodeError): 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.start = [] 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeEncodeError object with a bad object attribute 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadObjectUnicodeEncodeError(UnicodeEncodeError): 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.object = [] 320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeDecodeError object without an end attribute 340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoEndUnicodeDecodeError(UnicodeDecodeError): 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.end 380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeDecodeError object with a bad object attribute 400a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadObjectUnicodeDecodeError(UnicodeDecodeError): 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.object = [] 440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without a start attribute 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoStartUnicodeTranslateError(UnicodeTranslateError): 470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.start 500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without an end attribute 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoEndUnicodeTranslateError(UnicodeTranslateError): 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.end 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without an object attribute 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoObjectUnicodeTranslateError(UnicodeTranslateError): 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao del self.object 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass CodecCallbackTest(unittest.TestCase): 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_xmlcharrefreplace(self): 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # replace unencodable characters which numeric character entities. 670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # For ascii, latin-1 and charmaps this is completely implemented 680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in C and should be reasonably fast. 690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = u"\u30b9\u30d1\u30e2 \xe4nd eggs" 700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s.encode("ascii", "xmlcharrefreplace"), 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "スパモ änd eggs" 730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s.encode("latin-1", "xmlcharrefreplace"), 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "スパモ \xe4nd eggs" 770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_xmlcharnamereplace(self): 800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This time use a named character entity for unencodable 810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # characters, if one is available. 820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def xmlcharnamereplace(exc): 840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeEncodeError): 850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [] 870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for c in exc.object[exc.start:exc.end]: 880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(u"&#%d;" % ord(c)) 920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"".join(l), exc.end) 930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error( 950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "test.xmlcharnamereplace", xmlcharnamereplace) 960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" 980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "«ℜ» = ⟨ሴ€⟩" 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "\xabℜ\xbb = ⟨ሴ€⟩" 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) 1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_uninamereplace(self): 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # We're using the names from the unicode database this time, 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and we're doing "syntax highlighting" here, i.e. we include 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the replaced text in ANSI escape sequences. For this it is 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # useful that the error handler is not called for every single 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # unencodable character, but for a complete sequence of 1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # unencodable characters, otherwise we would output many 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # unnecessary escape sequences. 1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def uninamereplace(exc): 1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeEncodeError): 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [] 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for c in exc.object[exc.start:exc.end]: 1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(unicodedata.name(c, u"0x%x" % ord(c))) 1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) 1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error( 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "test.uninamereplace", uninamereplace) 1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"\xac\u1234\u20ac\u8000" 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_backslashescape(self): 1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Does the same as the "unicode-escape" encoding, but with different 1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # base encodings. 1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"a\xac\u1234\u20ac\u8000" 1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin += unichr(sys.maxunicode) 1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "a\\xac\\u1234\\u20ac\\u8000" 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout += "\\U%08x" % sys.maxunicode 1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) 1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "a\xac\\u1234\\u20ac\\u8000" 1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout += "\\U%08x" % sys.maxunicode 1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) 1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "a\xac\\u1234\xa4\\u8000" 1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout += "\\U%08x" % sys.maxunicode 1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_decoding_callbacks(self): 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is a test for a decoding callback handler 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # that allows the decoding of the invalid sequence 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "\xc0\x80" and returns "\x00" instead of raising an error. 1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # All other illegal sequences will be handled strictly. 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def relaxedutf8(exc): 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeDecodeError): 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if exc.object[exc.start:exc.start+2] == "\xc0\x80": 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"\x00", exc.start+2) # retry after two bytes 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise exc 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.relaxedutf8", relaxedutf8) 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # all the "\xc0\x80" will be decoded to "\x00" 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = u"a\x00b\x00c\xfc\x00\x00" 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = "\xc0\x80\xc0\x81" 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(UnicodeDecodeError, sin.decode, 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "utf-8", "test.relaxedutf8") 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_charmapencode(self): 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # For charmap encodings the replacement string will be 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # mapped through the encoding again. This means, that 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # to be able to use e.g. the "replace" handler, the 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # charmap has to have a mapping for "?". 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"]) 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"abc" 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "AABBCC" 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"abcA" 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao charmap[ord("?")] = "XYZ" 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sin = u"abcDEF" 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sout = "AABBCCXYZXYZXYZ" 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao charmap[ord("?")] = u"XYZ" 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao charmap[ord("?")] = u"XYZ" 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_decodeunicodeinternal(self): 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError, 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\x00\x00\x00\x00\x00".decode, 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "unicode-internal", 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler_unicodeinternal(exc): 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeDecodeError): 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"\x01", 1) 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u0000" 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u0000\ufffd" 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.hui", handler_unicodeinternal) 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u0000\u0001\u0000" 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_callbacks(self): 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler1(exc): 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeEncodeError) \ 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and not isinstance(exc, UnicodeDecodeError): 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"[%s]" % u"".join(l), exc.end) 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.handler1", handler1) 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def handler2(exc): 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(exc, UnicodeDecodeError): 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"[%s]" % u"".join(l), exc.end+1) # skip one character 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.handler2", handler2) 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "\x00\x81\x7f\x80\xff" 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s.decode("ascii", "test.handler1"), 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\x00[<129>]\x7f[<128>][<255>]" 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s.decode("ascii", "test.handler2"), 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\x00[<129>][<128>]" 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u3042[<92><117><51>]xxx" 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "\\u3042\u3xx".decode("unicode-escape", "test.handler1"), 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u3042[<92><117><51>]xx" 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0], 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"z[<98>][<99>]" 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"g\xfc\xdfrk".encode("ascii", "test.handler1"), 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"g[<252><223>]rk" 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"g\xfc\xdf".encode("ascii", "test.handler1"), 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"g[<252><223>]" 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_longstrings(self): 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # test long strings to check for memory overflow problems 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "backslashreplace"] 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # register the handlers under different names, 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # to prevent the codec from recognizing the name 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for err in errors: 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test." + err, codecs.lookup_error(err)) 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = 1000 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errors += [ "test." + err for err in errors ] 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "utf-8", "utf-7", "utf-16", "utf-32"): 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for err in errors: 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao uni.encode(enc, err) 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeError: 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def check_exceptionobjectargs(self, exctype, args, msg): 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check with one missing argument 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, exctype, *args[:-1]) 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check with one argument too much 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, exctype, *(args + ["too much"])) 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check with one argument of the wrong type 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao wrongargs = [ "spam", u"eggs", 42, 1.0, None ] 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(len(args)): 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for wrongarg in wrongargs: 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type(wrongarg) is type(args[i]): 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # build argument array 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao callargs = [] 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for j in xrange(len(args)): 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if i==j: 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao callargs.append(wrongarg) 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao callargs.append(args[i]) 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, exctype, *callargs) 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check with the correct number and type of arguments 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao exc = exctype(*args) 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(str(exc), msg) 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_unicodeencodeerror(self): 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"g\xfcrk", 1, 2, "ouch"], 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode character u'\\xfc' in position 1: ouch" 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"g\xfcrk", 1, 4, "ouch"], 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode characters in position 1-3: ouch" 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"\xfcx", 0, 1, "ouch"], 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode character u'\\xfc' in position 0: ouch" 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"\u0100x", 0, 1, "ouch"], 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode character u'\\u0100' in position 0: ouch" 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"\uffffx", 0, 1, "ouch"], 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode character u'\\uffff' in position 0: ouch" 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", u"\U00010000x", 0, 1, "ouch"], 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch" 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_unicodedecodeerror(self): 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError, 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", "g\xfcrk", 1, 2, "ouch"], 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't decode byte 0xfc in position 1: ouch" 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError, 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ["ascii", "g\xfcrk", 1, 3, "ouch"], 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "'ascii' codec can't decode bytes in position 1-2: ouch" 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_unicodetranslateerror(self): 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError, 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [u"g\xfcrk", 1, 2, "ouch"], 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "can't translate character u'\\xfc' in position 1: ouch" 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError, 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [u"g\u0100rk", 1, 2, "ouch"], 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "can't translate character u'\\u0100' in position 1: ouch" 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError, 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [u"g\uffffrk", 1, 2, "ouch"], 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "can't translate character u'\\uffff' in position 1: ouch" 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode > 0xffff: 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError, 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [u"g\U00010000rk", 1, 2, "ouch"], 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "can't translate character u'\\U00010000' in position 1: ouch" 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.check_exceptionobjectargs( 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError, 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao [u"g\xfcrk", 1, 3, "ouch"], 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "can't translate characters in position 1-2: ouch" 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badandgoodstrictexceptions(self): 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "strict" complains about a non-exception passed in 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.strict_errors, 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 42 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "strict" complains about the wrong exception type 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Exception, 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.strict_errors, 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Exception("ouch") 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the correct exception is passed in, "strict" raises it 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.strict_errors, 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch") 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badandgoodignoreexceptions(self): 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "ignore" complains about a non-exception passed in 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.ignore_errors, 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 42 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "ignore" complains about the wrong exception type 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.ignore_errors, 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeError("ouch") 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the correct exception is passed in, "ignore" returns an empty replacement 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"", 1) 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"", 1) 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"", 1) 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badandgoodreplaceexceptions(self): 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "replace" complains about a non-exception passed in 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors, 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 42 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "replace" complains about the wrong exception type 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors, 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeError("ouch") 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors, 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao BadObjectUnicodeEncodeError() 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors, 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao BadObjectUnicodeDecodeError() 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"?", 1) 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\ufffd", 1) 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\ufffd", 1) 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badandgoodxmlcharrefreplaceexceptions(self): 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "xmlcharrefreplace" complains about a non-exception passed in 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors, 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 42 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "xmlcharrefreplace" complains about the wrong exception types 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors, 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeError("ouch") 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "xmlcharrefreplace" can only be used for encoding 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors, 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors, 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use the correct exception 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "".join(unichr(c) for c in cs) 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors( 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError("ascii", s, 0, len(s), "ouch") 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ), 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"".join(u"&#%d;" % ord(c) for c in s), len(s)) 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badandgoodbackslashreplaceexceptions(self): 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "backslashreplace" complains about a non-exception passed in 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors, 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 42 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "backslashreplace" complains about the wrong exception types 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors, 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeError("ouch") 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "backslashreplace" can only be used for encoding 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors, 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors, 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use the correct exception 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\u3042", 1) 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\x00", 1) 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\xff", 1) 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\u0100", 1) 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\uffff", 1) 5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode>0xffff: 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\U00010000", 1) 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (u"\\U0010ffff", 1) 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badhandlerresults(self): 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for res in results: 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.badhandler", lambda x: res) 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for enc in encs: 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u3042".encode, 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao enc, 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "test.badhandler" 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for (enc, bytes) in ( 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("ascii", "\xff"), 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("utf-8", "\xff"), 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("utf-7", "+x-"), 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("unicode-internal", "\x00"), 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ): 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TypeError, 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bytes.decode, 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao enc, 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "test.badhandler" 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_lookup(self): 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.xmlcharrefreplace_errors, 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.lookup_error("xmlcharrefreplace") 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual( 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.backslashreplace_errors, 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.lookup_error("backslashreplace") 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_unencodablereplacement(self): 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def unencrepl(exc): 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(exc, UnicodeEncodeError): 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"\u4242", exc.end) 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TypeError("don't know how to handle %r" % exc) 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.unencreplhandler", unencrepl) 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for enc in ("ascii", "iso-8859-1", "iso-8859-15"): 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises( 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao UnicodeEncodeError, 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u"\u4242".encode, 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao enc, 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "test.unencreplhandler" 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badregistercall(self): 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Modules/_codecsmodule.c::register_error() 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Python/codecs.c::PyCodec_RegisterError() 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.register_error, 42) 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_badlookupcall(self): 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Modules/_codecsmodule.c::lookup_error() 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.lookup_error) 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_unknownhandler(self): 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Modules/_codecsmodule.c::lookup_error() 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_xmlcharrefvalues(self): 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and inline implementations 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.maxunicode>=100000: 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao v += (100000, 500000, 1000000) 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = u"".join([unichr(x) for x in v]) 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for enc in ("ascii", "iso-8859-15"): 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s.encode(enc, err) 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_decodehelper(self): 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Objects/unicodeobject.c::unicode_decode_call_errorhandler() 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and callers 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown") 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def baddecodereturn1(exc): 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 42 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.baddecodereturn1", baddecodereturn1) 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def baddecodereturn2(exc): 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"?", None) 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.baddecodereturn2", baddecodereturn2) 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2") 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler = PosReturn() 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.posreturn", handler.handle) 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid negative position 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -1 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid negative position 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -2 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>") 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Negative position out of bounds 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -3 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid positive position 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 1 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Largest valid positive position (one beyond end of input) 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 2 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>") 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Invalid positive position 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 3 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Restart at the "0" 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 6 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0") 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class D(dict): 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __getitem__(self, key): 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None}) 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D()) 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000}) 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_encodehelper(self): 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and callers 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown") 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def badencodereturn1(exc): 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 42 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.badencodereturn1", badencodereturn1) 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1") 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def badencodereturn2(exc): 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (u"?", None) 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.badencodereturn2", badencodereturn2) 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2") 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler = PosReturn() 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao codecs.register_error("test.posreturn", handler.handle) 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid negative position 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -1 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid negative position 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -2 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>") 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Negative position out of bounds 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = -3 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Valid positive position 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 1 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Largest valid positive position (one beyond end of input 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 2 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>") 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Invalid positive position 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 3 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao handler.pos = 0 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class D(dict): 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __getitem__(self, key): 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None}) 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D()) 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300}) 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_translatehelper(self): 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # enhance coverage of: 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and callers 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (Unfortunately the errors argument is not directly accessible 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # from Python, so we can't test that much) 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class D(dict): 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __getitem__(self, key): 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(ValueError, u"\xff".translate, D()) 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def test_bug828737(self): 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao charmap = { 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ord("&"): u"&", 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ord("<"): u"<", 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ord(">"): u">", 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ord('"'): u""", 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao } 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for n in (1, 10, 100, 1000): 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text = u'abc<def>ghi'*n 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao text.translate(charmap) 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef test_main(): 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao test.test_support.run_unittest(CodecCallbackTest) 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif __name__ == "__main__": 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao test_main() 805