1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport test.test_support, unittest 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sys, codecs, htmlentitydefs, unicodedata 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass PosReturn: 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # this can be used for configurable callbacks 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pos = 0 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handle(self, exc): 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep oldpos = self.pos 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep realpos = oldpos 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if realpos<0: 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep realpos = len(exc.object) + realpos 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # if we don't advance this time, terminate on the next call 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # otherwise we'd get an endless loop 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if realpos <= exc.start: 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pos = len(exc.object) 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"<?>", oldpos) 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeEncodeError object with a bad start attribute 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadStartUnicodeEncodeError(UnicodeEncodeError): 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.start = [] 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeEncodeError object with a bad object attribute 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadObjectUnicodeEncodeError(UnicodeEncodeError): 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.object = [] 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeDecodeError object without an end attribute 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoEndUnicodeDecodeError(UnicodeDecodeError): 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.end 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeDecodeError object with a bad object attribute 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadObjectUnicodeDecodeError(UnicodeDecodeError): 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.object = [] 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without a start attribute 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoStartUnicodeTranslateError(UnicodeTranslateError): 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.start 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without an end attribute 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoEndUnicodeTranslateError(UnicodeTranslateError): 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.end 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without an object attribute 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoObjectUnicodeTranslateError(UnicodeTranslateError): 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self): 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.object 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass CodecCallbackTest(unittest.TestCase): 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_xmlcharrefreplace(self): 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # replace unencodable characters which numeric character entities. 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # For ascii, latin-1 and charmaps this is completely implemented 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # in C and should be reasonably fast. 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s = u"\u30b9\u30d1\u30e2 \xe4nd eggs" 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s.encode("ascii", "xmlcharrefreplace"), 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "スパモ änd eggs" 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s.encode("latin-1", "xmlcharrefreplace"), 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "スパモ \xe4nd eggs" 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_xmlcharnamereplace(self): 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # This time use a named character entity for unencodable 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # characters, if one is available. 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def xmlcharnamereplace(exc): 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeEncodeError): 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l = [] 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for c in exc.object[exc.start:exc.end]: 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except KeyError: 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l.append(u"&#%d;" % ord(c)) 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"".join(l), exc.end) 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error( 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "test.xmlcharnamereplace", xmlcharnamereplace) 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "«ℜ» = ⟨ሴ€⟩" 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "\xabℜ\xbb = ⟨ሴ€⟩" 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_uninamereplace(self): 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # We're using the names from the unicode database this time, 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and we're doing "syntax highlighting" here, i.e. we include 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # the replaced text in ANSI escape sequences. For this it is 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # useful that the error handler is not called for every single 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # unencodable character, but for a complete sequence of 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # unencodable characters, otherwise we would output many 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # unnecessary escape sequences. 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def uninamereplace(exc): 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeEncodeError): 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l = [] 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for c in exc.object[exc.start:exc.end]: 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l.append(unicodedata.name(c, u"0x%x" % ord(c))) 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error( 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "test.uninamereplace", uninamereplace) 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"\xac\u1234\u20ac\u8000" 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) 128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) 131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" 133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) 134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_backslashescape(self): 136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Does the same as the "unicode-escape" encoding, but with different 137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # base encodings. 138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"a\xac\u1234\u20ac\u8000" 139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin += unichr(sys.maxunicode) 141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "a\\xac\\u1234\\u20ac\\u8000" 142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout += "\\U%08x" % sys.maxunicode 144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) 145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "a\xac\\u1234\\u20ac\\u8000" 147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout += "\\U%08x" % sys.maxunicode 149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) 150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "a\xac\\u1234\xa4\\u8000" 152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout += "\\U%08x" % sys.maxunicode 154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_decoding_callbacks(self): 157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # This is a test for a decoding callback handler 158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # that allows the decoding of the invalid sequence 159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "\xc0\x80" and returns "\x00" instead of raising an error. 160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # All other illegal sequences will be handled strictly. 161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def relaxedutf8(exc): 162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeDecodeError): 163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if exc.object[exc.start:exc.start+2] == "\xc0\x80": 165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"\x00", exc.start+2) # retry after two bytes 166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise exc 168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.relaxedutf8", relaxedutf8) 170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # all the "\xc0\x80" will be decoded to "\x00" 172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = u"a\x00b\x00c\xfc\x00\x00" 174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised 177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = "\xc0\x80\xc0\x81" 178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(UnicodeDecodeError, sin.decode, 179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "utf-8", "test.relaxedutf8") 180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_charmapencode(self): 182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # For charmap encodings the replacement string will be 183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # mapped through the encoding again. This means, that 184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # to be able to use e.g. the "replace" handler, the 185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # charmap has to have a mapping for "?". 186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"]) 187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"abc" 188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "AABBCC" 189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"abcA" 192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) 193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep charmap[ord("?")] = "XYZ" 195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sin = u"abcDEF" 196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sout = "AABBCCXYZXYZXYZ" 197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep charmap[ord("?")] = u"XYZ" 200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep charmap[ord("?")] = u"XYZ" 203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_decodeunicodeinternal(self): 206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError, 208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\x00\x00\x00\x00\x00".decode, 209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "unicode-internal", 210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handler_unicodeinternal(exc): 213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeDecodeError): 214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"\x01", 1) 216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), 219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u0000" 220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), 224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u0000\ufffd" 225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.hui", handler_unicodeinternal) 228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), 231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u0000\u0001\u0000" 232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_callbacks(self): 235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handler1(exc): 236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeEncodeError) \ 237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep and not isinstance(exc, UnicodeDecodeError): 238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"[%s]" % u"".join(l), exc.end) 241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.handler1", handler1) 243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def handler2(exc): 245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not isinstance(exc, UnicodeDecodeError): 246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"[%s]" % u"".join(l), exc.end+1) # skip one character 249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.handler2", handler2) 251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s = "\x00\x81\x7f\x80\xff" 253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s.decode("ascii", "test.handler1"), 256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\x00[<129>]\x7f[<128>][<255>]" 257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s.decode("ascii", "test.handler2"), 260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\x00[<129>][<128>]" 261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), 265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u3042[<92><117><51>]xxx" 266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "\\u3042\u3xx".decode("unicode-escape", "test.handler1"), 270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u3042[<92><117><51>]xx" 271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0], 275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"z[<98>][<99>]" 276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"g\xfc\xdfrk".encode("ascii", "test.handler1"), 280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"g[<252><223>]rk" 281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"g\xfc\xdf".encode("ascii", "test.handler1"), 285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"g[<252><223>]" 286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_longstrings(self): 289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # test long strings to check for memory overflow problems 290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", 291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "backslashreplace"] 292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # register the handlers under different names, 293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # to prevent the codec from recognizing the name 294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for err in errors: 295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test." + err, codecs.lookup_error(err)) 296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep l = 1000 297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors += [ "test." + err for err in errors ] 298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: 299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", 300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "utf-8", "utf-7", "utf-16", "utf-32"): 301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for err in errors: 302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep uni.encode(enc, err) 304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except UnicodeError: 305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def check_exceptionobjectargs(self, exctype, args, msg): 308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion 309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # check with one missing argument 310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, exctype, *args[:-1]) 311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # check with one argument too much 312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, exctype, *(args + ["too much"])) 313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # check with one argument of the wrong type 314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep wrongargs = [ "spam", u"eggs", 42, 1.0, None ] 315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for i in xrange(len(args)): 316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for wrongarg in wrongargs: 317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if type(wrongarg) is type(args[i]): 318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # build argument array 320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep callargs = [] 321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for j in xrange(len(args)): 322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if i==j: 323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep callargs.append(wrongarg) 324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep callargs.append(args[i]) 326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, exctype, *callargs) 327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # check with the correct number and type of arguments 329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep exc = exctype(*args) 330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(str(exc), msg) 331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_unicodeencodeerror(self): 333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"g\xfcrk", 1, 2, "ouch"], 336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode character u'\\xfc' in position 1: ouch" 337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"g\xfcrk", 1, 4, "ouch"], 341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode characters in position 1-3: ouch" 342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"\xfcx", 0, 1, "ouch"], 346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode character u'\\xfc' in position 0: ouch" 347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"\u0100x", 0, 1, "ouch"], 351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode character u'\\u0100' in position 0: ouch" 352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"\uffffx", 0, 1, "ouch"], 356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode character u'\\uffff' in position 0: ouch" 357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", u"\U00010000x", 0, 1, "ouch"], 362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch" 363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_unicodedecodeerror(self): 366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError, 368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", "g\xfcrk", 1, 2, "ouch"], 369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't decode byte 0xfc in position 1: ouch" 370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError, 373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ["ascii", "g\xfcrk", 1, 3, "ouch"], 374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "'ascii' codec can't decode bytes in position 1-2: ouch" 375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_unicodetranslateerror(self): 378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError, 380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [u"g\xfcrk", 1, 2, "ouch"], 381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "can't translate character u'\\xfc' in position 1: ouch" 382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError, 385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [u"g\u0100rk", 1, 2, "ouch"], 386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "can't translate character u'\\u0100' in position 1: ouch" 387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError, 390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [u"g\uffffrk", 1, 2, "ouch"], 391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "can't translate character u'\\uffff' in position 1: ouch" 392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode > 0xffff: 394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError, 396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [u"g\U00010000rk", 1, 2, "ouch"], 397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "can't translate character u'\\U00010000' in position 1: ouch" 398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.check_exceptionobjectargs( 400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError, 401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep [u"g\xfcrk", 1, 3, "ouch"], 402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "can't translate characters in position 1-2: ouch" 403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badandgoodstrictexceptions(self): 406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "strict" complains about a non-exception passed in 407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.strict_errors, 410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 42 411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "strict" complains about the wrong exception type 413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Exception, 415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.strict_errors, 416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Exception("ouch") 417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If the correct exception is passed in, "strict" raises it 420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.strict_errors, 423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch") 424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badandgoodignoreexceptions(self): 427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "ignore" complains about a non-exception passed in 428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.ignore_errors, 431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 42 432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "ignore" complains about the wrong exception type 434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.ignore_errors, 437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeError("ouch") 438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If the correct exception is passed in, "ignore" returns an empty replacement 440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"", 1) 443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"", 1) 447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"", 1) 451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badandgoodreplaceexceptions(self): 454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "replace" complains about a non-exception passed in 455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors, 458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 42 459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "replace" complains about the wrong exception type 461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors, 464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeError("ouch") 465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors, 469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BadObjectUnicodeEncodeError() 470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors, 474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BadObjectUnicodeDecodeError() 475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement 477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"?", 1) 480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\ufffd", 1) 484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\ufffd", 1) 488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badandgoodxmlcharrefreplaceexceptions(self): 491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "xmlcharrefreplace" complains about a non-exception passed in 492edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 493edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 494edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors, 495edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 42 496edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 497edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "xmlcharrefreplace" complains about the wrong exception types 498edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 499edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 500edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors, 501edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeError("ouch") 502edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 503edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "xmlcharrefreplace" can only be used for encoding 504edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 505edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 506edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors, 507edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 508edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 509edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 510edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 511edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors, 512edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 513edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 514edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Use the correct exception 515edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) 516edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s = "".join(unichr(c) for c in cs) 517edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 518edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors( 519edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError("ascii", s, 0, len(s), "ouch") 520edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ), 521edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"".join(u"&#%d;" % ord(c) for c in s), len(s)) 522edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 523edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 524edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badandgoodbackslashreplaceexceptions(self): 525edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "backslashreplace" complains about a non-exception passed in 526edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 527edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 528edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors, 529edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 42 530edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 531edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "backslashreplace" complains about the wrong exception types 532edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 533edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 534edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors, 535edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeError("ouch") 536edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 537edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # "backslashreplace" can only be used for encoding 538edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 539edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 540edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors, 541edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 542edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 543edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 544edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 545edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors, 546edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 547edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 548edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Use the correct exception 549edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 550edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 551edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\u3042", 1) 552edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 553edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 554edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), 555edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\x00", 1) 556edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 557edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 558edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), 559edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\xff", 1) 560edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 561edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 562edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), 563edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\u0100", 1) 564edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 565edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 566edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), 567edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\uffff", 1) 568edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 569edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode>0xffff: 570edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 571edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), 572edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\U00010000", 1) 573edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 574edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 575edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), 576edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (u"\\U0010ffff", 1) 577edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 578edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 579edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badhandlerresults(self): 580edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) 581edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") 582edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 583edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for res in results: 584edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.badhandler", lambda x: res) 585edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for enc in encs: 586edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 587edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 588edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u3042".encode, 589edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep enc, 590edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "test.badhandler" 591edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 592edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for (enc, bytes) in ( 593edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ("ascii", "\xff"), 594edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ("utf-8", "\xff"), 595edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ("utf-7", "+x-"), 596edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ("unicode-internal", "\x00"), 597edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ): 598edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 599edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep TypeError, 600edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep bytes.decode, 601edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep enc, 602edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "test.badhandler" 603edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 604edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 605edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_lookup(self): 606edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 607edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) 608edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 609edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 610edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.xmlcharrefreplace_errors, 611edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.lookup_error("xmlcharrefreplace") 612edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 613edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual( 614edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.backslashreplace_errors, 615edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.lookup_error("backslashreplace") 616edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 617edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 618edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_unencodablereplacement(self): 619edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def unencrepl(exc): 620edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if isinstance(exc, UnicodeEncodeError): 621edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"\u4242", exc.end) 622edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 623edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise TypeError("don't know how to handle %r" % exc) 624edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.unencreplhandler", unencrepl) 625edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for enc in ("ascii", "iso-8859-1", "iso-8859-15"): 626edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises( 627edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep UnicodeEncodeError, 628edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep u"\u4242".encode, 629edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep enc, 630edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "test.unencreplhandler" 631edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 632edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 633edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badregistercall(self): 634edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 635edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Modules/_codecsmodule.c::register_error() 636edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Python/codecs.c::PyCodec_RegisterError() 637edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.register_error, 42) 638edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) 639edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 640edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_badlookupcall(self): 641edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 642edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Modules/_codecsmodule.c::lookup_error() 643edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.lookup_error) 644edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 645edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_unknownhandler(self): 646edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 647edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Modules/_codecsmodule.c::lookup_error() 648edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") 649edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 650edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_xmlcharrefvalues(self): 651edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 652edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() 653edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and inline implementations 654edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) 655edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.maxunicode>=100000: 656edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep v += (100000, 500000, 1000000) 657edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s = u"".join([unichr(x) for x in v]) 658edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) 659edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for enc in ("ascii", "iso-8859-15"): 660edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): 661edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep s.encode(enc, err) 662edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 663edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_decodehelper(self): 664edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 665edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Objects/unicodeobject.c::unicode_decode_call_errorhandler() 666edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and callers 667edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown") 668edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 669edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def baddecodereturn1(exc): 670edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return 42 671edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.baddecodereturn1", baddecodereturn1) 672edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") 673edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") 674edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") 675edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") 676edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") 677edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") 678edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 679edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def baddecodereturn2(exc): 680edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"?", None) 681edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.baddecodereturn2", baddecodereturn2) 682edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2") 683edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 684edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler = PosReturn() 685edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.posreturn", handler.handle) 686edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 687edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid negative position 688edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -1 689edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 690edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 691edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid negative position 692edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -2 693edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>") 694edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 695edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Negative position out of bounds 696edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -3 697edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 698edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 699edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid positive position 700edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 1 701edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 702edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 703edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Largest valid positive position (one beyond end of input) 704edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 2 705edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>") 706edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 707edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Invalid positive position 708edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 3 709edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 710edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 711edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Restart at the "0" 712edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 6 713edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0") 714edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 715edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep class D(dict): 716edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getitem__(self, key): 717edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise ValueError 718edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None}) 719edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D()) 720edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000}) 721edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 722edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_encodehelper(self): 723edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 724edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 725edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and callers 726edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown") 727edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 728edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def badencodereturn1(exc): 729edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return 42 730edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.badencodereturn1", badencodereturn1) 731edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1") 732edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 733edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def badencodereturn2(exc): 734edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (u"?", None) 735edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.badencodereturn2", badencodereturn2) 736edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2") 737edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 738edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler = PosReturn() 739edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.register_error("test.posreturn", handler.handle) 740edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 741edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid negative position 742edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -1 743edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 744edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 745edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid negative position 746edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -2 747edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>") 748edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 749edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Negative position out of bounds 750edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = -3 751edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 752edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 753edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Valid positive position 754edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 1 755edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 756edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 757edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Largest valid positive position (one beyond end of input 758edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 2 759edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>") 760edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 761edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Invalid positive position 762edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 3 763edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 764edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 765edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handler.pos = 0 766edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 767edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep class D(dict): 768edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getitem__(self, key): 769edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise ValueError 770edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): 771edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None}) 772edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D()) 773edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300}) 774edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 775edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_translatehelper(self): 776edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # enhance coverage of: 777edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 778edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and callers 779edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # (Unfortunately the errors argument is not directly accessible 780edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # from Python, so we can't test that much) 781edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep class D(dict): 782edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getitem__(self, key): 783edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise ValueError 784edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(ValueError, u"\xff".translate, D()) 785edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) 786edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) 787edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 788edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def test_bug828737(self): 789edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep charmap = { 790edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ord("&"): u"&", 791edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ord("<"): u"<", 792edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ord(">"): u">", 793edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ord('"'): u""", 794edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep } 795edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 796edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for n in (1, 10, 100, 1000): 797edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep text = u'abc<def>ghi'*n 798edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep text.translate(charmap) 799edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 800edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef test_main(): 801edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep test.test_support.run_unittest(CodecCallbackTest) 802edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 803edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == "__main__": 804edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep test_main() 805