10a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport test.test_support, unittest
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys, codecs, htmlentitydefs, unicodedata
30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
40a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass PosReturn:
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # this can be used for configurable callbacks
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos = 0
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def handle(self, exc):
110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        oldpos = self.pos
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        realpos = oldpos
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if realpos<0:
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            realpos = len(exc.object) + realpos
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # if we don't advance this time, terminate on the next call
160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # otherwise we'd get an endless loop
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if realpos <= exc.start:
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.pos = len(exc.object)
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return (u"<?>", oldpos)
200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeEncodeError object with a bad start attribute
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadStartUnicodeEncodeError(UnicodeEncodeError):
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.start = []
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeEncodeError object with a bad object attribute
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadObjectUnicodeEncodeError(UnicodeEncodeError):
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.object = []
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeDecodeError object without an end attribute
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoEndUnicodeDecodeError(UnicodeDecodeError):
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self.end
380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeDecodeError object with a bad object attribute
400a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadObjectUnicodeDecodeError(UnicodeDecodeError):
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.object = []
440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without a start attribute
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoStartUnicodeTranslateError(UnicodeTranslateError):
470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self.start
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without an end attribute
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoEndUnicodeTranslateError(UnicodeTranslateError):
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self.end
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# A UnicodeTranslateError object without an object attribute
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass NoObjectUnicodeTranslateError(UnicodeTranslateError):
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        del self.object
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass CodecCallbackTest(unittest.TestCase):
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_xmlcharrefreplace(self):
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # replace unencodable characters which numeric character entities.
670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # For ascii, latin-1 and charmaps this is completely implemented
680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # in C and should be reasonably fast.
690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s.encode("ascii", "xmlcharrefreplace"),
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "&#12473;&#12497;&#12514; &#228;nd eggs"
730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s.encode("latin-1", "xmlcharrefreplace"),
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "&#12473;&#12497;&#12514; \xe4nd eggs"
770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_xmlcharnamereplace(self):
800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This time use a named character entity for unencodable
810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # characters, if one is available.
820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def xmlcharnamereplace(exc):
840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not isinstance(exc, UnicodeEncodeError):
850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l = []
870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for c in exc.object[exc.start:exc.end]:
880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except KeyError:
910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    l.append(u"&#%d;" % ord(c))
920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"".join(l), exc.end)
930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error(
950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "test.xmlcharnamereplace", xmlcharnamereplace)
960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_uninamereplace(self):
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # We're using the names from the unicode database this time,
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and we're doing "syntax highlighting" here, i.e. we include
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # the replaced text in ANSI escape sequences. For this it is
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # useful that the error handler is not called for every single
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # unencodable character, but for a complete sequence of
1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # unencodable characters, otherwise we would output many
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # unnecessary escape sequences.
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def uninamereplace(exc):
1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not isinstance(exc, UnicodeEncodeError):
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l = []
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for c in exc.object[exc.start:exc.end]:
1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error(
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "test.uninamereplace", uninamereplace)
1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"\xac\u1234\u20ac\u8000"
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_backslashescape(self):
1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Does the same as the "unicode-escape" encoding, but with different
1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # base encodings.
1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"a\xac\u1234\u20ac\u8000"
1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sin += unichr(sys.maxunicode)
1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "a\\xac\\u1234\\u20ac\\u8000"
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sout += "\\U%08x" % sys.maxunicode
1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "a\xac\\u1234\\u20ac\\u8000"
1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sout += "\\U%08x" % sys.maxunicode
1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "a\xac\\u1234\xa4\\u8000"
1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sout += "\\U%08x" % sys.maxunicode
1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_decoding_callbacks(self):
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # This is a test for a decoding callback handler
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # that allows the decoding of the invalid sequence
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "\xc0\x80" and returns "\x00" instead of raising an error.
1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # All other illegal sequences will be handled strictly.
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def relaxedutf8(exc):
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not isinstance(exc, UnicodeDecodeError):
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if exc.object[exc.start:exc.start+2] == "\xc0\x80":
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return (u"\x00", exc.start+2) # retry after two bytes
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise exc
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.relaxedutf8", relaxedutf8)
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # all the "\xc0\x80" will be decoded to "\x00"
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = u"a\x00b\x00c\xfc\x00\x00"
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = "\xc0\x80\xc0\x81"
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(UnicodeDecodeError, sin.decode,
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                          "utf-8", "test.relaxedutf8")
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_charmapencode(self):
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # For charmap encodings the replacement string will be
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # mapped through the encoding again. This means, that
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # to be able to use e.g. the "replace" handler, the
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # charmap has to have a mapping for "?".
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"abc"
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "AABBCC"
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"abcA"
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        charmap[ord("?")] = "XYZ"
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sin = u"abcDEF"
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sout = "AABBCCXYZXYZXYZ"
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        charmap[ord("?")] = u"XYZ"
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        charmap[ord("?")] = u"XYZ"
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_decodeunicodeinternal(self):
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeDecodeError,
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "\x00\x00\x00\x00\x00".decode,
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "unicode-internal",
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            def handler_unicodeinternal(exc):
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not isinstance(exc, UnicodeDecodeError):
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise TypeError("don't know how to handle %r" % exc)
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return (u"\x01", 1)
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertEqual(
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u"\u0000"
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertEqual(
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u"\u0000\ufffd"
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.register_error("test.hui", handler_unicodeinternal)
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertEqual(
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u"\u0000\u0001\u0000"
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_callbacks(self):
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def handler1(exc):
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not isinstance(exc, UnicodeEncodeError) \
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao               and not isinstance(exc, UnicodeDecodeError):
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"[%s]" % u"".join(l), exc.end)
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.handler1", handler1)
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def handler2(exc):
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not isinstance(exc, UnicodeDecodeError):
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.handler2", handler2)
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = "\x00\x81\x7f\x80\xff"
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s.decode("ascii", "test.handler1"),
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"\x00[<129>]\x7f[<128>][<255>]"
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s.decode("ascii", "test.handler2"),
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"\x00[<129>][<128>]"
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"\u3042[<92><117><51>]xxx"
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"\u3042[<92><117><51>]xx"
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"z[<98>][<99>]"
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"g[<252><223>]rk"
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"g\xfc\xdf".encode("ascii", "test.handler1"),
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            u"g[<252><223>]"
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_longstrings(self):
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # test long strings to check for memory overflow problems
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   "backslashreplace"]
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # register the handlers under different names,
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # to prevent the codec from recognizing the name
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for err in errors:
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.register_error("test." + err, codecs.lookup_error(err))
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        l = 1000
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        errors += [ "test." + err for err in errors ]
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        "utf-8", "utf-7", "utf-16", "utf-32"):
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for err in errors:
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    try:
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        uni.encode(enc, err)
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except UnicodeError:
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        pass
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def check_exceptionobjectargs(self, exctype, args, msg):
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # check with one missing argument
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, exctype, *args[:-1])
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # check with one argument too much
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, exctype, *(args + ["too much"]))
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # check with one argument of the wrong type
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(len(args)):
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for wrongarg in wrongargs:
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if type(wrongarg) is type(args[i]):
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    continue
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # build argument array
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                callargs = []
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for j in xrange(len(args)):
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if i==j:
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        callargs.append(wrongarg)
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        callargs.append(args[i])
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.assertRaises(TypeError, exctype, *callargs)
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # check with the correct number and type of arguments
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        exc = exctype(*args)
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(str(exc), msg)
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_unicodeencodeerror(self):
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", u"g\xfcrk", 1, 2, "ouch"],
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", u"g\xfcrk", 1, 4, "ouch"],
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't encode characters in position 1-3: ouch"
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", u"\xfcx", 0, 1, "ouch"],
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", u"\u0100x", 0, 1, "ouch"],
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", u"\uffffx", 0, 1, "ouch"],
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.check_exceptionobjectargs(
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                UnicodeEncodeError,
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ["ascii", u"\U00010000x", 0, 1, "ouch"],
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_unicodedecodeerror(self):
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeDecodeError,
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", "g\xfcrk", 1, 2, "ouch"],
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't decode byte 0xfc in position 1: ouch"
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeDecodeError,
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ["ascii", "g\xfcrk", 1, 3, "ouch"],
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "'ascii' codec can't decode bytes in position 1-2: ouch"
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_unicodetranslateerror(self):
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError,
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [u"g\xfcrk", 1, 2, "ouch"],
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "can't translate character u'\\xfc' in position 1: ouch"
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError,
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [u"g\u0100rk", 1, 2, "ouch"],
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "can't translate character u'\\u0100' in position 1: ouch"
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError,
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [u"g\uffffrk", 1, 2, "ouch"],
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "can't translate character u'\\uffff' in position 1: ouch"
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode > 0xffff:
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.check_exceptionobjectargs(
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                UnicodeTranslateError,
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                [u"g\U00010000rk", 1, 2, "ouch"],
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "can't translate character u'\\U00010000' in position 1: ouch"
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.check_exceptionobjectargs(
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError,
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            [u"g\xfcrk", 1, 3, "ouch"],
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "can't translate characters in position 1-2: ouch"
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badandgoodstrictexceptions(self):
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "strict" complains about a non-exception passed in
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.strict_errors,
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            42
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "strict" complains about the wrong exception type
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            Exception,
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.strict_errors,
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            Exception("ouch")
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # If the correct exception is passed in, "strict" raises it
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError,
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.strict_errors,
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badandgoodignoreexceptions(self):
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "ignore" complains about a non-exception passed in
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.ignore_errors,
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           42
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "ignore" complains about the wrong exception type
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.ignore_errors,
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           UnicodeError("ouch")
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # If the correct exception is passed in, "ignore" returns an empty replacement
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"", 1)
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"", 1)
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"", 1)
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badandgoodreplaceexceptions(self):
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "replace" complains about a non-exception passed in
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.replace_errors,
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           42
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "replace" complains about the wrong exception type
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.replace_errors,
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           UnicodeError("ouch")
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.replace_errors,
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            BadObjectUnicodeEncodeError()
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.replace_errors,
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            BadObjectUnicodeDecodeError()
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"?", 1)
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\ufffd", 1)
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\ufffd", 1)
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badandgoodxmlcharrefreplaceexceptions(self):
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "xmlcharrefreplace" complains about a non-exception passed in
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.xmlcharrefreplace_errors,
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           42
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "xmlcharrefreplace" complains about the wrong exception types
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.xmlcharrefreplace_errors,
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           UnicodeError("ouch")
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "xmlcharrefreplace" can only be used for encoding
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.xmlcharrefreplace_errors,
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.xmlcharrefreplace_errors,
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Use the correct exception
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = "".join(unichr(c) for c in cs)
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.xmlcharrefreplace_errors(
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ),
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badandgoodbackslashreplaceexceptions(self):
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "backslashreplace" complains about a non-exception passed in
5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.backslashreplace_errors,
5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           42
5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "backslashreplace" complains about the wrong exception types
5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TypeError,
5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           codecs.backslashreplace_errors,
5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           UnicodeError("ouch")
5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "backslashreplace" can only be used for encoding
5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors,
5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(
5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            TypeError,
5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors,
5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Use the correct exception
5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\\u3042", 1)
5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\\x00", 1)
5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\\xff", 1)
5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\\u0100", 1)
5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            (u"\\uffff", 1)
5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode>0xffff:
5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertEqual(
5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                (u"\\U00010000", 1)
5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertEqual(
5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                (u"\\U0010ffff", 1)
5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badhandlerresults(self):
5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for res in results:
5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.register_error("test.badhandler", lambda x: res)
5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for enc in encs:
5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.assertRaises(
5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    TypeError,
5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    u"\u3042".encode,
5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    enc,
5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    "test.badhandler"
5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for (enc, bytes) in (
5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("ascii", "\xff"),
5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("utf-8", "\xff"),
5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("utf-7", "+x-"),
5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("unicode-internal", "\x00"),
5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ):
5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.assertRaises(
5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    TypeError,
6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    bytes.decode,
6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    enc,
6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    "test.badhandler"
6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                )
6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_lookup(self):
6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.xmlcharrefreplace_errors,
6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.lookup_error("xmlcharrefreplace")
6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(
6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.backslashreplace_errors,
6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            codecs.lookup_error("backslashreplace")
6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_unencodablereplacement(self):
6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def unencrepl(exc):
6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(exc, UnicodeEncodeError):
6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return (u"\u4242", exc.end)
6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise TypeError("don't know how to handle %r" % exc)
6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.unencreplhandler", unencrepl)
6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertRaises(
6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                UnicodeEncodeError,
6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u"\u4242".encode,
6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                enc,
6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                "test.unencreplhandler"
6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badregistercall(self):
6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Modules/_codecsmodule.c::register_error()
6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Python/codecs.c::PyCodec_RegisterError()
6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.register_error, 42)
6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_badlookupcall(self):
6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Modules/_codecsmodule.c::lookup_error()
6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.lookup_error)
6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_unknownhandler(self):
6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Modules/_codecsmodule.c::lookup_error()
6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_xmlcharrefvalues(self):
6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and inline implementations
6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode>=100000:
6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            v += (100000, 500000, 1000000)
6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = u"".join([unichr(x) for x in v])
6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for enc in ("ascii", "iso-8859-15"):
6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s.encode(enc, err)
6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_decodehelper(self):
6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and callers
6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def baddecodereturn1(exc):
6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 42
6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.baddecodereturn1", baddecodereturn1)
6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def baddecodereturn2(exc):
6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"?", None)
6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.baddecodereturn2", baddecodereturn2)
6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler = PosReturn()
6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.posreturn", handler.handle)
6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid negative position
6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -1
6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid negative position
6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -2
6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Negative position out of bounds
6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -3
6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid positive position
7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 1
7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Largest valid positive position (one beyond end of input)
7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 2
7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>")
7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Invalid positive position
7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 3
7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Restart at the "0"
7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 6
7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        class D(dict):
7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            def __getitem__(self, key):
7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError
7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000})
7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_encodehelper(self):
7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and callers
7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def badencodereturn1(exc):
7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 42
7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.badencodereturn1", badencodereturn1)
7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        def badencodereturn2(exc):
7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return (u"?", None)
7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.badencodereturn2", badencodereturn2)
7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler = PosReturn()
7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        codecs.register_error("test.posreturn", handler.handle)
7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid negative position
7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -1
7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid negative position
7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -2
7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Negative position out of bounds
7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = -3
7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Valid positive position
7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 1
7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Largest valid positive position (one beyond end of input
7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 2
7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Invalid positive position
7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 3
7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        handler.pos = 0
7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        class D(dict):
7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            def __getitem__(self, key):
7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError
7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_translatehelper(self):
7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # enhance coverage of:
7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and callers
7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # (Unfortunately the errors argument is not directly accessible
7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # from Python, so we can't test that much)
7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        class D(dict):
7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            def __getitem__(self, key):
7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError
7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(ValueError, u"\xff".translate, D())
7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def test_bug828737(self):
7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        charmap = {
7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ord("&"): u"&amp;",
7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ord("<"): u"&lt;",
7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ord(">"): u"&gt;",
7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            ord('"'): u"&quot;",
7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        }
7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for n in (1, 10, 100, 1000):
7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text = u'abc<def>ghi'*n
7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            text.translate(charmap)
7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef test_main():
8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    test.test_support.run_unittest(CodecCallbackTest)
8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif __name__ == "__main__":
8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    test_main()
805