1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport test.test_support, unittest
2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sys, codecs, htmlentitydefs, unicodedata
3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass PosReturn:
5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # this can be used for configurable callbacks
6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.pos = 0
9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def handle(self, exc):
11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        oldpos = self.pos
12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        realpos = oldpos
13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if realpos<0:
14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            realpos = len(exc.object) + realpos
15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # if we don't advance this time, terminate on the next call
16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # otherwise we'd get an endless loop
17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if realpos <= exc.start:
18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.pos = len(exc.object)
19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (u"<?>", oldpos)
20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeEncodeError object with a bad start attribute
22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadStartUnicodeEncodeError(UnicodeEncodeError):
23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.start = []
26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeEncodeError object with a bad object attribute
28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadObjectUnicodeEncodeError(UnicodeEncodeError):
29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.object = []
32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeDecodeError object without an end attribute
34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoEndUnicodeDecodeError(UnicodeDecodeError):
35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        del self.end
38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeDecodeError object with a bad object attribute
40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BadObjectUnicodeDecodeError(UnicodeDecodeError):
41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.object = []
44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without a start attribute
46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoStartUnicodeTranslateError(UnicodeTranslateError):
47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        del self.start
50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without an end attribute
52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoEndUnicodeTranslateError(UnicodeTranslateError):
53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        del self.end
56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# A UnicodeTranslateError object without an object attribute
58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass NoObjectUnicodeTranslateError(UnicodeTranslateError):
59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self):
60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        del self.object
62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass CodecCallbackTest(unittest.TestCase):
64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_xmlcharrefreplace(self):
66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # replace unencodable characters which numeric character entities.
67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # For ascii, latin-1 and charmaps this is completely implemented
68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # in C and should be reasonably fast.
69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            s.encode("ascii", "xmlcharrefreplace"),
72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "&#12473;&#12497;&#12514; &#228;nd eggs"
73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            s.encode("latin-1", "xmlcharrefreplace"),
76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "&#12473;&#12497;&#12514; \xe4nd eggs"
77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_xmlcharnamereplace(self):
80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # This time use a named character entity for unencodable
81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # characters, if one is available.
82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def xmlcharnamereplace(exc):
84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not isinstance(exc, UnicodeEncodeError):
85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            l = []
87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for c in exc.object[exc.start:exc.end]:
88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                try:
89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                except KeyError:
91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    l.append(u"&#%d;" % ord(c))
92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"".join(l), exc.end)
93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error(
95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "test.xmlcharnamereplace", xmlcharnamereplace)
96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_uninamereplace(self):
106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # We're using the names from the unicode database this time,
107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and we're doing "syntax highlighting" here, i.e. we include
108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # the replaced text in ANSI escape sequences. For this it is
109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # useful that the error handler is not called for every single
110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # unencodable character, but for a complete sequence of
111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # unencodable characters, otherwise we would output many
112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # unnecessary escape sequences.
113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def uninamereplace(exc):
115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not isinstance(exc, UnicodeEncodeError):
116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            l = []
118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for c in exc.object[exc.start:exc.end]:
119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error(
123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "test.uninamereplace", uninamereplace)
124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"\xac\u1234\u20ac\u8000"
126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_backslashescape(self):
136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Does the same as the "unicode-escape" encoding, but with different
137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # base encodings.
138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"a\xac\u1234\u20ac\u8000"
139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sin += unichr(sys.maxunicode)
141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "a\\xac\\u1234\\u20ac\\u8000"
142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sout += "\\U%08x" % sys.maxunicode
144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "a\xac\\u1234\\u20ac\\u8000"
147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sout += "\\U%08x" % sys.maxunicode
149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "a\xac\\u1234\xa4\\u8000"
152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sout += "\\U%08x" % sys.maxunicode
154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_decoding_callbacks(self):
157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # This is a test for a decoding callback handler
158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # that allows the decoding of the invalid sequence
159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "\xc0\x80" and returns "\x00" instead of raising an error.
160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # All other illegal sequences will be handled strictly.
161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def relaxedutf8(exc):
162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not isinstance(exc, UnicodeDecodeError):
163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if exc.object[exc.start:exc.start+2] == "\xc0\x80":
165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                return (u"\x00", exc.start+2) # retry after two bytes
166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise exc
168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.relaxedutf8", relaxedutf8)
170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # all the "\xc0\x80" will be decoded to "\x00"
172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = u"a\x00b\x00c\xfc\x00\x00"
174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = "\xc0\x80\xc0\x81"
178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(UnicodeDecodeError, sin.decode,
179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                          "utf-8", "test.relaxedutf8")
180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_charmapencode(self):
182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # For charmap encodings the replacement string will be
183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # mapped through the encoding again. This means, that
184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # to be able to use e.g. the "replace" handler, the
185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # charmap has to have a mapping for "?".
186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"abc"
188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "AABBCC"
189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"abcA"
192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        charmap[ord("?")] = "XYZ"
195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sin = u"abcDEF"
196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        sout = "AABBCCXYZXYZXYZ"
197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        charmap[ord("?")] = u"XYZ"
200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        charmap[ord("?")] = u"XYZ"
203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_decodeunicodeinternal(self):
206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeDecodeError,
208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "\x00\x00\x00\x00\x00".decode,
209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "unicode-internal",
210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            def handler_unicodeinternal(exc):
213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if not isinstance(exc, UnicodeDecodeError):
214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    raise TypeError("don't know how to handle %r" % exc)
215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                return (u"\x01", 1)
216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertEqual(
218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                u"\u0000"
220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertEqual(
223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                u"\u0000\ufffd"
225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.register_error("test.hui", handler_unicodeinternal)
228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertEqual(
230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                u"\u0000\u0001\u0000"
232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_callbacks(self):
235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def handler1(exc):
236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not isinstance(exc, UnicodeEncodeError) \
237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep               and not isinstance(exc, UnicodeDecodeError):
238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"[%s]" % u"".join(l), exc.end)
241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.handler1", handler1)
243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def handler2(exc):
245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not isinstance(exc, UnicodeDecodeError):
246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.handler2", handler2)
251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        s = "\x00\x81\x7f\x80\xff"
253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            s.decode("ascii", "test.handler1"),
256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"\x00[<129>]\x7f[<128>][<255>]"
257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            s.decode("ascii", "test.handler2"),
260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"\x00[<129>][<128>]"
261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"\u3042[<92><117><51>]xxx"
266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"\u3042[<92><117><51>]xx"
271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"z[<98>][<99>]"
276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"g[<252><223>]rk"
281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"g\xfc\xdf".encode("ascii", "test.handler1"),
285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            u"g[<252><223>]"
286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_longstrings(self):
289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # test long strings to check for memory overflow problems
290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                   "backslashreplace"]
292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # register the handlers under different names,
293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # to prevent the codec from recognizing the name
294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for err in errors:
295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.register_error("test." + err, codecs.lookup_error(err))
296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        l = 1000
297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        errors += [ "test." + err for err in errors ]
298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        "utf-8", "utf-7", "utf-16", "utf-32"):
301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                for err in errors:
302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    try:
303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        uni.encode(enc, err)
304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    except UnicodeError:
305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        pass
306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def check_exceptionobjectargs(self, exctype, args, msg):
308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # check with one missing argument
310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, exctype, *args[:-1])
311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # check with one argument too much
312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, exctype, *(args + ["too much"]))
313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # check with one argument of the wrong type
314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for i in xrange(len(args)):
316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for wrongarg in wrongargs:
317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if type(wrongarg) is type(args[i]):
318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    continue
319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # build argument array
320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                callargs = []
321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                for j in xrange(len(args)):
322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if i==j:
323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        callargs.append(wrongarg)
324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        callargs.append(args[i])
326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.assertRaises(TypeError, exctype, *callargs)
327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # check with the correct number and type of arguments
329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        exc = exctype(*args)
330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(str(exc), msg)
331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_unicodeencodeerror(self):
333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", u"g\xfcrk", 1, 2, "ouch"],
336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", u"g\xfcrk", 1, 4, "ouch"],
341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't encode characters in position 1-3: ouch"
342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", u"\xfcx", 0, 1, "ouch"],
346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", u"\u0100x", 0, 1, "ouch"],
351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", u"\uffffx", 0, 1, "ouch"],
356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.check_exceptionobjectargs(
360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                UnicodeEncodeError,
361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ["ascii", u"\U00010000x", 0, 1, "ouch"],
362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_unicodedecodeerror(self):
366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeDecodeError,
368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", "g\xfcrk", 1, 2, "ouch"],
369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't decode byte 0xfc in position 1: ouch"
370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeDecodeError,
373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ["ascii", "g\xfcrk", 1, 3, "ouch"],
374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "'ascii' codec can't decode bytes in position 1-2: ouch"
375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_unicodetranslateerror(self):
378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError,
380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            [u"g\xfcrk", 1, 2, "ouch"],
381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "can't translate character u'\\xfc' in position 1: ouch"
382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError,
385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            [u"g\u0100rk", 1, 2, "ouch"],
386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "can't translate character u'\\u0100' in position 1: ouch"
387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError,
390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            [u"g\uffffrk", 1, 2, "ouch"],
391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "can't translate character u'\\uffff' in position 1: ouch"
392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode > 0xffff:
394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.check_exceptionobjectargs(
395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                UnicodeTranslateError,
396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                [u"g\U00010000rk", 1, 2, "ouch"],
397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "can't translate character u'\\U00010000' in position 1: ouch"
398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.check_exceptionobjectargs(
400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError,
401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            [u"g\xfcrk", 1, 3, "ouch"],
402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            "can't translate characters in position 1-2: ouch"
403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badandgoodstrictexceptions(self):
406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "strict" complains about a non-exception passed in
407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.strict_errors,
410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            42
411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "strict" complains about the wrong exception type
413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Exception,
415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.strict_errors,
416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Exception("ouch")
417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # If the correct exception is passed in, "strict" raises it
420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError,
422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.strict_errors,
423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badandgoodignoreexceptions(self):
427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "ignore" complains about a non-exception passed in
428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.ignore_errors,
431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           42
432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "ignore" complains about the wrong exception type
434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.ignore_errors,
437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           UnicodeError("ouch")
438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # If the correct exception is passed in, "ignore" returns an empty replacement
440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"", 1)
443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"", 1)
447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"", 1)
451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badandgoodreplaceexceptions(self):
454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "replace" complains about a non-exception passed in
455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.replace_errors,
458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           42
459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "replace" complains about the wrong exception type
461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.replace_errors,
464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           UnicodeError("ouch")
465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.replace_errors,
469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            BadObjectUnicodeEncodeError()
470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.replace_errors,
474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            BadObjectUnicodeDecodeError()
475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"?", 1)
480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\ufffd", 1)
484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\ufffd", 1)
488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badandgoodxmlcharrefreplaceexceptions(self):
491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "xmlcharrefreplace" complains about a non-exception passed in
492edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
493edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
494edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.xmlcharrefreplace_errors,
495edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           42
496edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
497edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "xmlcharrefreplace" complains about the wrong exception types
498edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
499edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
500edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.xmlcharrefreplace_errors,
501edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           UnicodeError("ouch")
502edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
503edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "xmlcharrefreplace" can only be used for encoding
504edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
505edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
506edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.xmlcharrefreplace_errors,
507edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
508edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
509edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
510edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
511edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.xmlcharrefreplace_errors,
512edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
513edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
514edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Use the correct exception
515edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
516edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        s = "".join(unichr(c) for c in cs)
517edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
518edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.xmlcharrefreplace_errors(
519edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
520edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ),
521edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
522edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
523edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
524edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badandgoodbackslashreplaceexceptions(self):
525edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "backslashreplace" complains about a non-exception passed in
526edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
527edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
528edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.backslashreplace_errors,
529edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           42
530edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
531edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "backslashreplace" complains about the wrong exception types
532edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
533edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           TypeError,
534edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           codecs.backslashreplace_errors,
535edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           UnicodeError("ouch")
536edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
537edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # "backslashreplace" can only be used for encoding
538edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
539edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
540edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors,
541edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
542edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
543edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(
544edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            TypeError,
545edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors,
546edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
547edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
548edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Use the correct exception
549edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
550edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
551edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\\u3042", 1)
552edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
553edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
554edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
555edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\\x00", 1)
556edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
557edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
558edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
559edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\\xff", 1)
560edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
561edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
562edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
563edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\\u0100", 1)
564edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
565edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
566edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
567edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (u"\\uffff", 1)
568edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
569edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode>0xffff:
570edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertEqual(
571edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
572edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                (u"\\U00010000", 1)
573edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
574edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertEqual(
575edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
576edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                (u"\\U0010ffff", 1)
577edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
578edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
579edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badhandlerresults(self):
580edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
581edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
582edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
583edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for res in results:
584edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.register_error("test.badhandler", lambda x: res)
585edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for enc in encs:
586edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.assertRaises(
587edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    TypeError,
588edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    u"\u3042".encode,
589edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    enc,
590edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    "test.badhandler"
591edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                )
592edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for (enc, bytes) in (
593edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ("ascii", "\xff"),
594edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ("utf-8", "\xff"),
595edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ("utf-7", "+x-"),
596edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                ("unicode-internal", "\x00"),
597edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ):
598edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.assertRaises(
599edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    TypeError,
600edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    bytes.decode,
601edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    enc,
602edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    "test.badhandler"
603edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                )
604edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
605edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_lookup(self):
606edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
607edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
608edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
609edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
610edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.xmlcharrefreplace_errors,
611edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.lookup_error("xmlcharrefreplace")
612edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
613edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(
614edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.backslashreplace_errors,
615edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.lookup_error("backslashreplace")
616edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        )
617edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
618edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_unencodablereplacement(self):
619edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def unencrepl(exc):
620edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if isinstance(exc, UnicodeEncodeError):
621edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                return (u"\u4242", exc.end)
622edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
623edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise TypeError("don't know how to handle %r" % exc)
624edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.unencreplhandler", unencrepl)
625edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
626edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertRaises(
627edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                UnicodeEncodeError,
628edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                u"\u4242".encode,
629edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                enc,
630edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                "test.unencreplhandler"
631edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            )
632edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
633edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badregistercall(self):
634edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
635edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Modules/_codecsmodule.c::register_error()
636edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Python/codecs.c::PyCodec_RegisterError()
637edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.register_error, 42)
638edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
639edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
640edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_badlookupcall(self):
641edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
642edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Modules/_codecsmodule.c::lookup_error()
643edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.lookup_error)
644edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
645edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_unknownhandler(self):
646edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
647edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Modules/_codecsmodule.c::lookup_error()
648edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
649edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
650edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_xmlcharrefvalues(self):
651edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
652edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
653edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and inline implementations
654edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
655edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if sys.maxunicode>=100000:
656edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            v += (100000, 500000, 1000000)
657edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        s = u"".join([unichr(x) for x in v])
658edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
659edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for enc in ("ascii", "iso-8859-15"):
660edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
661edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                s.encode(enc, err)
662edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
663edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_decodehelper(self):
664edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
665edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
666edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and callers
667edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
668edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
669edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def baddecodereturn1(exc):
670edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return 42
671edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.baddecodereturn1", baddecodereturn1)
672edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
673edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
674edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
675edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
676edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
677edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
678edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
679edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def baddecodereturn2(exc):
680edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"?", None)
681edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.baddecodereturn2", baddecodereturn2)
682edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
683edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
684edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler = PosReturn()
685edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.posreturn", handler.handle)
686edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
687edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid negative position
688edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -1
689edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
690edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
691edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid negative position
692edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -2
693edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
694edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
695edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Negative position out of bounds
696edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -3
697edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
698edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
699edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid positive position
700edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 1
701edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
702edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
703edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Largest valid positive position (one beyond end of input)
704edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 2
705edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>")
706edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
707edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Invalid positive position
708edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 3
709edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
710edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
711edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Restart at the "0"
712edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 6
713edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
714edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
715edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        class D(dict):
716edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            def __getitem__(self, key):
717edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise ValueError
718edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
719edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
720edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000})
721edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
722edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_encodehelper(self):
723edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
724edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
725edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and callers
726edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
727edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
728edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def badencodereturn1(exc):
729edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return 42
730edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.badencodereturn1", badencodereturn1)
731edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
732edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
733edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        def badencodereturn2(exc):
734edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (u"?", None)
735edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.badencodereturn2", badencodereturn2)
736edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
737edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
738edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler = PosReturn()
739edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.register_error("test.posreturn", handler.handle)
740edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
741edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid negative position
742edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -1
743edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
744edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
745edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid negative position
746edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -2
747edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
748edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
749edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Negative position out of bounds
750edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = -3
751edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
752edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
753edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Valid positive position
754edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 1
755edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
756edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
757edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Largest valid positive position (one beyond end of input
758edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 2
759edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
760edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
761edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Invalid positive position
762edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 3
763edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
764edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
765edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handler.pos = 0
766edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
767edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        class D(dict):
768edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            def __getitem__(self, key):
769edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise ValueError
770edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
771edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
772edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
773edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
774edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
775edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_translatehelper(self):
776edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # enhance coverage of:
777edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
778edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and callers
779edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # (Unfortunately the errors argument is not directly accessible
780edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # from Python, so we can't test that much)
781edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        class D(dict):
782edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            def __getitem__(self, key):
783edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise ValueError
784edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(ValueError, u"\xff".translate, D())
785edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
786edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
787edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
788edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def test_bug828737(self):
789edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        charmap = {
790edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ord("&"): u"&amp;",
791edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ord("<"): u"&lt;",
792edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ord(">"): u"&gt;",
793edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            ord('"'): u"&quot;",
794edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        }
795edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
796edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for n in (1, 10, 100, 1000):
797edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            text = u'abc<def>ghi'*n
798edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            text.translate(charmap)
799edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
800edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef test_main():
801edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    test.test_support.run_unittest(CodecCallbackTest)
802edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
803edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == "__main__":
804edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    test_main()
805