1#
2# test_codecencodings_cn.py
3#   Codec encoding tests for PRC encodings.
4#
5
6from test import test_support
7from test import test_multibytecodec_support
8import unittest
9
10class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
11    encoding = 'gb2312'
12    tstring = test_multibytecodec_support.load_teststring('gb2312')
13    codectests = (
14        # invalid bytes
15        ("abc\x81\x81\xc1\xc4", "strict",  None),
16        ("abc\xc8", "strict",  None),
17        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
18        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
19        ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
20        ("\xc1\x64", "strict", None),
21    )
22
23class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
24    encoding = 'gbk'
25    tstring = test_multibytecodec_support.load_teststring('gbk')
26    codectests = (
27        # invalid bytes
28        ("abc\x80\x80\xc1\xc4", "strict",  None),
29        ("abc\xc8", "strict",  None),
30        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
31        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
32        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
33        ("\x83\x34\x83\x31", "strict", None),
34        (u"\u30fb", "strict", None),
35    )
36
37class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
38    encoding = 'gb18030'
39    tstring = test_multibytecodec_support.load_teststring('gb18030')
40    codectests = (
41        # invalid bytes
42        ("abc\x80\x80\xc1\xc4", "strict",  None),
43        ("abc\xc8", "strict",  None),
44        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
45        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
46        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
47        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
48        (u"\u30fb", "strict", "\x819\xa79"),
49    )
50    has_iso10646 = True
51
52class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
53    encoding = 'hz'
54    tstring = test_multibytecodec_support.load_teststring('hz')
55    codectests = (
56        # test '~\n' (3 lines)
57        (b'This sentence is in ASCII.\n'
58         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
59         b'~{NpJ)l6HK!#~}Bye.\n',
60         'strict',
61         u'This sentence is in ASCII.\n'
62         u'The next sentence is in GB.'
63         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
64         u'Bye.\n'),
65        # test '~\n' (4 lines)
66        (b'This sentence is in ASCII.\n'
67         b'The next sentence is in GB.~\n'
68         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
69         b'Bye.\n',
70         'strict',
71         u'This sentence is in ASCII.\n'
72         u'The next sentence is in GB.'
73         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
74         u'Bye.\n'),
75        # invalid bytes
76        (b'ab~cd', 'replace', u'ab\uFFFDd'),
77        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
78        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
79    )
80
81def test_main():
82    test_support.run_unittest(__name__)
83
84if __name__ == "__main__":
85    test_main()
86