1#!/usr/bin/env python
2#
3# test_codecencodings_cn.py
4#   Codec encoding tests for PRC encodings.
5#
6
7from test import test_support
8from test import test_multibytecodec_support
9import unittest
10
11class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
12    encoding = 'gb2312'
13    tstring = test_multibytecodec_support.load_teststring('gb2312')
14    codectests = (
15        # invalid bytes
16        ("abc\x81\x81\xc1\xc4", "strict",  None),
17        ("abc\xc8", "strict",  None),
18        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
19        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
20        ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
21        ("\xc1\x64", "strict", None),
22    )
23
24class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
25    encoding = 'gbk'
26    tstring = test_multibytecodec_support.load_teststring('gbk')
27    codectests = (
28        # invalid bytes
29        ("abc\x80\x80\xc1\xc4", "strict",  None),
30        ("abc\xc8", "strict",  None),
31        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
32        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
33        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
34        ("\x83\x34\x83\x31", "strict", None),
35        (u"\u30fb", "strict", None),
36    )
37
38class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
39    encoding = 'gb18030'
40    tstring = test_multibytecodec_support.load_teststring('gb18030')
41    codectests = (
42        # invalid bytes
43        ("abc\x80\x80\xc1\xc4", "strict",  None),
44        ("abc\xc8", "strict",  None),
45        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
46        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
47        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
48        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
49        (u"\u30fb", "strict", "\x819\xa79"),
50    )
51    has_iso10646 = True
52
53class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
54    encoding = 'hz'
55    tstring = test_multibytecodec_support.load_teststring('hz')
56    codectests = (
57        # test '~\n' (3 lines)
58        (b'This sentence is in ASCII.\n'
59         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
60         b'~{NpJ)l6HK!#~}Bye.\n',
61         'strict',
62         u'This sentence is in ASCII.\n'
63         u'The next sentence is in GB.'
64         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
65         u'Bye.\n'),
66        # test '~\n' (4 lines)
67        (b'This sentence is in ASCII.\n'
68         b'The next sentence is in GB.~\n'
69         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
70         b'Bye.\n',
71         'strict',
72         u'This sentence is in ASCII.\n'
73         u'The next sentence is in GB.'
74         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
75         u'Bye.\n'),
76        # invalid bytes
77        (b'ab~cd', 'replace', u'ab\uFFFDd'),
78        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
79        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
80    )
81
82def test_main():
83    test_support.run_unittest(__name__)
84
85if __name__ == "__main__":
86    test_main()
87