1#!/usr/bin/env python 2# 3# test_codecencodings_cn.py 4# Codec encoding tests for PRC encodings. 5# 6 7from test import test_support 8from test import test_multibytecodec_support 9import unittest 10 11class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase): 12 encoding = 'gb2312' 13 tstring = test_multibytecodec_support.load_teststring('gb2312') 14 codectests = ( 15 # invalid bytes 16 ("abc\x81\x81\xc1\xc4", "strict", None), 17 ("abc\xc8", "strict", None), 18 ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"), 19 ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 20 ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"), 21 ("\xc1\x64", "strict", None), 22 ) 23 24class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): 25 encoding = 'gbk' 26 tstring = test_multibytecodec_support.load_teststring('gbk') 27 codectests = ( 28 # invalid bytes 29 ("abc\x80\x80\xc1\xc4", "strict", None), 30 ("abc\xc8", "strict", None), 31 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 32 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 33 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 34 ("\x83\x34\x83\x31", "strict", None), 35 (u"\u30fb", "strict", None), 36 ) 37 38class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase): 39 encoding = 'gb18030' 40 tstring = test_multibytecodec_support.load_teststring('gb18030') 41 codectests = ( 42 # invalid bytes 43 ("abc\x80\x80\xc1\xc4", "strict", None), 44 ("abc\xc8", "strict", None), 45 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 46 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 47 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 48 ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"), 49 (u"\u30fb", "strict", "\x819\xa79"), 50 ) 51 has_iso10646 = True 52 53class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase): 54 encoding = 'hz' 55 tstring = test_multibytecodec_support.load_teststring('hz') 56 codectests = ( 57 # test '~\n' (3 lines) 58 (b'This sentence is in ASCII.\n' 59 b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' 60 b'~{NpJ)l6HK!#~}Bye.\n', 61 'strict', 62 u'This sentence is in ASCII.\n' 63 u'The next sentence is in GB.' 64 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 65 u'Bye.\n'), 66 # test '~\n' (4 lines) 67 (b'This sentence is in ASCII.\n' 68 b'The next sentence is in GB.~\n' 69 b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' 70 b'Bye.\n', 71 'strict', 72 u'This sentence is in ASCII.\n' 73 u'The next sentence is in GB.' 74 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 75 u'Bye.\n'), 76 # invalid bytes 77 (b'ab~cd', 'replace', u'ab\uFFFDd'), 78 (b'ab\xffcd', 'replace', u'ab\uFFFDcd'), 79 (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), 80 ) 81 82def test_main(): 83 test_support.run_unittest(__name__) 84 85if __name__ == "__main__": 86 test_main() 87