10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" Test script for the Unicode implementation. 20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiWritten by Bill Tutt. 40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiModified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) 50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""#" 90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport unittest 110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport sys 120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport _testcapi 130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yifrom test import test_support 150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass UnicodeNamesTest(unittest.TestCase): 170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def checkletter(self, name, code): 190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # Helper that put all \N escapes inside eval'd raw strings, 200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # to make sure this script runs even if the compiler 210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # chokes on \N escapes 220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi res = eval(ur'u"\N{%s}"' % name) 230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertEqual(res, code) 240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return res 250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_general(self): 270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # General and case insensitivity test: 280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi chars = [ 290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN CAPITAL LETTER T", 300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER H", 310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER E", 320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "SPACE", 330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER R", 340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN CAPITAL LETTER E", 350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER D", 360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "SPACE", 370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER f", 380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN CAPITAL LeTtEr o", 390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMaLl LETTER x", 400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "SPACE", 410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER A", 420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER T", 430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER E", 440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "SPACE", 450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER T", 460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER H", 470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER E", 480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "SpAcE", 490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER S", 500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER H", 510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN small LETTER e", 520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN small LETTER e", 530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "LATIN SMALL LETTER P", 540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "FULL STOP" 550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ] 560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi string = u"The rEd fOx ate the sheep." 570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertEqual( 590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi u"".join([self.checkletter(*args) for args in zip(chars, string)]), 600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi string 610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_ascii_letters(self): 640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi import unicodedata 650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi for char in "".join(map(chr, xrange(ord("a"), ord("z")))): 670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi name = "LATIN SMALL LETTER %s" % char.upper() 680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi code = unicodedata.lookup(name) 690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertEqual(unicodedata.name(code), name) 700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_hangul_syllables(self): 720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE GA", u"\uac00") 730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") 740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") 750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") 760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") 770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") 780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") 790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE YI", u"\uc758") 800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") 810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") 820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") 830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") 840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") 850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi import unicodedata 870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_cjk_unified_ideographs(self): 900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400") 910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5") 920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00") 930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5") 940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000") 950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6") 960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_bmp_characters(self): 980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi import unicodedata 990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi count = 0 1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi for code in xrange(0x10000): 1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi char = unichr(code) 1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi name = unicodedata.name(char, None) 1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if name is not None: 1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertEqual(unicodedata.lookup(name), char) 1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi count += 1 1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_misc_symbols(self): 1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("PILCROW SIGN", u"\u00b6") 1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD") 1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F") 1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41") 1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_errors(self): 1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi import unicodedata 1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises(TypeError, unicodedata.name) 1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises(TypeError, unicodedata.name, u'xx') 1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises(TypeError, unicodedata.lookup) 1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises(KeyError, unicodedata.lookup, u'unknown') 1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_strict_eror_handling(self): 1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # bogus character name 1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises( 1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi UnicodeError, 1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode, "\\N{blah}", 'unicode-escape', 'strict' 1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # long bogus character name 1270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises( 1280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi UnicodeError, 1290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' 1300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 1310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # missing closing brace 1320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises( 1330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi UnicodeError, 1340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode, "\\N{SPACE", 'unicode-escape', 'strict' 1350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 1360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # missing opening brace 1370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaises( 1380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi UnicodeError, 1390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi unicode, "\\NSPACE", 'unicode-escape', 'strict' 1400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 1410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX, 1430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "needs UINT_MAX < SIZE_MAX") 1440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi @unittest.skipUnless(_testcapi.UINT_MAX < sys.maxint, 1450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi "needs UINT_MAX < sys.maxint") 1460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi @test_support.bigmemtest(minsize=_testcapi.UINT_MAX + 1, 1470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi memuse=2 + 4 // len(u'\U00010000')) 1480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def test_issue16335(self, size): 1490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi func = self.test_issue16335 1500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if size < func.minsize: 1510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi raise unittest.SkipTest("not enough memory: %.1fG minimum needed" % 1520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi (func.minsize * func.memuse / float(1024**3),)) 1530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # very very long bogus character name 1540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi x = b'\\N{SPACE' + b'x' * int(_testcapi.UINT_MAX + 1) + b'}' 1550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertEqual(len(x), len(b'\\N{SPACE}') + 1560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi (_testcapi.UINT_MAX + 1)) 1570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.assertRaisesRegexp(UnicodeError, 1580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 'unknown Unicode character name', 1590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi x.decode, 'unicode-escape' 1600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 1610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef test_main(): 1640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi test_support.run_unittest(UnicodeNamesTest) 1650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiif __name__ == "__main__": 1670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi test_main() 168