10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" Test script for the Unicode implementation.
20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiWritten by Bill Tutt.
40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiModified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""#"
90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport unittest
110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport sys
120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport _testcapi
130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yifrom test import test_support
150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass UnicodeNamesTest(unittest.TestCase):
170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def checkletter(self, name, code):
190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Helper that put all \N escapes inside eval'd raw strings,
200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # to make sure this script runs even if the compiler
210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # chokes on \N escapes
220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        res = eval(ur'u"\N{%s}"' % name)
230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(res, code)
240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return res
250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_general(self):
270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # General and case insensitivity test:
280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        chars = [
290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN CAPITAL LETTER T",
300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER H",
310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER E",
320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "SPACE",
330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER R",
340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN CAPITAL LETTER E",
350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER D",
360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "SPACE",
370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER f",
380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN CAPITAL LeTtEr o",
390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMaLl LETTER x",
400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "SPACE",
410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER A",
420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER T",
430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER E",
440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "SPACE",
450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER T",
460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER H",
470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER E",
480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "SpAcE",
490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER S",
500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER H",
510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN small LETTER e",
520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN small LETTER e",
530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "LATIN SMALL LETTER P",
540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            "FULL STOP"
550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        ]
560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        string = u"The rEd fOx ate the sheep."
570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(
590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            u"".join([self.checkletter(*args) for args in zip(chars, string)]),
600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            string
610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_ascii_letters(self):
640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        import unicodedata
650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            name = "LATIN SMALL LETTER %s" % char.upper()
680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            code = unicodedata.lookup(name)
690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertEqual(unicodedata.name(code), name)
700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_hangul_syllables(self):
720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        import unicodedata
870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_cjk_unified_ideographs(self):
900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_bmp_characters(self):
980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        import unicodedata
990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        count = 0
1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        for code in xrange(0x10000):
1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            char = unichr(code)
1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            name = unicodedata.name(char, None)
1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if name is not None:
1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.assertEqual(unicodedata.lookup(name), char)
1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                count += 1
1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_misc_symbols(self):
1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("PILCROW SIGN", u"\u00b6")
1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_errors(self):
1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        import unicodedata
1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(TypeError, unicodedata.name)
1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(TypeError, unicodedata.name, u'xx')
1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(TypeError, unicodedata.lookup)
1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_strict_eror_handling(self):
1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # bogus character name
1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(
1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            UnicodeError,
1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            unicode, "\\N{blah}", 'unicode-escape', 'strict'
1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # long bogus character name
1270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(
1280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            UnicodeError,
1290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
1300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
1310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # missing closing brace
1320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(
1330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            UnicodeError,
1340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
1350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
1360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # missing opening brace
1370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(
1380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            UnicodeError,
1390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            unicode, "\\NSPACE", 'unicode-escape', 'strict'
1400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
1410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
1430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                         "needs UINT_MAX < SIZE_MAX")
1440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    @unittest.skipUnless(_testcapi.UINT_MAX < sys.maxint,
1450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                         "needs UINT_MAX < sys.maxint")
1460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    @test_support.bigmemtest(minsize=_testcapi.UINT_MAX + 1,
1470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                             memuse=2 + 4 // len(u'\U00010000'))
1480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_issue16335(self, size):
1490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        func = self.test_issue16335
1500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if size < func.minsize:
1510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            raise unittest.SkipTest("not enough memory: %.1fG minimum needed" %
1520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                    (func.minsize * func.memuse / float(1024**3),))
1530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # very very long bogus character name
1540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        x = b'\\N{SPACE' + b'x' * int(_testcapi.UINT_MAX + 1) + b'}'
1550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(len(x), len(b'\\N{SPACE}') +
1560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                    (_testcapi.UINT_MAX + 1))
1570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaisesRegexp(UnicodeError,
1580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            'unknown Unicode character name',
1590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            x.decode, 'unicode-escape'
1600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        )
1610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef test_main():
1640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    test_support.run_unittest(UnicodeNamesTest)
1650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiif __name__ == "__main__":
1670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    test_main()
168