1""" Test script for the Unicode implementation.
2
3Written by Bill Tutt.
4Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""#"
9
10import unittest
11import sys
12import _testcapi
13
14from test import test_support
15
16class UnicodeNamesTest(unittest.TestCase):
17
18    def checkletter(self, name, code):
19        # Helper that put all \N escapes inside eval'd raw strings,
20        # to make sure this script runs even if the compiler
21        # chokes on \N escapes
22        res = eval(ur'u"\N{%s}"' % name)
23        self.assertEqual(res, code)
24        return res
25
26    def test_general(self):
27        # General and case insensitivity test:
28        chars = [
29            "LATIN CAPITAL LETTER T",
30            "LATIN SMALL LETTER H",
31            "LATIN SMALL LETTER E",
32            "SPACE",
33            "LATIN SMALL LETTER R",
34            "LATIN CAPITAL LETTER E",
35            "LATIN SMALL LETTER D",
36            "SPACE",
37            "LATIN SMALL LETTER f",
38            "LATIN CAPITAL LeTtEr o",
39            "LATIN SMaLl LETTER x",
40            "SPACE",
41            "LATIN SMALL LETTER A",
42            "LATIN SMALL LETTER T",
43            "LATIN SMALL LETTER E",
44            "SPACE",
45            "LATIN SMALL LETTER T",
46            "LATIN SMALL LETTER H",
47            "LATIN SMALL LETTER E",
48            "SpAcE",
49            "LATIN SMALL LETTER S",
50            "LATIN SMALL LETTER H",
51            "LATIN small LETTER e",
52            "LATIN small LETTER e",
53            "LATIN SMALL LETTER P",
54            "FULL STOP"
55        ]
56        string = u"The rEd fOx ate the sheep."
57
58        self.assertEqual(
59            u"".join([self.checkletter(*args) for args in zip(chars, string)]),
60            string
61        )
62
63    def test_ascii_letters(self):
64        import unicodedata
65
66        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
67            name = "LATIN SMALL LETTER %s" % char.upper()
68            code = unicodedata.lookup(name)
69            self.assertEqual(unicodedata.name(code), name)
70
71    def test_hangul_syllables(self):
72        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
73        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
74        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
75        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
76        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
77        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
78        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
79        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
80        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
81        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
82        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
83        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
84        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
85
86        import unicodedata
87        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
88
89    def test_cjk_unified_ideographs(self):
90        self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
91        self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
92        self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
93        self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
94        self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
95        self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
96
97    def test_bmp_characters(self):
98        import unicodedata
99        count = 0
100        for code in xrange(0x10000):
101            char = unichr(code)
102            name = unicodedata.name(char, None)
103            if name is not None:
104                self.assertEqual(unicodedata.lookup(name), char)
105                count += 1
106
107    def test_misc_symbols(self):
108        self.checkletter("PILCROW SIGN", u"\u00b6")
109        self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
110        self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
111        self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
112
113    def test_errors(self):
114        import unicodedata
115        self.assertRaises(TypeError, unicodedata.name)
116        self.assertRaises(TypeError, unicodedata.name, u'xx')
117        self.assertRaises(TypeError, unicodedata.lookup)
118        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
119
120    def test_strict_eror_handling(self):
121        # bogus character name
122        self.assertRaises(
123            UnicodeError,
124            unicode, "\\N{blah}", 'unicode-escape', 'strict'
125        )
126        # long bogus character name
127        self.assertRaises(
128            UnicodeError,
129            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
130        )
131        # missing closing brace
132        self.assertRaises(
133            UnicodeError,
134            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
135        )
136        # missing opening brace
137        self.assertRaises(
138            UnicodeError,
139            unicode, "\\NSPACE", 'unicode-escape', 'strict'
140        )
141
142    @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
143                         "needs UINT_MAX < SIZE_MAX")
144    @unittest.skipUnless(_testcapi.UINT_MAX < sys.maxint,
145                         "needs UINT_MAX < sys.maxint")
146    @test_support.bigmemtest(minsize=_testcapi.UINT_MAX + 1,
147                             memuse=2 + 4 // len(u'\U00010000'))
148    def test_issue16335(self, size):
149        func = self.test_issue16335
150        if size < func.minsize:
151            raise unittest.SkipTest("not enough memory: %.1fG minimum needed" %
152                    (func.minsize * func.memuse / float(1024**3),))
153        # very very long bogus character name
154        x = b'\\N{SPACE' + b'x' * int(_testcapi.UINT_MAX + 1) + b'}'
155        self.assertEqual(len(x), len(b'\\N{SPACE}') +
156                                    (_testcapi.UINT_MAX + 1))
157        self.assertRaisesRegexp(UnicodeError,
158            'unknown Unicode character name',
159            x.decode, 'unicode-escape'
160        )
161
162
163def test_main():
164    test_support.run_unittest(UnicodeNamesTest)
165
166if __name__ == "__main__":
167    test_main()
168