183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" Test script for the unicodedata module.
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Written by Marc-Andre Lemburg (mal@lemburg.com).
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport sys
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport unittest
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport hashlib
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport subprocess
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport test.test_support
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehencoding = 'utf-8'
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Run tests
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeMethodsTest(unittest.TestCase):
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # update this, if the database changes
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf'
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_method_checksum(self):
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        h = hashlib.sha1()
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in range(0x10000):
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            char = unichr(i)
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            data = [
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # Predicates (single char)
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isalnum()],
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isalpha()],
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isdecimal()],
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isdigit()],
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.islower()],
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isnumeric()],
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isspace()],
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.istitle()],
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[char.isupper()],
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # Predicates (multiple chars)
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'abc').isalnum()],
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'abc').isalpha()],
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'123').isdecimal()],
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'123').isdigit()],
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'abc').islower()],
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'123').isnumeric()],
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u' \t').isspace()],
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'abc').istitle()],
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                u"01"[(char + u'ABC').isupper()],
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # Mappings (single char)
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                char.lower(),
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                char.upper(),
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                char.title(),
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # Mappings (multiple chars)
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                (char + u'abc').lower(),
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                (char + u'ABC').upper(),
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                (char + u'abc').title(),
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                (char + u'ABC').title(),
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                ]
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            h.update(u''.join(data).encode(encoding))
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result = h.hexdigest()
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(result, self.expectedchecksum)
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeDatabaseTest(unittest.TestCase):
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def setUp(self):
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # In case unicodedata is not available, this will raise an ImportError,
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # but the other test cases will still be run
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        import unicodedata
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.db = unicodedata
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def tearDown(self):
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        del self.db
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeFunctionsTest(UnicodeDatabaseTest):
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # update this, if the database changes
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6'
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_function_checksum(self):
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        data = []
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        h = hashlib.sha1()
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in range(0x10000):
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            char = unichr(i)
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            data = [
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                # Properties
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                str(self.db.digit(char, -1)),
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                str(self.db.numeric(char, -1)),
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                str(self.db.decimal(char, -1)),
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.db.category(char),
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.db.bidirectional(char),
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.db.decomposition(char),
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                str(self.db.mirrored(char)),
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                str(self.db.combining(char)),
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            ]
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            h.update(''.join(data))
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result = h.hexdigest()
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(result, self.expectedchecksum)
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_digit(self):
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.digit(u'A', None), None)
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.digit(u'9'), 9)
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.digit(u'\u215b', None), None)
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.digit(u'\u2468'), 9)
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.digit(u'\U00020000', None), None)
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.digit)
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.digit, u'xx')
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, self.db.digit, u'x')
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_numeric(self):
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'A',None), None)
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'9'), 9)
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'\ua627'), 7.0)
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.numeric(u'\U00020000', None), None)
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.numeric)
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.numeric, u'xx')
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, self.db.numeric, u'x')
12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_decimal(self):
12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decimal(u'A',None), None)
13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decimal(u'9'), 9)
13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decimal(u'\u215b', None), None)
13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decimal(u'\u2468', None), None)
13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decimal(u'\U00020000', None), None)
13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.decimal)
13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.decimal, u'xx')
13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, self.db.decimal, u'x')
13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_category(self):
14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.category(u'a'), 'Ll')
14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.category(u'A'), 'Lu')
14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.category)
14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.category, u'xx')
14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_bidirectional(self):
14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.bidirectional(u' '), 'WS')
15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.bidirectional(u'A'), 'L')
15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.bidirectional)
15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.bidirectional, u'xx')
15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_decomposition(self):
15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.decomposition)
16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.decomposition, u'xx')
16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_mirrored(self):
16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.mirrored(u'a'), 0)
16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.mirrored(u'\u2201'), 1)
16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.mirrored)
17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.mirrored, u'xx')
17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_combining(self):
17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.combining(u'\uFFFE'), 0)
17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.combining(u'a'), 0)
17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.combining(u'\u20e1'), 230)
17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.combining(u'\U00020000'), 0)
17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.combining)
18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.combining, u'xx')
18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_normalize(self):
18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, self.db.normalize)
18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.normalize('NFKC', u''), u'')
18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # The rest can be found in test_normalization.py
18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # which requires an external file.
18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_pr29(self):
19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # http://www.unicode.org/review/pr-29.html
19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # See issues #1054943 and #10254.
19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        composed = (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161",
19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    u'Li\u030dt-s\u1e73\u0301',
19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c'
19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    + u'\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917',
19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    u'\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c'
19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    + 'u\u0938\u094d\u0924\u093e\u0928')
19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for text in composed:
19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(self.db.normalize('NFC', text), text)
20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_issue10254(self):
20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Crash reported in #10254
20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        a = u'C\u0338' * 20  + u'C\u0327'
20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        b = u'C\u0338' * 20  + u'\xC7'
20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.normalize('NFC', a), b)
20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_east_asian_width(self):
20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        eaw = self.db.east_asian_width
20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, eaw, 'a')
21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, eaw, u'')
21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, eaw, u'ra')
21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\x1e'), 'N')
21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\x20'), 'Na')
21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\uC894'), 'W')
21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\uFF66'), 'H')
21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\uFF1F'), 'F')
21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\u2010'), 'A')
21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(eaw(u'\U00020000'), 'W')
21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeMiscTest(UnicodeDatabaseTest):
22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_failed_import_during_compiling(self):
22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Issue 4367
22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Decoding \N escapes requires the unicodedata module. If it can't be
22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # imported, we shouldn't segfault.
22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # This program should raise a SyntaxError in the eval.
22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        code = "import sys;" \
22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            "sys.modules['unicodedata'] = None;" \
23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            """eval("u'\N{SOFT HYPHEN}'")"""
23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        args = [sys.executable, "-c", code]
23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # We use a subprocess because the unicodedata module may already have
23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # been loaded in this process.
23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        popen = subprocess.Popen(args, stderr=subprocess.PIPE)
23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        popen.wait()
23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(popen.returncode, 1)
23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        error = "SyntaxError: (unicode error) \N escapes not supported " \
23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            "(can't load unicodedata module)"
23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertIn(error, popen.stderr.read())
24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_decimal_numeric_consistent(self):
24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Test that decimal and numeric are consistent,
24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # i.e. if a character has a decimal value,
24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # its numeric value should be the same.
24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        count = 0
24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in xrange(0x10000):
24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            c = unichr(i)
24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            dec = self.db.decimal(c, -1)
24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if dec != -1:
25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.assertEqual(dec, self.db.numeric(c))
25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                count += 1
25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(count >= 10) # should have tested at least the ASCII digits
25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_digit_numeric_consistent(self):
25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Test that digit and numeric are consistent,
25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # i.e. if a character has a digit value,
25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # its numeric value should be the same.
25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        count = 0
25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in xrange(0x10000):
26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            c = unichr(i)
26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            dec = self.db.digit(c, -1)
26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if dec != -1:
26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.assertEqual(dec, self.db.numeric(c))
26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                count += 1
26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(count >= 10) # should have tested at least the ASCII digits
26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_bug_1704793(self):
26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_ucd_510(self):
27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        import unicodedata
27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(unicodedata.mirrored(u"\u0f3a"))
27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Also, we now have two ways of representing
27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # the upper-case mapping: as delta, or as absolute value
27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(u"a".upper()==u'A')
27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(u"\u1d79".upper()==u'\ua77d')
27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(u".".upper()==u".")
28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_bug_5828(self):
28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(u"\u1d79".lower(), u"\u1d79")
28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(
28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [
28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                c for c in range(sys.maxunicode+1)
28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            ],
28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [0]
29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        )
29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_bug_4971(self):
29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # LETTER DZ WITH CARON: DZ, Dz, dz
29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(u"\u01c4".title(), u"\u01c5")
29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(u"\u01c5".title(), u"\u01c5")
29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(u"\u01c6".title(), u"\u01c5")
29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_linebreak_7643(self):
29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in range(0x10000):
30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            lines = (unichr(i) + u'A').splitlines()
30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if i in (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                     0x1c, 0x1d, 0x1e, 0x2028, 0x2029):
30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.assertEqual(len(lines), 2,
30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                                 r"\u%.4x should be a linebreak" % i)
30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.assertEqual(len(lines), 1,
30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                                 r"\u%.4x should not be a linebreak" % i)
30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef test_main():
31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    test.test_support.run_unittest(
31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        UnicodeMiscTest,
31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        UnicodeMethodsTest,
31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        UnicodeFunctionsTest
31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    )
31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == "__main__":
31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    test_main()
318