183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" Test script for the unicodedata module. 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Written by Marc-Andre Lemburg (mal@lemburg.com). 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport sys 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport unittest 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport hashlib 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport subprocess 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport test.test_support 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehencoding = 'utf-8' 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Run tests 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeMethodsTest(unittest.TestCase): 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # update this, if the database changes 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf' 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_method_checksum(self): 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh h = hashlib.sha1() 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in range(0x10000): 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh char = unichr(i) 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = [ 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Predicates (single char) 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isalnum()], 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isalpha()], 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isdecimal()], 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isdigit()], 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.islower()], 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isnumeric()], 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isspace()], 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.istitle()], 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[char.isupper()], 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Predicates (multiple chars) 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'abc').isalnum()], 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'abc').isalpha()], 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'123').isdecimal()], 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'123').isdigit()], 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'abc').islower()], 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'123').isnumeric()], 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u' \t').isspace()], 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'abc').istitle()], 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u"01"[(char + u'ABC').isupper()], 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Mappings (single char) 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh char.lower(), 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh char.upper(), 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh char.title(), 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Mappings (multiple chars) 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (char + u'abc').lower(), 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (char + u'ABC').upper(), 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (char + u'abc').title(), 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (char + u'ABC').title(), 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ] 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh h.update(u''.join(data).encode(encoding)) 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = h.hexdigest() 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(result, self.expectedchecksum) 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeDatabaseTest(unittest.TestCase): 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setUp(self): 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # In case unicodedata is not available, this will raise an ImportError, 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # but the other test cases will still be run 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh import unicodedata 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.db = unicodedata 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def tearDown(self): 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del self.db 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeFunctionsTest(UnicodeDatabaseTest): 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # update this, if the database changes 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6' 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_function_checksum(self): 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = [] 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh h = hashlib.sha1() 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in range(0x10000): 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh char = unichr(i) 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = [ 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Properties 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh str(self.db.digit(char, -1)), 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh str(self.db.numeric(char, -1)), 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh str(self.db.decimal(char, -1)), 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.db.category(char), 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.db.bidirectional(char), 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.db.decomposition(char), 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh str(self.db.mirrored(char)), 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh str(self.db.combining(char)), 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ] 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh h.update(''.join(data)) 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = h.hexdigest() 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(result, self.expectedchecksum) 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_digit(self): 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.digit(u'A', None), None) 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.digit(u'9'), 9) 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.digit(u'\u215b', None), None) 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.digit(u'\u2468'), 9) 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.digit(u'\U00020000', None), None) 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.digit) 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.digit, u'xx') 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(ValueError, self.db.digit, u'x') 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_numeric(self): 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'A',None), None) 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'9'), 9) 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'\u215b'), 0.125) 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'\u2468'), 9.0) 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'\ua627'), 7.0) 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.numeric(u'\U00020000', None), None) 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.numeric) 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.numeric, u'xx') 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(ValueError, self.db.numeric, u'x') 12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_decimal(self): 12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decimal(u'A',None), None) 13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decimal(u'9'), 9) 13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decimal(u'\u215b', None), None) 13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decimal(u'\u2468', None), None) 13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decimal(u'\U00020000', None), None) 13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.decimal) 13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.decimal, u'xx') 13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(ValueError, self.db.decimal, u'x') 13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_category(self): 14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.category(u'\uFFFE'), 'Cn') 14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.category(u'a'), 'Ll') 14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.category(u'A'), 'Lu') 14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.category(u'\U00020000'), 'Lo') 14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.category) 14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.category, u'xx') 14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_bidirectional(self): 14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.bidirectional(u'\uFFFE'), '') 15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.bidirectional(u' '), 'WS') 15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.bidirectional(u'A'), 'L') 15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L') 15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.bidirectional) 15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.bidirectional, u'xx') 15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_decomposition(self): 15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decomposition(u'\uFFFE'),'') 15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034') 16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.decomposition) 16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.decomposition, u'xx') 16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_mirrored(self): 16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.mirrored(u'\uFFFE'), 0) 16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.mirrored(u'a'), 0) 16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.mirrored(u'\u2201'), 1) 16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.mirrored(u'\U00020000'), 0) 16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.mirrored) 17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.mirrored, u'xx') 17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_combining(self): 17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.combining(u'\uFFFE'), 0) 17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.combining(u'a'), 0) 17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.combining(u'\u20e1'), 230) 17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.combining(u'\U00020000'), 0) 17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.combining) 18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.combining, u'xx') 18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_normalize(self): 18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, self.db.normalize) 18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx') 18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.normalize('NFKC', u''), u'') 18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # The rest can be found in test_normalization.py 18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # which requires an external file. 18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_pr29(self): 19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # http://www.unicode.org/review/pr-29.html 19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # See issues #1054943 and #10254. 19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh composed = (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161", 19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u'Li\u030dt-s\u1e73\u0301', 19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c' 19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh + u'\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917', 19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh u'\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c' 19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh + 'u\u0938\u094d\u0924\u093e\u0928') 19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for text in composed: 19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.normalize('NFC', text), text) 20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_issue10254(self): 20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Crash reported in #10254 20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh a = u'C\u0338' * 20 + u'C\u0327' 20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh b = u'C\u0338' * 20 + u'\xC7' 20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.normalize('NFC', a), b) 20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_east_asian_width(self): 20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh eaw = self.db.east_asian_width 20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, eaw, 'a') 21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, eaw, u'') 21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertRaises(TypeError, eaw, u'ra') 21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\x1e'), 'N') 21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\x20'), 'Na') 21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\uC894'), 'W') 21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\uFF66'), 'H') 21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\uFF1F'), 'F') 21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\u2010'), 'A') 21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(eaw(u'\U00020000'), 'W') 21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass UnicodeMiscTest(UnicodeDatabaseTest): 22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_failed_import_during_compiling(self): 22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Issue 4367 22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Decoding \N escapes requires the unicodedata module. If it can't be 22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # imported, we shouldn't segfault. 22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # This program should raise a SyntaxError in the eval. 22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh code = "import sys;" \ 22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "sys.modules['unicodedata'] = None;" \ 23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """eval("u'\N{SOFT HYPHEN}'")""" 23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh args = [sys.executable, "-c", code] 23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # We use a subprocess because the unicodedata module may already have 23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # been loaded in this process. 23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh popen = subprocess.Popen(args, stderr=subprocess.PIPE) 23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh popen.wait() 23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(popen.returncode, 1) 23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh error = "SyntaxError: (unicode error) \N escapes not supported " \ 23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "(can't load unicodedata module)" 23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertIn(error, popen.stderr.read()) 24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_decimal_numeric_consistent(self): 24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Test that decimal and numeric are consistent, 24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # i.e. if a character has a decimal value, 24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # its numeric value should be the same. 24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh count = 0 24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in xrange(0x10000): 24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh c = unichr(i) 24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh dec = self.db.decimal(c, -1) 24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if dec != -1: 25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(dec, self.db.numeric(c)) 25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh count += 1 25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(count >= 10) # should have tested at least the ASCII digits 25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_digit_numeric_consistent(self): 25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Test that digit and numeric are consistent, 25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # i.e. if a character has a digit value, 25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # its numeric value should be the same. 25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh count = 0 25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in xrange(0x10000): 26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh c = unichr(i) 26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh dec = self.db.digit(c, -1) 26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if dec != -1: 26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(dec, self.db.numeric(c)) 26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh count += 1 26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(count >= 10) # should have tested at least the ASCII digits 26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_bug_1704793(self): 26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346') 26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_ucd_510(self): 27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh import unicodedata 27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0 27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(unicodedata.mirrored(u"\u0f3a")) 27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a")) 27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Also, we now have two ways of representing 27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # the upper-case mapping: as delta, or as absolute value 27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(u"a".upper()==u'A') 27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(u"\u1d79".upper()==u'\ua77d') 27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertTrue(u".".upper()==u".") 28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_bug_5828(self): 28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(u"\u1d79".lower(), u"\u1d79") 28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Only U+0000 should have U+0000 as its upper/lower/titlecase variant 28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual( 28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh [ 28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh c for c in range(sys.maxunicode+1) 28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title() 28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ], 28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh [0] 29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ) 29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_bug_4971(self): 29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # LETTER DZ WITH CARON: DZ, Dz, dz 29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(u"\u01c4".title(), u"\u01c5") 29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(u"\u01c5".title(), u"\u01c5") 29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(u"\u01c6".title(), u"\u01c5") 29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def test_linebreak_7643(self): 29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in range(0x10000): 30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lines = (unichr(i) + u'A').splitlines() 30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if i in (0x0a, 0x0b, 0x0c, 0x0d, 0x85, 30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 0x1c, 0x1d, 0x1e, 0x2028, 0x2029): 30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(len(lines), 2, 30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh r"\u%.4x should be a linebreak" % i) 30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.assertEqual(len(lines), 1, 30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh r"\u%.4x should not be a linebreak" % i) 30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef test_main(): 31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh test.test_support.run_unittest( 31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh UnicodeMiscTest, 31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh UnicodeMethodsTest, 31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh UnicodeFunctionsTest 31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ) 31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == "__main__": 31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh test_main() 318