19405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterfrom test.test_support import run_unittest, open_urlresource 29405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterimport unittest 39405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter 42dab865ff177f167886396bb162285f6dd24e668Antoine Pitroufrom httplib import HTTPException 5677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisimport sys 61b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Petersimport os 74dd3a50ca480eef7bd898cfbfef8377231e18ae9Martin v. Löwisfrom unicodedata import normalize, unidata_version 81b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters 93cc8f211ed0d569836114d0d54c6298a21e6dde3Florent XiclunaTESTDATAFILE = "NormalizationTest.txt" 106246f2a103c6bbe36c885e076437d32144f27f81Georg BrandlTESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE 114dd3a50ca480eef7bd898cfbfef8377231e18ae9Martin v. Löwis 123cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xiclunadef check_version(testfile): 133cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna hdr = testfile.readline() 143cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna return unidata_version in hdr 15677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 16846d72a7d7536ea6ad9b530b1a96c354fb623115Neal Norwitzclass RangeError(Exception): 17677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis pass 18677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 19677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFC(str): 20677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFC", str) 21677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 22677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKC(str): 23677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFKC", str) 24677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 25677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFD(str): 26677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFD", str) 27677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 28677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKD(str): 29677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFKD", str) 30677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 31677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef unistr(data): 32677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis data = [int(x, 16) for x in data.split(" ")] 33677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis for x in data: 34677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if x > sys.maxunicode: 35677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis raise RangeError 36677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return u"".join([unichr(x) for x in data]) 370d4c06e06e5ee1f3bb1fa8068114bd700d74864aNeal Norwitz 389405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterclass NormalizationTest(unittest.TestCase): 399405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter def test_main(self): 403cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna part = None 419405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter part1_data = {} 428844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou # Hit the exception early 438844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou try: 443cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna testdata = open_urlresource(TESTDATAURL, check_version) 452dab865ff177f167886396bb162285f6dd24e668Antoine Pitrou except (IOError, HTTPException): 468844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou self.skipTest("Could not retrieve " + TESTDATAURL) 473cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna for line in testdata: 489405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if '#' in line: 499405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter line = line.split('#')[0] 509405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter line = line.strip() 519405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if not line: 529405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter continue 539405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if line.startswith("@Part"): 549405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter part = line.split()[0] 559405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter continue 569405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter try: 579405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 589405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter except RangeError: 599405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter # Skip unsupported characters; 60419e23cbb07a624609a219919eaecd3c61d8e9b4Ezio Melotti # try at least adding c1 if we are in part1 619405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if part == "@Part1": 629405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter try: 639405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter c1 = unistr(line.split(';')[0]) 649405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter except RangeError: 659405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter pass 669405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter else: 679405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter part1_data[c1] = 1 689405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter continue 69677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 709405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter # Perform tests 715c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 725c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c4 == NFC(c4) == NFC(c5), line) 735c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 745c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c5 == NFD(c4) == NFD(c5), line) 755c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ 769405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter NFKC(c3) == NFKC(c4) == NFKC(c5), 779405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter line) 785c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ 799405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter NFKD(c3) == NFKD(c4) == NFKD(c5), 809405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter line) 811b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters 829405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter # Record part 1 data 839405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if part == "@Part1": 849405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter part1_data[c1] = 1 85677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 869405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter # Perform tests for all other data 879405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter for c in range(sys.maxunicode+1): 889405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter X = unichr(c) 899405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter if X in part1_data: 909405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter continue 915c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) 9277c06fbf942bf4c532d0f8d6f254882a9e5957ecTim Peters 939405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter def test_bug_834676(self): 949405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter # Check for bug 834676 959405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter normalize('NFC', u'\ud55c\uae00') 96677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 97677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 989405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterdef test_main(): 999405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter run_unittest(NormalizationTest) 100d2171d2ba414def2ecf27b694ea27c2e9fde0fcfMartin v. Löwis 1011b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Petersif __name__ == "__main__": 1021b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters test_main() 103