test_normalization.py revision 677bde2dd14ac2c8f170779adcc732f991db8bd6
1677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisfrom test.test_support import verbose, TestFailed, TestSkipped, verify 2677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisimport sys 3677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisfrom unicodedata import normalize 4677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwistry: 5677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis data = open("NormalizationTest.txt","r").readlines() 6677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisexcept IOError: 7677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis raise TestSkipped("NormalizationTest.txt not found, download from http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt") 8677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 9677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisclass RangeError: 10677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis pass 11677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 12677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFC(str): 13677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFC", str) 14677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 15677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKC(str): 16677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFKC", str) 17677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 18677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFD(str): 19677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFD", str) 20677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 21677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKD(str): 22677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return normalize("NFKD", str) 23677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 24677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef unistr(data): 25677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis data = [int(x, 16) for x in data.split(" ")] 26677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis for x in data: 27677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if x > sys.maxunicode: 28677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis raise RangeError 29677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis return u"".join([unichr(x) for x in data]) 30677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 31677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwispart1_data = {} 32677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisfor line in data: 33677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if '#' in line: 34677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis line = line.split('#')[0] 35677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis line = line.strip() 36677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if not line: 37677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis continue 38677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if line.startswith("@Part"): 39677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis part = line 40677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis continue 41677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis try: 42677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 43677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis except RangeError: 44677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis # Skip unsupported characters 45677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis continue 46677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 47677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if verbose: 48677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis print line 49677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 50677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis # Perform tests 51677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 52677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c4 == NFC(c4) == NFC(c5), line) 53677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 54677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c5 == NFD(c4) == NFD(c5), line) 55677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), line) 56677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), line) 57677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 58677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis # Record part 1 data 59677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if part == "@Part1": 60677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis part1_data[c1] = 1 61677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 62677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis# Perform tests for all other data 63677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisfor c in range(sys.maxunicode+1): 64677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis X = unichr(c) 65677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis if X in part1_data: 66677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis continue 67677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c 68677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis 69