test_normalization.py revision d2171d2ba414def2ecf27b694ea27c2e9fde0fcf
1from test.test_support import verbose, TestFailed, TestSkipped, verify 2import sys 3import os 4from unicodedata import normalize 5 6TESTDATAFILE = "NormalizationTest-3.2.0" + os.extsep + "txt" 7 8# This search allows using a build directory just inside the source 9# directory, and saving just one copy of the test data in the source 10# tree, rather than having a copy in each build directory. 11# There might be a better way to do this. 12 13for path in [os.path.curdir, os.path.pardir]: 14 fn = os.path.join(path, TESTDATAFILE) 15 skip_expected = not os.path.exists(fn) 16 if not skip_expected: 17 TESTDATAFILE = fn 18 break 19 20class RangeError: 21 pass 22 23def NFC(str): 24 return normalize("NFC", str) 25 26def NFKC(str): 27 return normalize("NFKC", str) 28 29def NFD(str): 30 return normalize("NFD", str) 31 32def NFKD(str): 33 return normalize("NFKD", str) 34 35def unistr(data): 36 data = [int(x, 16) for x in data.split(" ")] 37 for x in data: 38 if x > sys.maxunicode: 39 raise RangeError 40 return u"".join([unichr(x) for x in data]) 41 42def test_main(): 43 if skip_expected: 44 raise TestSkipped(TESTDATAFILE + " not found, download from " + 45 "http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE) 46 47 part1_data = {} 48 for line in open(TESTDATAFILE): 49 if '#' in line: 50 line = line.split('#')[0] 51 line = line.strip() 52 if not line: 53 continue 54 if line.startswith("@Part"): 55 part = line 56 continue 57 try: 58 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 59 except RangeError: 60 # Skip unsupported characters 61 continue 62 63 if verbose: 64 print line 65 66 # Perform tests 67 verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 68 verify(c4 == NFC(c4) == NFC(c5), line) 69 verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 70 verify(c5 == NFD(c4) == NFD(c5), line) 71 verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), 72 line) 73 verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), 74 line) 75 76 # Record part 1 data 77 if part == "@Part1": 78 part1_data[c1] = 1 79 80 # Perform tests for all other data 81 for c in range(sys.maxunicode+1): 82 X = unichr(c) 83 if X in part1_data: 84 continue 85 assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c 86 87 # Check for bug 834676 88 normalize('NFC',u'\ud55c\uae00') 89 90if __name__ == "__main__": 91 test_main() 92