19405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterfrom test.test_support import run_unittest, open_urlresource
29405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterimport unittest
39405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter
42dab865ff177f167886396bb162285f6dd24e668Antoine Pitroufrom httplib import HTTPException
5677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisimport sys
61b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Petersimport os
74dd3a50ca480eef7bd898cfbfef8377231e18ae9Martin v. Löwisfrom unicodedata import normalize, unidata_version
81b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters
93cc8f211ed0d569836114d0d54c6298a21e6dde3Florent XiclunaTESTDATAFILE = "NormalizationTest.txt"
106246f2a103c6bbe36c885e076437d32144f27f81Georg BrandlTESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
114dd3a50ca480eef7bd898cfbfef8377231e18ae9Martin v. Löwis
123cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xiclunadef check_version(testfile):
133cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna    hdr = testfile.readline()
143cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna    return unidata_version in hdr
15677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
16846d72a7d7536ea6ad9b530b1a96c354fb623115Neal Norwitzclass RangeError(Exception):
17677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    pass
18677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
19677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFC(str):
20677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    return normalize("NFC", str)
21677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
22677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKC(str):
23677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    return normalize("NFKC", str)
24677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
25677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFD(str):
26677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    return normalize("NFD", str)
27677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
28677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef NFKD(str):
29677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    return normalize("NFKD", str)
30677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
31677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwisdef unistr(data):
32677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    data = [int(x, 16) for x in data.split(" ")]
33677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    for x in data:
34677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis        if x > sys.maxunicode:
35677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis            raise RangeError
36677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis    return u"".join([unichr(x) for x in data])
370d4c06e06e5ee1f3bb1fa8068114bd700d74864aNeal Norwitz
389405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterclass NormalizationTest(unittest.TestCase):
399405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter    def test_main(self):
403cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna        part = None
419405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter        part1_data = {}
428844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou        # Hit the exception early
438844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou        try:
443cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna            testdata = open_urlresource(TESTDATAURL, check_version)
452dab865ff177f167886396bb162285f6dd24e668Antoine Pitrou        except (IOError, HTTPException):
468844153d7f3c5ff00e844f674cd639af4ab8addcAntoine Pitrou            self.skipTest("Could not retrieve " + TESTDATAURL)
473cc8f211ed0d569836114d0d54c6298a21e6dde3Florent Xicluna        for line in testdata:
489405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            if '#' in line:
499405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                line = line.split('#')[0]
509405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            line = line.strip()
519405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            if not line:
529405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                continue
539405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            if line.startswith("@Part"):
549405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                part = line.split()[0]
559405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                continue
569405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            try:
579405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
589405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            except RangeError:
599405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                # Skip unsupported characters;
60419e23cbb07a624609a219919eaecd3c61d8e9b4Ezio Melotti                # try at least adding c1 if we are in part1
619405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                if part == "@Part1":
629405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                    try:
639405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                        c1 = unistr(line.split(';')[0])
649405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                    except RangeError:
659405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                        pass
669405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                    else:
679405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                        part1_data[c1] = 1
689405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                continue
69677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
709405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            # Perform tests
715c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
725c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
735c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
745c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
755c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
769405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                            NFKC(c3) == NFKC(c4) == NFKC(c5),
779405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                            line)
785c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
799405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                            NFKD(c3) == NFKD(c4) == NFKD(c5),
809405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                            line)
811b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters
829405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            # Record part 1 data
839405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            if part == "@Part1":
849405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                part1_data[c1] = 1
85677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
869405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter        # Perform tests for all other data
879405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter        for c in range(sys.maxunicode+1):
889405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            X = unichr(c)
899405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter            if X in part1_data:
909405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter                continue
915c8da86f3a515ce1a6d5f27fd15e3c5f4d8e931eBenjamin Peterson            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
9277c06fbf942bf4c532d0f8d6f254882a9e5957ecTim Peters
939405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter    def test_bug_834676(self):
949405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter        # Check for bug 834676
959405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter        normalize('NFC', u'\ud55c\uae00')
96677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
97677bde2dd14ac2c8f170779adcc732f991db8bd6Martin v. Löwis
989405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winterdef test_main():
999405609c1725ea86c3cdc9a9ac665649d80d62c6Collin Winter    run_unittest(NormalizationTest)
100d2171d2ba414def2ecf27b694ea27c2e9fde0fcfMartin v. Löwis
1011b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Petersif __name__ == "__main__":
1021b445d3fcfcc06e5360e83b978efdb9b1c980278Tim Peters    test_main()
103