17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2004, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.normalizer;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Random;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacterCategory;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class NormalizationMonkeyTest extends TestFmwk {
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int loopCount = 100;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int maxCharCount = 20;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int maxCodePoint = 0x10ffff;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    Random random = null; // initialized in getTestSource
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeNormalizer unicode_NFD;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeNormalizer unicode_NFC;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeNormalizer unicode_NFKD;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeNormalizer unicode_NFKC;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public NormalizationMonkeyTest() {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static void main(String[] args) throws Exception {
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        new NormalizationMonkeyTest().run(args);
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestNormalize() {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (unicode_NFD == null) {
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                unicode_NFD = new UnicodeNormalizer(UnicodeNormalizer.D, true);
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                unicode_NFC = new UnicodeNormalizer(UnicodeNormalizer.C, true);
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                unicode_NFKD = new UnicodeNormalizer(UnicodeNormalizer.KD, true);
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                unicode_NFKC = new UnicodeNormalizer(UnicodeNormalizer.KC, true);
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            catch (Exception e) {
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String msg = "Normalization tests could not be run: " + e.getMessage();
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (isModularBuild()) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    warnln(msg);
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln(msg);
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = 0;
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (i < loopCount) {
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String source = getTestSource();
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("Test source:" + source);
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //NFD
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String uncodeNorm = unicode_NFD.normalize(source);
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String icuNorm = Normalizer.normalize(source, Normalizer.NFD);
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFD(Unicode): " + uncodeNorm);
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFD(icu4j)  : " + icuNorm);
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!uncodeNorm.equals(icuNorm)) {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("NFD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //NFC
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            uncodeNorm = unicode_NFC.normalize(source);
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            icuNorm = Normalizer.normalize(source, Normalizer.NFC);
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFC(Unicode): " + uncodeNorm);
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFC(icu4j)  : " + icuNorm);
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!uncodeNorm.equals(icuNorm)) {
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("NFC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //NFKD
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            uncodeNorm = unicode_NFKD.normalize(source);
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            icuNorm = Normalizer.normalize(source, Normalizer.NFKD);
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFKD(Unicode): " + uncodeNorm);
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFKD(icu4j)  : " + icuNorm);
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!uncodeNorm.equals(icuNorm)) {
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("NFKD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //NFKC
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            uncodeNorm = unicode_NFKC.normalize(source);
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            icuNorm = Normalizer.normalize(source, Normalizer.NFKC);
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFKC(Unicode): " + uncodeNorm);
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("\tNFKC(icu4j)  : " + icuNorm);
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!uncodeNorm.equals(icuNorm)) {
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("NFKC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i++;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String getTestSource() {
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    if (random == null) {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        random = createRandom(); // use test framework's random seed
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String source = "";
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i = 0;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (i < (random.nextInt(maxCharCount) + 1)) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int codepoint = random.nextInt(maxCodePoint);
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //Elimate unassigned characters
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) {
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                codepoint = random.nextInt(maxCodePoint);
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            source = source + UTF16.valueOf(codepoint);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i++;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return source;
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
111