17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2004, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.normalizer; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Random; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacterCategory; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class NormalizationMonkeyTest extends TestFmwk { 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int loopCount = 100; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int maxCharCount = 20; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int maxCodePoint = 0x10ffff; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Random random = null; // initialized in getTestSource 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeNormalizer unicode_NFD; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeNormalizer unicode_NFC; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeNormalizer unicode_NFKD; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeNormalizer unicode_NFKC; 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public NormalizationMonkeyTest() { 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void main(String[] args) throws Exception { 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new NormalizationMonkeyTest().run(args); 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestNormalize() { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (unicode_NFD == null) { 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert unicode_NFD = new UnicodeNormalizer(UnicodeNormalizer.D, true); 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert unicode_NFC = new UnicodeNormalizer(UnicodeNormalizer.C, true); 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert unicode_NFKD = new UnicodeNormalizer(UnicodeNormalizer.KD, true); 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert unicode_NFKC = new UnicodeNormalizer(UnicodeNormalizer.KC, true); 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert catch (Exception e) { 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String msg = "Normalization tests could not be run: " + e.getMessage(); 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (isModularBuild()) { 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert warnln(msg); 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln(msg); 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = 0; 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (i < loopCount) { 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String source = getTestSource(); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Test source:" + source); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //NFD 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String uncodeNorm = unicode_NFD.normalize(source); 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String icuNorm = Normalizer.normalize(source, Normalizer.NFD); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFD(Unicode): " + uncodeNorm); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFD(icu4j) : " + icuNorm); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!uncodeNorm.equals(icuNorm)) { 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("NFD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //NFC 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uncodeNorm = unicode_NFC.normalize(source); 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert icuNorm = Normalizer.normalize(source, Normalizer.NFC); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFC(Unicode): " + uncodeNorm); 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFC(icu4j) : " + icuNorm); 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!uncodeNorm.equals(icuNorm)) { 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("NFC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //NFKD 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uncodeNorm = unicode_NFKD.normalize(source); 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert icuNorm = Normalizer.normalize(source, Normalizer.NFKD); 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFKD(Unicode): " + uncodeNorm); 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFKD(icu4j) : " + icuNorm); 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!uncodeNorm.equals(icuNorm)) { 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("NFKD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //NFKC 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uncodeNorm = unicode_NFKC.normalize(source); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert icuNorm = Normalizer.normalize(source, Normalizer.NFKC); 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFKC(Unicode): " + uncodeNorm); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("\tNFKC(icu4j) : " + icuNorm); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!uncodeNorm.equals(icuNorm)) { 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("NFKC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i++; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String getTestSource() { 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (random == null) { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert random = createRandom(); // use test framework's random seed 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String source = ""; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = 0; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (i < (random.nextInt(maxCharCount) + 1)) { 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int codepoint = random.nextInt(maxCodePoint); 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //Elimate unassigned characters 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) { 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert codepoint = random.nextInt(maxCodePoint); 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert source = source + UTF16.valueOf(codepoint); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i++; 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return source; 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 111