1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 ******************************************************************************* 5 * Copyright (C) 1996-2004, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10package com.ibm.icu.dev.test.normalizer; 11 12import java.util.Random; 13 14import org.junit.Test; 15import org.junit.runner.RunWith; 16import org.junit.runners.JUnit4; 17 18import com.ibm.icu.dev.test.TestFmwk; 19import com.ibm.icu.lang.UCharacter; 20import com.ibm.icu.lang.UCharacterCategory; 21import com.ibm.icu.text.Normalizer; 22import com.ibm.icu.text.UTF16; 23 24@RunWith(JUnit4.class) 25public class NormalizationMonkeyTest extends TestFmwk { 26 int loopCount = 100; 27 int maxCharCount = 20; 28 int maxCodePoint = 0x10ffff; 29 Random random = null; // initialized in getTestSource 30 UnicodeNormalizer unicode_NFD; 31 UnicodeNormalizer unicode_NFC; 32 UnicodeNormalizer unicode_NFKD; 33 UnicodeNormalizer unicode_NFKC; 34 35 public NormalizationMonkeyTest() { 36 } 37 38 @Test 39 public void TestNormalize() { 40 if (unicode_NFD == null) { 41 try { 42 unicode_NFD = new UnicodeNormalizer(UnicodeNormalizer.D, true); 43 unicode_NFC = new UnicodeNormalizer(UnicodeNormalizer.C, true); 44 unicode_NFKD = new UnicodeNormalizer(UnicodeNormalizer.KD, true); 45 unicode_NFKC = new UnicodeNormalizer(UnicodeNormalizer.KC, true); 46 } 47 catch (Exception e) { 48 errln("Normalization tests could not be run: " + e.getMessage()); 49 } 50 } 51 int i = 0; 52 while (i < loopCount) { 53 String source = getTestSource(); 54 logln("Test source:" + source); 55 //NFD 56 String uncodeNorm = unicode_NFD.normalize(source); 57 String icuNorm = Normalizer.normalize(source, Normalizer.NFD); 58 logln("\tNFD(Unicode): " + uncodeNorm); 59 logln("\tNFD(icu4j) : " + icuNorm); 60 if (!uncodeNorm.equals(icuNorm)) { 61 errln("NFD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 62 } 63 //NFC 64 uncodeNorm = unicode_NFC.normalize(source); 65 icuNorm = Normalizer.normalize(source, Normalizer.NFC); 66 logln("\tNFC(Unicode): " + uncodeNorm); 67 logln("\tNFC(icu4j) : " + icuNorm); 68 if (!uncodeNorm.equals(icuNorm)) { 69 errln("NFC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 70 } 71 //NFKD 72 uncodeNorm = unicode_NFKD.normalize(source); 73 icuNorm = Normalizer.normalize(source, Normalizer.NFKD); 74 logln("\tNFKD(Unicode): " + uncodeNorm); 75 logln("\tNFKD(icu4j) : " + icuNorm); 76 if (!uncodeNorm.equals(icuNorm)) { 77 errln("NFKD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 78 } 79 //NFKC 80 uncodeNorm = unicode_NFKC.normalize(source); 81 icuNorm = Normalizer.normalize(source, Normalizer.NFKC); 82 logln("\tNFKC(Unicode): " + uncodeNorm); 83 logln("\tNFKC(icu4j) : " + icuNorm); 84 if (!uncodeNorm.equals(icuNorm)) { 85 errln("NFKC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm); 86 } 87 88 i++; 89 } 90 } 91 92 String getTestSource() { 93 if (random == null) { 94 random = createRandom(); // use test framework's random seed 95 } 96 String source = ""; 97 int i = 0; 98 while (i < (random.nextInt(maxCharCount) + 1)) { 99 int codepoint = random.nextInt(maxCodePoint); 100 //Elimate unassigned characters 101 while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) { 102 codepoint = random.nextInt(maxCodePoint); 103 } 104 source = source + UTF16.valueOf(codepoint); 105 i++; 106 } 107 return source; 108 } 109} 110