1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 1996-2004, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9
10package com.ibm.icu.dev.test.normalizer;
11
12import java.util.Random;
13
14import org.junit.Test;
15import org.junit.runner.RunWith;
16import org.junit.runners.JUnit4;
17
18import com.ibm.icu.dev.test.TestFmwk;
19import com.ibm.icu.lang.UCharacter;
20import com.ibm.icu.lang.UCharacterCategory;
21import com.ibm.icu.text.Normalizer;
22import com.ibm.icu.text.UTF16;
23
24@RunWith(JUnit4.class)
25public class NormalizationMonkeyTest extends TestFmwk {
26    int loopCount = 100;
27    int maxCharCount = 20;
28    int maxCodePoint = 0x10ffff;
29    Random random = null; // initialized in getTestSource
30    UnicodeNormalizer unicode_NFD;
31    UnicodeNormalizer unicode_NFC;
32    UnicodeNormalizer unicode_NFKD;
33    UnicodeNormalizer unicode_NFKC;
34
35    public NormalizationMonkeyTest() {
36    }
37
38    @Test
39    public void TestNormalize() {
40        if (unicode_NFD == null) {
41            try {
42                unicode_NFD = new UnicodeNormalizer(UnicodeNormalizer.D, true);
43                unicode_NFC = new UnicodeNormalizer(UnicodeNormalizer.C, true);
44                unicode_NFKD = new UnicodeNormalizer(UnicodeNormalizer.KD, true);
45                unicode_NFKC = new UnicodeNormalizer(UnicodeNormalizer.KC, true);
46            }
47            catch (Exception e) {
48                errln("Normalization tests could not be run: " + e.getMessage());
49            }
50        }
51        int i = 0;
52        while (i < loopCount) {
53            String source = getTestSource();
54            logln("Test source:" + source);
55            //NFD
56            String uncodeNorm = unicode_NFD.normalize(source);
57            String icuNorm = Normalizer.normalize(source, Normalizer.NFD);
58            logln("\tNFD(Unicode): " + uncodeNorm);
59            logln("\tNFD(icu4j)  : " + icuNorm);
60            if (!uncodeNorm.equals(icuNorm)) {
61                errln("NFD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
62            }
63            //NFC
64            uncodeNorm = unicode_NFC.normalize(source);
65            icuNorm = Normalizer.normalize(source, Normalizer.NFC);
66            logln("\tNFC(Unicode): " + uncodeNorm);
67            logln("\tNFC(icu4j)  : " + icuNorm);
68            if (!uncodeNorm.equals(icuNorm)) {
69                errln("NFC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
70            }
71            //NFKD
72            uncodeNorm = unicode_NFKD.normalize(source);
73            icuNorm = Normalizer.normalize(source, Normalizer.NFKD);
74            logln("\tNFKD(Unicode): " + uncodeNorm);
75            logln("\tNFKD(icu4j)  : " + icuNorm);
76            if (!uncodeNorm.equals(icuNorm)) {
77                errln("NFKD: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
78            }
79            //NFKC
80            uncodeNorm = unicode_NFKC.normalize(source);
81            icuNorm = Normalizer.normalize(source, Normalizer.NFKC);
82            logln("\tNFKC(Unicode): " + uncodeNorm);
83            logln("\tNFKC(icu4j)  : " + icuNorm);
84            if (!uncodeNorm.equals(icuNorm)) {
85                errln("NFKC: Unicode sample output => " + uncodeNorm + "; icu4j output=> " + icuNorm);
86            }
87
88            i++;
89        }
90    }
91
92    String getTestSource() {
93    if (random == null) {
94        random = createRandom(); // use test framework's random seed
95    }
96        String source = "";
97        int i = 0;
98        while (i < (random.nextInt(maxCharCount) + 1)) {
99            int codepoint = random.nextInt(maxCodePoint);
100            //Elimate unassigned characters
101            while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) {
102                codepoint = random.nextInt(maxCodePoint);
103            }
104            source = source + UTF16.valueOf(codepoint);
105            i++;
106        }
107        return source;
108    }
109}
110