1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2017 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4package android.icu.dev.test.util;
5
6
7import java.io.IOException;
8import java.util.List;
9import java.util.Random;
10import java.util.Set;
11import java.util.TreeSet;
12import java.util.regex.Pattern;
13
14import org.junit.Test;
15import org.junit.runner.RunWith;
16import org.junit.runners.JUnit4;
17
18import android.icu.dev.test.TestFmwk;
19import android.icu.impl.locale.XCldrStub.Joiner;
20import android.icu.impl.locale.XCldrStub.Splitter;
21import android.icu.impl.locale.XLocaleDistance;
22import android.icu.impl.locale.XLocaleDistance.DistanceOption;
23import android.icu.impl.locale.XLocaleMatcher;
24import android.icu.text.UnicodeSet;
25import android.icu.util.LocaleMatcher;
26import android.icu.util.LocalePriorityList;
27import android.icu.util.Output;
28import android.icu.util.ULocale;
29import android.icu.testsharding.MainTestShard;
30
31/**
32 * Test the XLocaleMatcher.
33 *
34 * @author markdavis
35 */
36@MainTestShard
37@RunWith(JUnit4.class)
38public class XLocaleMatcherTest extends TestFmwk {
39    private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
40
41    private static final int REGION_DISTANCE = 4;
42
43    private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
44
45    private XLocaleMatcher newXLocaleMatcher() {
46        return new XLocaleMatcher("");
47    }
48
49    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
50        return new XLocaleMatcher(build);
51    }
52
53    private XLocaleMatcher newXLocaleMatcher(String string) {
54        return new XLocaleMatcher(LocalePriorityList.add(string).build());
55    }
56
57    @SuppressWarnings("unused")
58    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
59        return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
60    }
61
62    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d, DistanceOption distanceOption) {
63        return XLocaleMatcher
64            .builder()
65            .setSupportedLocales(string)
66            .setThresholdDistance(d)
67            .setDistanceOption(distanceOption)
68            .build();
69    }
70
71    //    public void testParentLocales() {
72    //        // find all the regions that have a closer relation because of an explicit parent
73    //        Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
74    //        explicitParents.remove("root");
75    //        Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
76    //        for (String locale : explicitParents) {
77    //            while (true) {
78    //                locale = LocaleIDParser.getParent(locale);
79    //                if (locale == null || locale.equals("root")) {
80    //                    break;
81    //                }
82    //                otherParents.add(locale);
83    //            }
84    //        }
85    //        otherParents.remove("root");
86    //
87    //        for (String locale : CONFIG.getCldrFactory().getAvailable()) {
88    //            String parentId = LocaleIDParser.getParent(locale);
89    //            String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
90    //            if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
91    //                continue;
92    //            }
93    //            System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
94    //        }
95    //    }
96
97
98// TBD reenable with override data
99//    public void testOverrideData() {
100//        double threshold = 0.05;
101//        XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
102//        .addDistance("br", "fr", 10, true)
103//        .addDistance("es", "cy", 10, true);
104//        logln(XLocaleMatcherData.toString());
105//
106//        final XLocaleMatcher matcher = newXLocaleMatcher(
107//            LocalePriorityList
108//            .add(ULocale.ENGLISH)
109//            .add(ULocale.FRENCH)
110//            .add(ULocale.UK)
111//            .build(), XLocaleMatcherData, threshold);
112//        logln(matcher.toString());
113//
114//        assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
115//        assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
116//        // way
117//    }
118
119
120    private void assertEquals(Object expected, Object string) {
121        assertEquals("", expected, string);
122    }
123
124    /**
125     * If all the base languages are the same, then each sublocale matches
126     * itself most closely
127     */
128    @Test
129    public void testExactMatches() {
130        String lastBase = "";
131        TreeSet<ULocale> sorted = new TreeSet<ULocale>();
132        for (ULocale loc : ULocale.getAvailableLocales()) {
133            String language = loc.getLanguage();
134            if (!lastBase.equals(language)) {
135                check(sorted);
136                sorted.clear();
137                lastBase = language;
138            }
139            sorted.add(loc);
140        }
141        check(sorted);
142    }
143
144    private void check(Set<ULocale> sorted) {
145        if (sorted.isEmpty()) {
146            return;
147        }
148        check2(sorted);
149        ULocale first = sorted.iterator().next();
150        ULocale max = ULocale.addLikelySubtags(first);
151        sorted.add(max);
152        check2(sorted);
153    }
154
155    /**
156     * @param sorted
157     */
158    private void check2(Set<ULocale> sorted) {
159        // TODO Auto-generated method stub
160        logln("Checking: " + sorted);
161        XLocaleMatcher matcher = newXLocaleMatcher(
162            LocalePriorityList.add(
163                sorted.toArray(new ULocale[sorted.size()]))
164            .build());
165        for (ULocale loc : sorted) {
166            String stringLoc = loc.toString();
167            assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
168        }
169    }
170
171    @Test
172    public void testComputeDistance_monkeyTest() {
173        String[] codes = ULocale.getISOCountries();
174        Random random = new Random();
175        XLocaleMatcher lm = newXLocaleMatcher();
176        for (int i = 0; i < 1000; ++i) {
177            String x = codes[random.nextInt(codes.length)];
178            String y = codes[random.nextInt(codes.length)];
179            double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
180            if (x.equals("ZZ") || y.equals("ZZ")) {
181                assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
182            } else if (x.equals(y)) {
183                assertEquals("dist(x,x) = 0", 0.0, d);
184            } else {
185                assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
186                assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
187            }
188        }
189    }
190
191
192    @Test
193    public void testPerf() {
194        if (LANGUAGE_MATCHER_DATA == null) {
195            return; // skip except when testing data
196        }
197        final ULocale desired = new ULocale("sv");
198
199        final String shortList = "en, sv";
200        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
201        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
202
203        final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
204        final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
205        final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
206
207        final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
208        final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
209        final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
210
211        //XLocaleMatcher.DEBUG = true;
212        ULocale expected = new ULocale("sv");
213        assertEquals(expected, matcherShort.getBestMatch(desired));
214        assertEquals(expected, matcherLong.getBestMatch(desired));
215        assertEquals(expected, matcherVeryLong.getBestMatch(desired));
216        //XLocaleMatcher.DEBUG = false;
217
218        long timeShortNew=0;
219        long timeMediumNew=0;
220        long timeLongNew=0;
221
222        for (int i = 0; i < 2; ++i) {
223            int iterations = i == 0 ? 1000 : 1000000;
224            boolean showMessage = i != 0;
225            timeShortNew = timeXLocaleMatcher("Duration (few  supported):\t", desired, matcherShort, showMessage, iterations);
226            timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
227            timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
228        }
229
230        long timeShortOld=0;
231        long timeMediumOld=0;
232        long timeLongOld=0;
233
234        for (int i = 0; i < 2; ++i) {
235            int iterations = i == 0 ? 1000 : 100000;
236            boolean showMessage = i != 0;
237            timeShortOld = timeLocaleMatcher("Old Duration (few  supported):\t", desired, matcherShortOld, showMessage, iterations);
238            timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
239            timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
240        }
241
242        assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
243        assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
244        assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
245
246    }
247
248    private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
249        boolean showmessage, int iterations) {
250        long start = System.nanoTime();
251        for (int i = iterations; i > 0; --i) {
252            matcher.getBestMatch(desired);
253        }
254        long delta = System.nanoTime() - start;
255        if (showmessage) logln(title + (delta / iterations) + " nanos");
256        return (delta / iterations);
257    }
258
259    private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
260        boolean showmessage, int iterations) {
261        long start = System.nanoTime();
262        for (int i = iterations; i > 0; --i) {
263            matcher.getBestMatch(desired);
264        }
265        long delta = System.nanoTime() - start;
266        if (showmessage) logln(title + (delta / iterations) + " nanos");
267        return (delta / iterations);
268    }
269
270    @Test
271    public void testDataDriven() throws IOException {
272        DataDrivenTestHelper tfh = new MyTestFileHandler()
273            .setFramework(this)
274            .run(XLocaleMatcherTest.class, "data/localeMatcherTest.txt");
275        if (REFORMAT) {
276            System.out.println(tfh.appendLines(new StringBuilder()));
277        }
278    }
279
280    private static final Splitter COMMA_SPACE = Splitter.on(Pattern.compile(",\\s*|\\s+")).trimResults();
281    private static final Joiner JOIN_COMMA_SPACE = Joiner.on(", ");
282    @SuppressWarnings("unused")
283    private static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
284
285    class MyTestFileHandler extends DataDrivenTestHelper {
286
287        Output<ULocale> bestDesired = new Output<ULocale>();
288        DistanceOption distanceOption = DistanceOption.NORMAL;
289        int threshold = -1;
290
291        @Override
292        public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
293            List<String> supported = COMMA_SPACE.splitToList(arguments.get(0));
294            final String supportedReformatted = JOIN_COMMA_SPACE.join(supported);
295            LocalePriorityList supportedList = LocalePriorityList.add(supportedReformatted).build();
296
297            Iterable<String> desired = COMMA_SPACE.split(arguments.get(1));
298            final String desiredReformatted = JOIN_COMMA_SPACE.join(desired);
299            LocalePriorityList desiredList = LocalePriorityList.add(desiredReformatted).build();
300
301            String expected = arguments.get(2);
302            String expectedLanguageTag = expected.equals("null") ? null : new ULocale(expected).toLanguageTag();
303
304            String expectedUi = arguments.size() < 4 ? null : arguments.get(3);
305            String expectedUiLanguageTag = expectedUi == null || expectedUi.equals("null") ? null
306                : new ULocale(expectedUi).toLanguageTag();
307
308            if (breakpoint) {
309                breakpoint = false; // put debugger breakpoint here to break at @debug in test file
310            }
311
312            XLocaleMatcher matcher = threshold < 0 && distanceOption == DistanceOption.NORMAL
313                ? newXLocaleMatcher(supportedList)
314                : newXLocaleMatcher(supportedList, threshold, distanceOption);
315            commentBase = "(" + lineNumber + ") " + commentBase;
316
317            ULocale bestSupported;
318            if (expectedUi != null) {
319                bestSupported = matcher.getBestMatch(desiredList, bestDesired);
320                ULocale bestUI = XLocaleMatcher.combine(bestSupported, bestDesired.value);
321                assertEquals(commentBase + " (UI)", expectedUiLanguageTag, bestUI == null ? null : bestUI.toLanguageTag());
322            } else {
323                bestSupported = matcher.getBestMatch(desiredList);
324            }
325            String bestMatchLanguageTag = bestSupported == null ? null : bestSupported.toLanguageTag();
326            assertEquals(commentBase, expectedLanguageTag, bestMatchLanguageTag);
327        }
328
329        @Override
330        public void handleParams(String comment, List<String> arguments) {
331            String switchItem = arguments.get(0);
332            if (switchItem.equals("@DistanceOption")) {
333                distanceOption = DistanceOption.valueOf(arguments.get(1));
334            } else if (switchItem.equals("@Threshold")) {
335                threshold = Integer.valueOf(arguments.get(1));
336            } else {
337                super.handleParams(comment, arguments);
338            }
339            return;
340        }
341    }
342}
343