1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/*
2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru**********************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2011-2015, International Business Machines Corporation
4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* and others.  All Rights Reserved.
5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru**********************************************************************
6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/
7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * IntlTestSpoof tests for USpoofDetector
9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h"
12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "itspoof.h"
168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normlzr.h"
188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/regex.h"
198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/unistr.h"
208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uscript.h"
218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uspoof.h"
228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "cstring.h"
248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h"
258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h"
268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uhash.h"
278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h>
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h>
30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
326d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru    errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE__, #expr);};}
368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \
388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__, __LINE__, msg, #expr);};}
39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \
42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             __FILE__, __LINE__, #a, (a), #b, (b)); }}
43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             __FILE__, __LINE__, #a, (a), #b, (b)); }}
47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/*
49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   TEST_SETUP and TEST_TEARDOWN
50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         macros to handle the boilerplate around setting up test case.
51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         Put arbitrary test code between SETUP and TEARDOWN.
52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_SETUP {  \
55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR; \
56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USpoofChecker *sc;     \
57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    sc = uspoof_open(&status);  \
58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT_SUCCESS(status);   \
59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (U_SUCCESS(status)){
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_TEARDOWN  \
62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }  \
63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT_SUCCESS(status);  \
64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uspoof_close(sc);  \
65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{
72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (exec) logln("TestSuite spoof: ");
73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    switch (index) {
74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        case 0:
75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestSpoofAPI";
76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testSpoofAPI();
78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 1:
81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestSkeleton";
82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testSkeleton();
84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 2:
87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestAreConfusable";
88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testAreConfusable();
90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 3:
93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestInvisible";
94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testInvisible();
96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 4:
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "testConfData";
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (exec) {
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                testConfData();
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 5:
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            name = "testBug8654";
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) {
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                testBug8654();
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 6:
1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            name = "testIdentifierInfo";
1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (exec) {
1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                testIdentifierInfo();
1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 7:
1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            name = "testScriptSet";
1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (exec) {
1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                testScriptSet();
1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        case 8:
1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            name = "testRestrictionLevel";
1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (exec) {
1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                testRestrictionLevel();
1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius       case 9:
1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            name = "testMixedNumbers";
1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (exec) {
1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                testMixedNumbers();
1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        default: name=""; break;
137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testSpoofAPI() {
141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
14327f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s("xyz");  // Many latin ranges are whole-script confusable with other scripts.
14427f654740f2a26ad62a5c155af9199af9e69b889claireho                                 // If this test starts failing, consult confusablesWholeScript.txt
145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t position = 666;
146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(0, checkResults);
1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_EQ(0, position);
150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s1("cxs");
154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s("I1l0O");
162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString dest;
163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
16527f654740f2a26ad62a5c155af9199af9e69b889claireho        TEST_ASSERT(UnicodeString("lllOO") == dest);
166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT(&dest == &retStr);
167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define CHECK_SKELETON(type, input, expected) { \
172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    checkSkeleton(sc, type, input, expected, __LINE__); \
173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// testSkeleton.   Spot check a number of confusable skeleton substitutions from the
177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 Unicode data file confusables.txt
178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 Test cases chosen for substitutions of various lengths, and
179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 membership in different mapping tables.
1801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//          Note: for ICU 55, all tables collapsed to the MA table data.
1811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//          TODO: for ICU 56 with Unicode 8, revisit this test.
1821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//
183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testSkeleton() {
184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t ML = 0;
185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t MA = USPOOF_ANY_CASE;
187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "nochange", "nochange");
1911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SA, "nochange", "nochange");
1921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(ML, "nochange", "nochange");
1931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(MA, "nochange", "nochange");
19427f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "love", "love");
19527f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "1ove", "love");   // Digit 1 to letter l
196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(ML, "OOPS", "OOPS");
1971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(ML, "00PS", "OOPS");
19827f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "OOPS", "OOPS");
19927f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "00PS", "OOPS");   // Digit 0 to letter O in any case mode only
200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u059c", "\\u0301");
201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
20227f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029");  // "(ll)"
2031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f");
204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // This mapping exists in the ML and MA tables, does not exist in SL, SA
2061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // 0C83 ;	0983 ;	ML
2071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // 0C83 ;	0983 ;	MA
2081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        //
2091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
2101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SL, "\\u0C83", "\\u0983");
2111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SA, "\\u0C83", "\\u0983");
21227f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(ML, "\\u0C83", "\\u0983");
21327f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "\\u0C83", "\\u0983");
214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // 0391 mappings exist only in MA and SA tables.
216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(MA, "\\u0391", "A");
2171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SA, "\\u0391", "A");
2181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(ML, "\\u0391", "A");
2191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SL, "\\u0391", "A");
220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // 13CF Mappings in all four tables, different in MA.
222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(ML, "\\u13CF", "b");
223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(MA, "\\u13CF", "b");
2241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SL, "\\u13CF", "b");
2251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SA, "\\u13CF", "b");
226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
22727f654740f2a26ad62a5c155af9199af9e69b889claireho        // 0022 ;  0027 0027 ;
228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // all tables.
22927f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");
23027f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");
23127f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");
23227f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");
233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // 017F mappings exist only in MA and SA tables.
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        CHECK_SKELETON(MA, "\\u017F", "f");
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        CHECK_SKELETON(SA, "\\u017F", "f");
2371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(ML, "\\u017F", "f");
2381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        CHECK_SKELETON(SL, "\\u017F", "f");
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//  Run a single confusable skeleton transformation test case.
246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                  const char *input, const char *expected, int32_t lineNum) {
249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString uInput = UnicodeString(input).unescape();
250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString uExpected = UnicodeString(expected).unescape();
251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString actual;
254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);
255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (U_FAILURE(status)) {
256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              u_errorName(status));
258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        return;
259b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
260b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (uExpected != actual) {
261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               __FILE__, __LINE__, lineNum);
263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln(UnicodeString(" Actual   Skeleton: \"") + actual + UnicodeString("\"\n") +
264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testAreConfusable() {
269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
270b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
271b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ");
272b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s2("A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ");
274b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
275b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
277b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
278b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
279b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
280b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testInvisible() {
281b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
282b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
283b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t position = -42;
284b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
285b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT(0 == position);
287b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
288b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
289b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
290b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_EQ(0, position);
292b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Two acute accents, one from the composed a with acute accent, \u00e1,
294b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // and one separate.
295b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        position = -42;
296b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
297b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_EQ(0, position);
300b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
301b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid IntlTestSpoof::testBug8654() {
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    TEST_SETUP
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape();
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t position = -42;
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE );
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        TEST_ASSERT_SUCCESS(status);
3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_EQ(0, position);
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    TEST_TEARDOWN;
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UnicodeString parseHex(const UnicodeString &in) {
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Convert a series of hex numbers in a Unicode String to a string with the
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // corresponding characters.
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The conversion is _really_ annoying.  There must be some function to just do it.
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString result;
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 cc = 0;
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int32_t i=0; i<in.length(); i++) {
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar c = in.charAt(i);
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (c == 0x20) {   // Space
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (cc > 0) {
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               result.append(cc);
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               cc = 0;
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (c>=0x30 && c<=0x39) {
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cc = (cc<<4) + (c - 0x30);
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if ((c>=0x41 && c<=0x46) || (c>=0x61 && c<=0x66)) {
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cc = (cc<<4) + (c & 0x0f)+9;
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // else do something with bad input.
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (cc > 0) {
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.append(cc);
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return result;
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Append the hex form of a UChar32 to a UnicodeString.
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Used in formatting error messages.
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Match the formatting of numbers in confusables.txt
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Minimum of 4 digits, no leading zeroes for positions 5 and up.
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void appendHexUChar(UnicodeString &dest, UChar32 c) {
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool   doZeroes = FALSE;
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int bitNum=28; bitNum>=0; bitNum-=4) {
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (bitNum <= 12) {
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            doZeroes = TRUE;
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int hexDigit = (c>>bitNum) & 0x0f;
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (hexDigit != 0 || doZeroes) {
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            doZeroes = TRUE;
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dest.append((UChar)(hexDigit<=9? hexDigit + 0x30: hexDigit -10 + 0x41));
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    dest.append((UChar)0x20);
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  testConfData - Check each data item from the Unicode confusables.txt file,
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                 verify that it transforms correctly in a skeleton.
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid IntlTestSpoof::testConfData() {
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char buffer[2000];
368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (getUnidataPath(buffer) == NULL) {
369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        errln("Skipping test spoof/testConfData. Unable to find path to source/data/unidata/.");
370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_strcat(buffer, "confusables.txt");
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalStdioFilePointer f(fopen(buffer, "rb"));
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (f.isNull()) {
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Skipping test spoof/testConfData.  File confusables.txt not accessible.");
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f.getAlias(), 0, SEEK_END);
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  fileSize = ftell(f.getAlias());
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalArray<char> fileBuf(new char[fileSize]);
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f.getAlias(), 0, SEEK_SET);
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_EQ(amt_read, fileSize);
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT(fileSize>0);
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (amt_read != fileSize || fileSize <=0) {
38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize));
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UErrorCode status = U_ZERO_ERROR;
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUSpoofCheckerPointer sc(uspoof_open(&status));
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_SUCCESS(status);
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Parse lines from the confusables.txt file.  Example Line:
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // FF44 ;	0064 ;	SL	# ( d -> d ) FULLWIDTH ....
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Three fields.  The hex fields can contain more than one character,
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //                and each character may be more than 4 digits (for supplemntals)
39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // This regular expression matches lines and splits the fields into capture groups.
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt, 0, status);
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_SUCCESS(status);
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (parseLine.find()) {
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString from = parseHex(parseLine.group(1, status));
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // The source character was not NFD.
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Skip this case; the first step in obtaining a skeleton is to NFD the input,
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  so the mapping in this line of confusables.txt will never be applied.
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString rawExpected = parseHex(parseLine.group(2, status));
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString expected;
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Normalizer::decompose(rawExpected, FALSE /*NFD*/, 0, expected, status);
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t skeletonType = 0;
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString tableType = parseLine.group(3, status);
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (tableType.indexOf("SL") >= 0) {
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("SA") >= 0) {
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("ML") >= 0) {
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = 0;
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("MA") >= 0) {
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_ANY_CASE;
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString actual;
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status);
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT(actual == expected);
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (actual != expected) {
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln(parseLine.group(0, status));
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString line = "Actual: ";
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int i = 0;
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while (i < actual.length()) {
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                appendHexUChar(line, actual.char32At(i));
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                i = actual.moveIndex32(i, 1);
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln(line);
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// testIdentifierInfo. Note that IdentifierInfo is not public ICU API at this time
4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid IntlTestSpoof::testIdentifierInfo() {
4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status);
4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet bitset2;  bitset2.set(USCRIPT_HANGUL, status);
4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(bitset12.contains(bitset2));
4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(bitset12.contains(bitset12));
4568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!bitset2.contains(bitset12));
4578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet arabSet;  arabSet.set(USCRIPT_ARABIC, status);
4598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status);
4608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UElement arabEl;  arabEl.pointer = &arabSet;
4618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UElement latinEl; latinEl.pointer = &latinSet;
4628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0);
4638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0);
4648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UnicodeString scriptString;
4668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    bitset12.displayScripts(scriptString);
4678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString);
4688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
4708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UHashtable *alternates = uhash_open(uhash_hashScriptSet ,uhash_compareScriptSet, NULL, &status);
4718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_puti(alternates, &bitset12, 1, &status);
4728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_puti(alternates, &bitset2, 1, &status);
4738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UnicodeString alternatesString;
4748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    IdentifierInfo::displayAlternates(alternatesString, alternates, status);
4758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang; Hang Latn") == alternatesString);
4768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
4778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
4798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet tScriptSet;
4808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    tScriptSet.parseScripts(scriptString, status);
4818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
4828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(bitset12 == tScriptSet);
4838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UnicodeString ss;
4848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ss.remove();
4858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_close(alternates);
4868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    struct Test {
4888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fTestString;
4898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        URestrictionLevel   fRestrictionLevel;
4908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fNumerics;
4918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fScripts;
4928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fAlternates;
4938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fCommonAlternates;
4948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    } tests[] = {
4958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u2665",                USPOOF_UNRESTRICTIVE,      "[]", "Latn", "", ""},
4968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u3006",                USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
4978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u30FC\\u3006",         USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hira Kana", "Hira Kana"},
4988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u30FC\\u3006\\u30A2",  USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
4998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u30A2\\u0061\\u30FC\\u3006",  USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
5008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u0031\\u0661",         USPOOF_UNRESTRICTIVE,      "[\\u0030\\u0660]", "Latn", "Arab Thaa", "Arab Thaa"},
5018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u0031\\u0661\\u06F1",  USPOOF_UNRESTRICTIVE,      "[\\u0030\\u0660\\u06F0]", "Latn Arab", "", ""},
5028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0661\\u30FC\\u3006\\u0061\\u30A2\\u0031\\u0967\\u06F1",  USPOOF_UNRESTRICTIVE,
503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                  "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
5048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            {"\\u0061\\u30A2\\u30FC\\u3006\\u0031\\u0967\\u0661\\u06F1",  USPOOF_UNRESTRICTIVE,
505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                  "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"}
5068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    };
5078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int testNum;
509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (testNum = 0; testNum < UPRV_LENGTHOF(tests); testNum++) {
5108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        char testNumStr[40];
5118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sprintf(testNumStr, "testNum = %d", testNum);
5128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        Test &test = tests[testNum];
5138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
5148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString testString(test.fTestString);  // Note: may do charset conversion.
5158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        testString = testString.unescape();
5168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        IdentifierInfo idInfo(status);
5178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_SUCCESS(status);
5188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));
5198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        idInfo.setIdentifier(testString, status);
5208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(*idInfo.getIdentifier() == testString, testNumStr);
5218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        URestrictionLevel restrictionLevel = test.fRestrictionLevel;
5238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(restrictionLevel == idInfo.getRestrictionLevel(status), testNumStr);
5248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
5268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeSet numerics(UnicodeString(test.fNumerics).unescape(), status);
5278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_SUCCESS(status);
5288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(numerics == *idInfo.getNumerics(), testNumStr);
5298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet scripts;
5318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scripts.parseScripts(UnicodeString(test.fScripts), status);
5328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(scripts == *idInfo.getScripts(), testNumStr);
5338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString alternatesStr;
5358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        IdentifierInfo::displayAlternates(alternatesStr, idInfo.getAlternates(), status);
5368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(UnicodeString(test.fAlternates) == alternatesStr, testNumStr);
5378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet commonAlternates;
5398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        commonAlternates.parseScripts(UnicodeString(test.fCommonAlternates), status);
5408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(commonAlternates == *idInfo.getCommonAmongAlternates(), testNumStr);
5418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
5428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Test of getScriptCount()
5448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //   Script and or Script Extension for chars used in the tests
5458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u3013  ; Bopo Hang Hani Hira Kana # So       GETA MARK
5468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\uA838  ; Deva Gujr Guru Kthi Takr # Sc       NORTH INDIC RUPEE MARK
5478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u0951  ; Deva Latn                # Mn       DEVANAGARI STRESS SIGN UDATTA
5488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //
5498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u0370  ; Greek                    # L        GREEK CAPITAL LETTER HETA
5508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u0481  ; Cyrillic                 # L&       CYRILLIC SMALL LETTER KOPPA
5518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u0904  ; Devanagari               # Lo       DEVANAGARI LETTER SHORT A
5528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u3041  ; Hiragana                 # Lo       HIRAGANA LETTER SMALL A
5538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     1234     ; Common                   #          ascii digits
5548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //     \\u0300  ; Inherited                # Mn       COMBINING GRAVE ACCENT
5558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    struct ScriptTest {
5578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char *fTestString;
5588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t     fScriptCount;
5598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    } scriptTests[] = {
5608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"Hello", 1},
5618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"Hello\\u0370", 2},
5628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"1234", 0},
5638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"Hello1234\\u0300", 1},   // Common and Inherited are ignored.
5648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0030", 0},
5658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"abc\\u0951", 1},
5668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"abc\\u3013", 2},
5678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\uA838\\u0951", 1},     // Triggers commonAmongAlternates path.
5688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u3013\\uA838", 2}
5698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    };
5708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
5728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    IdentifierInfo identifierInfo(status);
573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (testNum=0; testNum<UPRV_LENGTHOF(scriptTests); testNum++) {
5748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptTest &test = scriptTests[testNum];
5758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        char msgBuf[100];
5768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sprintf(msgBuf, "testNum = %d ", testNum);
5778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString testString = UnicodeString(test.fTestString).unescape();
5788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
5808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        identifierInfo.setIdentifier(testString, status);
5818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t scriptCount = identifierInfo.getScriptCount();
5828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(test.fScriptCount == scriptCount, msgBuf);
5838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
5848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
5858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid IntlTestSpoof::testScriptSet() {
5878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet s1;
5888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet s2;
5898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
5908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1 == s2);
5928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_ARABIC,status);
5938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
5948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!(s1 == s2));
5958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.test(USCRIPT_ARABIC, status));
5968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.test(USCRIPT_GREEK, status) == FALSE);
5978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
5998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.reset(USCRIPT_ARABIC, status);
6008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1 == s2);
6018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
6038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.setAll();
6048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.test(USCRIPT_COMMON, status));
6058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.test(USCRIPT_ETHIOPIC, status));
6068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.test(USCRIPT_CODE_LIMIT, status));
6078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.resetAll();
6088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s1.test(USCRIPT_COMMON, status));
6098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s1.test(USCRIPT_ETHIOPIC, status));
6108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s1.test(USCRIPT_CODE_LIMIT, status));
6118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
6138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_TAKRI, status);
6148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_BLISSYMBOLS, status);
6158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.setAll();
6168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2.contains(s1));
6178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s1.contains(s2));
6188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2.intersects(s1));
6198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.intersects(s2));
6208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.reset(USCRIPT_TAKRI, status);
6218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s2.contains(s1));
6228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!s1.contains(s2));
6238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s1.intersects(s2));
6248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2.intersects(s1));
6258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
6268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    status = U_ZERO_ERROR;
6288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.resetAll();
6298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_NKO, status);
6308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_COMMON, status);
6318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2 = s1;
6328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2 == s1);
6338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_EQ(2, s2.countMembers());
6348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.intersect(s1);
6358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2 == s1);
6368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.setAll();
6378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(!(s2 == s1));
6388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2.countMembers() >= USCRIPT_CODE_LIMIT);
6398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.intersect(s1);
6408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2 == s1);
6418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.setAll();
6438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.reset(USCRIPT_COMMON, status);
6448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s2.intersect(s1);
6458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT(s2.countMembers() == 1);
6468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.resetAll();
6488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_AFAKA, status);
6498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_VAI, status);
6508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    s1.set(USCRIPT_INHERITED, status);
6518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t n = -1;
6528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i=0; i<4; i++) {
6538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        n = s1.nextSetBit(n+1);
6548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        switch (i) {
6558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED, n); break;
6568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 1: TEST_ASSERT_EQ(USCRIPT_VAI, n); break;
6578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA, n); break;
6588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 3: TEST_ASSERT_EQ(-1, (int32_t)n); break;
6598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          default: TEST_ASSERT(FALSE);
6608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
6618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
6628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
6638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
6648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid IntlTestSpoof::testRestrictionLevel() {
6678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    struct Test {
6688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char         *fId;
6698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        URestrictionLevel   fExpectedRestrictionLevel;
6708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    } tests[] = {
6718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE},
6728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"a",                     USPOOF_ASCII},
673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        {"\\u03B3",               USPOOF_SINGLE_SCRIPT_RESTRICTIVE},
6748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE},
6758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0061\\u0904",        USPOOF_MODERATELY_RESTRICTIVE},
6768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0061\\u03B3",        USPOOF_MINIMALLY_RESTRICTIVE}
6778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    };
6788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    char msgBuffer[100];
6798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE,
681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE,
682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         USPOOF_UNRESTRICTIVE};
6838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
6858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    IdentifierInfo idInfo(status);
6868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
6878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));
6888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    TEST_ASSERT_SUCCESS(status);
689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {
6908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
6918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const Test &test = tests[testNum];
6928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString testString = UnicodeString(test.fId).unescape();
6938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        URestrictionLevel expectedLevel = test.fExpectedRestrictionLevel;
6948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        idInfo.setIdentifier(testString, status);
6958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sprintf(msgBuffer, "testNum = %d ", testNum);
6968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_SUCCESS(status);
6978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(expectedLevel == idInfo.getRestrictionLevel(status), msgBuffer);
698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        for (int levelIndex=0; levelIndex<UPRV_LENGTHOF(restrictionLevels); levelIndex++) {
6998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            status = U_ZERO_ERROR;
7008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            URestrictionLevel levelSetInSpoofChecker = restrictionLevels[levelIndex];
7018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            USpoofChecker *sc = uspoof_open(&status);
7028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status);
7038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
7048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status);
706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
7078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t expectedValue = 0;
709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if (expectedLevel > levelSetInSpoofChecker) {
710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                expectedValue |= USPOOF_RESTRICTION_LEVEL;
711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) {
713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                expectedValue |= USPOOF_CHAR_LIMIT;
714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    testNum, levelIndex, expectedValue, actualValue);
717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer);
718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            TEST_ASSERT_SUCCESS(status);
719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Run the same check again, with the Spoof Checker configured to return
721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // the actual restriction level.
722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status);
723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
7268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            TEST_ASSERT_SUCCESS(status);
727fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if (U_SUCCESS(status)) {
728fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK);
729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS);
730fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
7318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            uspoof_close(sc);
7328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
7338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
7348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
7358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
7368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
7378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid IntlTestSpoof::testMixedNumbers() {
7388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    struct Test {
7398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char *fTestString;
7408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const char *fExpectedSet;
7418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    } tests[] = {
7428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"1",              "[0]"},
7438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0967",        "[\\u0966]"},
7448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"1\\u0967",       "[0\\u0966]"},
7458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        {"\\u0661\\u06F1", "[\\u0660\\u06F0]"}
7468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    };
7478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
7488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    IdentifierInfo idInfo(status);
749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {
7508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        char msgBuf[100];
7518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sprintf(msgBuf, "testNum = %d ", testNum);
7528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        Test &test = tests[testNum];
7538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
7548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
7558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString testString = UnicodeString(test.fTestString).unescape();
7568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeSet expectedSet(UnicodeString(test.fExpectedSet).unescape(), status);
7578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        idInfo.setIdentifier(testString, status);
7588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_SUCCESS(status);
7598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG(expectedSet == *idInfo.getNumerics(), msgBuf);
7608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
7618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_ZERO_ERROR;
7628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        USpoofChecker *sc = uspoof_open(&status);
7638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this
7648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
7658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UBool mixedNumberFailure = ((result & USPOOF_MIXED_NUMBERS) != 0);
7668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf);
7678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        uspoof_close(sc);
7688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
7698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
7708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
7718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */
772