1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/*
2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru**********************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2011, International Business Machines Corporation
4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* and others.  All Rights Reserved.
5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru**********************************************************************
6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/
7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * IntlTestSpoof tests for USpoofDetector
9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h"
12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "itspoof.h"
16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uspoof.h"
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/regex.h"
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normlzr.h"
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "cstring.h"
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h>
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h>
23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru    errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             __FILE__, __LINE__, #a, (a), #b, (b)); }}
33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             __FILE__, __LINE__, #a, (a), #b, (b)); }}
37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/*
39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   TEST_SETUP and TEST_TEARDOWN
40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         macros to handle the boilerplate around setting up test case.
41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         Put arbitrary test code between SETUP and TEARDOWN.
42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_SETUP {  \
45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR; \
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USpoofChecker *sc;     \
47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    sc = uspoof_open(&status);  \
48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT_SUCCESS(status);   \
49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (U_SUCCESS(status)){
50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_TEARDOWN  \
52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }  \
53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT_SUCCESS(status);  \
54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uspoof_close(sc);  \
55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{
62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (exec) logln("TestSuite spoof: ");
63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    switch (index) {
64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        case 0:
65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestSpoofAPI";
66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testSpoofAPI();
68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru         case 1:
71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestSkeleton";
72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testSkeleton();
74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru         case 2:
77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestAreConfusable";
78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testAreConfusable();
80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru          case 3:
83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            name = "TestInvisible";
84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) {
85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                testInvisible();
86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            }
87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          case 4:
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "testConfData";
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (exec) {
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                testConfData();
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          case 5:
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            name = "testBug8654";
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) {
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                testBug8654();
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         default: name=""; break;
101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testSpoofAPI() {
105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
10727f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s("xyz");  // Many latin ranges are whole-script confusable with other scripts.
10827f654740f2a26ad62a5c155af9199af9e69b889claireho                                 // If this test starts failing, consult confusablesWholeScript.txt
109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t position = 666;
110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(0, checkResults);
113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(666, position);
114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s1("cxs");
118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s("I1l0O");
126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString dest;
127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
12927f654740f2a26ad62a5c155af9199af9e69b889claireho        TEST_ASSERT(UnicodeString("lllOO") == dest);
130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT(&dest == &retStr);
131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define CHECK_SKELETON(type, input, expected) { \
136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    checkSkeleton(sc, type, input, expected, __LINE__); \
137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// testSkeleton.   Spot check a number of confusable skeleton substitutions from the
141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 Unicode data file confusables.txt
142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 Test cases chosen for substitutions of various lengths, and
143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 membership in different mapping tables.
144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testSkeleton() {
145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t ML = 0;
146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t MA = USPOOF_ANY_CASE;
148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // A long "identifier" that will overflow implementation stack buffers, forcing heap allocations.
15227f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SL, " A 1ong \\u02b9identifier' that will overflow implementation stack buffers, forcing heap allocations."
15327f654740f2a26ad62a5c155af9199af9e69b889claireho                           " A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
15427f654740f2a26ad62a5c155af9199af9e69b889claireho                           " A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
15527f654740f2a26ad62a5c155af9199af9e69b889claireho                           " A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
15727f654740f2a26ad62a5c155af9199af9e69b889claireho               " A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
15827f654740f2a26ad62a5c155af9199af9e69b889claireho               " A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
15927f654740f2a26ad62a5c155af9199af9e69b889claireho               " A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
16027f654740f2a26ad62a5c155af9199af9e69b889claireho               " A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations.")
161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "nochange", "nochange");
16327f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "love", "love");
16427f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "1ove", "love");   // Digit 1 to letter l
165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(ML, "OOPS", "OOPS");
16627f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(ML, "00PS", "00PS");   // Digit 0 unchanged in lower case mode.
16727f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "OOPS", "OOPS");
16827f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "00PS", "OOPS");   // Digit 0 to letter O in any case mode only
169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u059c", "\\u0301");
170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
17127f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029");  // "(ll)"
172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647");
173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // This mapping exists in the ML and MA tables, does not exist in SL, SA
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //0C83 ;	0C03 ;
176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u0C83", "\\u0C83");
177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SA, "\\u0C83", "\\u0C83");
17827f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(ML, "\\u0C83", "\\u0983");
17927f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "\\u0C83", "\\u0983");
180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // 0391 ; 0041 ;
182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // This mapping exists only in the MA table.
183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(MA, "\\u0391", "A");
184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SA, "\\u0391", "\\u0391");
185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(ML, "\\u0391", "\\u0391");
186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u0391", "\\u0391");
187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // 13CF ;  0062 ;
189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // This mapping exists in the ML and MA tables
190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(ML, "\\u13CF", "b");
191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(MA, "\\u13CF", "b");
192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SL, "\\u13CF", "\\u13CF");
193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        CHECK_SKELETON(SA, "\\u13CF", "\\u13CF");
194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
19527f654740f2a26ad62a5c155af9199af9e69b889claireho        // 0022 ;  0027 0027 ;
196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // all tables.
19727f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");
19827f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");
19927f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");
20027f654740f2a26ad62a5c155af9199af9e69b889claireho        CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");
201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // 017F ;  0066 ;
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // This mapping exists in the SA and MA tables
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        CHECK_SKELETON(MA, "\\u017F", "f");
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        CHECK_SKELETON(SA, "\\u017F", "f");
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//  Run a single confusable skeleton transformation test case.
213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                  const char *input, const char *expected, int32_t lineNum) {
216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString uInput = UnicodeString(input).unescape();
217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString uExpected = UnicodeString(expected).unescape();
218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString actual;
221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);
222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (U_FAILURE(status)) {
223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              u_errorName(status));
225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        return;
226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if (uExpected != actual) {
228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               __FILE__, __LINE__, lineNum);
230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        errln(UnicodeString(" Actual   Skeleton: \"") + actual + UnicodeString("\"\n") +
231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testAreConfusable() {
236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
238b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ");
239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString s2("A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ");
241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid IntlTestSpoof::testInvisible() {
248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_SETUP
249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        int32_t position = -42;
251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT(position == -42);
254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(7, position);
259b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Two acute accents, one from the composed a with acute accent, \u00e1,
261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        // and one separate.
262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        position = -42;
263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        TEST_ASSERT_EQ(7, position);
267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_TEARDOWN;
268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid IntlTestSpoof::testBug8654() {
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    TEST_SETUP
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape();
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t position = -42;
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE );
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        TEST_ASSERT_SUCCESS(status);
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        TEST_ASSERT_EQ(3, position);
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    TEST_TEARDOWN;
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UnicodeString parseHex(const UnicodeString &in) {
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Convert a series of hex numbers in a Unicode String to a string with the
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // corresponding characters.
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The conversion is _really_ annoying.  There must be some function to just do it.
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString result;
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 cc = 0;
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int32_t i=0; i<in.length(); i++) {
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar c = in.charAt(i);
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (c == 0x20) {   // Space
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (cc > 0) {
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               result.append(cc);
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               cc = 0;
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (c>=0x30 && c<=0x39) {
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cc = (cc<<4) + (c - 0x30);
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if ((c>=0x41 && c<=0x46) || (c>=0x61 && c<=0x66)) {
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cc = (cc<<4) + (c & 0x0f)+9;
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // else do something with bad input.
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (cc > 0) {
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.append(cc);
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return result;
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Append the hex form of a UChar32 to a UnicodeString.
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Used in formatting error messages.
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Match the formatting of numbers in confusables.txt
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Minimum of 4 digits, no leading zeroes for positions 5 and up.
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void appendHexUChar(UnicodeString &dest, UChar32 c) {
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool   doZeroes = FALSE;
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int bitNum=28; bitNum>=0; bitNum-=4) {
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (bitNum <= 12) {
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            doZeroes = TRUE;
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int hexDigit = (c>>bitNum) & 0x0f;
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (hexDigit != 0 || doZeroes) {
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            doZeroes = TRUE;
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dest.append((UChar)(hexDigit<=9? hexDigit + 0x30: hexDigit -10 + 0x41));
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    dest.append((UChar)0x20);
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  testConfData - Check each data item from the Unicode confusables.txt file,
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                 verify that it transforms correctly in a skeleton.
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid IntlTestSpoof::testConfData() {
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *testDataDir = IntlTest::getSourceTestData(status);
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_SUCCESS(status);
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char buffer[2000];
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_strcpy(buffer, testDataDir);
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uprv_strcat(buffer, "confusables.txt");
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalStdioFilePointer f(fopen(buffer, "rb"));
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (f.isNull()) {
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Skipping test spoof/testConfData.  File confusables.txt not accessible.");
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f.getAlias(), 0, SEEK_END);
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  fileSize = ftell(f.getAlias());
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalArray<char> fileBuf(new char[fileSize]);
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f.getAlias(), 0, SEEK_SET);
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_EQ(amt_read, fileSize);
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT(fileSize>0);
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (amt_read != fileSize || fileSize <=0) {
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize));
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUSpoofCheckerPointer sc(uspoof_open(&status));
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_SUCCESS(status);
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Parse lines from the confusables.txt file.  Example Line:
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // FF44 ;	0064 ;	SL	# ( d -> d ) FULLWIDTH ....
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Three fields.  The hex fields can contain more than one character,
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //                and each character may be more than 4 digits (for supplemntals)
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // This regular expression matches lines and splits the fields into capture groups.
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt, 0, status);
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    TEST_ASSERT_SUCCESS(status);
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (parseLine.find()) {
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString from = parseHex(parseLine.group(1, status));
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // The source character was not NFD.
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Skip this case; the first step in obtaining a skeleton is to NFD the input,
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  so the mapping in this line of confusables.txt will never be applied.
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString rawExpected = parseHex(parseLine.group(2, status));
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString expected;
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Normalizer::decompose(rawExpected, FALSE /*NFD*/, 0, expected, status);
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t skeletonType = 0;
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString tableType = parseLine.group(3, status);
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (tableType.indexOf("SL") >= 0) {
38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("SA") >= 0) {
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("ML") >= 0) {
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = 0;
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (tableType.indexOf("MA") >= 0) {
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skeletonType = USPOOF_ANY_CASE;
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString actual;
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status);
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT_SUCCESS(status);
39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        TEST_ASSERT(actual == expected);
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (actual != expected) {
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln(parseLine.group(0, status));
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString line = "Actual: ";
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int i = 0;
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while (i < actual.length()) {
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                appendHexUChar(line, actual.char32At(i));
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                i = actual.moveIndex32(i, 1);
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln(line);
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
417