1/*
2**********************************************************************
3* Copyright (C) 2009, International Business Machines Corporation
4* and others.  All Rights Reserved.
5**********************************************************************
6*/
7/**
8 * IntlTestSpoof tests for USpoofDetector
9 */
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "itspoof.h"
16#include "unicode/uspoof.h"
17
18#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
19    errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
20
21#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
22    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
23
24#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
25    errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
26             __FILE__, __LINE__, #a, (a), #b, (b)); }}
27
28#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
29    errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
30             __FILE__, __LINE__, #a, (a), #b, (b)); }}
31
32/*
33 *   TEST_SETUP and TEST_TEARDOWN
34 *         macros to handle the boilerplate around setting up test case.
35 *         Put arbitrary test code between SETUP and TEARDOWN.
36 *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
37 */
38#define TEST_SETUP {  \
39    UErrorCode status = U_ZERO_ERROR; \
40    USpoofChecker *sc;     \
41    sc = uspoof_open(&status);  \
42    TEST_ASSERT_SUCCESS(status);   \
43    if (U_SUCCESS(status)){
44
45#define TEST_TEARDOWN  \
46    }  \
47    TEST_ASSERT_SUCCESS(status);  \
48    uspoof_close(sc);  \
49}
50
51
52
53
54void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
55{
56    if (exec) logln("TestSuite spoof: ");
57    switch (index) {
58        case 0:
59            name = "TestSpoofAPI";
60            if (exec) {
61                testSpoofAPI();
62            }
63            break;
64         case 1:
65            name = "TestSkeleton";
66            if (exec) {
67                testSkeleton();
68            }
69            break;
70         case 2:
71            name = "TestAreConfusable";
72            if (exec) {
73                testAreConfusable();
74            }
75            break;
76          case 3:
77            name = "TestInvisible";
78            if (exec) {
79                testInvisible();
80            }
81            break;
82        default: name=""; break;
83    }
84}
85
86void IntlTestSpoof::testSpoofAPI() {
87
88    TEST_SETUP
89        UnicodeString s("uvw");
90        int32_t position = 666;
91        int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
92        TEST_ASSERT_SUCCESS(status);
93        TEST_ASSERT_EQ(0, checkResults);
94        TEST_ASSERT_EQ(666, position);
95    TEST_TEARDOWN;
96
97    TEST_SETUP
98        UnicodeString s1("cxs");
99        UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
100        int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
101        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
102
103    TEST_TEARDOWN;
104
105    TEST_SETUP
106        UnicodeString s("I1l0O");
107        UnicodeString dest;
108        UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
109        TEST_ASSERT_SUCCESS(status);
110        TEST_ASSERT(UnicodeString("11100") == dest);
111        TEST_ASSERT(&dest == &retStr);
112    TEST_TEARDOWN;
113}
114
115
116#define CHECK_SKELETON(type, input, expected) { \
117    checkSkeleton(sc, type, input, expected, __LINE__); \
118    }
119
120
121// testSkeleton.   Spot check a number of confusable skeleton substitutions from the
122//                 Unicode data file confusables.txt
123//                 Test cases chosen for substitutions of various lengths, and
124//                 membership in different mapping tables.
125void IntlTestSpoof::testSkeleton() {
126    const uint32_t ML = 0;
127    const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
128    const uint32_t MA = USPOOF_ANY_CASE;
129    const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
130
131    TEST_SETUP
132        // A long "identifier" that will overflow implementation stack buffers, forcing heap allocations.
133        CHECK_SKELETON(SL, " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
134                           " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
135                           " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
136                           " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
137
138               " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
139               " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
140               " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
141               " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations.")
142
143        // FC5F ;	FE74 0651 ;   ML  #* ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM to
144        //                                ARABIC KASRATAN ISOLATED FORM, ARABIC SHADDA
145        //    This character NFKD normalizes to \u0020 \u064d \u0651, so its confusable mapping
146        //    is never used in creating a skeleton.
147        CHECK_SKELETON(SL, "\\uFC5F", " \\u064d\\u0651");
148
149        CHECK_SKELETON(SL, "nochange", "nochange");
150        CHECK_SKELETON(MA, "love", "1ove");   // lower case l to digit 1
151        CHECK_SKELETON(ML, "OOPS", "OOPS");
152        CHECK_SKELETON(MA, "OOPS", "00PS");   // Letter O to digit 0 in any case mode only
153        CHECK_SKELETON(SL, "\\u059c", "\\u0301");
154        CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
155        CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029");
156        CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647");
157
158        // This mapping exists in the ML and MA tables, does not exist in SL, SA
159        //0C83 ;	0C03 ;	ML	# ( ಠâ à° ) KANNADA SIGN VISARGA â TELUGU SIGN VISARGA	# {source:513}
160        CHECK_SKELETON(SL, "\\u0C83", "\\u0C83");
161        CHECK_SKELETON(SA, "\\u0C83", "\\u0C83");
162        CHECK_SKELETON(ML, "\\u0C83", "\\u0C03");
163        CHECK_SKELETON(MA, "\\u0C83", "\\u0C03");
164
165        // 0391 ; 0041 ; MA # ( Î â A ) GREEK CAPITAL LETTER ALPHA to LATIN CAPITAL LETTER A
166        // This mapping exists only in the MA table.
167        CHECK_SKELETON(MA, "\\u0391", "A");
168        CHECK_SKELETON(SA, "\\u0391", "\\u0391");
169        CHECK_SKELETON(ML, "\\u0391", "\\u0391");
170        CHECK_SKELETON(SL, "\\u0391", "\\u0391");
171
172        // 13CF ;  0062 ;  MA  #  CHEROKEE LETTER SI to LATIN SMALL LETTER B
173        // This mapping exists in the ML and MA tables
174        CHECK_SKELETON(ML, "\\u13CF", "b");
175        CHECK_SKELETON(MA, "\\u13CF", "b");
176        CHECK_SKELETON(SL, "\\u13CF", "\\u13CF");
177        CHECK_SKELETON(SA, "\\u13CF", "\\u13CF");
178
179        // 0022 ;  02B9 02B9 ;  SA  #*  QUOTATION MARK to MODIFIER LETTER PRIME, MODIFIER LETTER PRIME
180        // all tables.
181        CHECK_SKELETON(SL, "\\u0022", "\\u02B9\\u02B9");
182        CHECK_SKELETON(SA, "\\u0022", "\\u02B9\\u02B9");
183        CHECK_SKELETON(ML, "\\u0022", "\\u02B9\\u02B9");
184        CHECK_SKELETON(MA, "\\u0022", "\\u02B9\\u02B9");
185
186    TEST_TEARDOWN;
187}
188
189
190//
191//  Run a single confusable skeleton transformation test case.
192//
193void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
194                                  const char *input, const char *expected, int32_t lineNum) {
195    UnicodeString uInput = UnicodeString(input).unescape();
196    UnicodeString uExpected = UnicodeString(expected).unescape();
197
198    UErrorCode status = U_ZERO_ERROR;
199    UnicodeString actual;
200    uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);
201    if (U_FAILURE(status)) {
202        errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
203              u_errorName(status));
204        return;
205    }
206    if (uExpected != actual) {
207        errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
208               __FILE__, __LINE__, lineNum);
209        errln(UnicodeString(" Actual   Skeleton: \"") + actual + UnicodeString("\"\n") +
210              UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
211    }
212}
213
214void IntlTestSpoof::testAreConfusable() {
215    TEST_SETUP
216        UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
217                         "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ");
218        UnicodeString s2("A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
219                         "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ");
220        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
221        TEST_ASSERT_SUCCESS(status);
222
223    TEST_TEARDOWN;
224}
225
226void IntlTestSpoof::testInvisible() {
227    TEST_SETUP
228        UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
229        int32_t position = -42;
230        TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
231        TEST_ASSERT_SUCCESS(status);
232        TEST_ASSERT(position == -42);
233
234        UnicodeString  s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
235        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
236        TEST_ASSERT_SUCCESS(status);
237        TEST_ASSERT_EQ(7, position);
238
239        // Tow acute accents, one from the composed a with acute accent, \u00e1,
240        // and one separate.
241        position = -42;
242        UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
243        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
244        TEST_ASSERT_SUCCESS(status);
245        TEST_ASSERT_EQ(7, position);
246    TEST_TEARDOWN;
247}
248#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
249
250