1/* 2********************************************************************** 3* Copyright (C) 2009, International Business Machines Corporation 4* and others. All Rights Reserved. 5********************************************************************** 6*/ 7/** 8 * IntlTestSpoof tests for USpoofDetector 9 */ 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_REGULAR_EXPRESSIONS 14 15#include "itspoof.h" 16#include "unicode/uspoof.h" 17 18#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 19 errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 20 21#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 22 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 23 24#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \ 25 errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \ 26 __FILE__, __LINE__, #a, (a), #b, (b)); }} 27 28#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \ 29 errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \ 30 __FILE__, __LINE__, #a, (a), #b, (b)); }} 31 32/* 33 * TEST_SETUP and TEST_TEARDOWN 34 * macros to handle the boilerplate around setting up test case. 35 * Put arbitrary test code between SETUP and TEARDOWN. 36 * "sc" is the ready-to-go SpoofChecker for use in the tests. 37 */ 38#define TEST_SETUP { \ 39 UErrorCode status = U_ZERO_ERROR; \ 40 USpoofChecker *sc; \ 41 sc = uspoof_open(&status); \ 42 TEST_ASSERT_SUCCESS(status); \ 43 if (U_SUCCESS(status)){ 44 45#define TEST_TEARDOWN \ 46 } \ 47 TEST_ASSERT_SUCCESS(status); \ 48 uspoof_close(sc); \ 49} 50 51 52 53 54void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 55{ 56 if (exec) logln("TestSuite spoof: "); 57 switch (index) { 58 case 0: 59 name = "TestSpoofAPI"; 60 if (exec) { 61 testSpoofAPI(); 62 } 63 break; 64 case 1: 65 name = "TestSkeleton"; 66 if (exec) { 67 testSkeleton(); 68 } 69 break; 70 case 2: 71 name = "TestAreConfusable"; 72 if (exec) { 73 testAreConfusable(); 74 } 75 break; 76 case 3: 77 name = "TestInvisible"; 78 if (exec) { 79 testInvisible(); 80 } 81 break; 82 default: name=""; break; 83 } 84} 85 86void IntlTestSpoof::testSpoofAPI() { 87 88 TEST_SETUP 89 UnicodeString s("uvw"); 90 int32_t position = 666; 91 int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status); 92 TEST_ASSERT_SUCCESS(status); 93 TEST_ASSERT_EQ(0, checkResults); 94 TEST_ASSERT_EQ(666, position); 95 TEST_TEARDOWN; 96 97 TEST_SETUP 98 UnicodeString s1("cxs"); 99 UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs" 100 int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status); 101 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); 102 103 TEST_TEARDOWN; 104 105 TEST_SETUP 106 UnicodeString s("I1l0O"); 107 UnicodeString dest; 108 UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status); 109 TEST_ASSERT_SUCCESS(status); 110 TEST_ASSERT(UnicodeString("11100") == dest); 111 TEST_ASSERT(&dest == &retStr); 112 TEST_TEARDOWN; 113} 114 115 116#define CHECK_SKELETON(type, input, expected) { \ 117 checkSkeleton(sc, type, input, expected, __LINE__); \ 118 } 119 120 121// testSkeleton. Spot check a number of confusable skeleton substitutions from the 122// Unicode data file confusables.txt 123// Test cases chosen for substitutions of various lengths, and 124// membership in different mapping tables. 125void IntlTestSpoof::testSkeleton() { 126 const uint32_t ML = 0; 127 const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE; 128 const uint32_t MA = USPOOF_ANY_CASE; 129 const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE; 130 131 TEST_SETUP 132 // A long "identifier" that will overflow implementation stack buffers, forcing heap allocations. 133 CHECK_SKELETON(SL, " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations." 134 " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations." 135 " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations." 136 " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations.", 137 138 " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations." 139 " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations." 140 " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations." 141 " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations.") 142 143 // FC5F ; FE74 0651 ; ML #* ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM to 144 // ARABIC KASRATAN ISOLATED FORM, ARABIC SHADDA 145 // This character NFKD normalizes to \u0020 \u064d \u0651, so its confusable mapping 146 // is never used in creating a skeleton. 147 CHECK_SKELETON(SL, "\\uFC5F", " \\u064d\\u0651"); 148 149 CHECK_SKELETON(SL, "nochange", "nochange"); 150 CHECK_SKELETON(MA, "love", "1ove"); // lower case l to digit 1 151 CHECK_SKELETON(ML, "OOPS", "OOPS"); 152 CHECK_SKELETON(MA, "OOPS", "00PS"); // Letter O to digit 0 in any case mode only 153 CHECK_SKELETON(SL, "\\u059c", "\\u0301"); 154 CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D"); 155 CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029"); 156 CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647"); 157 158 // This mapping exists in the ML and MA tables, does not exist in SL, SA 159 //0C83 ; 0C03 ; ML # ( ಠâ à° ) KANNADA SIGN VISARGA â TELUGU SIGN VISARGA # {source:513} 160 CHECK_SKELETON(SL, "\\u0C83", "\\u0C83"); 161 CHECK_SKELETON(SA, "\\u0C83", "\\u0C83"); 162 CHECK_SKELETON(ML, "\\u0C83", "\\u0C03"); 163 CHECK_SKELETON(MA, "\\u0C83", "\\u0C03"); 164 165 // 0391 ; 0041 ; MA # ( Î â A ) GREEK CAPITAL LETTER ALPHA to LATIN CAPITAL LETTER A 166 // This mapping exists only in the MA table. 167 CHECK_SKELETON(MA, "\\u0391", "A"); 168 CHECK_SKELETON(SA, "\\u0391", "\\u0391"); 169 CHECK_SKELETON(ML, "\\u0391", "\\u0391"); 170 CHECK_SKELETON(SL, "\\u0391", "\\u0391"); 171 172 // 13CF ; 0062 ; MA # CHEROKEE LETTER SI to LATIN SMALL LETTER B 173 // This mapping exists in the ML and MA tables 174 CHECK_SKELETON(ML, "\\u13CF", "b"); 175 CHECK_SKELETON(MA, "\\u13CF", "b"); 176 CHECK_SKELETON(SL, "\\u13CF", "\\u13CF"); 177 CHECK_SKELETON(SA, "\\u13CF", "\\u13CF"); 178 179 // 0022 ; 02B9 02B9 ; SA #* QUOTATION MARK to MODIFIER LETTER PRIME, MODIFIER LETTER PRIME 180 // all tables. 181 CHECK_SKELETON(SL, "\\u0022", "\\u02B9\\u02B9"); 182 CHECK_SKELETON(SA, "\\u0022", "\\u02B9\\u02B9"); 183 CHECK_SKELETON(ML, "\\u0022", "\\u02B9\\u02B9"); 184 CHECK_SKELETON(MA, "\\u0022", "\\u02B9\\u02B9"); 185 186 TEST_TEARDOWN; 187} 188 189 190// 191// Run a single confusable skeleton transformation test case. 192// 193void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type, 194 const char *input, const char *expected, int32_t lineNum) { 195 UnicodeString uInput = UnicodeString(input).unescape(); 196 UnicodeString uExpected = UnicodeString(expected).unescape(); 197 198 UErrorCode status = U_ZERO_ERROR; 199 UnicodeString actual; 200 uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status); 201 if (U_FAILURE(status)) { 202 errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum, 203 u_errorName(status)); 204 return; 205 } 206 if (uExpected != actual) { 207 errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.", 208 __FILE__, __LINE__, lineNum); 209 errln(UnicodeString(" Actual Skeleton: \"") + actual + UnicodeString("\"\n") + 210 UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\"")); 211 } 212} 213 214void IntlTestSpoof::testAreConfusable() { 215 TEST_SETUP 216 UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. " 217 "A long string that will overflow stack buffers. A long string that will overflow stack buffers. "); 218 UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. " 219 "A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "); 220 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status)); 221 TEST_ASSERT_SUCCESS(status); 222 223 TEST_TEARDOWN; 224} 225 226void IntlTestSpoof::testInvisible() { 227 TEST_SETUP 228 UnicodeString s = UnicodeString("abcd\\u0301ef").unescape(); 229 int32_t position = -42; 230 TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status)); 231 TEST_ASSERT_SUCCESS(status); 232 TEST_ASSERT(position == -42); 233 234 UnicodeString s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape(); 235 TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status)); 236 TEST_ASSERT_SUCCESS(status); 237 TEST_ASSERT_EQ(7, position); 238 239 // Tow acute accents, one from the composed a with acute accent, \u00e1, 240 // and one separate. 241 position = -42; 242 UnicodeString s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape(); 243 TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status)); 244 TEST_ASSERT_SUCCESS(status); 245 TEST_ASSERT_EQ(7, position); 246 TEST_TEARDOWN; 247} 248#endif // UCONFIG_NO_REGULAR_EXPRESSIONS 249 250