1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 2003-2013, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* file name: testidn.cpp 11* encoding: UTF-8 12* tab size: 8 (not used) 13* indentation:4 14* 15* created on: 2003-02-06 16* created by: Ram Viswanadha 17* 18* This program reads the rfc3454_*.txt files, 19* parses them, and extracts the data for Nameprep conformance. 20* It then preprocesses it and writes a binary file for efficient use 21* in various IDNA conversion processes. 22*/ 23 24#include "unicode/utypes.h" 25 26#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 27 28#define USPREP_TYPE_NAMES_ARRAY 29 30#include "unicode/uchar.h" 31#include "unicode/putil.h" 32#include "cmemory.h" 33#include "cstring.h" 34#include "unicode/udata.h" 35#include "unicode/utf16.h" 36#include "unewdata.h" 37#include "uoptions.h" 38#include "uparse.h" 39#include "utrie.h" 40#include "umutex.h" 41#include "sprpimpl.h" 42#include "testidna.h" 43#include "punyref.h" 44#include <stdlib.h> 45 46UBool beVerbose=FALSE, haveCopyright=TRUE; 47 48/* prototypes --------------------------------------------------------------- */ 49 50 51static void 52parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 53 54static void 55compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 56 UStringPrepType option); 57 58static void 59compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 60 61static void 62testAllCodepoints(TestIDNA& test); 63 64static TestIDNA* pTestIDNA =NULL; 65 66static const char* fileNames[] = { 67 "rfc3491.txt" 68 }; 69static const UTrie *idnTrie = NULL; 70static const int32_t *indexes = NULL; 71static const uint16_t *mappingData = NULL; 72/* -------------------------------------------------------------------------- */ 73 74/* file definitions */ 75#define DATA_TYPE "icu" 76 77#define SPREP_DIR "sprep" 78 79extern int 80testData(TestIDNA& test) { 81 char *basename=NULL; 82 UErrorCode errorCode=U_ZERO_ERROR; 83 char *saveBasename =NULL; 84 85 LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode)); 86 if(U_FAILURE(errorCode)){ 87 test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 88 return errorCode; 89 } 90 91 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 92 //TODO get the srcDir dynamically 93 const char *srcDir=IntlTest::pathToDataDirectory(); 94 95 idnTrie = &profile->sprepTrie; 96 indexes = profile->indexes; 97 mappingData = profile->mappingData; 98 99 //initialize 100 pTestIDNA = &test; 101 102 /* prepare the filename beginning with the source dir */ 103 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 104 filename[0] = 0x2E; 105 filename[1] = U_FILE_SEP_CHAR; 106 uprv_strcpy(filename+2,srcDir); 107 }else{ 108 uprv_strcpy(filename, srcDir); 109 } 110 basename=filename+uprv_strlen(filename); 111 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 112 *basename++=U_FILE_SEP_CHAR; 113 } 114 115 /* process unassigned */ 116 basename=filename+uprv_strlen(filename); 117 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 118 *basename++=U_FILE_SEP_CHAR; 119 } 120 121 /* first copy misc directory */ 122 saveBasename = basename; 123 (void)saveBasename; // Suppress set but not used warning. 124 uprv_strcpy(basename,SPREP_DIR); 125 basename = basename + uprv_strlen(SPREP_DIR); 126 *basename++=U_FILE_SEP_CHAR; 127 128 /* process unassigned */ 129 uprv_strcpy(basename,fileNames[0]); 130 parseMappings(filename,TRUE, test,&errorCode); 131 if(U_FAILURE(errorCode)) { 132 test.errln( "Could not open file %s for reading \n", filename); 133 return errorCode; 134 } 135 136 testAllCodepoints(test); 137 138 pTestIDNA = NULL; 139 free(filename); 140 return errorCode; 141} 142U_CDECL_BEGIN 143 144static void U_CALLCONV 145strprepProfileLineFn(void * /*context*/, 146 char *fields[][2], int32_t fieldCount, 147 UErrorCode *pErrorCode) { 148 uint32_t mapping[40]; 149 char *end, *map; 150 uint32_t code; 151 int32_t length; 152 /*UBool* mapWithNorm = (UBool*) context;*/ 153 const char* typeName; 154 uint32_t rangeStart=0,rangeEnd =0; 155 const char *s; 156 157 s = u_skipWhitespace(fields[0][0]); 158 if (*s == '@') { 159 /* a special directive introduced in 4.2 */ 160 return; 161 } 162 163 if(fieldCount != 3){ 164 *pErrorCode = U_INVALID_FORMAT_ERROR; 165 return; 166 } 167 168 typeName = fields[2][0]; 169 map = fields[1][0]; 170 171 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 172 173 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 174 175 /* store the range */ 176 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 177 178 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 179 180 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 181 182 /* store the range */ 183 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 184 185 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 186 /* get the character code, field 0 */ 187 code=(uint32_t)uprv_strtoul(s, &end, 16); 188 189 /* parse the mapping string */ 190 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 191 192 /* store the mapping */ 193 compareMapping(code,mapping, length,USPREP_MAP); 194 195 }else{ 196 *pErrorCode = U_INVALID_FORMAT_ERROR; 197 } 198 199} 200 201U_CDECL_END 202 203static void 204parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 205 char *fields[3][2]; 206 207 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 208 return; 209 } 210 211 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 212 213 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 214 215 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 216 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 217 } 218} 219 220 221static inline UStringPrepType 222getValues(uint32_t result, int32_t& value, UBool& isIndex){ 223 224 UStringPrepType type; 225 226 if(result == 0){ 227 /* 228 * Initial value stored in the mapping table 229 * just return USPREP_TYPE_LIMIT .. so that 230 * the source codepoint is copied to the destination 231 */ 232 type = USPREP_TYPE_LIMIT; 233 isIndex =FALSE; 234 value = 0; 235 }else if(result >= _SPREP_TYPE_THRESHOLD){ 236 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 237 isIndex =FALSE; 238 value = 0; 239 }else{ 240 /* get the state */ 241 type = USPREP_MAP; 242 /* ascertain if the value is index or delta */ 243 if(result & 0x02){ 244 isIndex = TRUE; 245 value = result >> 2; //mask off the lower 2 bits and shift 246 247 }else{ 248 isIndex = FALSE; 249 value = (int16_t)result; 250 value = (value >> 2); 251 252 } 253 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 254 type = USPREP_DELETE; 255 isIndex =FALSE; 256 value = 0; 257 } 258 } 259 return type; 260} 261 262 263 264static void 265testAllCodepoints(TestIDNA& test){ 266 /* 267 { 268 UChar str[19] = { 269 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 270 0x070F,//prohibited 271 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 272 }; 273 uint32_t in[19] = {0}; 274 UErrorCode status = U_ZERO_ERROR; 275 int32_t inLength=0, outLength=100; 276 char output[100] = {0}; 277 punycode_status error; 278 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 279 280 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 281 printf(output); 282 283 } 284 */ 285 286 uint32_t i = 0; 287 int32_t unassigned = 0; 288 int32_t prohibited = 0; 289 int32_t mappedWithNorm = 0; 290 int32_t mapped = 0; 291 int32_t noValueInTrie = 0; 292 293 UStringPrepType type; 294 int32_t value; 295 UBool isIndex = FALSE; 296 297 for(i=0;i<=0x10FFFF;i++){ 298 uint32_t result = 0; 299 UTRIE_GET16(idnTrie,i, result); 300 type = getValues(result,value, isIndex); 301 if(type != USPREP_TYPE_LIMIT ){ 302 if(type == USPREP_UNASSIGNED){ 303 unassigned++; 304 } 305 if(type == USPREP_PROHIBITED){ 306 prohibited++; 307 } 308 if(type == USPREP_MAP){ 309 mapped++; 310 } 311 }else{ 312 noValueInTrie++; 313 if(result > 0){ 314 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 315 } 316 } 317 } 318 319 test.logln("Number of Unassinged code points : %i \n",unassigned); 320 test.logln("Number of Prohibited code points : %i \n",prohibited); 321 test.logln("Number of Mapped code points : %i \n",mapped); 322 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 323 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 324 325 326} 327 328static void 329compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 330 UStringPrepType type){ 331 uint32_t result = 0; 332 UTRIE_GET16(idnTrie,codepoint, result); 333 334 int32_t length=0; 335 UBool isIndex; 336 UStringPrepType retType; 337 int32_t value, index=0, delta=0; 338 339 retType = getValues(result,value,isIndex); 340 341 342 if(type != retType && retType != USPREP_DELETE){ 343 344 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 345 346 } 347 348 if(isIndex){ 349 index = value; 350 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 351 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 352 length = 1; 353 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 354 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 355 length = 2; 356 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 357 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 358 length = 3; 359 }else{ 360 length = mappingData[index++]; 361 } 362 }else{ 363 delta = value; 364 length = (retType == USPREP_DELETE)? 0 : 1; 365 } 366 367 int32_t realLength =0; 368 /* figure out the real length */ 369 for(int32_t j=0; j<mapLength; j++){ 370 if(mapping[j] > 0xFFFF){ 371 realLength +=2; 372 }else{ 373 realLength++; 374 } 375 } 376 377 if(realLength != length){ 378 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 379 } 380 381 if(isIndex){ 382 for(int8_t i =0; i< mapLength; i++){ 383 if(mapping[i] <= 0xFFFF){ 384 if(mappingData[index+i] != (uint16_t)mapping[i]){ 385 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 386 } 387 }else{ 388 UChar lead = U16_LEAD(mapping[i]); 389 UChar trail = U16_TRAIL(mapping[i]); 390 if(mappingData[index+i] != lead || 391 mappingData[index+i+1] != trail){ 392 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 393 } 394 } 395 } 396 }else{ 397 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 398 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 399 } 400 } 401 402} 403 404static void 405compareFlagsForRange(uint32_t start, uint32_t end, 406 UStringPrepType type){ 407 408 uint32_t result =0 ; 409 UStringPrepType retType; 410 UBool isIndex=FALSE; 411 int32_t value=0; 412/* 413 // supplementary code point 414 UChar __lead16=U16_LEAD(0x2323E); 415 int32_t __offset; 416 417 // get data for lead surrogate 418 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 419 __offset=(&idnTrie)->getFoldingOffset(result); 420 421 // get the real data from the folded lead/trail units 422 if(__offset>0) { 423 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 424 } else { 425 (result)=(uint32_t)((&idnTrie)->initialValue); 426 } 427 428 UTRIE_GET16(&idnTrie,0x2323E, result); 429*/ 430 while(start < end+1){ 431 UTRIE_GET16(idnTrie,start, result); 432 retType = getValues(result,value,isIndex); 433 if(result > _SPREP_TYPE_THRESHOLD){ 434 if(retType != type){ 435 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 436 } 437 }else{ 438 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 439 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 440 } 441 } 442 443 start++; 444 } 445 446} 447 448 449#endif /* #if !UCONFIG_NO_IDNA */ 450 451/* 452 * Hey, Emacs, please set the following: 453 * 454 * Local Variables: 455 * indent-tabs-mode: nil 456 * End: 457 * 458 */ 459