1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2009, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: testidn.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2003-02-06 14* created by: Ram Viswanadha 15* 16* This program reads the rfc3454_*.txt files, 17* parses them, and extracts the data for Nameprep conformance. 18* It then preprocesses it and writes a binary file for efficient use 19* in various IDNA conversion processes. 20*/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 25 26#define USPREP_TYPE_NAMES_ARRAY 27 28#include "unicode/uchar.h" 29#include "unicode/putil.h" 30#include "cmemory.h" 31#include "cstring.h" 32#include "unicode/udata.h" 33#include "unewdata.h" 34#include "uoptions.h" 35#include "uparse.h" 36#include "utrie.h" 37#include "umutex.h" 38#include "sprpimpl.h" 39#include "testidna.h" 40#include "punyref.h" 41#include <stdlib.h> 42 43UBool beVerbose=FALSE, haveCopyright=TRUE; 44 45/* prototypes --------------------------------------------------------------- */ 46 47 48static void 49parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 50 51static void 52compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 53 UStringPrepType option); 54 55static void 56compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 57 58static void 59testAllCodepoints(TestIDNA& test); 60 61static TestIDNA* pTestIDNA =NULL; 62 63static const char* fileNames[] = { 64 "rfc3491.txt" 65 }; 66static const UTrie *idnTrie = NULL; 67static const int32_t *indexes = NULL; 68static const uint16_t *mappingData = NULL; 69/* -------------------------------------------------------------------------- */ 70 71/* file definitions */ 72#define DATA_TYPE "icu" 73 74#define SPREP_DIR "sprep" 75 76extern int 77testData(TestIDNA& test) { 78 char *basename=NULL; 79 UErrorCode errorCode=U_ZERO_ERROR; 80 char *saveBasename =NULL; 81 82 LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode)); 83 if(U_FAILURE(errorCode)){ 84 test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 85 return errorCode; 86 } 87 88 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 89 //TODO get the srcDir dynamically 90 const char *srcDir=IntlTest::pathToDataDirectory(); 91 92 idnTrie = &profile->sprepTrie; 93 indexes = profile->indexes; 94 mappingData = profile->mappingData; 95 96 //initialize 97 pTestIDNA = &test; 98 99 /* prepare the filename beginning with the source dir */ 100 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 101 filename[0] = 0x2E; 102 filename[1] = U_FILE_SEP_CHAR; 103 uprv_strcpy(filename+2,srcDir); 104 }else{ 105 uprv_strcpy(filename, srcDir); 106 } 107 basename=filename+uprv_strlen(filename); 108 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 109 *basename++=U_FILE_SEP_CHAR; 110 } 111 112 /* process unassigned */ 113 basename=filename+uprv_strlen(filename); 114 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 115 *basename++=U_FILE_SEP_CHAR; 116 } 117 118 /* first copy misc directory */ 119 saveBasename = basename; 120 uprv_strcpy(basename,SPREP_DIR); 121 basename = basename + uprv_strlen(SPREP_DIR); 122 *basename++=U_FILE_SEP_CHAR; 123 124 /* process unassigned */ 125 uprv_strcpy(basename,fileNames[0]); 126 parseMappings(filename,TRUE, test,&errorCode); 127 if(U_FAILURE(errorCode)) { 128 test.errln( "Could not open file %s for reading \n", filename); 129 return errorCode; 130 } 131 132 testAllCodepoints(test); 133 134 pTestIDNA = NULL; 135 free(filename); 136 return errorCode; 137} 138U_CDECL_BEGIN 139 140static void U_CALLCONV 141strprepProfileLineFn(void * /*context*/, 142 char *fields[][2], int32_t fieldCount, 143 UErrorCode *pErrorCode) { 144 uint32_t mapping[40]; 145 char *end, *map; 146 uint32_t code; 147 int32_t length; 148 /*UBool* mapWithNorm = (UBool*) context;*/ 149 const char* typeName; 150 uint32_t rangeStart=0,rangeEnd =0; 151 const char *s; 152 153 s = u_skipWhitespace(fields[0][0]); 154 if (*s == '@') { 155 /* a special directive introduced in 4.2 */ 156 return; 157 } 158 159 if(fieldCount != 3){ 160 *pErrorCode = U_INVALID_FORMAT_ERROR; 161 return; 162 } 163 164 typeName = fields[2][0]; 165 map = fields[1][0]; 166 167 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 168 169 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 170 171 /* store the range */ 172 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 173 174 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 175 176 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 177 178 /* store the range */ 179 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 180 181 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 182 /* get the character code, field 0 */ 183 code=(uint32_t)uprv_strtoul(s, &end, 16); 184 185 /* parse the mapping string */ 186 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 187 188 /* store the mapping */ 189 compareMapping(code,mapping, length,USPREP_MAP); 190 191 }else{ 192 *pErrorCode = U_INVALID_FORMAT_ERROR; 193 } 194 195} 196 197U_CDECL_END 198 199static void 200parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 201 char *fields[3][2]; 202 203 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 204 return; 205 } 206 207 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 208 209 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 210 211 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 212 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 213 } 214} 215 216 217static inline UStringPrepType 218getValues(uint32_t result, int32_t& value, UBool& isIndex){ 219 220 UStringPrepType type; 221 222 if(result == 0){ 223 /* 224 * Initial value stored in the mapping table 225 * just return USPREP_TYPE_LIMIT .. so that 226 * the source codepoint is copied to the destination 227 */ 228 type = USPREP_TYPE_LIMIT; 229 isIndex =FALSE; 230 value = 0; 231 }else if(result >= _SPREP_TYPE_THRESHOLD){ 232 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 233 isIndex =FALSE; 234 value = 0; 235 }else{ 236 /* get the state */ 237 type = USPREP_MAP; 238 /* ascertain if the value is index or delta */ 239 if(result & 0x02){ 240 isIndex = TRUE; 241 value = result >> 2; //mask off the lower 2 bits and shift 242 243 }else{ 244 isIndex = FALSE; 245 value = (int16_t)result; 246 value = (value >> 2); 247 248 } 249 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 250 type = USPREP_DELETE; 251 isIndex =FALSE; 252 value = 0; 253 } 254 } 255 return type; 256} 257 258 259 260static void 261testAllCodepoints(TestIDNA& test){ 262 /* 263 { 264 UChar str[19] = { 265 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 266 0x070F,//prohibited 267 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 268 }; 269 uint32_t in[19] = {0}; 270 UErrorCode status = U_ZERO_ERROR; 271 int32_t inLength=0, outLength=100; 272 char output[100] = {0}; 273 punycode_status error; 274 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 275 276 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 277 printf(output); 278 279 } 280 */ 281 282 uint32_t i = 0; 283 int32_t unassigned = 0; 284 int32_t prohibited = 0; 285 int32_t mappedWithNorm = 0; 286 int32_t mapped = 0; 287 int32_t noValueInTrie = 0; 288 289 UStringPrepType type; 290 int32_t value; 291 UBool isIndex = FALSE; 292 293 for(i=0;i<=0x10FFFF;i++){ 294 uint32_t result = 0; 295 UTRIE_GET16(idnTrie,i, result); 296 type = getValues(result,value, isIndex); 297 if(type != USPREP_TYPE_LIMIT ){ 298 if(type == USPREP_UNASSIGNED){ 299 unassigned++; 300 } 301 if(type == USPREP_PROHIBITED){ 302 prohibited++; 303 } 304 if(type == USPREP_MAP){ 305 mapped++; 306 } 307 }else{ 308 noValueInTrie++; 309 if(result > 0){ 310 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 311 } 312 } 313 } 314 315 test.logln("Number of Unassinged code points : %i \n",unassigned); 316 test.logln("Number of Prohibited code points : %i \n",prohibited); 317 test.logln("Number of Mapped code points : %i \n",mapped); 318 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 319 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 320 321 322} 323 324static void 325compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 326 UStringPrepType type){ 327 uint32_t result = 0; 328 UTRIE_GET16(idnTrie,codepoint, result); 329 330 int32_t length=0; 331 UBool isIndex; 332 UStringPrepType retType; 333 int32_t value, index=0, delta=0; 334 335 retType = getValues(result,value,isIndex); 336 337 338 if(type != retType && retType != USPREP_DELETE){ 339 340 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 341 342 } 343 344 if(isIndex){ 345 index = value; 346 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 347 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 348 length = 1; 349 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 350 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 351 length = 2; 352 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 353 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 354 length = 3; 355 }else{ 356 length = mappingData[index++]; 357 } 358 }else{ 359 delta = value; 360 length = (retType == USPREP_DELETE)? 0 : 1; 361 } 362 363 int32_t realLength =0; 364 /* figure out the real length */ 365 for(int32_t j=0; j<mapLength; j++){ 366 if(mapping[j] > 0xFFFF){ 367 realLength +=2; 368 }else{ 369 realLength++; 370 } 371 } 372 373 if(realLength != length){ 374 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 375 } 376 377 if(isIndex){ 378 for(int8_t i =0; i< mapLength; i++){ 379 if(mapping[i] <= 0xFFFF){ 380 if(mappingData[index+i] != (uint16_t)mapping[i]){ 381 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 382 } 383 }else{ 384 UChar lead = UTF16_LEAD(mapping[i]); 385 UChar trail = UTF16_TRAIL(mapping[i]); 386 if(mappingData[index+i] != lead || 387 mappingData[index+i+1] != trail){ 388 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 389 } 390 } 391 } 392 }else{ 393 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 394 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 395 } 396 } 397 398} 399 400static void 401compareFlagsForRange(uint32_t start, uint32_t end, 402 UStringPrepType type){ 403 404 uint32_t result =0 ; 405 UStringPrepType retType; 406 UBool isIndex=FALSE; 407 int32_t value=0; 408/* 409 // supplementary code point 410 UChar __lead16=UTF16_LEAD(0x2323E); 411 int32_t __offset; 412 413 // get data for lead surrogate 414 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 415 __offset=(&idnTrie)->getFoldingOffset(result); 416 417 // get the real data from the folded lead/trail units 418 if(__offset>0) { 419 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 420 } else { 421 (result)=(uint32_t)((&idnTrie)->initialValue); 422 } 423 424 UTRIE_GET16(&idnTrie,0x2323E, result); 425*/ 426 while(start < end+1){ 427 UTRIE_GET16(idnTrie,start, result); 428 retType = getValues(result,value,isIndex); 429 if(result > _SPREP_TYPE_THRESHOLD){ 430 if(retType != type){ 431 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 432 } 433 }else{ 434 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 435 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 436 } 437 } 438 439 start++; 440 } 441 442} 443 444 445#endif /* #if !UCONFIG_NO_IDNA */ 446 447/* 448 * Hey, Emacs, please set the following: 449 * 450 * Local Variables: 451 * indent-tabs-mode: nil 452 * End: 453 * 454 */ 455