testidn.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2006, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: testidn.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2003-02-06 14* created by: Ram Viswanadha 15* 16* This program reads the rfc3454_*.txt files, 17* parses them, and extracts the data for Nameprep conformance. 18* It then preprocesses it and writes a binary file for efficient use 19* in various IDNA conversion processes. 20*/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 25 26#define USPREP_TYPE_NAMES_ARRAY 27 28#include "unicode/uchar.h" 29#include "unicode/putil.h" 30#include "cmemory.h" 31#include "cstring.h" 32#include "unicode/udata.h" 33#include "unewdata.h" 34#include "uoptions.h" 35#include "uparse.h" 36#include "utrie.h" 37#include "umutex.h" 38#include "sprpimpl.h" 39#include "testidna.h" 40#include "punyref.h" 41#include <stdlib.h> 42 43UBool beVerbose=FALSE, haveCopyright=TRUE; 44 45/* prototypes --------------------------------------------------------------- */ 46 47 48static void 49parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 50 51static void 52compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 53 UStringPrepType option); 54 55static void 56compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 57 58static void 59testAllCodepoints(TestIDNA& test); 60 61static TestIDNA* pTestIDNA =NULL; 62 63static const char* fileNames[] = { 64 "NamePrepProfile.txt" 65 }; 66static UStringPrepProfile *profile = NULL; 67static const UTrie *idnTrie = NULL; 68static const int32_t *indexes = NULL; 69static const uint16_t *mappingData = NULL; 70/* -------------------------------------------------------------------------- */ 71 72/* file definitions */ 73#define DATA_NAME "uidna" 74#define DATA_TYPE "icu" 75 76#define MISC_DIR "misc" 77 78extern int 79testData(TestIDNA& test) { 80 char *basename=NULL; 81 UErrorCode errorCode=U_ZERO_ERROR; 82 char *saveBasename =NULL; 83 84 profile = usprep_open(NULL, DATA_NAME, &errorCode); 85 if(U_FAILURE(errorCode)){ 86 test.errln("Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 87 return errorCode; 88 } 89 90 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 91 //TODO get the srcDir dynamically 92 const char *srcDir=IntlTest::pathToDataDirectory(); 93 94 idnTrie = &profile->sprepTrie; 95 indexes = profile->indexes; 96 mappingData = profile->mappingData; 97 98 //initialize 99 pTestIDNA = &test; 100 101 /* prepare the filename beginning with the source dir */ 102 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 103 filename[0] = 0x2E; 104 filename[1] = U_FILE_SEP_CHAR; 105 uprv_strcpy(filename+2,srcDir); 106 }else{ 107 uprv_strcpy(filename, srcDir); 108 } 109 basename=filename+uprv_strlen(filename); 110 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 111 *basename++=U_FILE_SEP_CHAR; 112 } 113 114 /* process unassigned */ 115 basename=filename+uprv_strlen(filename); 116 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 117 *basename++=U_FILE_SEP_CHAR; 118 } 119 120 /* first copy misc directory */ 121 saveBasename = basename; 122 uprv_strcpy(basename,MISC_DIR); 123 basename = basename + uprv_strlen(MISC_DIR); 124 *basename++=U_FILE_SEP_CHAR; 125 126 /* process unassigned */ 127 uprv_strcpy(basename,fileNames[0]); 128 parseMappings(filename,TRUE, test,&errorCode); 129 if(U_FAILURE(errorCode)) { 130 test.errln( "Could not open file %s for reading \n", filename); 131 return errorCode; 132 } 133 134 testAllCodepoints(test); 135 136 usprep_close(profile); 137 pTestIDNA = NULL; 138 free(filename); 139 return errorCode; 140} 141U_CDECL_BEGIN 142 143static void U_CALLCONV 144strprepProfileLineFn(void * /*context*/, 145 char *fields[][2], int32_t fieldCount, 146 UErrorCode *pErrorCode) { 147 uint32_t mapping[40]; 148 char *end, *map; 149 uint32_t code; 150 int32_t length; 151 /*UBool* mapWithNorm = (UBool*) context;*/ 152 const char* typeName; 153 uint32_t rangeStart=0,rangeEnd =0; 154 155 if(fieldCount != 3){ 156 *pErrorCode = U_INVALID_FORMAT_ERROR; 157 return; 158 } 159 160 typeName = fields[2][0]; 161 map = fields[1][0]; 162 163 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 164 165 u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); 166 167 /* store the range */ 168 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 169 170 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 171 172 u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); 173 174 /* store the range */ 175 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 176 177 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 178 /* get the character code, field 0 */ 179 code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); 180 181 /* parse the mapping string */ 182 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 183 184 /* store the mapping */ 185 compareMapping(code,mapping, length,USPREP_MAP); 186 187 }else{ 188 *pErrorCode = U_INVALID_FORMAT_ERROR; 189 } 190 191} 192 193U_CDECL_END 194 195static void 196parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 197 char *fields[3][2]; 198 199 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 200 return; 201 } 202 203 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 204 205 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 206 207 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 208 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 209 } 210} 211 212 213static inline UStringPrepType 214getValues(uint32_t result, int32_t& value, UBool& isIndex){ 215 216 UStringPrepType type; 217 218 if(result == 0){ 219 /* 220 * Initial value stored in the mapping table 221 * just return USPREP_TYPE_LIMIT .. so that 222 * the source codepoint is copied to the destination 223 */ 224 type = USPREP_TYPE_LIMIT; 225 isIndex =FALSE; 226 value = 0; 227 }else if(result >= _SPREP_TYPE_THRESHOLD){ 228 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 229 isIndex =FALSE; 230 value = 0; 231 }else{ 232 /* get the state */ 233 type = USPREP_MAP; 234 /* ascertain if the value is index or delta */ 235 if(result & 0x02){ 236 isIndex = TRUE; 237 value = result >> 2; //mask off the lower 2 bits and shift 238 239 }else{ 240 isIndex = FALSE; 241 value = (int16_t)result; 242 value = (value >> 2); 243 244 } 245 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 246 type = USPREP_DELETE; 247 isIndex =FALSE; 248 value = 0; 249 } 250 } 251 return type; 252} 253 254 255 256static void 257testAllCodepoints(TestIDNA& test){ 258 /* 259 { 260 UChar str[19] = { 261 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 262 0x070F,//prohibited 263 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 264 }; 265 uint32_t in[19] = {0}; 266 UErrorCode status = U_ZERO_ERROR; 267 int32_t inLength=0, outLength=100; 268 char output[100] = {0}; 269 punycode_status error; 270 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 271 272 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 273 printf(output); 274 275 } 276 */ 277 278 uint32_t i = 0; 279 int32_t unassigned = 0; 280 int32_t prohibited = 0; 281 int32_t mappedWithNorm = 0; 282 int32_t mapped = 0; 283 int32_t noValueInTrie = 0; 284 285 UStringPrepType type; 286 int32_t value; 287 UBool isIndex = FALSE; 288 289 for(i=0;i<=0x10FFFF;i++){ 290 uint32_t result = 0; 291 UTRIE_GET16(idnTrie,i, result); 292 type = getValues(result,value, isIndex); 293 if(type != USPREP_TYPE_LIMIT ){ 294 if(type == USPREP_UNASSIGNED){ 295 unassigned++; 296 } 297 if(type == USPREP_PROHIBITED){ 298 prohibited++; 299 } 300 if(type == USPREP_MAP){ 301 mapped++; 302 } 303 }else{ 304 noValueInTrie++; 305 if(result > 0){ 306 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 307 } 308 } 309 } 310 311 test.logln("Number of Unassinged code points : %i \n",unassigned); 312 test.logln("Number of Prohibited code points : %i \n",prohibited); 313 test.logln("Number of Mapped code points : %i \n",mapped); 314 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 315 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 316 317 318} 319 320static void 321compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 322 UStringPrepType type){ 323 uint32_t result = 0; 324 UTRIE_GET16(idnTrie,codepoint, result); 325 326 int32_t length=0; 327 UBool isIndex; 328 UStringPrepType retType; 329 int32_t value, index=0, delta=0; 330 331 retType = getValues(result,value,isIndex); 332 333 334 if(type != retType && retType != USPREP_DELETE){ 335 336 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 337 338 } 339 340 if(isIndex){ 341 index = value; 342 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 343 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 344 length = 1; 345 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 346 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 347 length = 2; 348 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 349 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 350 length = 3; 351 }else{ 352 length = mappingData[index++]; 353 } 354 }else{ 355 delta = value; 356 length = (retType == USPREP_DELETE)? 0 : 1; 357 } 358 359 int32_t realLength =0; 360 /* figure out the real length */ 361 for(int32_t j=0; j<mapLength; j++){ 362 if(mapping[j] > 0xFFFF){ 363 realLength +=2; 364 }else{ 365 realLength++; 366 } 367 } 368 369 if(realLength != length){ 370 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 371 } 372 373 if(isIndex){ 374 for(int8_t i =0; i< mapLength; i++){ 375 if(mapping[i] <= 0xFFFF){ 376 if(mappingData[index+i] != (uint16_t)mapping[i]){ 377 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 378 } 379 }else{ 380 UChar lead = UTF16_LEAD(mapping[i]); 381 UChar trail = UTF16_TRAIL(mapping[i]); 382 if(mappingData[index+i] != lead || 383 mappingData[index+i+1] != trail){ 384 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 385 } 386 } 387 } 388 }else{ 389 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 390 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 391 } 392 } 393 394} 395 396static void 397compareFlagsForRange(uint32_t start, uint32_t end, 398 UStringPrepType type){ 399 400 uint32_t result =0 ; 401 UStringPrepType retType; 402 UBool isIndex=FALSE; 403 int32_t value=0; 404/* 405 // supplementary code point 406 UChar __lead16=UTF16_LEAD(0x2323E); 407 int32_t __offset; 408 409 // get data for lead surrogate 410 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 411 __offset=(&idnTrie)->getFoldingOffset(result); 412 413 // get the real data from the folded lead/trail units 414 if(__offset>0) { 415 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 416 } else { 417 (result)=(uint32_t)((&idnTrie)->initialValue); 418 } 419 420 UTRIE_GET16(&idnTrie,0x2323E, result); 421*/ 422 while(start < end+1){ 423 UTRIE_GET16(idnTrie,start, result); 424 retType = getValues(result,value,isIndex); 425 if(result > _SPREP_TYPE_THRESHOLD){ 426 if(retType != type){ 427 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 428 } 429 }else{ 430 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 431 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 432 } 433 } 434 435 start++; 436 } 437 438} 439 440 441#endif /* #if !UCONFIG_NO_IDNA */ 442 443/* 444 * Hey, Emacs, please set the following: 445 * 446 * Local Variables: 447 * indent-tabs-mode: nil 448 * End: 449 * 450 */ 451