1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2011, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: testidn.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2003-02-06 14* created by: Ram Viswanadha 15* 16* This program reads the rfc3454_*.txt files, 17* parses them, and extracts the data for Nameprep conformance. 18* It then preprocesses it and writes a binary file for efficient use 19* in various IDNA conversion processes. 20*/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 25 26#define USPREP_TYPE_NAMES_ARRAY 27 28#include "unicode/uchar.h" 29#include "unicode/putil.h" 30#include "cmemory.h" 31#include "cstring.h" 32#include "unicode/udata.h" 33#include "unicode/utf16.h" 34#include "unewdata.h" 35#include "uoptions.h" 36#include "uparse.h" 37#include "utrie.h" 38#include "umutex.h" 39#include "sprpimpl.h" 40#include "testidna.h" 41#include "punyref.h" 42#include <stdlib.h> 43 44UBool beVerbose=FALSE, haveCopyright=TRUE; 45 46/* prototypes --------------------------------------------------------------- */ 47 48 49static void 50parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 51 52static void 53compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 54 UStringPrepType option); 55 56static void 57compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 58 59static void 60testAllCodepoints(TestIDNA& test); 61 62static TestIDNA* pTestIDNA =NULL; 63 64static const char* fileNames[] = { 65 "rfc3491.txt" 66 }; 67static const UTrie *idnTrie = NULL; 68static const int32_t *indexes = NULL; 69static const uint16_t *mappingData = NULL; 70/* -------------------------------------------------------------------------- */ 71 72/* file definitions */ 73#define DATA_TYPE "icu" 74 75#define SPREP_DIR "sprep" 76 77extern int 78testData(TestIDNA& test) { 79 char *basename=NULL; 80 UErrorCode errorCode=U_ZERO_ERROR; 81 char *saveBasename =NULL; 82 83 LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode)); 84 if(U_FAILURE(errorCode)){ 85 test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 86 return errorCode; 87 } 88 89 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 90 //TODO get the srcDir dynamically 91 const char *srcDir=IntlTest::pathToDataDirectory(); 92 93 idnTrie = &profile->sprepTrie; 94 indexes = profile->indexes; 95 mappingData = profile->mappingData; 96 97 //initialize 98 pTestIDNA = &test; 99 100 /* prepare the filename beginning with the source dir */ 101 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 102 filename[0] = 0x2E; 103 filename[1] = U_FILE_SEP_CHAR; 104 uprv_strcpy(filename+2,srcDir); 105 }else{ 106 uprv_strcpy(filename, srcDir); 107 } 108 basename=filename+uprv_strlen(filename); 109 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 110 *basename++=U_FILE_SEP_CHAR; 111 } 112 113 /* process unassigned */ 114 basename=filename+uprv_strlen(filename); 115 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 116 *basename++=U_FILE_SEP_CHAR; 117 } 118 119 /* first copy misc directory */ 120 saveBasename = basename; 121 uprv_strcpy(basename,SPREP_DIR); 122 basename = basename + uprv_strlen(SPREP_DIR); 123 *basename++=U_FILE_SEP_CHAR; 124 125 /* process unassigned */ 126 uprv_strcpy(basename,fileNames[0]); 127 parseMappings(filename,TRUE, test,&errorCode); 128 if(U_FAILURE(errorCode)) { 129 test.errln( "Could not open file %s for reading \n", filename); 130 return errorCode; 131 } 132 133 testAllCodepoints(test); 134 135 pTestIDNA = NULL; 136 free(filename); 137 return errorCode; 138} 139U_CDECL_BEGIN 140 141static void U_CALLCONV 142strprepProfileLineFn(void * /*context*/, 143 char *fields[][2], int32_t fieldCount, 144 UErrorCode *pErrorCode) { 145 uint32_t mapping[40]; 146 char *end, *map; 147 uint32_t code; 148 int32_t length; 149 /*UBool* mapWithNorm = (UBool*) context;*/ 150 const char* typeName; 151 uint32_t rangeStart=0,rangeEnd =0; 152 const char *s; 153 154 s = u_skipWhitespace(fields[0][0]); 155 if (*s == '@') { 156 /* a special directive introduced in 4.2 */ 157 return; 158 } 159 160 if(fieldCount != 3){ 161 *pErrorCode = U_INVALID_FORMAT_ERROR; 162 return; 163 } 164 165 typeName = fields[2][0]; 166 map = fields[1][0]; 167 168 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 169 170 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 171 172 /* store the range */ 173 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 174 175 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 176 177 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 178 179 /* store the range */ 180 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 181 182 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 183 /* get the character code, field 0 */ 184 code=(uint32_t)uprv_strtoul(s, &end, 16); 185 186 /* parse the mapping string */ 187 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 188 189 /* store the mapping */ 190 compareMapping(code,mapping, length,USPREP_MAP); 191 192 }else{ 193 *pErrorCode = U_INVALID_FORMAT_ERROR; 194 } 195 196} 197 198U_CDECL_END 199 200static void 201parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 202 char *fields[3][2]; 203 204 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 205 return; 206 } 207 208 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 209 210 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 211 212 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 213 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 214 } 215} 216 217 218static inline UStringPrepType 219getValues(uint32_t result, int32_t& value, UBool& isIndex){ 220 221 UStringPrepType type; 222 223 if(result == 0){ 224 /* 225 * Initial value stored in the mapping table 226 * just return USPREP_TYPE_LIMIT .. so that 227 * the source codepoint is copied to the destination 228 */ 229 type = USPREP_TYPE_LIMIT; 230 isIndex =FALSE; 231 value = 0; 232 }else if(result >= _SPREP_TYPE_THRESHOLD){ 233 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 234 isIndex =FALSE; 235 value = 0; 236 }else{ 237 /* get the state */ 238 type = USPREP_MAP; 239 /* ascertain if the value is index or delta */ 240 if(result & 0x02){ 241 isIndex = TRUE; 242 value = result >> 2; //mask off the lower 2 bits and shift 243 244 }else{ 245 isIndex = FALSE; 246 value = (int16_t)result; 247 value = (value >> 2); 248 249 } 250 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 251 type = USPREP_DELETE; 252 isIndex =FALSE; 253 value = 0; 254 } 255 } 256 return type; 257} 258 259 260 261static void 262testAllCodepoints(TestIDNA& test){ 263 /* 264 { 265 UChar str[19] = { 266 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 267 0x070F,//prohibited 268 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 269 }; 270 uint32_t in[19] = {0}; 271 UErrorCode status = U_ZERO_ERROR; 272 int32_t inLength=0, outLength=100; 273 char output[100] = {0}; 274 punycode_status error; 275 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 276 277 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 278 printf(output); 279 280 } 281 */ 282 283 uint32_t i = 0; 284 int32_t unassigned = 0; 285 int32_t prohibited = 0; 286 int32_t mappedWithNorm = 0; 287 int32_t mapped = 0; 288 int32_t noValueInTrie = 0; 289 290 UStringPrepType type; 291 int32_t value; 292 UBool isIndex = FALSE; 293 294 for(i=0;i<=0x10FFFF;i++){ 295 uint32_t result = 0; 296 UTRIE_GET16(idnTrie,i, result); 297 type = getValues(result,value, isIndex); 298 if(type != USPREP_TYPE_LIMIT ){ 299 if(type == USPREP_UNASSIGNED){ 300 unassigned++; 301 } 302 if(type == USPREP_PROHIBITED){ 303 prohibited++; 304 } 305 if(type == USPREP_MAP){ 306 mapped++; 307 } 308 }else{ 309 noValueInTrie++; 310 if(result > 0){ 311 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 312 } 313 } 314 } 315 316 test.logln("Number of Unassinged code points : %i \n",unassigned); 317 test.logln("Number of Prohibited code points : %i \n",prohibited); 318 test.logln("Number of Mapped code points : %i \n",mapped); 319 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 320 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 321 322 323} 324 325static void 326compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 327 UStringPrepType type){ 328 uint32_t result = 0; 329 UTRIE_GET16(idnTrie,codepoint, result); 330 331 int32_t length=0; 332 UBool isIndex; 333 UStringPrepType retType; 334 int32_t value, index=0, delta=0; 335 336 retType = getValues(result,value,isIndex); 337 338 339 if(type != retType && retType != USPREP_DELETE){ 340 341 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 342 343 } 344 345 if(isIndex){ 346 index = value; 347 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 348 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 349 length = 1; 350 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 351 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 352 length = 2; 353 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 354 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 355 length = 3; 356 }else{ 357 length = mappingData[index++]; 358 } 359 }else{ 360 delta = value; 361 length = (retType == USPREP_DELETE)? 0 : 1; 362 } 363 364 int32_t realLength =0; 365 /* figure out the real length */ 366 for(int32_t j=0; j<mapLength; j++){ 367 if(mapping[j] > 0xFFFF){ 368 realLength +=2; 369 }else{ 370 realLength++; 371 } 372 } 373 374 if(realLength != length){ 375 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 376 } 377 378 if(isIndex){ 379 for(int8_t i =0; i< mapLength; i++){ 380 if(mapping[i] <= 0xFFFF){ 381 if(mappingData[index+i] != (uint16_t)mapping[i]){ 382 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 383 } 384 }else{ 385 UChar lead = U16_LEAD(mapping[i]); 386 UChar trail = U16_TRAIL(mapping[i]); 387 if(mappingData[index+i] != lead || 388 mappingData[index+i+1] != trail){ 389 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 390 } 391 } 392 } 393 }else{ 394 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 395 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 396 } 397 } 398 399} 400 401static void 402compareFlagsForRange(uint32_t start, uint32_t end, 403 UStringPrepType type){ 404 405 uint32_t result =0 ; 406 UStringPrepType retType; 407 UBool isIndex=FALSE; 408 int32_t value=0; 409/* 410 // supplementary code point 411 UChar __lead16=U16_LEAD(0x2323E); 412 int32_t __offset; 413 414 // get data for lead surrogate 415 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 416 __offset=(&idnTrie)->getFoldingOffset(result); 417 418 // get the real data from the folded lead/trail units 419 if(__offset>0) { 420 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 421 } else { 422 (result)=(uint32_t)((&idnTrie)->initialValue); 423 } 424 425 UTRIE_GET16(&idnTrie,0x2323E, result); 426*/ 427 while(start < end+1){ 428 UTRIE_GET16(idnTrie,start, result); 429 retType = getValues(result,value,isIndex); 430 if(result > _SPREP_TYPE_THRESHOLD){ 431 if(retType != type){ 432 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 433 } 434 }else{ 435 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 436 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 437 } 438 } 439 440 start++; 441 } 442 443} 444 445 446#endif /* #if !UCONFIG_NO_IDNA */ 447 448/* 449 * Hey, Emacs, please set the following: 450 * 451 * Local Variables: 452 * indent-tabs-mode: nil 453 * End: 454 * 455 */ 456