1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: testidn.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2003-02-06 14* created by: Ram Viswanadha 15* 16* This program reads the rfc3454_*.txt files, 17* parses them, and extracts the data for Nameprep conformance. 18* It then preprocesses it and writes a binary file for efficient use 19* in various IDNA conversion processes. 20*/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 25 26#define USPREP_TYPE_NAMES_ARRAY 27 28#include "unicode/uchar.h" 29#include "unicode/putil.h" 30#include "cmemory.h" 31#include "cstring.h" 32#include "unicode/udata.h" 33#include "unicode/utf16.h" 34#include "unewdata.h" 35#include "uoptions.h" 36#include "uparse.h" 37#include "utrie.h" 38#include "umutex.h" 39#include "sprpimpl.h" 40#include "testidna.h" 41#include "punyref.h" 42#include <stdlib.h> 43 44UBool beVerbose=FALSE, haveCopyright=TRUE; 45 46/* prototypes --------------------------------------------------------------- */ 47 48 49static void 50parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 51 52static void 53compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 54 UStringPrepType option); 55 56static void 57compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 58 59static void 60testAllCodepoints(TestIDNA& test); 61 62static TestIDNA* pTestIDNA =NULL; 63 64static const char* fileNames[] = { 65 "rfc3491.txt" 66 }; 67static const UTrie *idnTrie = NULL; 68static const int32_t *indexes = NULL; 69static const uint16_t *mappingData = NULL; 70/* -------------------------------------------------------------------------- */ 71 72/* file definitions */ 73#define DATA_TYPE "icu" 74 75#define SPREP_DIR "sprep" 76 77extern int 78testData(TestIDNA& test) { 79 char *basename=NULL; 80 UErrorCode errorCode=U_ZERO_ERROR; 81 char *saveBasename =NULL; 82 83 LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode)); 84 if(U_FAILURE(errorCode)){ 85 test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 86 return errorCode; 87 } 88 89 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 90 //TODO get the srcDir dynamically 91 const char *srcDir=IntlTest::pathToDataDirectory(); 92 93 idnTrie = &profile->sprepTrie; 94 indexes = profile->indexes; 95 mappingData = profile->mappingData; 96 97 //initialize 98 pTestIDNA = &test; 99 100 /* prepare the filename beginning with the source dir */ 101 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 102 filename[0] = 0x2E; 103 filename[1] = U_FILE_SEP_CHAR; 104 uprv_strcpy(filename+2,srcDir); 105 }else{ 106 uprv_strcpy(filename, srcDir); 107 } 108 basename=filename+uprv_strlen(filename); 109 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 110 *basename++=U_FILE_SEP_CHAR; 111 } 112 113 /* process unassigned */ 114 basename=filename+uprv_strlen(filename); 115 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 116 *basename++=U_FILE_SEP_CHAR; 117 } 118 119 /* first copy misc directory */ 120 saveBasename = basename; 121 (void)saveBasename; // Suppress set but not used warning. 122 uprv_strcpy(basename,SPREP_DIR); 123 basename = basename + uprv_strlen(SPREP_DIR); 124 *basename++=U_FILE_SEP_CHAR; 125 126 /* process unassigned */ 127 uprv_strcpy(basename,fileNames[0]); 128 parseMappings(filename,TRUE, test,&errorCode); 129 if(U_FAILURE(errorCode)) { 130 test.errln( "Could not open file %s for reading \n", filename); 131 return errorCode; 132 } 133 134 testAllCodepoints(test); 135 136 pTestIDNA = NULL; 137 free(filename); 138 return errorCode; 139} 140U_CDECL_BEGIN 141 142static void U_CALLCONV 143strprepProfileLineFn(void * /*context*/, 144 char *fields[][2], int32_t fieldCount, 145 UErrorCode *pErrorCode) { 146 uint32_t mapping[40]; 147 char *end, *map; 148 uint32_t code; 149 int32_t length; 150 /*UBool* mapWithNorm = (UBool*) context;*/ 151 const char* typeName; 152 uint32_t rangeStart=0,rangeEnd =0; 153 const char *s; 154 155 s = u_skipWhitespace(fields[0][0]); 156 if (*s == '@') { 157 /* a special directive introduced in 4.2 */ 158 return; 159 } 160 161 if(fieldCount != 3){ 162 *pErrorCode = U_INVALID_FORMAT_ERROR; 163 return; 164 } 165 166 typeName = fields[2][0]; 167 map = fields[1][0]; 168 169 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 170 171 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 172 173 /* store the range */ 174 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 175 176 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 177 178 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 179 180 /* store the range */ 181 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 182 183 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 184 /* get the character code, field 0 */ 185 code=(uint32_t)uprv_strtoul(s, &end, 16); 186 187 /* parse the mapping string */ 188 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 189 190 /* store the mapping */ 191 compareMapping(code,mapping, length,USPREP_MAP); 192 193 }else{ 194 *pErrorCode = U_INVALID_FORMAT_ERROR; 195 } 196 197} 198 199U_CDECL_END 200 201static void 202parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 203 char *fields[3][2]; 204 205 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 206 return; 207 } 208 209 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 210 211 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 212 213 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 214 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 215 } 216} 217 218 219static inline UStringPrepType 220getValues(uint32_t result, int32_t& value, UBool& isIndex){ 221 222 UStringPrepType type; 223 224 if(result == 0){ 225 /* 226 * Initial value stored in the mapping table 227 * just return USPREP_TYPE_LIMIT .. so that 228 * the source codepoint is copied to the destination 229 */ 230 type = USPREP_TYPE_LIMIT; 231 isIndex =FALSE; 232 value = 0; 233 }else if(result >= _SPREP_TYPE_THRESHOLD){ 234 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 235 isIndex =FALSE; 236 value = 0; 237 }else{ 238 /* get the state */ 239 type = USPREP_MAP; 240 /* ascertain if the value is index or delta */ 241 if(result & 0x02){ 242 isIndex = TRUE; 243 value = result >> 2; //mask off the lower 2 bits and shift 244 245 }else{ 246 isIndex = FALSE; 247 value = (int16_t)result; 248 value = (value >> 2); 249 250 } 251 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 252 type = USPREP_DELETE; 253 isIndex =FALSE; 254 value = 0; 255 } 256 } 257 return type; 258} 259 260 261 262static void 263testAllCodepoints(TestIDNA& test){ 264 /* 265 { 266 UChar str[19] = { 267 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 268 0x070F,//prohibited 269 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 270 }; 271 uint32_t in[19] = {0}; 272 UErrorCode status = U_ZERO_ERROR; 273 int32_t inLength=0, outLength=100; 274 char output[100] = {0}; 275 punycode_status error; 276 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 277 278 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 279 printf(output); 280 281 } 282 */ 283 284 uint32_t i = 0; 285 int32_t unassigned = 0; 286 int32_t prohibited = 0; 287 int32_t mappedWithNorm = 0; 288 int32_t mapped = 0; 289 int32_t noValueInTrie = 0; 290 291 UStringPrepType type; 292 int32_t value; 293 UBool isIndex = FALSE; 294 295 for(i=0;i<=0x10FFFF;i++){ 296 uint32_t result = 0; 297 UTRIE_GET16(idnTrie,i, result); 298 type = getValues(result,value, isIndex); 299 if(type != USPREP_TYPE_LIMIT ){ 300 if(type == USPREP_UNASSIGNED){ 301 unassigned++; 302 } 303 if(type == USPREP_PROHIBITED){ 304 prohibited++; 305 } 306 if(type == USPREP_MAP){ 307 mapped++; 308 } 309 }else{ 310 noValueInTrie++; 311 if(result > 0){ 312 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 313 } 314 } 315 } 316 317 test.logln("Number of Unassinged code points : %i \n",unassigned); 318 test.logln("Number of Prohibited code points : %i \n",prohibited); 319 test.logln("Number of Mapped code points : %i \n",mapped); 320 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 321 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 322 323 324} 325 326static void 327compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 328 UStringPrepType type){ 329 uint32_t result = 0; 330 UTRIE_GET16(idnTrie,codepoint, result); 331 332 int32_t length=0; 333 UBool isIndex; 334 UStringPrepType retType; 335 int32_t value, index=0, delta=0; 336 337 retType = getValues(result,value,isIndex); 338 339 340 if(type != retType && retType != USPREP_DELETE){ 341 342 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 343 344 } 345 346 if(isIndex){ 347 index = value; 348 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 349 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 350 length = 1; 351 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 352 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 353 length = 2; 354 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 355 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 356 length = 3; 357 }else{ 358 length = mappingData[index++]; 359 } 360 }else{ 361 delta = value; 362 length = (retType == USPREP_DELETE)? 0 : 1; 363 } 364 365 int32_t realLength =0; 366 /* figure out the real length */ 367 for(int32_t j=0; j<mapLength; j++){ 368 if(mapping[j] > 0xFFFF){ 369 realLength +=2; 370 }else{ 371 realLength++; 372 } 373 } 374 375 if(realLength != length){ 376 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 377 } 378 379 if(isIndex){ 380 for(int8_t i =0; i< mapLength; i++){ 381 if(mapping[i] <= 0xFFFF){ 382 if(mappingData[index+i] != (uint16_t)mapping[i]){ 383 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 384 } 385 }else{ 386 UChar lead = U16_LEAD(mapping[i]); 387 UChar trail = U16_TRAIL(mapping[i]); 388 if(mappingData[index+i] != lead || 389 mappingData[index+i+1] != trail){ 390 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 391 } 392 } 393 } 394 }else{ 395 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 396 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 397 } 398 } 399 400} 401 402static void 403compareFlagsForRange(uint32_t start, uint32_t end, 404 UStringPrepType type){ 405 406 uint32_t result =0 ; 407 UStringPrepType retType; 408 UBool isIndex=FALSE; 409 int32_t value=0; 410/* 411 // supplementary code point 412 UChar __lead16=U16_LEAD(0x2323E); 413 int32_t __offset; 414 415 // get data for lead surrogate 416 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 417 __offset=(&idnTrie)->getFoldingOffset(result); 418 419 // get the real data from the folded lead/trail units 420 if(__offset>0) { 421 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 422 } else { 423 (result)=(uint32_t)((&idnTrie)->initialValue); 424 } 425 426 UTRIE_GET16(&idnTrie,0x2323E, result); 427*/ 428 while(start < end+1){ 429 UTRIE_GET16(idnTrie,start, result); 430 retType = getValues(result,value,isIndex); 431 if(result > _SPREP_TYPE_THRESHOLD){ 432 if(retType != type){ 433 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 434 } 435 }else{ 436 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 437 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 438 } 439 } 440 441 start++; 442 } 443 444} 445 446 447#endif /* #if !UCONFIG_NO_IDNA */ 448 449/* 450 * Hey, Emacs, please set the following: 451 * 452 * Local Variables: 453 * indent-tabs-mode: nil 454 * End: 455 * 456 */ 457