1/* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2005-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * created on: 2005jun15 10 * created by: Raymond Yang 11 */ 12 13#if !UCONFIG_NO_IDNA 14 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include "unicode/utypes.h" 19#include "unicode/ucnv.h" 20#include "unicode/ustring.h" 21#include "unicode/uidna.h" 22 23#include "idnaconf.h" 24 25static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // ===== 26static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone 27static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase 28static const UChar C_NAMEUTF8[] = {0x6E, 0x61, 0x6D, 0x65, 0x75, 0x74, 0x66, 0x38, 0}; // nameutf8 29 30static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type 31static const UChar C_TOASCII[] = {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0}; // toascii 32static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode 33 34static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail 35static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass 36static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail 37 38static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc 39static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44, 40 0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules 41 42IdnaConfTest::IdnaConfTest(){ 43 base = NULL; 44 len = 0; 45 curOffset = 0; 46 47 type = option = passfail = -1; 48 namebase.setToBogus(); 49 namezone.setToBogus(); 50} 51IdnaConfTest::~IdnaConfTest(){ 52 delete [] base; 53} 54 55#if !UCONFIG_NO_IDNA 56/* this function is modified from RBBITest::ReadAndConvertFile() 57 * 58 */ 59UBool IdnaConfTest::ReadAndConvertFile(){ 60 61 char * source = NULL; 62 size_t source_len; 63 64 // read the test data file to memory 65 FILE* f = NULL; 66 UErrorCode status = U_ZERO_ERROR; 67 68 const char *path = IntlTest::getSourceTestData(status); 69 if (U_FAILURE(status)) { 70 errln("%s", u_errorName(status)); 71 return FALSE; 72 } 73 74 const char* name = "idna_conf.txt"; // test data file 75 int t = strlen(path) + strlen(name) + 1; 76 char* absolute_name = new char[t]; 77 strcpy(absolute_name, path); 78 strcat(absolute_name, name); 79 f = fopen(absolute_name, "rb"); 80 delete [] absolute_name; 81 82 if (f == NULL){ 83 dataerrln("fopen error on %s", name); 84 return FALSE; 85 } 86 87 fseek( f, 0, SEEK_END); 88 if ((source_len = ftell(f)) <= 0){ 89 errln("Error reading test data file."); 90 fclose(f); 91 return FALSE; 92 } 93 94 source = new char[source_len]; 95 fseek(f, 0, SEEK_SET); 96 if (fread(source, 1, source_len, f) != source_len) { 97 errln("Error reading test data file."); 98 delete [] source; 99 fclose(f); 100 return FALSE; 101 } 102 fclose(f); 103 104 // convert the UTF-8 encoded stream to UTF-16 stream 105 UConverter* conv = ucnv_open("utf-8", &status); 106 int dest_len = ucnv_toUChars(conv, 107 NULL, // dest, 108 0, // destCapacity, 109 source, 110 source_len, 111 &status); 112 if (status == U_BUFFER_OVERFLOW_ERROR) { 113 // Buffer Overflow is expected from the preflight operation. 114 status = U_ZERO_ERROR; 115 UChar * dest = NULL; 116 dest = new UChar[ dest_len + 1]; 117 ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status); 118 // Do not know the "if possible" behavior of ucnv_toUChars() 119 // Do it by ourself. 120 dest[dest_len] = 0; 121 len = dest_len; 122 base = dest; 123 delete [] source; 124 ucnv_close(conv); 125 return TRUE; // The buffer will owned by caller. 126 } 127 errln("UConverter error: %s", u_errorName(status)); 128 delete [] source; 129 ucnv_close(conv); 130 return FALSE; 131} 132 133int IdnaConfTest::isNewlineMark(){ 134 static const UChar LF = 0x0a; 135 static const UChar CR = 0x0d; 136 UChar c = base[curOffset]; 137 // CR LF 138 if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){ 139 return 2; 140 } 141 142 // CR or LF 143 if ( c == CR || c == LF) { 144 return 1; 145 } 146 147 return 0; 148} 149 150/* Read a logical line. 151 * 152 * All lines ending in a backslash (\) and immediately followed by a newline 153 * character are joined with the next line in the source file forming logical 154 * lines from the physical lines. 155 * 156 */ 157UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){ 158 if ( !(curOffset < len) ) return FALSE; // stream end 159 160 static const UChar BACKSLASH = 0x5c; 161 buf.remove(); 162 int t = 0; 163 while (curOffset < len){ 164 if ((t = isNewlineMark())) { // end of line 165 curOffset += t; 166 break; 167 } 168 UChar c = base[curOffset]; 169 if (c == BACKSLASH && curOffset < len -1){ // escaped new line mark 170 if ((t = isNewlineMark())){ 171 curOffset += 1 + t; // BACKSLAH and NewlineMark 172 continue; 173 } 174 }; 175 buf.append(c); 176 curOffset++; 177 } 178 return TRUE; 179} 180 181// 182//=============================================================== 183// 184 185/* Explain <xxxxx> tag to a native value 186 * 187 * Since <xxxxx> is always larger than the native value, 188 * the operation will replace the tag directly in the buffer, 189 * and, of course, will shift tail elements. 190 */ 191void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){ 192 buf.append((UChar)0); // add a terminal NULL 193 UChar* bufBase = buf.getBuffer(buf.length()); 194 UChar* p = bufBase; 195 while (*p != 0){ 196 if ( *p != 0x3C){ // < 197 *bufBase++ = *p++; 198 } else { 199 p++; // skip < 200 UChar32 cp = 0; 201 for ( ;*p != 0x3E; p++){ // > 202 if (0x30 <= *p && *p <= 0x39){ // 0-9 203 cp = (cp * 16) + (*p - 0x30); 204 } else if (0x61 <= *p && *p <= 0x66){ // a-f 205 cp = (cp * 16) + (*p - 0x61) + 10; 206 } else if (0x41 <= *p && *p <= 0x46) {// A-F 207 cp = (cp * 16) + (*p - 0x41) + 10; 208 } 209 // no else. hope everything is good. 210 } 211 p++; // skip > 212 if (U_IS_BMP(cp)){ 213 *bufBase++ = cp; 214 } else { 215 *bufBase++ = U16_LEAD(cp); 216 *bufBase++ = U16_TRAIL(cp); 217 } 218 } 219 } 220 *bufBase = 0; // close our buffer 221 buf.releaseBuffer(); 222} 223 224void IdnaConfTest::Call(){ 225 if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){ 226 errln("Incomplete record"); 227 } else { 228 UErrorCode status = U_ZERO_ERROR; 229 UChar result[200] = {0,}; // simple life 230 const UChar *p = namebase.getTerminatedBuffer(); 231 const int p_len = namebase.length(); 232 233 if (type == 0 && option == 0){ 234 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status); 235 } else if (type == 0 && option == 1){ 236 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status); 237 } else if (type == 1 && option == 0){ 238 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status); 239 } else if (type == 1 && option == 1){ 240 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status); 241 } 242 if (passfail == 0){ 243 if (U_FAILURE(status)){ 244 id.append(" should pass, but failed. - "); 245 id.append(u_errorName(status)); 246 errcheckln(status, id); 247 } else{ 248 if (namezone.compare(result, -1) == 0){ 249 // expected 250 logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result)); 251 } else { 252 id.append(" no error, but result is not as expected."); 253 errln(id); 254 } 255 } 256 } else if (passfail == 1){ 257 if (U_FAILURE(status)){ 258 // expected 259 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName 260 //logln("Got the expected error: " + UnicodeString(u_errorName(status))); 261 } else{ 262 if (namebase.compare(result, -1) == 0){ 263 // garbage in -> garbage out 264 logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result)); 265 } else { 266 id.append(" should fail, but not failed. "); 267 id.append(u_errorName(status)); 268 errln(id); 269 } 270 } 271 } 272 } 273 type = option = passfail = -1; 274 namebase.setToBogus(); 275 namezone.setToBogus(); 276 id.remove(); 277 return; 278} 279 280void IdnaConfTest::Test(void){ 281 if (!ReadAndConvertFile())return; 282 283 UnicodeString s; 284 UnicodeString key; 285 UnicodeString value; 286 287 // skip everything before the first "=====" and "=====" itself 288 do { 289 if (!ReadOneLine(s)) { 290 errln("End of file prematurely found"); 291 break; 292 } 293 } 294 while (s.compare(C_TAG, -1) != 0); //"=====" 295 296 while(ReadOneLine(s)){ 297 s.trim(); 298 key.remove(); 299 value.remove(); 300 if (s.compare(C_TAG, -1) == 0){ //"=====" 301 Call(); 302 } else { 303 // explain key:value 304 int p = s.indexOf((UChar)0x3A); // : 305 key.setTo(s,0,p).trim(); 306 value.setTo(s,p+1).trim(); 307 if (key.compare(C_TYPE, -1) == 0){ 308 if (value.compare(C_TOASCII, -1) == 0) { 309 type = 0; 310 } else if (value.compare(C_TOUNICODE, -1) == 0){ 311 type = 1; 312 } 313 } else if (key.compare(C_PASSFAIL, -1) == 0){ 314 if (value.compare(C_PASS, -1) == 0){ 315 passfail = 0; 316 } else if (value.compare(C_FAIL, -1) == 0){ 317 passfail = 1; 318 } 319 } else if (key.compare(C_DESC, -1) == 0){ 320 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){ 321 option = 1; // not found 322 } else { 323 option = 0; 324 } 325 id.setTo(value, 0, value.indexOf((UChar)0x20)); // space 326 } else if (key.compare(C_NAMEZONE, -1) == 0){ 327 ExplainCodePointTag(value); 328 namezone.setTo(value); 329 } else if (key.compare(C_NAMEBASE, -1) == 0){ 330 ExplainCodePointTag(value); 331 namebase.setTo(value); 332 } 333 // just skip other lines 334 } 335 } 336 337 Call(); // for last record 338} 339#else 340void IdnaConfTest::Test(void) 341{ 342 // test nothing... 343} 344#endif 345 346void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){ 347 switch (index) { 348 TESTCASE(0,Test); 349 default: name = ""; break; 350 } 351} 352 353#endif 354