1/* 2 ******************************************************************************* 3 * Copyright (C) 1996-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8package com.ibm.icu.dev.test.normalizer; 9 10import java.io.BufferedReader; 11import java.io.IOException; 12import java.text.StringCharacterIterator; 13 14import com.ibm.icu.dev.test.TestFmwk; 15import com.ibm.icu.dev.test.TestUtil; 16import com.ibm.icu.impl.Utility; 17import com.ibm.icu.text.Normalizer; 18import com.ibm.icu.text.UTF16; 19import com.ibm.icu.text.UnicodeSet; 20 21public class ConformanceTest extends TestFmwk { 22 23 Normalizer normalizer; 24 25 public static void main(String[] args) throws Exception { 26 new ConformanceTest().run(args); 27 } 28 29 public ConformanceTest() { 30 // Doesn't matter what the string and mode are; we'll change 31 // them later as needed. 32 normalizer = new Normalizer("", Normalizer.NFC, 0); 33 } 34 // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt 35 static String[] moreCases ={ 36 // Markus 2001aug30 37 "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0", 38 39 // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129 40 "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" 41 }; 42 43 /** 44 * Test the conformance of Normalizer to 45 * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.* http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt 46 * This file must be located at the path specified as TEST_SUITE_FILE. 47 */ 48 public void TestConformance() throws Exception{ 49 runConformance("unicode/NormalizationTest.txt",0); 50 } 51 public void TestConformance_3_2() throws Exception{ 52 runConformance("unicode/NormalizationTest-3.2.0.txt",Normalizer.UNICODE_3_2); 53 } 54 55 public void runConformance(String fileName, int options) throws Exception{ 56 String line = null; 57 String[] fields = new String[5]; 58 StringBuffer buf = new StringBuffer(); 59 int passCount = 0; 60 int failCount = 0; 61 UnicodeSet other = new UnicodeSet(0, 0x10ffff); 62 int c=0; 63 BufferedReader input = null; 64 try { 65 input = TestUtil.getDataReader(fileName); 66 for (int count = 0;;++count) { 67 line = input.readLine(); 68 if (line == null) { 69 //read the extra test cases 70 if(count > moreCases.length) { 71 count = 0; 72 } else if(count == moreCases.length) { 73 // all done 74 break; 75 } 76 line = moreCases[count++]; 77 } 78 if (line.length() == 0) continue; 79 80 // Expect 5 columns of this format: 81 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> 82 83 // Skip comments 84 if (line.charAt(0) == '#' || line.charAt(0)=='@') continue; 85 86 // Parse out the fields 87 hexsplit(line, ';', fields, buf); 88 89 // Remove a single code point from the "other" UnicodeSet 90 if(fields[0].length()==UTF16.moveCodePointOffset(fields[0],0, 1)) { 91 c=UTF16.charAt(fields[0],0); 92 if(0xac20<=c && c<=0xd73f) { 93 // not an exhaustive test run: skip most Hangul syllables 94 if(c==0xac20) { 95 other.remove(0xac20, 0xd73f); 96 } 97 continue; 98 } 99 other.remove(c); 100 } 101 if (checkConformance(fields, line,options)) { 102 ++passCount; 103 } else { 104 ++failCount; 105 } 106 if ((count % 1000) == 999) { 107 logln("Line " + (count+1)); 108 } 109 } 110 } catch (IOException ex) { 111 ex.printStackTrace(); 112 throw new IllegalArgumentException("Couldn't read file " 113 + ex.getClass().getName() + " " + ex.getMessage() 114 + " line = " + line 115 ); 116 } finally { 117 if (input != null) { 118 try { 119 input.close(); 120 } catch (IOException ignored) { 121 } 122 } 123 } 124 125 if (failCount != 0) { 126 errln("Total: " + failCount + " lines failed, " + 127 passCount + " lines passed"); 128 } else { 129 logln("Total: " + passCount + " lines passed"); 130 } 131 } 132 133 /** 134 * Verify the conformance of the given line of the Unicode 135 * normalization (UTR 15) test suite file. For each line, 136 * there are five columns, corresponding to field[0]..field[4]. 137 * 138 * The following invariants must be true for all conformant implementations 139 * c2 == NFC(c1) == NFC(c2) == NFC(c3) 140 * c3 == NFD(c1) == NFD(c2) == NFD(c3) 141 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 142 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 143 * 144 * @param field the 5 columns 145 * @param line the source line from the test suite file 146 * @return true if the test passes 147 */ 148 private boolean checkConformance(String[] field, String line, int options) throws Exception{ 149 boolean pass = true; 150 StringBuffer buf = new StringBuffer(); // scratch 151 String out,fcd; 152 int i=0; 153 for (i=0; i<5; ++i) { 154 if (i<3) { 155 out = Normalizer.normalize(field[i], Normalizer.NFC, options); 156 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1)); 157 158 out = iterativeNorm(field[i], Normalizer.NFC, buf, +1,options); 159 pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 160 161 out = iterativeNorm(field[i], Normalizer.NFC, buf, -1,options); 162 pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 163 164 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, +1,options); 165 pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 166 167 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, -1,options); 168 pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 169 170 out = Normalizer.normalize(field[i], Normalizer.NFD); 171 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1)); 172 173 out = iterativeNorm(field[i], Normalizer.NFD, buf, +1,options); 174 pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 175 176 out = iterativeNorm(field[i], Normalizer.NFD, buf, -1,options); 177 pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 178 179 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, +1,options); 180 pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 181 182 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, -1,options); 183 pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 184 185 cross(field[2] /*NFD String*/, field[1]/*NFC String*/, Normalizer.NFC); 186 cross(field[1] /*NFC String*/, field[2]/*NFD String*/, Normalizer.NFD); 187 } 188 out = Normalizer.normalize(field[i], Normalizer.NFKC,options); 189 pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1)); 190 191 out = iterativeNorm(field[i], Normalizer.NFKC, buf, +1,options); 192 pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 193 194 out = iterativeNorm(field[i], Normalizer.NFKC, buf, -1,options); 195 pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 196 197 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, +1,options); 198 pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 199 200 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, -1,options); 201 pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 202 203 204 out = Normalizer.normalize(field[i], Normalizer.NFKD,options); 205 pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1)); 206 207 out = iterativeNorm(field[i], Normalizer.NFKD, buf, +1,options); 208 pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 209 210 out = iterativeNorm(field[i], Normalizer.NFKD, buf, -1,options); 211 pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 212 213 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, +1,options); 214 pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 215 216 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, -1,options); 217 pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 218 219 cross(field[4] /*NFKD String*/, field[3]/*NFKC String*/, Normalizer.NFKC); 220 cross(field[3] /*NFKC String*/, field[4]/*NFKD String*/, Normalizer.NFKD); 221 222 } 223 compare(field[1],field[2]); 224 compare(field[0],field[1]); 225 compare(field[0],field[2]); 226 // test quick checks 227 if(Normalizer.NO == Normalizer.quickCheck(field[1], Normalizer.NFC,options)) { 228 errln("Normalizer error: quickCheck(NFC(s), Normalizer.NFC) is Normalizer.NO"); 229 pass = false; 230 } 231 if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.NFD,options)) { 232 errln("Normalizer error: quickCheck(NFD(s), Normalizer.NFD) is Normalizer.NO"); 233 pass = false; 234 } 235 if(Normalizer.NO == Normalizer.quickCheck(field[3], Normalizer.NFKC,options)) { 236 errln("Normalizer error: quickCheck(NFKC(s), Normalizer.NFKC) is Normalizer.NO"); 237 pass = false; 238 } 239 if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.NFKD,options)) { 240 errln("Normalizer error: quickCheck(NFKD(s), Normalizer.NFKD) is Normalizer.NO"); 241 pass = false; 242 } 243 244 if(!Normalizer.isNormalized(field[1], Normalizer.NFC, options)) { 245 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 246 pass = false; 247 } 248 if(!field[0].equals(field[1]) && Normalizer.isNormalized(field[0], Normalizer.NFC, options)) { 249 errln("Normalizer error: isNormalized(s, Normalizer.NFC) is TRUE"); 250 pass = false; 251 } 252 if(!Normalizer.isNormalized(field[3], Normalizer.NFKC, options)) { 253 errln("Normalizer error: isNormalized(NFKC(s), Normalizer.NFKC) is false"); 254 pass = false; 255 } 256 if(!field[0].equals(field[3]) && Normalizer.isNormalized(field[0], Normalizer.NFKC, options)) { 257 errln("Normalizer error: isNormalized(s, Normalizer.NFKC) is TRUE"); 258 pass = false; 259 } 260 // test api that takes a char[] 261 if(!Normalizer.isNormalized(field[1].toCharArray(),0,field[1].length(), Normalizer.NFC,options)) { 262 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 263 pass = false; 264 } 265 // test api that takes a codepoint 266 if(!Normalizer.isNormalized(UTF16.charAt(field[1],0), Normalizer.NFC,options)) { 267 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 268 pass = false; 269 } 270 // test FCD quick check and "makeFCD" 271 fcd=Normalizer.normalize(field[0], Normalizer.FCD); 272 if(Normalizer.NO == Normalizer.quickCheck(fcd, Normalizer.FCD,options)) { 273 errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO"); 274 pass = false; 275 } 276 // check FCD return length 277 { 278 char[] fcd2 = new char[ fcd.length() * 2 ]; 279 char[] src = field[0].toCharArray(); 280 int fcdLen = Normalizer.normalize(src, 0, src.length, fcd2, fcd.length(), fcd2.length,Normalizer.FCD, 0); 281 if(fcdLen != fcd.length()){ 282 errln("makeFCD did not return the correct length"); 283 } 284 } 285 if(Normalizer.NO == Normalizer.quickCheck(fcd, Normalizer.FCD, options)) { 286 errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO"); 287 pass = false; 288 } 289 if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.FCD, options)) { 290 errln("Normalizer error: quickCheck(NFD(s), Normalizer.FCD) is Normalizer.NO"); 291 pass = false; 292 } 293 294 if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.FCD, options)) { 295 errln("Normalizer error: quickCheck(NFKD(s), Normalizer.FCD) is Normalizer.NO"); 296 pass = false; 297 } 298 299 out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, +1,options); 300 out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, -1,options); 301 302 out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, +1,options); 303 out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, -1,options); 304 305 out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, +1,options); 306 out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, -1,options); 307 308 out=Normalizer.normalize(fcd, Normalizer.NFD); 309 if(!out.equals(field[2])) { 310 errln("Normalizer error: NFD(FCD(s))!=NFD(s)"); 311 pass = false; 312 } 313 if (!pass) { 314 errln("FAIL: " + line); 315 } 316 if(field[0]!=field[2]) { 317 // two strings that are canonically equivalent must test 318 // equal under a canonical caseless match 319 // see UAX #21 Case Mappings and Jitterbug 2021 and 320 // Unicode Technical Committee meeting consensus 92-C31 321 int rc; 322 if((rc = Normalizer.compare(field[0], field[2], (options<<Normalizer.COMPARE_NORM_OPTIONS_SHIFT)|Normalizer.COMPARE_IGNORE_CASE))!=0){ 323 errln("Normalizer.compare(original, NFD, case-insensitive) returned "+rc+" instead of 0 for equal"); 324 pass=false; 325 } 326 } 327 328 return pass; 329 } 330 // two strings that are canonically equivalent must test 331 // equal under a canonical caseless match 332 // see UAX #21 Case Mappings and Jitterbug 2021 and 333 // Unicode Technical Committee meeting consensus 92-C31 334 private void compare(String s1, String s2){ 335 if(s1.length()==1 && s2.length()==1){ 336 if(Normalizer.compare(UTF16.charAt(s1,0),UTF16.charAt(s2,0),Normalizer.COMPARE_IGNORE_CASE)!=0){ 337 errln("Normalizer.compare(int,int) failed for s1: " 338 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 339 } 340 } 341 if(s1.length()==1 && s2.length()>1){ 342 if(Normalizer.compare(UTF16.charAt(s1,0),s2,Normalizer.COMPARE_IGNORE_CASE)!=0){ 343 errln("Normalizer.compare(int,String) failed for s1: " 344 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 345 } 346 } 347 if(s1.length()>1 && s2.length()>1){ 348 // TODO: Re-enable this tests after UTC fixes UAX 21 349 if(Normalizer.compare(s1.toCharArray(),s2.toCharArray(),Normalizer.COMPARE_IGNORE_CASE)!=0){ 350 errln("Normalizer.compare(char[],char[]) failed for s1: " 351 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 352 } 353 } 354 } 355 private void cross(String s1, String s2,Normalizer.Mode mode){ 356 String result = Normalizer.normalize(s1,mode); 357 if(!result.equals(s2)){ 358 errln("cross test failed s1: " + Utility.hex(s1) + " s2: " 359 +Utility.hex(s2)); 360 } 361 } 362 /** 363 * Do a normalization using the iterative API in the given direction. 364 * @param buf scratch buffer 365 * @param dir either +1 or -1 366 */ 367 private String iterativeNorm(String str, Normalizer.Mode mode, 368 StringBuffer buf, int dir ,int options) throws Exception{ 369 normalizer.setText(str); 370 normalizer.setMode(mode); 371 buf.setLength(0); 372 normalizer.setOption(-1, false); // reset all options 373 normalizer.setOption(options, true); // set desired options 374 375 int ch; 376 if (dir > 0) { 377 for (ch = normalizer.first(); ch != Normalizer.DONE; 378 ch = normalizer.next()) { 379 buf.append(UTF16.valueOf(ch)); 380 } 381 } else { 382 for (ch = normalizer.last(); ch != Normalizer.DONE; 383 ch = normalizer.previous()) { 384 buf.insert(0, UTF16.valueOf(ch)); 385 } 386 } 387 return buf.toString(); 388 } 389 390 /** 391 * Do a normalization using the iterative API in the given direction. 392 * @param str a Java StringCharacterIterator 393 * @param buf scratch buffer 394 * @param dir either +1 or -1 395 */ 396 private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode, 397 StringBuffer buf, int dir,int options) throws Exception{ 398 normalizer.setText(str); 399 normalizer.setMode(mode); 400 buf.setLength(0); 401 normalizer.setOption(-1, false); // reset all options 402 normalizer.setOption(options, true); // set desired options 403 404 int ch; 405 if (dir > 0) { 406 for (ch = normalizer.first(); ch != Normalizer.DONE; 407 ch = normalizer.next()) { 408 buf.append(UTF16.valueOf(ch)); 409 } 410 } else { 411 for (ch = normalizer.last(); ch != Normalizer.DONE; 412 ch = normalizer.previous()) { 413 buf.insert(0, UTF16.valueOf(ch)); 414 } 415 } 416 return buf.toString(); 417 } 418 419 /** 420 * @param op name of normalization form, e.g., "KC" 421 * @param s string being normalized 422 * @param got value received 423 * @param exp expected value 424 * @param msg description of this test 425 * @returns true if got == exp 426 */ 427 private boolean assertEqual(String op, String s, String got, 428 String exp, String msg) { 429 if (exp.equals(got)) { 430 return true; 431 } 432 errln((" " + msg + ") " + op + "(" + s + ")=" + hex(got) + 433 ", exp. " + hex(exp))); 434 return false; 435 } 436 437 /** 438 * Split a string into pieces based on the given delimiter 439 * character. Then, parse the resultant fields from hex into 440 * characters. That is, "0040 0400;0C00;0899" -> new String[] { 441 * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 442 * be of the proper length already, and exactly output.length 443 * fields are parsed. If there are too few an exception is 444 * thrown. If there are too many the extras are ignored. 445 * 446 * @param buf scratch buffer 447 */ 448 private static void hexsplit(String s, char delimiter, 449 String[] output, StringBuffer buf) { 450 int i; 451 int pos = 0; 452 for (i=0; i<output.length; ++i) { 453 int delim = s.indexOf(delimiter, pos); 454 if (delim < 0) { 455 throw new IllegalArgumentException("Missing field in " + s); 456 } 457 // Our field is from pos..delim-1. 458 buf.setLength(0); 459 460 String toHex = s.substring(pos,delim); 461 pos = delim; 462 int index = 0; 463 int len = toHex.length(); 464 while(index< len){ 465 if(toHex.charAt(index)==' '){ 466 index++; 467 }else{ 468 int spacePos = toHex.indexOf(' ', index); 469 if(spacePos==-1){ 470 appendInt(buf,toHex.substring(index,len),s); 471 spacePos = len; 472 }else{ 473 appendInt(buf,toHex.substring(index, spacePos),s); 474 } 475 index = spacePos+1; 476 } 477 } 478 479 if (buf.length() < 1) { 480 throw new IllegalArgumentException("Empty field " + i + " in " + s); 481 } 482 output[i] = buf.toString(); 483 ++pos; // Skip over delim 484 } 485 } 486 public static void appendInt(StringBuffer buf, String strToHex, String s){ 487 int hex = Integer.parseInt(strToHex,16); 488 if (hex < 0 ) { 489 throw new IllegalArgumentException("Out of range hex " + 490 hex + " in " + s); 491 }else if (hex > 0xFFFF){ 492 buf.append((char)((hex>>10)+0xd7c0)); 493 buf.append((char)((hex&0x3ff)|0xdc00)); 494 }else{ 495 buf.append((char) hex); 496 } 497 } 498 499 // Specific tests for debugging. These are generally failures 500 // taken from the conformance file, but culled out to make 501 // debugging easier. These can be eliminated without affecting 502 // coverage. 503 504 public void _hideTestCase6(int options) throws Exception{ 505 _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;",options); 506 } 507 508 public void _testOneLine(String line,int options) throws Exception{ 509 String[] fields = new String[5]; 510 StringBuffer buf = new StringBuffer(); 511 // Parse out the fields 512 hexsplit(line, ';', fields, buf); 513 checkConformance(fields, line,options); 514 } 515 516 517} 518