1/* 2 ******************************************************************************* 3 * Copyright (C) 1996-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.test.compression; 8 9import com.ibm.icu.dev.test.TestFmwk; 10import com.ibm.icu.text.UnicodeCompressor; 11import com.ibm.icu.text.UnicodeDecompressor; 12 13public class ExhaustiveTest extends TestFmwk { 14 public static void main(String args[]) throws Exception { 15 new ExhaustiveTest().run(args); 16 } 17 18 /** Test simple compress/decompress API, returning # of errors */ 19 public void testSimple() throws Exception { 20 for(int i = 0; i < fTestCases.length; i++) { 21 simpleTest(fTestCases[i]); 22 } 23 } 24 private void simpleTest(String s) throws Exception { 25 byte [] compressed = UnicodeCompressor.compress(s); 26 String res = UnicodeDecompressor.decompress(compressed); 27 if (logDiffs(s.toCharArray(), s.length(), 28 res.toCharArray(), res.length()) == false) { 29 logln(s.length() + " chars ===> " 30 + compressed.length + " bytes ===> " 31 + res.length() + " chars"); 32 } else { 33 logln("Compressed:"); 34 printBytes(compressed, compressed.length); 35 errln("testSimple did not compress correctly"); 36 } 37 } 38 39 /** Test iterative compress/decompress API, returning # of errors */ 40 public void testIterative() throws Exception { 41 for(int i = 0; i < fTestCases.length; i++) { 42 myTest(fTestCases[i].toCharArray(), fTestCases[i].length()); 43 } 44 } 45 private void myTest(char[] chars, int len) { 46 UnicodeCompressor myCompressor = new UnicodeCompressor(); 47 UnicodeDecompressor myDecompressor = new UnicodeDecompressor(); 48 49 // variables for my compressor 50 int myByteCount = 0; 51 int myCharCount = 0; 52 int myCompressedSize = Math.max(512, 3*len); 53 byte[] myCompressed = new byte[myCompressedSize]; 54 int myDecompressedSize = Math.max(2, 2 * len); 55 char[] myDecompressed = new char[myDecompressedSize]; 56 int[] unicharsRead = new int[1]; 57 int[] bytesRead = new int[1]; 58 59 myByteCount = myCompressor.compress(chars, 0, len, unicharsRead, 60 myCompressed, 0, myCompressedSize); 61 62 myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount, 63 bytesRead, myDecompressed, 0, myDecompressedSize); 64 65 if (logDiffs(chars, len, myDecompressed, myCharCount) == false) { 66 logln(len + " chars ===> " 67 + myByteCount + " bytes ===> " 68 + myCharCount + " chars"); 69 } else { 70 logln("Compressed:"); 71 printBytes(myCompressed, myByteCount); 72 errln("Iterative test failed"); 73 } 74 } 75 76 /** Test iterative compress/decompress API */ 77 public void testMultipass() throws Exception { 78 for(int i = 0; i < fTestCases.length; i++) { 79 myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length()); 80 } 81 } 82 private void myMultipassTest(char [] chars, int len) throws Exception { 83 UnicodeCompressor myCompressor = new UnicodeCompressor(); 84 UnicodeDecompressor myDecompressor = new UnicodeDecompressor(); 85 86 // variables for my compressor 87 88 // for looping 89 int byteBufferSize = 4;//Math.max(4, len / 4); 90 byte[] byteBuffer = new byte [byteBufferSize]; 91 // real target 92 int compressedSize = Math.max(512, 3 * len); 93 byte[] compressed = new byte[compressedSize]; 94 95 // for looping 96 int unicharBufferSize = 2;//byteBufferSize; 97 char[] unicharBuffer = new char[unicharBufferSize]; 98 // real target 99 int decompressedSize = Math.max(2, 2 * len); 100 char[] decompressed = new char[decompressedSize]; 101 102 int bytesWritten = 0; 103 int unicharsWritten = 0; 104 105 int[] unicharsRead = new int[1]; 106 int[] bytesRead = new int[1]; 107 108 int totalCharsCompressed = 0; 109 int totalBytesWritten = 0; 110 111 int totalBytesDecompressed = 0; 112 int totalCharsWritten = 0; 113 114 // not used boolean err = false; 115 116 117 // perform the compression in a loop 118 do { 119 120 // do the compression 121 bytesWritten = myCompressor.compress(chars, totalCharsCompressed, 122 len, unicharsRead, byteBuffer, 0, byteBufferSize); 123 124 // copy the current set of bytes into the target buffer 125 System.arraycopy(byteBuffer, 0, compressed, 126 totalBytesWritten, bytesWritten); 127 128 // update the no. of characters compressed 129 totalCharsCompressed += unicharsRead[0]; 130 131 // update the no. of bytes written 132 totalBytesWritten += bytesWritten; 133 134 /*System.out.logln("Compression pass complete. Compressed " 135 + unicharsRead[0] + " chars into " 136 + bytesWritten + " bytes.");*/ 137 } while(totalCharsCompressed < len); 138 139 if (totalCharsCompressed != len) { 140 errln("ERROR: Number of characters compressed(" 141 + totalCharsCompressed + ") != len(" + len + ")"); 142 } else { 143 logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes."); 144 } 145 146 // perform the decompression in a loop 147 do { 148 149 // do the decompression 150 unicharsWritten = myDecompressor.decompress(compressed, 151 totalBytesDecompressed, totalBytesWritten, 152 bytesRead, unicharBuffer, 0, unicharBufferSize); 153 154 // copy the current set of chars into the target buffer 155 System.arraycopy(unicharBuffer, 0, decompressed, 156 totalCharsWritten, unicharsWritten); 157 158 // update the no. of bytes decompressed 159 totalBytesDecompressed += bytesRead[0]; 160 161 // update the no. of chars written 162 totalCharsWritten += unicharsWritten; 163 164 /*System.out.logln("Decompression pass complete. Decompressed " 165 + bytesRead[0] + " bytes into " 166 + unicharsWritten + " chars.");*/ 167 } while (totalBytesDecompressed < totalBytesWritten); 168 169 if (totalBytesDecompressed != totalBytesWritten) { 170 errln("ERROR: Number of bytes decompressed(" 171 + totalBytesDecompressed 172 + ") != totalBytesWritten(" 173 + totalBytesWritten + ")"); 174 } else { 175 logln("MP: " + totalBytesWritten 176 + " bytes ===> " + totalCharsWritten + " chars."); 177 } 178 179 if (logDiffs(chars, len, decompressed, totalCharsWritten)) { 180 errln("ERROR: buffer contents incorrect"); 181 } 182 } 183 184 /** Print differences between two character buffers */ 185 private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) { 186 boolean result = false; 187 188 if(s1len != s2len) { 189 logln("===================="); 190 logln("Length doesn't match: expected " + s1len 191 + ", got " + s2len); 192 logln("Expected:"); 193 printChars(s1, s1len); 194 logln("Got:"); 195 printChars(s2, s2len); 196 result = true; 197 } 198 199 int len = Math.min(s1len, s2len); 200 for(int i = 0; i < len; ++i) { 201 if(s1[i] != s2[i]) { 202 if(result == false) { 203 logln("===================="); 204 } 205 logln("First difference at char " + i); 206 logln("Exp. char: " + Integer.toHexString(s1[i])); 207 logln("Got char : " + Integer.toHexString(s2[i])); 208 logln("Expected:"); 209 printChars(s1, s1len); 210 logln("Got:"); 211 printChars(s2, s2len); 212 result = true; 213 break; 214 } 215 } 216 217 return result; 218 } 219 220 // generate a string of characters, with simulated runs of characters 221 /*private static char[] randomChars(int len, Random random) { 222 char[] result = new char [len]; 223 int runLen = 0; 224 int used = 0; 225 226 while(used < len) { 227 runLen = (int) (30 * random.nextDouble()); 228 if(used + runLen >= len) { 229 runLen = len - used; 230 } 231 randomRun(result, used, runLen, random); 232 used += runLen; 233 } 234 235 return result; 236 }*/ 237 238 // generate a run of characters in a "window" 239 /*private static void randomRun(char[] target, int pos, int len, Random random) { 240 int offset = (int) (0xFFFF * random.nextDouble()); 241 242 // don't overflow 16 bits 243 if(offset > 0xFF80) { 244 offset = 0xFF80; 245 } 246 247 for(int i = pos; i < pos + len; i++) { 248 target[i] = (char)(offset + (0x7F * random.nextDouble())); 249 } 250 }*/ 251 252 private static final String [] fTestCases = { 253 "Hello \u9292 \u9192 World!", 254 "Hell\u0429o \u9292 \u9192 W\u0084rld!", 255 "Hell\u0429o \u9292 \u9292W\u0084rld!", 256 257 "\u0648\u06c8", // catch missing reset 258 "\u0648\u06c8", 259 260 "\u4444\uE001", // lowest quotable 261 "\u4444\uf2FF", // highest quotable 262 "\u4444\uf188\u4444", 263 "\u4444\uf188\uf288", 264 "\u4444\uf188abc\0429\uf288", 265 "\u9292\u2222", 266 "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!", 267 "Hell\u0429o \u9292 \u9292W\u0084rld!", 268 "Hello World!123456", 269 "Hello W\u0081\u011f\u0082!", // Latin 1 run 270 271 "abc\u0301\u0302", // uses SQn for u301 u302 272 "abc\u4411d", // uses SQU 273 "abc\u4411\u4412d",// uses SCU 274 "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5 275 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data 276 "\u9292\u2222", 277 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 278 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 279 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 280 281 "", // empty input 282 "\u0000", // smallest BMP character 283 "\uFFFF", // largest BMP character 284 285 "\ud800\udc00", // smallest surrogate 286 "\ud8ff\udcff", // largest surrogate pair 287 288 // regression tests 289 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 290 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 291 "\u0041\u00df\u0401\u015f", 292 "\u9066\u2123abc", 293 "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 294 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489" 295 296 }; 297 298 //========================== 299 // Compression modes 300 //========================== 301 private final static int SINGLEBYTEMODE = 0; 302 private final static int UNICODEMODE = 1; 303 304 //========================== 305 // Single-byte mode tags 306 //========================== 307 private final static int SDEFINEX = 0x0B; 308 //private final static int SRESERVED = 0x0C; // this is a reserved value 309 private final static int SQUOTEU = 0x0E; 310 private final static int SSWITCHU = 0x0F; 311 312 private final static int SQUOTE0 = 0x01; 313 private final static int SQUOTE1 = 0x02; 314 private final static int SQUOTE2 = 0x03; 315 private final static int SQUOTE3 = 0x04; 316 private final static int SQUOTE4 = 0x05; 317 private final static int SQUOTE5 = 0x06; 318 private final static int SQUOTE6 = 0x07; 319 private final static int SQUOTE7 = 0x08; 320 321 private final static int SSWITCH0 = 0x10; 322 private final static int SSWITCH1 = 0x11; 323 private final static int SSWITCH2 = 0x12; 324 private final static int SSWITCH3 = 0x13; 325 private final static int SSWITCH4 = 0x14; 326 private final static int SSWITCH5 = 0x15; 327 private final static int SSWITCH6 = 0x16; 328 private final static int SSWITCH7 = 0x17; 329 330 private final static int SDEFINE0 = 0x18; 331 private final static int SDEFINE1 = 0x19; 332 private final static int SDEFINE2 = 0x1A; 333 private final static int SDEFINE3 = 0x1B; 334 private final static int SDEFINE4 = 0x1C; 335 private final static int SDEFINE5 = 0x1D; 336 private final static int SDEFINE6 = 0x1E; 337 private final static int SDEFINE7 = 0x1F; 338 339 //========================== 340 // Unicode mode tags 341 //========================== 342 private final static int USWITCH0 = 0xE0; 343 private final static int USWITCH1 = 0xE1; 344 private final static int USWITCH2 = 0xE2; 345 private final static int USWITCH3 = 0xE3; 346 private final static int USWITCH4 = 0xE4; 347 private final static int USWITCH5 = 0xE5; 348 private final static int USWITCH6 = 0xE6; 349 private final static int USWITCH7 = 0xE7; 350 351 private final static int UDEFINE0 = 0xE8; 352 private final static int UDEFINE1 = 0xE9; 353 private final static int UDEFINE2 = 0xEA; 354 private final static int UDEFINE3 = 0xEB; 355 private final static int UDEFINE4 = 0xEC; 356 private final static int UDEFINE5 = 0xED; 357 private final static int UDEFINE6 = 0xEE; 358 private final static int UDEFINE7 = 0xEF; 359 360 private final static int UQUOTEU = 0xF0; 361 private final static int UDEFINEX = 0xF1; 362 //private final static int URESERVED = 0xF2; // this is a reserved value 363 364 /* Print out an array of characters, with non-printables (for me) 365 displayed as hex values */ 366 private void printChars(char[] chars, int len) { 367 for(int i = 0; i < len; i++) { 368 int c = (int)chars[i]; 369 if(c < 0x0020 || c >= 0x7f) { 370 log("[0x"); 371 log(Integer.toHexString(c)); 372 log("]"); 373 } else { 374 log(String.valueOf((char)c)); 375 } 376 } 377 logln(""); 378 } 379 380 private void printBytes(byte[] byteBuffer, int len) { 381 int curByteIndex = 0; 382 int byteBufferLimit = len; 383 int mode = SINGLEBYTEMODE; 384 int aByte = 0x00; 385 386 if(len > byteBuffer.length) { 387 logln("Warning: printBytes called with length too large. Truncating"); 388 byteBufferLimit = byteBuffer.length; 389 } 390 391 while(curByteIndex < byteBufferLimit) { 392 switch(mode) { 393 case SINGLEBYTEMODE: 394 while(curByteIndex < byteBufferLimit 395 && mode == SINGLEBYTEMODE) { 396 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF; 397 switch(aByte) { 398 default: 399 log(Integer.toHexString(((int) aByte) & 0xFF) + " "); 400 break; 401 // quote unicode 402 case SQUOTEU: 403 log("SQUOTEU "); 404 if (curByteIndex < byteBufferLimit) { 405 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 406 } 407 if (curByteIndex < byteBufferLimit) { 408 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 409 } 410 break; 411 412 // switch to Unicode mode 413 case SSWITCHU: 414 log("SSWITCHU "); 415 mode = UNICODEMODE; 416 break; 417 418 // handle all quote tags 419 case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3: 420 case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7: 421 log("SQUOTE" + (aByte - SQUOTE0) + " "); 422 if(curByteIndex < byteBufferLimit) { 423 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 424 } 425 break; 426 427 // handle all switch tags 428 case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3: 429 case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7: 430 log("SSWITCH" + (aByte - SSWITCH0) + " "); 431 break; 432 433 // handle all define tags 434 case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3: 435 case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7: 436 log("SDEFINE" + (aByte - SDEFINE0) + " "); 437 if (curByteIndex < byteBufferLimit) { 438 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 439 } 440 break; 441 442 // handle define extended tag 443 case SDEFINEX: 444 log("SDEFINEX "); 445 if (curByteIndex < byteBufferLimit) { 446 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 447 } 448 if (curByteIndex < byteBufferLimit) { 449 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 450 } 451 break; 452 453 } // end switch 454 } // end while 455 break; 456 457 case UNICODEMODE: 458 while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) { 459 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF; 460 switch(aByte) { 461 // handle all define tags 462 case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3: 463 case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7: 464 log("UDEFINE" + (aByte - UDEFINE0) + " "); 465 if (curByteIndex < byteBufferLimit) { 466 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 467 } 468 mode = SINGLEBYTEMODE; 469 break; 470 471 // handle define extended tag 472 case UDEFINEX: 473 log("UDEFINEX "); 474 if (curByteIndex < byteBufferLimit) { 475 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 476 } 477 if (curByteIndex < byteBufferLimit) { 478 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 479 } 480 break; 481 482 // handle all switch tags 483 case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3: 484 case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7: 485 log("USWITCH" + (aByte - USWITCH0) + " "); 486 mode = SINGLEBYTEMODE; 487 break; 488 489 // quote unicode 490 case UQUOTEU: 491 log("UQUOTEU "); 492 if (curByteIndex < byteBufferLimit) { 493 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 494 } 495 if (curByteIndex < byteBufferLimit) { 496 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 497 } 498 break; 499 500 default: 501 log(Integer.toHexString(((int) aByte) & 0xFF) + " "); 502 if (curByteIndex < byteBufferLimit) { 503 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); 504 } 505 break; 506 507 } // end switch 508 } // end while 509 break; 510 511 } // end switch( mode ) 512 } // end while 513 514 logln(""); 515 } 516} 517 518 519 520 521 522 523