1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.test.compression;
8
9import com.ibm.icu.dev.test.TestFmwk;
10import com.ibm.icu.text.UnicodeCompressor;
11import com.ibm.icu.text.UnicodeDecompressor;
12
13public class ExhaustiveTest extends TestFmwk {
14    public static void main(String args[]) throws Exception {
15        new ExhaustiveTest().run(args);
16    }
17
18    /** Test simple compress/decompress API, returning # of errors */
19    public void testSimple() throws Exception {
20        for(int i = 0; i < fTestCases.length; i++) {
21            simpleTest(fTestCases[i]);
22        }
23    }
24    private void simpleTest(String s) throws Exception {
25        byte [] compressed = UnicodeCompressor.compress(s);
26        String res = UnicodeDecompressor.decompress(compressed);
27        if (logDiffs(s.toCharArray(), s.length(),
28                res.toCharArray(), res.length()) == false) {
29            logln(s.length() + " chars ===> "
30                    + compressed.length + " bytes ===> "
31                    + res.length() + " chars");
32        } else {
33            logln("Compressed:");
34            printBytes(compressed, compressed.length);
35            errln("testSimple did not compress correctly");
36        }
37    }
38
39    /** Test iterative compress/decompress API, returning # of errors */
40    public void testIterative() throws Exception {
41        for(int i = 0; i < fTestCases.length; i++) {
42            myTest(fTestCases[i].toCharArray(), fTestCases[i].length());
43        }
44    }
45    private void myTest(char[] chars, int len) {
46        UnicodeCompressor myCompressor = new UnicodeCompressor();
47        UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
48
49        // variables for my compressor
50        int myByteCount = 0;
51        int myCharCount = 0;
52        int myCompressedSize = Math.max(512, 3*len);
53        byte[] myCompressed = new byte[myCompressedSize];
54        int myDecompressedSize = Math.max(2, 2 * len);
55        char[] myDecompressed = new char[myDecompressedSize];
56        int[] unicharsRead = new int[1];
57        int[] bytesRead = new int[1];
58
59        myByteCount = myCompressor.compress(chars, 0, len, unicharsRead,
60                myCompressed, 0, myCompressedSize);
61
62        myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount,
63                bytesRead, myDecompressed, 0, myDecompressedSize);
64
65        if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {
66            logln(len + " chars ===> "
67                    + myByteCount + " bytes ===> "
68                    + myCharCount + " chars");
69        } else {
70            logln("Compressed:");
71            printBytes(myCompressed, myByteCount);
72            errln("Iterative test failed");
73        }
74    }
75
76    /** Test iterative compress/decompress API */
77    public void testMultipass() throws Exception {
78        for(int i = 0; i < fTestCases.length; i++) {
79            myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length());
80        }
81    }
82    private void myMultipassTest(char [] chars, int len) throws Exception {
83        UnicodeCompressor myCompressor = new UnicodeCompressor();
84        UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
85
86        // variables for my compressor
87
88        // for looping
89        int byteBufferSize = 4;//Math.max(4, len / 4);
90        byte[] byteBuffer = new byte [byteBufferSize];
91        // real target
92        int compressedSize = Math.max(512, 3 * len);
93        byte[] compressed = new byte[compressedSize];
94
95        // for looping
96        int unicharBufferSize = 2;//byteBufferSize;
97        char[] unicharBuffer = new char[unicharBufferSize];
98        // real target
99        int decompressedSize = Math.max(2, 2 * len);
100        char[] decompressed = new char[decompressedSize];
101
102        int bytesWritten = 0;
103        int unicharsWritten = 0;
104
105        int[] unicharsRead = new int[1];
106        int[] bytesRead = new int[1];
107
108        int totalCharsCompressed = 0;
109        int totalBytesWritten = 0;
110
111        int totalBytesDecompressed  = 0;
112        int totalCharsWritten = 0;
113
114        // not used boolean err = false;
115
116
117        // perform the compression in a loop
118        do {
119
120            // do the compression
121            bytesWritten = myCompressor.compress(chars, totalCharsCompressed,
122                   len, unicharsRead, byteBuffer, 0, byteBufferSize);
123
124            // copy the current set of bytes into the target buffer
125            System.arraycopy(byteBuffer, 0, compressed,
126                   totalBytesWritten, bytesWritten);
127
128            // update the no. of characters compressed
129            totalCharsCompressed += unicharsRead[0];
130
131            // update the no. of bytes written
132            totalBytesWritten += bytesWritten;
133
134            /*System.out.logln("Compression pass complete.  Compressed "
135                               + unicharsRead[0] + " chars into "
136                               + bytesWritten + " bytes.");*/
137        } while(totalCharsCompressed < len);
138
139        if (totalCharsCompressed != len) {
140            errln("ERROR: Number of characters compressed("
141                    + totalCharsCompressed + ") != len(" + len + ")");
142        } else {
143            logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes.");
144        }
145
146        // perform the decompression in a loop
147        do {
148
149            // do the decompression
150            unicharsWritten = myDecompressor.decompress(compressed,
151                    totalBytesDecompressed, totalBytesWritten,
152                    bytesRead, unicharBuffer, 0, unicharBufferSize);
153
154            // copy the current set of chars into the target buffer
155            System.arraycopy(unicharBuffer, 0, decompressed,
156                    totalCharsWritten, unicharsWritten);
157
158            // update the no. of bytes decompressed
159            totalBytesDecompressed += bytesRead[0];
160
161            // update the no. of chars written
162            totalCharsWritten += unicharsWritten;
163
164            /*System.out.logln("Decompression pass complete.  Decompressed "
165                               + bytesRead[0] + " bytes into "
166                               + unicharsWritten + " chars.");*/
167        } while (totalBytesDecompressed < totalBytesWritten);
168
169        if (totalBytesDecompressed != totalBytesWritten) {
170            errln("ERROR: Number of bytes decompressed("
171                    + totalBytesDecompressed
172                    + ") != totalBytesWritten("
173                    + totalBytesWritten + ")");
174        } else {
175            logln("MP: " + totalBytesWritten
176                    + " bytes ===> " + totalCharsWritten + " chars.");
177        }
178
179        if (logDiffs(chars, len, decompressed, totalCharsWritten)) {
180            errln("ERROR: buffer contents incorrect");
181        }
182    }
183
184    /** Print differences between two character buffers */
185    private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {
186        boolean result  = false;
187
188        if(s1len != s2len) {
189            logln("====================");
190            logln("Length doesn't match: expected " + s1len
191                               + ", got " + s2len);
192            logln("Expected:");
193            printChars(s1, s1len);
194            logln("Got:");
195            printChars(s2, s2len);
196            result = true;
197        }
198
199        int len = Math.min(s1len, s2len);
200        for(int i = 0; i < len; ++i) {
201            if(s1[i] != s2[i]) {
202                if(result == false) {
203                    logln("====================");
204                }
205                logln("First difference at char " + i);
206                logln("Exp. char: " + Integer.toHexString(s1[i]));
207                logln("Got char : " + Integer.toHexString(s2[i]));
208                logln("Expected:");
209                printChars(s1, s1len);
210                logln("Got:");
211                printChars(s2, s2len);
212                result = true;
213                break;
214            }
215        }
216
217        return result;
218    }
219
220    // generate a string of characters, with simulated runs of characters
221    /*private static char[] randomChars(int len, Random random) {
222        char[] result = new char [len];
223        int runLen = 0;
224        int used = 0;
225
226        while(used < len) {
227            runLen = (int) (30 * random.nextDouble());
228            if(used + runLen >= len) {
229                runLen = len - used;
230            }
231            randomRun(result, used, runLen, random);
232            used += runLen;
233        }
234
235        return result;
236    }*/
237
238    // generate a run of characters in a "window"
239    /*private static void randomRun(char[] target, int pos, int len, Random random) {
240        int offset = (int) (0xFFFF * random.nextDouble());
241
242        // don't overflow 16 bits
243        if(offset > 0xFF80) {
244            offset = 0xFF80;
245        }
246
247        for(int i = pos; i < pos + len; i++) {
248            target[i] = (char)(offset + (0x7F * random.nextDouble()));
249        }
250    }*/
251
252    private static final String [] fTestCases = {
253        "Hello \u9292 \u9192 World!",
254        "Hell\u0429o \u9292 \u9192 W\u0084rld!",
255        "Hell\u0429o \u9292 \u9292W\u0084rld!",
256
257        "\u0648\u06c8", // catch missing reset
258        "\u0648\u06c8",
259
260        "\u4444\uE001", // lowest quotable
261        "\u4444\uf2FF", // highest quotable
262        "\u4444\uf188\u4444",
263        "\u4444\uf188\uf288",
264        "\u4444\uf188abc\0429\uf288",
265        "\u9292\u2222",
266        "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",
267        "Hell\u0429o \u9292 \u9292W\u0084rld!",
268        "Hello World!123456",
269        "Hello W\u0081\u011f\u0082!", // Latin 1 run
270
271        "abc\u0301\u0302",  // uses SQn for u301 u302
272        "abc\u4411d",      // uses SQU
273        "abc\u4411\u4412d",// uses SCU
274        "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5
275        "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data
276        "\u9292\u2222",
277        "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",
278        "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",
279        "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",
280
281        "", // empty input
282        "\u0000", // smallest BMP character
283        "\uFFFF", // largest BMP character
284
285        "\ud800\udc00", // smallest surrogate
286        "\ud8ff\udcff", // largest surrogate pair
287
288        // regression tests
289        "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",
290        "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",
291        "\u0041\u00df\u0401\u015f",
292        "\u9066\u2123abc",
293        "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",
294        "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"
295
296    };
297
298    //==========================
299    // Compression modes
300    //==========================
301    private final static int SINGLEBYTEMODE                 = 0;
302    private final static int UNICODEMODE                    = 1;
303
304    //==========================
305    // Single-byte mode tags
306    //==========================
307    private final static int SDEFINEX                   = 0x0B;
308    //private final static int SRESERVED                  = 0x0C;             // this is a reserved value
309    private final static int SQUOTEU                    = 0x0E;
310    private final static int SSWITCHU                   = 0x0F;
311
312    private final static int SQUOTE0                        = 0x01;
313    private final static int SQUOTE1                        = 0x02;
314    private final static int SQUOTE2                        = 0x03;
315    private final static int SQUOTE3                        = 0x04;
316    private final static int SQUOTE4                        = 0x05;
317    private final static int SQUOTE5                        = 0x06;
318    private final static int SQUOTE6                        = 0x07;
319    private final static int SQUOTE7                        = 0x08;
320
321    private final static int SSWITCH0                       = 0x10;
322    private final static int SSWITCH1                       = 0x11;
323    private final static int SSWITCH2                       = 0x12;
324    private final static int SSWITCH3                       = 0x13;
325    private final static int SSWITCH4                       = 0x14;
326    private final static int SSWITCH5                       = 0x15;
327    private final static int SSWITCH6                       = 0x16;
328    private final static int SSWITCH7                       = 0x17;
329
330    private final static int SDEFINE0                       = 0x18;
331    private final static int SDEFINE1                       = 0x19;
332    private final static int SDEFINE2                       = 0x1A;
333    private final static int SDEFINE3                       = 0x1B;
334    private final static int SDEFINE4                       = 0x1C;
335    private final static int SDEFINE5                       = 0x1D;
336    private final static int SDEFINE6                       = 0x1E;
337    private final static int SDEFINE7                       = 0x1F;
338
339    //==========================
340    // Unicode mode tags
341    //==========================
342    private final static int USWITCH0                       = 0xE0;
343    private final static int USWITCH1                       = 0xE1;
344    private final static int USWITCH2                       = 0xE2;
345    private final static int USWITCH3                       = 0xE3;
346    private final static int USWITCH4                       = 0xE4;
347    private final static int USWITCH5                       = 0xE5;
348    private final static int USWITCH6                       = 0xE6;
349    private final static int USWITCH7                       = 0xE7;
350
351    private final static int UDEFINE0                       = 0xE8;
352    private final static int UDEFINE1                       = 0xE9;
353    private final static int UDEFINE2                       = 0xEA;
354    private final static int UDEFINE3                       = 0xEB;
355    private final static int UDEFINE4                       = 0xEC;
356    private final static int UDEFINE5                       = 0xED;
357    private final static int UDEFINE6                       = 0xEE;
358    private final static int UDEFINE7                       = 0xEF;
359
360    private final static int UQUOTEU                        = 0xF0;
361    private final static int UDEFINEX                       = 0xF1;
362    //private final static int URESERVED                      = 0xF2;         // this is a reserved value
363
364    /* Print out an array of characters, with non-printables (for me)
365       displayed as hex values */
366    private void printChars(char[] chars, int len) {
367        for(int i = 0; i < len; i++) {
368            int c = (int)chars[i];
369            if(c < 0x0020 || c >= 0x7f) {
370                log("[0x");
371                log(Integer.toHexString(c));
372                log("]");
373            } else {
374                log(String.valueOf((char)c));
375            }
376        }
377        logln("");
378    }
379
380    private void printBytes(byte[] byteBuffer, int len) {
381        int curByteIndex = 0;
382        int byteBufferLimit = len;
383        int mode = SINGLEBYTEMODE;
384        int aByte = 0x00;
385
386        if(len > byteBuffer.length) {
387            logln("Warning: printBytes called with length too large. Truncating");
388            byteBufferLimit = byteBuffer.length;
389        }
390
391        while(curByteIndex < byteBufferLimit) {
392            switch(mode) {
393            case SINGLEBYTEMODE:
394                while(curByteIndex < byteBufferLimit
395                      && mode == SINGLEBYTEMODE)  {
396                    aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
397                    switch(aByte) {
398                    default:
399                        log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
400                        break;
401                        // quote unicode
402                    case SQUOTEU:
403                        log("SQUOTEU ");
404                        if (curByteIndex < byteBufferLimit) {
405                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
406                        }
407                        if (curByteIndex < byteBufferLimit) {
408                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
409                        }
410                        break;
411
412                        // switch to Unicode mode
413                    case SSWITCHU:
414                        log("SSWITCHU ");
415                        mode = UNICODEMODE;
416                        break;
417
418                        // handle all quote tags
419                    case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
420                    case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
421                        log("SQUOTE" + (aByte - SQUOTE0) + " ");
422                        if(curByteIndex < byteBufferLimit) {
423                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
424                        }
425                        break;
426
427                        // handle all switch tags
428                    case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3:
429                    case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7:
430                        log("SSWITCH" + (aByte - SSWITCH0) + " ");
431                        break;
432
433                        // handle all define tags
434                    case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
435                    case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
436                        log("SDEFINE" + (aByte - SDEFINE0) + " ");
437                        if (curByteIndex < byteBufferLimit) {
438                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
439                        }
440                        break;
441
442                        // handle define extended tag
443                    case SDEFINEX:
444                        log("SDEFINEX ");
445                        if (curByteIndex < byteBufferLimit) {
446                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
447                        }
448                        if (curByteIndex < byteBufferLimit) {
449                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
450                        }
451                        break;
452
453                    } // end switch
454                } // end while
455                break;
456
457            case UNICODEMODE:
458                while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {
459                    aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
460                    switch(aByte) {
461                        // handle all define tags
462                    case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
463                    case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
464                        log("UDEFINE" + (aByte - UDEFINE0) + " ");
465                        if (curByteIndex < byteBufferLimit) {
466                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
467                        }
468                        mode = SINGLEBYTEMODE;
469                        break;
470
471                        // handle define extended tag
472                    case UDEFINEX:
473                        log("UDEFINEX ");
474                        if (curByteIndex < byteBufferLimit) {
475                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
476                        }
477                        if (curByteIndex < byteBufferLimit) {
478                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
479                        }
480                        break;
481
482                        // handle all switch tags
483                    case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3:
484                    case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7:
485                        log("USWITCH" + (aByte - USWITCH0) + " ");
486                        mode = SINGLEBYTEMODE;
487                        break;
488
489                        // quote unicode
490                    case UQUOTEU:
491                        log("UQUOTEU ");
492                        if (curByteIndex < byteBufferLimit) {
493                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
494                        }
495                        if (curByteIndex < byteBufferLimit) {
496                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
497                        }
498                        break;
499
500                    default:
501                        log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
502                        if (curByteIndex < byteBufferLimit) {
503                            log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
504                        }
505                        break;
506
507                    } // end switch
508                } // end while
509                break;
510
511            } // end switch( mode )
512        } // end while
513
514        logln("");
515    }
516}
517
518
519
520
521
522
523