]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/compression/ExhaustiveTest.java
Clean up imports.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / core / src / com / ibm / icu / dev / test / compression / ExhaustiveTest.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.test.compression;
8
9 import com.ibm.icu.dev.test.TestFmwk;
10 import com.ibm.icu.text.UnicodeCompressor;
11 import com.ibm.icu.text.UnicodeDecompressor;
12
13 public class ExhaustiveTest extends TestFmwk {
14     public static void main(String args[]) throws Exception {
15         new ExhaustiveTest().run(args);
16     }
17
18     /** Test simple compress/decompress API, returning # of errors */
19     public void testSimple() throws Exception {
20         for(int i = 0; i < fTestCases.length; i++) {
21             simpleTest(fTestCases[i]);
22         }
23     }
24     private void simpleTest(String s) throws Exception {
25         byte [] compressed = UnicodeCompressor.compress(s);
26         String res = UnicodeDecompressor.decompress(compressed);
27         if (logDiffs(s.toCharArray(), s.length(), 
28                 res.toCharArray(), res.length()) == false) {
29             logln(s.length() + " chars ===> " 
30                     + compressed.length + " bytes ===> " 
31                     + res.length() + " chars");
32         } else {
33             logln("Compressed:");
34             printBytes(compressed, compressed.length);
35             errln("testSimple did not compress correctly");
36         }
37     }
38
39     /** Test iterative compress/decompress API, returning # of errors */
40     public void testIterative() throws Exception {
41         for(int i = 0; i < fTestCases.length; i++) {
42             myTest(fTestCases[i].toCharArray(), fTestCases[i].length());
43         }
44     }
45     private void myTest(char[] chars, int len) {
46         UnicodeCompressor myCompressor = new UnicodeCompressor();
47         UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
48         
49         // variables for my compressor
50         int myByteCount = 0;
51         int myCharCount = 0;
52         int myCompressedSize = Math.max(512, 3*len);
53         byte[] myCompressed = new byte[myCompressedSize];
54         int myDecompressedSize = Math.max(2, 2 * len);
55         char[] myDecompressed = new char[myDecompressedSize];
56         int[] unicharsRead = new int[1];
57         int[] bytesRead = new int[1];
58         
59         myByteCount = myCompressor.compress(chars, 0, len, unicharsRead,
60                 myCompressed, 0, myCompressedSize);
61
62         myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount,
63                 bytesRead, myDecompressed, 0, myDecompressedSize);
64
65         if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {
66             logln(len + " chars ===> " 
67                     + myByteCount + " bytes ===> " 
68                     + myCharCount + " chars");
69         } else {
70             logln("Compressed:");
71             printBytes(myCompressed, myByteCount);
72             errln("Iterative test failed");
73         }
74     }
75
76     /** Test iterative compress/decompress API */
77     public void testMultipass() throws Exception {
78         for(int i = 0; i < fTestCases.length; i++) {
79             myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length());
80         }
81     }
82     private void myMultipassTest(char [] chars, int len) throws Exception {
83         UnicodeCompressor myCompressor = new UnicodeCompressor();
84         UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
85         
86         // variables for my compressor
87         
88         // for looping
89         int byteBufferSize = 4;//Math.max(4, len / 4);
90         byte[] byteBuffer = new byte [byteBufferSize];
91         // real target
92         int compressedSize = Math.max(512, 3 * len);
93         byte[] compressed = new byte[compressedSize];
94
95         // for looping
96         int unicharBufferSize = 2;//byteBufferSize;
97         char[] unicharBuffer = new char[unicharBufferSize];
98         // real target
99         int decompressedSize = Math.max(2, 2 * len);
100         char[] decompressed = new char[decompressedSize];
101
102         int bytesWritten = 0;
103         int unicharsWritten = 0;
104
105         int[] unicharsRead = new int[1];
106         int[] bytesRead = new int[1];
107         
108         int totalCharsCompressed = 0;
109         int totalBytesWritten = 0;
110
111         int totalBytesDecompressed  = 0;
112         int totalCharsWritten = 0;
113
114         // not used boolean err = false;
115
116
117         // perform the compression in a loop
118         do {
119             
120             // do the compression
121             bytesWritten = myCompressor.compress(chars, totalCharsCompressed, 
122                    len, unicharsRead, byteBuffer, 0, byteBufferSize);
123
124             // copy the current set of bytes into the target buffer
125             System.arraycopy(byteBuffer, 0, compressed, 
126                    totalBytesWritten, bytesWritten);
127             
128             // update the no. of characters compressed
129             totalCharsCompressed += unicharsRead[0];
130             
131             // update the no. of bytes written
132             totalBytesWritten += bytesWritten;
133             
134             /*System.out.logln("Compression pass complete.  Compressed "
135                                + unicharsRead[0] + " chars into "
136                                + bytesWritten + " bytes.");*/
137         } while(totalCharsCompressed < len);
138
139         if (totalCharsCompressed != len) {
140             errln("ERROR: Number of characters compressed("
141                     + totalCharsCompressed + ") != len(" + len + ")");
142         } else {
143             logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes.");
144         }
145         
146         // perform the decompression in a loop
147         do {
148             
149             // do the decompression
150             unicharsWritten = myDecompressor.decompress(compressed, 
151                     totalBytesDecompressed, totalBytesWritten, 
152                     bytesRead, unicharBuffer, 0, unicharBufferSize);
153
154             // copy the current set of chars into the target buffer
155             System.arraycopy(unicharBuffer, 0, decompressed, 
156                     totalCharsWritten, unicharsWritten);
157             
158             // update the no. of bytes decompressed
159             totalBytesDecompressed += bytesRead[0];
160             
161             // update the no. of chars written
162             totalCharsWritten += unicharsWritten;
163             
164             /*System.out.logln("Decompression pass complete.  Decompressed "
165                                + bytesRead[0] + " bytes into "
166                                + unicharsWritten + " chars.");*/
167         } while (totalBytesDecompressed < totalBytesWritten);
168
169         if (totalBytesDecompressed != totalBytesWritten) {
170             errln("ERROR: Number of bytes decompressed(" 
171                     + totalBytesDecompressed 
172                     + ") != totalBytesWritten(" 
173                     + totalBytesWritten + ")");
174         } else {
175             logln("MP: " + totalBytesWritten
176                     + " bytes ===> " + totalCharsWritten + " chars.");
177         }
178         
179         if (logDiffs(chars, len, decompressed, totalCharsWritten)) {
180             errln("ERROR: buffer contents incorrect");
181         }
182     }
183
184     /** Print differences between two character buffers */
185     private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {
186         boolean result  = false;
187         
188         if(s1len != s2len) {
189             logln("====================");
190             logln("Length doesn't match: expected " + s1len
191                                + ", got " + s2len);
192             logln("Expected:");
193             printChars(s1, s1len);
194             logln("Got:");
195             printChars(s2, s2len);
196             result = true;
197         }
198         
199         int len = Math.min(s1len, s2len);
200         for(int i = 0; i < len; ++i) {
201             if(s1[i] != s2[i]) {
202                 if(result == false) {
203                     logln("====================");
204                 }
205                 logln("First difference at char " + i);
206                 logln("Exp. char: " + Integer.toHexString(s1[i]));
207                 logln("Got char : " + Integer.toHexString(s2[i]));
208                 logln("Expected:");
209                 printChars(s1, s1len);
210                 logln("Got:");
211                 printChars(s2, s2len);
212                 result = true;
213                 break;
214             }
215         }
216     
217         return result;
218     }
219
220     // generate a string of characters, with simulated runs of characters
221     /*private static char[] randomChars(int len, Random random) {
222         char[] result = new char [len];
223         int runLen = 0;
224         int used = 0;
225         
226         while(used < len) {
227             runLen = (int) (30 * random.nextDouble());
228             if(used + runLen >= len) {
229                 runLen = len - used;
230             }
231             randomRun(result, used, runLen, random);
232             used += runLen;
233         }
234     
235         return result;
236     }*/
237
238     // generate a run of characters in a "window"
239     /*private static void randomRun(char[] target, int pos, int len, Random random) {
240         int offset = (int) (0xFFFF * random.nextDouble());
241
242         // don't overflow 16 bits
243         if(offset > 0xFF80) {
244             offset = 0xFF80;
245         }
246
247         for(int i = pos; i < pos + len; i++) {
248             target[i] = (char)(offset + (0x7F * random.nextDouble()));
249         }
250     }*/
251
252     private static final String [] fTestCases = {
253         "Hello \u9292 \u9192 World!",
254         "Hell\u0429o \u9292 \u9192 W\u0084rld!",
255         "Hell\u0429o \u9292 \u9292W\u0084rld!",
256
257         "\u0648\u06c8", // catch missing reset
258         "\u0648\u06c8",
259
260         "\u4444\uE001", // lowest quotable
261         "\u4444\uf2FF", // highest quotable
262         "\u4444\uf188\u4444",
263         "\u4444\uf188\uf288",
264         "\u4444\uf188abc\0429\uf288",
265         "\u9292\u2222",
266         "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",
267         "Hell\u0429o \u9292 \u9292W\u0084rld!",
268         "Hello World!123456",
269         "Hello W\u0081\u011f\u0082!", // Latin 1 run
270
271         "abc\u0301\u0302",  // uses SQn for u301 u302
272         "abc\u4411d",      // uses SQU
273         "abc\u4411\u4412d",// uses SCU
274         "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5
275         "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data
276         "\u9292\u2222",
277         "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",
278         "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",
279         "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",
280
281         "", // empty input
282         "\u0000", // smallest BMP character
283         "\uFFFF", // largest BMP character
284
285         "\ud800\udc00", // smallest surrogate
286         "\ud8ff\udcff", // largest surrogate pair
287         
288         // regression tests
289         "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",
290         "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",
291         "\u0041\u00df\u0401\u015f",
292         "\u9066\u2123abc",
293         "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",
294         "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"
295
296     };
297
298     //==========================
299     // Compression modes
300     //==========================
301     private final static int SINGLEBYTEMODE                 = 0;
302     private final static int UNICODEMODE                    = 1;
303     
304     //==========================
305     // Single-byte mode tags
306     //==========================
307     private final static int SDEFINEX                   = 0x0B;
308     //private final static int SRESERVED                  = 0x0C;             // this is a reserved value
309     private final static int SQUOTEU                    = 0x0E;
310     private final static int SSWITCHU                   = 0x0F;
311
312     private final static int SQUOTE0                        = 0x01;
313     private final static int SQUOTE1                        = 0x02;
314     private final static int SQUOTE2                        = 0x03;
315     private final static int SQUOTE3                        = 0x04;
316     private final static int SQUOTE4                        = 0x05;
317     private final static int SQUOTE5                        = 0x06;
318     private final static int SQUOTE6                        = 0x07;
319     private final static int SQUOTE7                        = 0x08;
320
321     private final static int SSWITCH0                       = 0x10;
322     private final static int SSWITCH1                       = 0x11;
323     private final static int SSWITCH2                       = 0x12;
324     private final static int SSWITCH3                       = 0x13;
325     private final static int SSWITCH4                       = 0x14;
326     private final static int SSWITCH5                       = 0x15;
327     private final static int SSWITCH6                       = 0x16;
328     private final static int SSWITCH7                       = 0x17;
329
330     private final static int SDEFINE0                       = 0x18;
331     private final static int SDEFINE1                       = 0x19;
332     private final static int SDEFINE2                       = 0x1A;
333     private final static int SDEFINE3                       = 0x1B;
334     private final static int SDEFINE4                       = 0x1C;
335     private final static int SDEFINE5                       = 0x1D;
336     private final static int SDEFINE6                       = 0x1E;
337     private final static int SDEFINE7                       = 0x1F;
338
339     //==========================
340     // Unicode mode tags
341     //==========================
342     private final static int USWITCH0                       = 0xE0;
343     private final static int USWITCH1                       = 0xE1;
344     private final static int USWITCH2                       = 0xE2;
345     private final static int USWITCH3                       = 0xE3;
346     private final static int USWITCH4                       = 0xE4;
347     private final static int USWITCH5                       = 0xE5;
348     private final static int USWITCH6                       = 0xE6;
349     private final static int USWITCH7                       = 0xE7;
350
351     private final static int UDEFINE0                       = 0xE8;
352     private final static int UDEFINE1                       = 0xE9;
353     private final static int UDEFINE2                       = 0xEA;
354     private final static int UDEFINE3                       = 0xEB;
355     private final static int UDEFINE4                       = 0xEC;
356     private final static int UDEFINE5                       = 0xED;
357     private final static int UDEFINE6                       = 0xEE;
358     private final static int UDEFINE7                       = 0xEF;
359
360     private final static int UQUOTEU                        = 0xF0;
361     private final static int UDEFINEX                       = 0xF1;
362     //private final static int URESERVED                      = 0xF2;         // this is a reserved value
363
364     /* Print out an array of characters, with non-printables (for me) 
365        displayed as hex values */
366     private void printChars(char[] chars, int len) {
367         for(int i = 0; i < len; i++) {
368             int c = (int)chars[i];
369             if(c < 0x0020 || c >= 0x7f) {
370                 log("[0x");
371                 log(Integer.toHexString(c));
372                 log("]");
373             } else {
374                 log(String.valueOf((char)c));
375             }
376         }
377         logln("");
378     }
379
380     private void printBytes(byte[] byteBuffer, int len) {
381         int curByteIndex = 0;
382         int byteBufferLimit = len;
383         int mode = SINGLEBYTEMODE;
384         int aByte = 0x00;
385         
386         if(len > byteBuffer.length) {
387             logln("Warning: printBytes called with length too large. Truncating");
388             byteBufferLimit = byteBuffer.length;
389         }
390         
391         while(curByteIndex < byteBufferLimit) {
392             switch(mode) {  
393             case SINGLEBYTEMODE:
394                 while(curByteIndex < byteBufferLimit 
395                       && mode == SINGLEBYTEMODE)  {
396                     aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
397                     switch(aByte) {
398                     default:
399                         log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
400                         break;
401                         // quote unicode
402                     case SQUOTEU:
403                         log("SQUOTEU ");
404                         if (curByteIndex < byteBufferLimit) {
405                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
406                         }
407                         if (curByteIndex < byteBufferLimit) {
408                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
409                         }
410                         break;
411                         
412                         // switch to Unicode mode
413                     case SSWITCHU:
414                         log("SSWITCHU ");
415                         mode = UNICODEMODE;
416                         break;
417                         
418                         // handle all quote tags
419                     case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
420                     case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
421                         log("SQUOTE" + (aByte - SQUOTE0) + " ");
422                         if(curByteIndex < byteBufferLimit) {
423                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
424                         }
425                         break;
426                         
427                         // handle all switch tags
428                     case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3:
429                     case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7:
430                         log("SSWITCH" + (aByte - SSWITCH0) + " ");
431                         break;
432                                         
433                         // handle all define tags
434                     case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
435                     case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
436                         log("SDEFINE" + (aByte - SDEFINE0) + " ");
437                         if (curByteIndex < byteBufferLimit) {
438                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
439                         }
440                         break;
441                         
442                         // handle define extended tag
443                     case SDEFINEX:
444                         log("SDEFINEX ");
445                         if (curByteIndex < byteBufferLimit) {
446                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
447                         }
448                         if (curByteIndex < byteBufferLimit) {
449                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
450                         }
451                         break;
452                         
453                     } // end switch
454                 } // end while
455                 break;
456                 
457             case UNICODEMODE:
458                 while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {
459                     aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
460                     switch(aByte) {
461                         // handle all define tags
462                     case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
463                     case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
464                         log("UDEFINE" + (aByte - UDEFINE0) + " ");
465                         if (curByteIndex < byteBufferLimit) {
466                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
467                         }
468                         mode = SINGLEBYTEMODE;
469                         break;
470                         
471                         // handle define extended tag
472                     case UDEFINEX:
473                         log("UDEFINEX ");
474                         if (curByteIndex < byteBufferLimit) {
475                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
476                         }
477                         if (curByteIndex < byteBufferLimit) {
478                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
479                         }
480                         break;
481                         
482                         // handle all switch tags
483                     case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3:
484                     case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7:
485                         log("USWITCH" + (aByte - USWITCH0) + " ");
486                         mode = SINGLEBYTEMODE;
487                         break;
488                         
489                         // quote unicode
490                     case UQUOTEU:
491                         log("UQUOTEU ");
492                         if (curByteIndex < byteBufferLimit) {
493                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
494                         }
495                         if (curByteIndex < byteBufferLimit) {
496                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
497                         }
498                         break;
499                         
500                     default:
501                         log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
502                         if (curByteIndex < byteBufferLimit) {
503                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
504                         }
505                         break;
506                         
507                     } // end switch
508                 } // end while
509                 break;
510                 
511             } // end switch( mode )
512         } // end while
513         
514         logln("");
515     }    
516 }
517
518
519
520
521
522