]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/compression/ExhaustiveTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / compression / ExhaustiveTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.compression;\r
8 \r
9 import com.ibm.icu.dev.test.TestFmwk;\r
10 import com.ibm.icu.text.UnicodeCompressor;\r
11 import com.ibm.icu.text.UnicodeDecompressor;\r
12 \r
13 public class ExhaustiveTest extends TestFmwk {\r
14     public static void main(String args[]) throws Exception {\r
15         new ExhaustiveTest().run(args);\r
16     }\r
17 \r
18     /** Test simple compress/decompress API, returning # of errors */\r
19     public void testSimple() throws Exception {\r
20         for(int i = 0; i < fTestCases.length; i++) {\r
21             simpleTest(fTestCases[i]);\r
22         }\r
23     }\r
24     private void simpleTest(String s) throws Exception {\r
25         byte [] compressed = UnicodeCompressor.compress(s);\r
26         String res = UnicodeDecompressor.decompress(compressed);\r
27         if (logDiffs(s.toCharArray(), s.length(), \r
28                 res.toCharArray(), res.length()) == false) {\r
29             logln(s.length() + " chars ===> " \r
30                     + compressed.length + " bytes ===> " \r
31                     + res.length() + " chars");\r
32         } else {\r
33             logln("Compressed:");\r
34             printBytes(compressed, compressed.length);\r
35             errln("testSimple did not compress correctly");\r
36         }\r
37     }\r
38 \r
39     /** Test iterative compress/decompress API, returning # of errors */\r
40     public void testIterative() throws Exception {\r
41         for(int i = 0; i < fTestCases.length; i++) {\r
42             myTest(fTestCases[i].toCharArray(), fTestCases[i].length());\r
43         }\r
44     }\r
45     private void myTest(char[] chars, int len) {\r
46         UnicodeCompressor myCompressor = new UnicodeCompressor();\r
47         UnicodeDecompressor myDecompressor = new UnicodeDecompressor();\r
48         \r
49         // variables for my compressor\r
50         int myByteCount = 0;\r
51         int myCharCount = 0;\r
52         int myCompressedSize = Math.max(512, 3*len);\r
53         byte[] myCompressed = new byte[myCompressedSize];\r
54         int myDecompressedSize = Math.max(2, 2 * len);\r
55         char[] myDecompressed = new char[myDecompressedSize];\r
56         int[] unicharsRead = new int[1];\r
57         int[] bytesRead = new int[1];\r
58         \r
59         myByteCount = myCompressor.compress(chars, 0, len, unicharsRead,\r
60                 myCompressed, 0, myCompressedSize);\r
61 \r
62         myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount,\r
63                 bytesRead, myDecompressed, 0, myDecompressedSize);\r
64 \r
65         if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {\r
66             logln(len + " chars ===> " \r
67                     + myByteCount + " bytes ===> " \r
68                     + myCharCount + " chars");\r
69         } else {\r
70             logln("Compressed:");\r
71             printBytes(myCompressed, myByteCount);\r
72             errln("Iterative test failed");\r
73         }\r
74     }\r
75 \r
76     /** Test iterative compress/decompress API */\r
77     public void testMultipass() throws Exception {\r
78         for(int i = 0; i < fTestCases.length; i++) {\r
79             myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length());\r
80         }\r
81     }\r
82     private void myMultipassTest(char [] chars, int len) throws Exception {\r
83         UnicodeCompressor myCompressor = new UnicodeCompressor();\r
84         UnicodeDecompressor myDecompressor = new UnicodeDecompressor();\r
85         \r
86         // variables for my compressor\r
87         \r
88         // for looping\r
89         int byteBufferSize = 4;//Math.max(4, len / 4);\r
90         byte[] byteBuffer = new byte [byteBufferSize];\r
91         // real target\r
92         int compressedSize = Math.max(512, 3 * len);\r
93         byte[] compressed = new byte[compressedSize];\r
94 \r
95         // for looping\r
96         int unicharBufferSize = 2;//byteBufferSize;\r
97         char[] unicharBuffer = new char[unicharBufferSize];\r
98         // real target\r
99         int decompressedSize = Math.max(2, 2 * len);\r
100         char[] decompressed = new char[decompressedSize];\r
101 \r
102         int bytesWritten = 0;\r
103         int unicharsWritten = 0;\r
104 \r
105         int[] unicharsRead = new int[1];\r
106         int[] bytesRead = new int[1];\r
107         \r
108         int totalCharsCompressed = 0;\r
109         int totalBytesWritten = 0;\r
110 \r
111         int totalBytesDecompressed  = 0;\r
112         int totalCharsWritten = 0;\r
113 \r
114         // not used boolean err = false;\r
115 \r
116 \r
117         // perform the compression in a loop\r
118         do {\r
119             \r
120             // do the compression\r
121             bytesWritten = myCompressor.compress(chars, totalCharsCompressed, \r
122                    len, unicharsRead, byteBuffer, 0, byteBufferSize);\r
123 \r
124             // copy the current set of bytes into the target buffer\r
125             System.arraycopy(byteBuffer, 0, compressed, \r
126                    totalBytesWritten, bytesWritten);\r
127             \r
128             // update the no. of characters compressed\r
129             totalCharsCompressed += unicharsRead[0];\r
130             \r
131             // update the no. of bytes written\r
132             totalBytesWritten += bytesWritten;\r
133             \r
134             /*System.out.logln("Compression pass complete.  Compressed "\r
135                                + unicharsRead[0] + " chars into "\r
136                                + bytesWritten + " bytes.");*/\r
137         } while(totalCharsCompressed < len);\r
138 \r
139         if (totalCharsCompressed != len) {\r
140             errln("ERROR: Number of characters compressed("\r
141                     + totalCharsCompressed + ") != len(" + len + ")");\r
142         } else {\r
143             logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes.");\r
144         }\r
145         \r
146         // perform the decompression in a loop\r
147         do {\r
148             \r
149             // do the decompression\r
150             unicharsWritten = myDecompressor.decompress(compressed, \r
151                     totalBytesDecompressed, totalBytesWritten, \r
152                     bytesRead, unicharBuffer, 0, unicharBufferSize);\r
153 \r
154             // copy the current set of chars into the target buffer\r
155             System.arraycopy(unicharBuffer, 0, decompressed, \r
156                     totalCharsWritten, unicharsWritten);\r
157             \r
158             // update the no. of bytes decompressed\r
159             totalBytesDecompressed += bytesRead[0];\r
160             \r
161             // update the no. of chars written\r
162             totalCharsWritten += unicharsWritten;\r
163             \r
164             /*System.out.logln("Decompression pass complete.  Decompressed "\r
165                                + bytesRead[0] + " bytes into "\r
166                                + unicharsWritten + " chars.");*/\r
167         } while (totalBytesDecompressed < totalBytesWritten);\r
168 \r
169         if (totalBytesDecompressed != totalBytesWritten) {\r
170             errln("ERROR: Number of bytes decompressed(" \r
171                     + totalBytesDecompressed \r
172                     + ") != totalBytesWritten(" \r
173                     + totalBytesWritten + ")");\r
174         } else {\r
175             logln("MP: " + totalBytesWritten\r
176                     + " bytes ===> " + totalCharsWritten + " chars.");\r
177         }\r
178         \r
179         if (logDiffs(chars, len, decompressed, totalCharsWritten)) {\r
180             errln("ERROR: buffer contents incorrect");\r
181         }\r
182     }\r
183 \r
184     /** Print differences between two character buffers */\r
185     private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {\r
186         boolean result  = false;\r
187         \r
188         if(s1len != s2len) {\r
189             logln("====================");\r
190             logln("Length doesn't match: expected " + s1len\r
191                                + ", got " + s2len);\r
192             logln("Expected:");\r
193             printChars(s1, s1len);\r
194             logln("Got:");\r
195             printChars(s2, s2len);\r
196             result = true;\r
197         }\r
198         \r
199         int len = Math.min(s1len, s2len);\r
200         for(int i = 0; i < len; ++i) {\r
201             if(s1[i] != s2[i]) {\r
202                 if(result == false) {\r
203                     logln("====================");\r
204                 }\r
205                 logln("First difference at char " + i);\r
206                 logln("Exp. char: " + Integer.toHexString(s1[i]));\r
207                 logln("Got char : " + Integer.toHexString(s2[i]));\r
208                 logln("Expected:");\r
209                 printChars(s1, s1len);\r
210                 logln("Got:");\r
211                 printChars(s2, s2len);\r
212                 result = true;\r
213                 break;\r
214             }\r
215         }\r
216     \r
217         return result;\r
218     }\r
219 \r
220     // generate a string of characters, with simulated runs of characters\r
221     /*private static char[] randomChars(int len, Random random) {\r
222         char[] result = new char [len];\r
223         int runLen = 0;\r
224         int used = 0;\r
225         \r
226         while(used < len) {\r
227             runLen = (int) (30 * random.nextDouble());\r
228             if(used + runLen >= len) {\r
229                 runLen = len - used;\r
230             }\r
231             randomRun(result, used, runLen, random);\r
232             used += runLen;\r
233         }\r
234     \r
235         return result;\r
236     }*/\r
237 \r
238     // generate a run of characters in a "window"\r
239     /*private static void randomRun(char[] target, int pos, int len, Random random) {\r
240         int offset = (int) (0xFFFF * random.nextDouble());\r
241 \r
242         // don't overflow 16 bits\r
243         if(offset > 0xFF80) {\r
244             offset = 0xFF80;\r
245         }\r
246 \r
247         for(int i = pos; i < pos + len; i++) {\r
248             target[i] = (char)(offset + (0x7F * random.nextDouble()));\r
249         }\r
250     }*/\r
251 \r
252     private static final String [] fTestCases = {\r
253         "Hello \u9292 \u9192 World!",\r
254         "Hell\u0429o \u9292 \u9192 W\u0084rld!",\r
255         "Hell\u0429o \u9292 \u9292W\u0084rld!",\r
256 \r
257         "\u0648\u06c8", // catch missing reset\r
258         "\u0648\u06c8",\r
259 \r
260         "\u4444\uE001", // lowest quotable\r
261         "\u4444\uf2FF", // highest quotable\r
262         "\u4444\uf188\u4444",\r
263         "\u4444\uf188\uf288",\r
264         "\u4444\uf188abc\0429\uf288",\r
265         "\u9292\u2222",\r
266         "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",\r
267         "Hell\u0429o \u9292 \u9292W\u0084rld!",\r
268         "Hello World!123456",\r
269         "Hello W\u0081\u011f\u0082!", // Latin 1 run\r
270 \r
271         "abc\u0301\u0302",  // uses SQn for u301 u302\r
272         "abc\u4411d",      // uses SQU\r
273         "abc\u4411\u4412d",// uses SCU\r
274         "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5\r
275         "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data\r
276         "\u9292\u2222",\r
277         "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",\r
278         "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",\r
279         "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",\r
280 \r
281         "", // empty input\r
282         "\u0000", // smallest BMP character\r
283         "\uFFFF", // largest BMP character\r
284 \r
285         "\ud800\udc00", // smallest surrogate\r
286         "\ud8ff\udcff", // largest surrogate pair\r
287         \r
288         // regression tests\r
289         "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",\r
290         "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",\r
291         "\u0041\u00df\u0401\u015f",\r
292         "\u9066\u2123abc",\r
293         "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",\r
294         "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"\r
295 \r
296     };\r
297 \r
298     //==========================\r
299     // Compression modes\r
300     //==========================\r
301     private final static int SINGLEBYTEMODE                 = 0;\r
302     private final static int UNICODEMODE                    = 1;\r
303     \r
304     //==========================\r
305     // Single-byte mode tags\r
306     //==========================\r
307     private final static int SDEFINEX                   = 0x0B;\r
308     //private final static int SRESERVED                  = 0x0C;             // this is a reserved value\r
309     private final static int SQUOTEU                    = 0x0E;\r
310     private final static int SSWITCHU                   = 0x0F;\r
311 \r
312     private final static int SQUOTE0                        = 0x01;\r
313     private final static int SQUOTE1                        = 0x02;\r
314     private final static int SQUOTE2                        = 0x03;\r
315     private final static int SQUOTE3                        = 0x04;\r
316     private final static int SQUOTE4                        = 0x05;\r
317     private final static int SQUOTE5                        = 0x06;\r
318     private final static int SQUOTE6                        = 0x07;\r
319     private final static int SQUOTE7                        = 0x08;\r
320 \r
321     private final static int SSWITCH0                       = 0x10;\r
322     private final static int SSWITCH1                       = 0x11;\r
323     private final static int SSWITCH2                       = 0x12;\r
324     private final static int SSWITCH3                       = 0x13;\r
325     private final static int SSWITCH4                       = 0x14;\r
326     private final static int SSWITCH5                       = 0x15;\r
327     private final static int SSWITCH6                       = 0x16;\r
328     private final static int SSWITCH7                       = 0x17;\r
329 \r
330     private final static int SDEFINE0                       = 0x18;\r
331     private final static int SDEFINE1                       = 0x19;\r
332     private final static int SDEFINE2                       = 0x1A;\r
333     private final static int SDEFINE3                       = 0x1B;\r
334     private final static int SDEFINE4                       = 0x1C;\r
335     private final static int SDEFINE5                       = 0x1D;\r
336     private final static int SDEFINE6                       = 0x1E;\r
337     private final static int SDEFINE7                       = 0x1F;\r
338 \r
339     //==========================\r
340     // Unicode mode tags\r
341     //==========================\r
342     private final static int USWITCH0                       = 0xE0;\r
343     private final static int USWITCH1                       = 0xE1;\r
344     private final static int USWITCH2                       = 0xE2;\r
345     private final static int USWITCH3                       = 0xE3;\r
346     private final static int USWITCH4                       = 0xE4;\r
347     private final static int USWITCH5                       = 0xE5;\r
348     private final static int USWITCH6                       = 0xE6;\r
349     private final static int USWITCH7                       = 0xE7;\r
350 \r
351     private final static int UDEFINE0                       = 0xE8;\r
352     private final static int UDEFINE1                       = 0xE9;\r
353     private final static int UDEFINE2                       = 0xEA;\r
354     private final static int UDEFINE3                       = 0xEB;\r
355     private final static int UDEFINE4                       = 0xEC;\r
356     private final static int UDEFINE5                       = 0xED;\r
357     private final static int UDEFINE6                       = 0xEE;\r
358     private final static int UDEFINE7                       = 0xEF;\r
359 \r
360     private final static int UQUOTEU                        = 0xF0;\r
361     private final static int UDEFINEX                       = 0xF1;\r
362     //private final static int URESERVED                      = 0xF2;         // this is a reserved value\r
363 \r
364     /* Print out an array of characters, with non-printables (for me) \r
365        displayed as hex values */\r
366     private void printChars(char[] chars, int len) {\r
367         for(int i = 0; i < len; i++) {\r
368             int c = (int)chars[i];\r
369             if(c < 0x0020 || c >= 0x7f) {\r
370                 log("[0x");\r
371                 log(Integer.toHexString(c));\r
372                 log("]");\r
373             } else {\r
374                 log(String.valueOf((char)c));\r
375             }\r
376         }\r
377         logln("");\r
378     }\r
379 \r
380     private void printBytes(byte[] byteBuffer, int len) {\r
381         int curByteIndex = 0;\r
382         int byteBufferLimit = len;\r
383         int mode = SINGLEBYTEMODE;\r
384         int aByte = 0x00;\r
385         \r
386         if(len > byteBuffer.length) {\r
387             logln("Warning: printBytes called with length too large. Truncating");\r
388             byteBufferLimit = byteBuffer.length;\r
389         }\r
390         \r
391         while(curByteIndex < byteBufferLimit) {\r
392             switch(mode) {  \r
393             case SINGLEBYTEMODE:\r
394                 while(curByteIndex < byteBufferLimit \r
395                       && mode == SINGLEBYTEMODE)  {\r
396                     aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;\r
397                     switch(aByte) {\r
398                     default:\r
399                         log(Integer.toHexString(((int) aByte) & 0xFF) + " ");\r
400                         break;\r
401                         // quote unicode\r
402                     case SQUOTEU:\r
403                         log("SQUOTEU ");\r
404                         if (curByteIndex < byteBufferLimit) {\r
405                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
406                         }\r
407                         if (curByteIndex < byteBufferLimit) {\r
408                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
409                         }\r
410                         break;\r
411                         \r
412                         // switch to Unicode mode\r
413                     case SSWITCHU:\r
414                         log("SSWITCHU ");\r
415                         mode = UNICODEMODE;\r
416                         break;\r
417                         \r
418                         // handle all quote tags\r
419                     case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:\r
420                     case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:\r
421                         log("SQUOTE" + (aByte - SQUOTE0) + " ");\r
422                         if(curByteIndex < byteBufferLimit) {\r
423                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
424                         }\r
425                         break;\r
426                         \r
427                         // handle all switch tags\r
428                     case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3:\r
429                     case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7:\r
430                         log("SSWITCH" + (aByte - SSWITCH0) + " ");\r
431                         break;\r
432                                         \r
433                         // handle all define tags\r
434                     case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:\r
435                     case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:\r
436                         log("SDEFINE" + (aByte - SDEFINE0) + " ");\r
437                         if (curByteIndex < byteBufferLimit) {\r
438                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
439                         }\r
440                         break;\r
441                         \r
442                         // handle define extended tag\r
443                     case SDEFINEX:\r
444                         log("SDEFINEX ");\r
445                         if (curByteIndex < byteBufferLimit) {\r
446                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
447                         }\r
448                         if (curByteIndex < byteBufferLimit) {\r
449                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
450                         }\r
451                         break;\r
452                         \r
453                     } // end switch\r
454                 } // end while\r
455                 break;\r
456                 \r
457             case UNICODEMODE:\r
458                 while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {\r
459                     aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;\r
460                     switch(aByte) {\r
461                         // handle all define tags\r
462                     case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:\r
463                     case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:\r
464                         log("UDEFINE" + (aByte - UDEFINE0) + " ");\r
465                         if (curByteIndex < byteBufferLimit) {\r
466                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
467                         }\r
468                         mode = SINGLEBYTEMODE;\r
469                         break;\r
470                         \r
471                         // handle define extended tag\r
472                     case UDEFINEX:\r
473                         log("UDEFINEX ");\r
474                         if (curByteIndex < byteBufferLimit) {\r
475                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
476                         }\r
477                         if (curByteIndex < byteBufferLimit) {\r
478                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
479                         }\r
480                         break;\r
481                         \r
482                         // handle all switch tags\r
483                     case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3:\r
484                     case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7:\r
485                         log("USWITCH" + (aByte - USWITCH0) + " ");\r
486                         mode = SINGLEBYTEMODE;\r
487                         break;\r
488                         \r
489                         // quote unicode\r
490                     case UQUOTEU:\r
491                         log("UQUOTEU ");\r
492                         if (curByteIndex < byteBufferLimit) {\r
493                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
494                         }\r
495                         if (curByteIndex < byteBufferLimit) {\r
496                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
497                         }\r
498                         break;\r
499                         \r
500                     default:\r
501                         log(Integer.toHexString(((int) aByte) & 0xFF) + " ");\r
502                         if (curByteIndex < byteBufferLimit) {\r
503                             log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");\r
504                         }\r
505                         break;\r
506                         \r
507                     } // end switch\r
508                 } // end while\r
509                 break;\r
510                 \r
511             } // end switch( mode )\r
512         } // end while\r
513         \r
514         logln("");\r
515     }    \r
516 }\r
517 \r
518 \r
519 \r
520 \r
521 \r
522 \r