2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.compression;
\r
9 import com.ibm.icu.dev.test.TestFmwk;
\r
10 import com.ibm.icu.text.UnicodeCompressor;
\r
11 import com.ibm.icu.text.UnicodeDecompressor;
\r
13 public class ExhaustiveTest extends TestFmwk {
\r
14 public static void main(String args[]) throws Exception {
\r
15 new ExhaustiveTest().run(args);
\r
18 /** Test simple compress/decompress API, returning # of errors */
\r
19 public void testSimple() throws Exception {
\r
20 for(int i = 0; i < fTestCases.length; i++) {
\r
21 simpleTest(fTestCases[i]);
\r
24 private void simpleTest(String s) throws Exception {
\r
25 byte [] compressed = UnicodeCompressor.compress(s);
\r
26 String res = UnicodeDecompressor.decompress(compressed);
\r
27 if (logDiffs(s.toCharArray(), s.length(),
\r
28 res.toCharArray(), res.length()) == false) {
\r
29 logln(s.length() + " chars ===> "
\r
30 + compressed.length + " bytes ===> "
\r
31 + res.length() + " chars");
\r
33 logln("Compressed:");
\r
34 printBytes(compressed, compressed.length);
\r
35 errln("testSimple did not compress correctly");
\r
39 /** Test iterative compress/decompress API, returning # of errors */
\r
40 public void testIterative() throws Exception {
\r
41 for(int i = 0; i < fTestCases.length; i++) {
\r
42 myTest(fTestCases[i].toCharArray(), fTestCases[i].length());
\r
45 private void myTest(char[] chars, int len) {
\r
46 UnicodeCompressor myCompressor = new UnicodeCompressor();
\r
47 UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
\r
49 // variables for my compressor
\r
50 int myByteCount = 0;
\r
51 int myCharCount = 0;
\r
52 int myCompressedSize = Math.max(512, 3*len);
\r
53 byte[] myCompressed = new byte[myCompressedSize];
\r
54 int myDecompressedSize = Math.max(2, 2 * len);
\r
55 char[] myDecompressed = new char[myDecompressedSize];
\r
56 int[] unicharsRead = new int[1];
\r
57 int[] bytesRead = new int[1];
\r
59 myByteCount = myCompressor.compress(chars, 0, len, unicharsRead,
\r
60 myCompressed, 0, myCompressedSize);
\r
62 myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount,
\r
63 bytesRead, myDecompressed, 0, myDecompressedSize);
\r
65 if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {
\r
66 logln(len + " chars ===> "
\r
67 + myByteCount + " bytes ===> "
\r
68 + myCharCount + " chars");
\r
70 logln("Compressed:");
\r
71 printBytes(myCompressed, myByteCount);
\r
72 errln("Iterative test failed");
\r
76 /** Test iterative compress/decompress API */
\r
77 public void testMultipass() throws Exception {
\r
78 for(int i = 0; i < fTestCases.length; i++) {
\r
79 myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length());
\r
82 private void myMultipassTest(char [] chars, int len) throws Exception {
\r
83 UnicodeCompressor myCompressor = new UnicodeCompressor();
\r
84 UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
\r
86 // variables for my compressor
\r
89 int byteBufferSize = 4;//Math.max(4, len / 4);
\r
90 byte[] byteBuffer = new byte [byteBufferSize];
\r
92 int compressedSize = Math.max(512, 3 * len);
\r
93 byte[] compressed = new byte[compressedSize];
\r
96 int unicharBufferSize = 2;//byteBufferSize;
\r
97 char[] unicharBuffer = new char[unicharBufferSize];
\r
99 int decompressedSize = Math.max(2, 2 * len);
\r
100 char[] decompressed = new char[decompressedSize];
\r
102 int bytesWritten = 0;
\r
103 int unicharsWritten = 0;
\r
105 int[] unicharsRead = new int[1];
\r
106 int[] bytesRead = new int[1];
\r
108 int totalCharsCompressed = 0;
\r
109 int totalBytesWritten = 0;
\r
111 int totalBytesDecompressed = 0;
\r
112 int totalCharsWritten = 0;
\r
114 // not used boolean err = false;
\r
117 // perform the compression in a loop
\r
120 // do the compression
\r
121 bytesWritten = myCompressor.compress(chars, totalCharsCompressed,
\r
122 len, unicharsRead, byteBuffer, 0, byteBufferSize);
\r
124 // copy the current set of bytes into the target buffer
\r
125 System.arraycopy(byteBuffer, 0, compressed,
\r
126 totalBytesWritten, bytesWritten);
\r
128 // update the no. of characters compressed
\r
129 totalCharsCompressed += unicharsRead[0];
\r
131 // update the no. of bytes written
\r
132 totalBytesWritten += bytesWritten;
\r
134 /*System.out.logln("Compression pass complete. Compressed "
\r
135 + unicharsRead[0] + " chars into "
\r
136 + bytesWritten + " bytes.");*/
\r
137 } while(totalCharsCompressed < len);
\r
139 if (totalCharsCompressed != len) {
\r
140 errln("ERROR: Number of characters compressed("
\r
141 + totalCharsCompressed + ") != len(" + len + ")");
\r
143 logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes.");
\r
146 // perform the decompression in a loop
\r
149 // do the decompression
\r
150 unicharsWritten = myDecompressor.decompress(compressed,
\r
151 totalBytesDecompressed, totalBytesWritten,
\r
152 bytesRead, unicharBuffer, 0, unicharBufferSize);
\r
154 // copy the current set of chars into the target buffer
\r
155 System.arraycopy(unicharBuffer, 0, decompressed,
\r
156 totalCharsWritten, unicharsWritten);
\r
158 // update the no. of bytes decompressed
\r
159 totalBytesDecompressed += bytesRead[0];
\r
161 // update the no. of chars written
\r
162 totalCharsWritten += unicharsWritten;
\r
164 /*System.out.logln("Decompression pass complete. Decompressed "
\r
165 + bytesRead[0] + " bytes into "
\r
166 + unicharsWritten + " chars.");*/
\r
167 } while (totalBytesDecompressed < totalBytesWritten);
\r
169 if (totalBytesDecompressed != totalBytesWritten) {
\r
170 errln("ERROR: Number of bytes decompressed("
\r
171 + totalBytesDecompressed
\r
172 + ") != totalBytesWritten("
\r
173 + totalBytesWritten + ")");
\r
175 logln("MP: " + totalBytesWritten
\r
176 + " bytes ===> " + totalCharsWritten + " chars.");
\r
179 if (logDiffs(chars, len, decompressed, totalCharsWritten)) {
\r
180 errln("ERROR: buffer contents incorrect");
\r
184 /** Print differences between two character buffers */
\r
185 private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {
\r
186 boolean result = false;
\r
188 if(s1len != s2len) {
\r
189 logln("====================");
\r
190 logln("Length doesn't match: expected " + s1len
\r
191 + ", got " + s2len);
\r
192 logln("Expected:");
\r
193 printChars(s1, s1len);
\r
195 printChars(s2, s2len);
\r
199 int len = Math.min(s1len, s2len);
\r
200 for(int i = 0; i < len; ++i) {
\r
201 if(s1[i] != s2[i]) {
\r
202 if(result == false) {
\r
203 logln("====================");
\r
205 logln("First difference at char " + i);
\r
206 logln("Exp. char: " + Integer.toHexString(s1[i]));
\r
207 logln("Got char : " + Integer.toHexString(s2[i]));
\r
208 logln("Expected:");
\r
209 printChars(s1, s1len);
\r
211 printChars(s2, s2len);
\r
220 // generate a string of characters, with simulated runs of characters
\r
221 /*private static char[] randomChars(int len, Random random) {
\r
222 char[] result = new char [len];
\r
226 while(used < len) {
\r
227 runLen = (int) (30 * random.nextDouble());
\r
228 if(used + runLen >= len) {
\r
229 runLen = len - used;
\r
231 randomRun(result, used, runLen, random);
\r
238 // generate a run of characters in a "window"
\r
239 /*private static void randomRun(char[] target, int pos, int len, Random random) {
\r
240 int offset = (int) (0xFFFF * random.nextDouble());
\r
242 // don't overflow 16 bits
\r
243 if(offset > 0xFF80) {
\r
247 for(int i = pos; i < pos + len; i++) {
\r
248 target[i] = (char)(offset + (0x7F * random.nextDouble()));
\r
252 private static final String [] fTestCases = {
\r
253 "Hello \u9292 \u9192 World!",
\r
254 "Hell\u0429o \u9292 \u9192 W\u0084rld!",
\r
255 "Hell\u0429o \u9292 \u9292W\u0084rld!",
\r
257 "\u0648\u06c8", // catch missing reset
\r
260 "\u4444\uE001", // lowest quotable
\r
261 "\u4444\uf2FF", // highest quotable
\r
262 "\u4444\uf188\u4444",
\r
263 "\u4444\uf188\uf288",
\r
264 "\u4444\uf188abc\0429\uf288",
\r
266 "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",
\r
267 "Hell\u0429o \u9292 \u9292W\u0084rld!",
\r
268 "Hello World!123456",
\r
269 "Hello W\u0081\u011f\u0082!", // Latin 1 run
\r
271 "abc\u0301\u0302", // uses SQn for u301 u302
\r
272 "abc\u4411d", // uses SQU
\r
273 "abc\u4411\u4412d",// uses SCU
\r
274 "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5
\r
275 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data
\r
277 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",
\r
278 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",
\r
279 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",
\r
282 "\u0000", // smallest BMP character
\r
283 "\uFFFF", // largest BMP character
\r
285 "\ud800\udc00", // smallest surrogate
\r
286 "\ud8ff\udcff", // largest surrogate pair
\r
288 // regression tests
\r
289 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",
\r
290 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",
\r
291 "\u0041\u00df\u0401\u015f",
\r
293 "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",
\r
294 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"
\r
298 //==========================
\r
299 // Compression modes
\r
300 //==========================
\r
301 private final static int SINGLEBYTEMODE = 0;
\r
302 private final static int UNICODEMODE = 1;
\r
304 //==========================
\r
305 // Single-byte mode tags
\r
306 //==========================
\r
307 private final static int SDEFINEX = 0x0B;
\r
308 //private final static int SRESERVED = 0x0C; // this is a reserved value
\r
309 private final static int SQUOTEU = 0x0E;
\r
310 private final static int SSWITCHU = 0x0F;
\r
312 private final static int SQUOTE0 = 0x01;
\r
313 private final static int SQUOTE1 = 0x02;
\r
314 private final static int SQUOTE2 = 0x03;
\r
315 private final static int SQUOTE3 = 0x04;
\r
316 private final static int SQUOTE4 = 0x05;
\r
317 private final static int SQUOTE5 = 0x06;
\r
318 private final static int SQUOTE6 = 0x07;
\r
319 private final static int SQUOTE7 = 0x08;
\r
321 private final static int SSWITCH0 = 0x10;
\r
322 private final static int SSWITCH1 = 0x11;
\r
323 private final static int SSWITCH2 = 0x12;
\r
324 private final static int SSWITCH3 = 0x13;
\r
325 private final static int SSWITCH4 = 0x14;
\r
326 private final static int SSWITCH5 = 0x15;
\r
327 private final static int SSWITCH6 = 0x16;
\r
328 private final static int SSWITCH7 = 0x17;
\r
330 private final static int SDEFINE0 = 0x18;
\r
331 private final static int SDEFINE1 = 0x19;
\r
332 private final static int SDEFINE2 = 0x1A;
\r
333 private final static int SDEFINE3 = 0x1B;
\r
334 private final static int SDEFINE4 = 0x1C;
\r
335 private final static int SDEFINE5 = 0x1D;
\r
336 private final static int SDEFINE6 = 0x1E;
\r
337 private final static int SDEFINE7 = 0x1F;
\r
339 //==========================
\r
340 // Unicode mode tags
\r
341 //==========================
\r
342 private final static int USWITCH0 = 0xE0;
\r
343 private final static int USWITCH1 = 0xE1;
\r
344 private final static int USWITCH2 = 0xE2;
\r
345 private final static int USWITCH3 = 0xE3;
\r
346 private final static int USWITCH4 = 0xE4;
\r
347 private final static int USWITCH5 = 0xE5;
\r
348 private final static int USWITCH6 = 0xE6;
\r
349 private final static int USWITCH7 = 0xE7;
\r
351 private final static int UDEFINE0 = 0xE8;
\r
352 private final static int UDEFINE1 = 0xE9;
\r
353 private final static int UDEFINE2 = 0xEA;
\r
354 private final static int UDEFINE3 = 0xEB;
\r
355 private final static int UDEFINE4 = 0xEC;
\r
356 private final static int UDEFINE5 = 0xED;
\r
357 private final static int UDEFINE6 = 0xEE;
\r
358 private final static int UDEFINE7 = 0xEF;
\r
360 private final static int UQUOTEU = 0xF0;
\r
361 private final static int UDEFINEX = 0xF1;
\r
362 //private final static int URESERVED = 0xF2; // this is a reserved value
\r
364 /* Print out an array of characters, with non-printables (for me)
\r
365 displayed as hex values */
\r
366 private void printChars(char[] chars, int len) {
\r
367 for(int i = 0; i < len; i++) {
\r
368 int c = (int)chars[i];
\r
369 if(c < 0x0020 || c >= 0x7f) {
\r
371 log(Integer.toHexString(c));
\r
374 log(String.valueOf((char)c));
\r
380 private void printBytes(byte[] byteBuffer, int len) {
\r
381 int curByteIndex = 0;
\r
382 int byteBufferLimit = len;
\r
383 int mode = SINGLEBYTEMODE;
\r
386 if(len > byteBuffer.length) {
\r
387 logln("Warning: printBytes called with length too large. Truncating");
\r
388 byteBufferLimit = byteBuffer.length;
\r
391 while(curByteIndex < byteBufferLimit) {
\r
393 case SINGLEBYTEMODE:
\r
394 while(curByteIndex < byteBufferLimit
\r
395 && mode == SINGLEBYTEMODE) {
\r
396 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
\r
399 log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
\r
404 if (curByteIndex < byteBufferLimit) {
\r
405 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
407 if (curByteIndex < byteBufferLimit) {
\r
408 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
412 // switch to Unicode mode
\r
415 mode = UNICODEMODE;
\r
418 // handle all quote tags
\r
419 case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
\r
420 case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
\r
421 log("SQUOTE" + (aByte - SQUOTE0) + " ");
\r
422 if(curByteIndex < byteBufferLimit) {
\r
423 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
427 // handle all switch tags
\r
428 case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3:
\r
429 case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7:
\r
430 log("SSWITCH" + (aByte - SSWITCH0) + " ");
\r
433 // handle all define tags
\r
434 case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
\r
435 case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
\r
436 log("SDEFINE" + (aByte - SDEFINE0) + " ");
\r
437 if (curByteIndex < byteBufferLimit) {
\r
438 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
442 // handle define extended tag
\r
445 if (curByteIndex < byteBufferLimit) {
\r
446 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
448 if (curByteIndex < byteBufferLimit) {
\r
449 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
458 while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {
\r
459 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
\r
461 // handle all define tags
\r
462 case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
\r
463 case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
\r
464 log("UDEFINE" + (aByte - UDEFINE0) + " ");
\r
465 if (curByteIndex < byteBufferLimit) {
\r
466 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
468 mode = SINGLEBYTEMODE;
\r
471 // handle define extended tag
\r
474 if (curByteIndex < byteBufferLimit) {
\r
475 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
477 if (curByteIndex < byteBufferLimit) {
\r
478 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
482 // handle all switch tags
\r
483 case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3:
\r
484 case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7:
\r
485 log("USWITCH" + (aByte - USWITCH0) + " ");
\r
486 mode = SINGLEBYTEMODE;
\r
492 if (curByteIndex < byteBufferLimit) {
\r
493 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
495 if (curByteIndex < byteBufferLimit) {
\r
496 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
501 log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
\r
502 if (curByteIndex < byteBufferLimit) {
\r
503 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
\r
511 } // end switch( mode )
\r