2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.test.compression;
9 import com.ibm.icu.dev.test.TestFmwk;
10 import com.ibm.icu.text.UnicodeCompressor;
11 import com.ibm.icu.text.UnicodeDecompressor;
13 public class ExhaustiveTest extends TestFmwk {
14 public static void main(String args[]) throws Exception {
15 new ExhaustiveTest().run(args);
18 /** Test simple compress/decompress API, returning # of errors */
19 public void testSimple() throws Exception {
20 for(int i = 0; i < fTestCases.length; i++) {
21 simpleTest(fTestCases[i]);
24 private void simpleTest(String s) throws Exception {
25 byte [] compressed = UnicodeCompressor.compress(s);
26 String res = UnicodeDecompressor.decompress(compressed);
27 if (logDiffs(s.toCharArray(), s.length(),
28 res.toCharArray(), res.length()) == false) {
29 logln(s.length() + " chars ===> "
30 + compressed.length + " bytes ===> "
31 + res.length() + " chars");
34 printBytes(compressed, compressed.length);
35 errln("testSimple did not compress correctly");
39 /** Test iterative compress/decompress API, returning # of errors */
40 public void testIterative() throws Exception {
41 for(int i = 0; i < fTestCases.length; i++) {
42 myTest(fTestCases[i].toCharArray(), fTestCases[i].length());
45 private void myTest(char[] chars, int len) {
46 UnicodeCompressor myCompressor = new UnicodeCompressor();
47 UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
49 // variables for my compressor
52 int myCompressedSize = Math.max(512, 3*len);
53 byte[] myCompressed = new byte[myCompressedSize];
54 int myDecompressedSize = Math.max(2, 2 * len);
55 char[] myDecompressed = new char[myDecompressedSize];
56 int[] unicharsRead = new int[1];
57 int[] bytesRead = new int[1];
59 myByteCount = myCompressor.compress(chars, 0, len, unicharsRead,
60 myCompressed, 0, myCompressedSize);
62 myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount,
63 bytesRead, myDecompressed, 0, myDecompressedSize);
65 if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {
66 logln(len + " chars ===> "
67 + myByteCount + " bytes ===> "
68 + myCharCount + " chars");
71 printBytes(myCompressed, myByteCount);
72 errln("Iterative test failed");
76 /** Test iterative compress/decompress API */
77 public void testMultipass() throws Exception {
78 for(int i = 0; i < fTestCases.length; i++) {
79 myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length());
82 private void myMultipassTest(char [] chars, int len) throws Exception {
83 UnicodeCompressor myCompressor = new UnicodeCompressor();
84 UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
86 // variables for my compressor
89 int byteBufferSize = 4;//Math.max(4, len / 4);
90 byte[] byteBuffer = new byte [byteBufferSize];
92 int compressedSize = Math.max(512, 3 * len);
93 byte[] compressed = new byte[compressedSize];
96 int unicharBufferSize = 2;//byteBufferSize;
97 char[] unicharBuffer = new char[unicharBufferSize];
99 int decompressedSize = Math.max(2, 2 * len);
100 char[] decompressed = new char[decompressedSize];
102 int bytesWritten = 0;
103 int unicharsWritten = 0;
105 int[] unicharsRead = new int[1];
106 int[] bytesRead = new int[1];
108 int totalCharsCompressed = 0;
109 int totalBytesWritten = 0;
111 int totalBytesDecompressed = 0;
112 int totalCharsWritten = 0;
114 // not used boolean err = false;
117 // perform the compression in a loop
120 // do the compression
121 bytesWritten = myCompressor.compress(chars, totalCharsCompressed,
122 len, unicharsRead, byteBuffer, 0, byteBufferSize);
124 // copy the current set of bytes into the target buffer
125 System.arraycopy(byteBuffer, 0, compressed,
126 totalBytesWritten, bytesWritten);
128 // update the no. of characters compressed
129 totalCharsCompressed += unicharsRead[0];
131 // update the no. of bytes written
132 totalBytesWritten += bytesWritten;
134 /*System.out.logln("Compression pass complete. Compressed "
135 + unicharsRead[0] + " chars into "
136 + bytesWritten + " bytes.");*/
137 } while(totalCharsCompressed < len);
139 if (totalCharsCompressed != len) {
140 errln("ERROR: Number of characters compressed("
141 + totalCharsCompressed + ") != len(" + len + ")");
143 logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes.");
146 // perform the decompression in a loop
149 // do the decompression
150 unicharsWritten = myDecompressor.decompress(compressed,
151 totalBytesDecompressed, totalBytesWritten,
152 bytesRead, unicharBuffer, 0, unicharBufferSize);
154 // copy the current set of chars into the target buffer
155 System.arraycopy(unicharBuffer, 0, decompressed,
156 totalCharsWritten, unicharsWritten);
158 // update the no. of bytes decompressed
159 totalBytesDecompressed += bytesRead[0];
161 // update the no. of chars written
162 totalCharsWritten += unicharsWritten;
164 /*System.out.logln("Decompression pass complete. Decompressed "
165 + bytesRead[0] + " bytes into "
166 + unicharsWritten + " chars.");*/
167 } while (totalBytesDecompressed < totalBytesWritten);
169 if (totalBytesDecompressed != totalBytesWritten) {
170 errln("ERROR: Number of bytes decompressed("
171 + totalBytesDecompressed
172 + ") != totalBytesWritten("
173 + totalBytesWritten + ")");
175 logln("MP: " + totalBytesWritten
176 + " bytes ===> " + totalCharsWritten + " chars.");
179 if (logDiffs(chars, len, decompressed, totalCharsWritten)) {
180 errln("ERROR: buffer contents incorrect");
184 /** Print differences between two character buffers */
185 private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {
186 boolean result = false;
189 logln("====================");
190 logln("Length doesn't match: expected " + s1len
193 printChars(s1, s1len);
195 printChars(s2, s2len);
199 int len = Math.min(s1len, s2len);
200 for(int i = 0; i < len; ++i) {
202 if(result == false) {
203 logln("====================");
205 logln("First difference at char " + i);
206 logln("Exp. char: " + Integer.toHexString(s1[i]));
207 logln("Got char : " + Integer.toHexString(s2[i]));
209 printChars(s1, s1len);
211 printChars(s2, s2len);
220 // generate a string of characters, with simulated runs of characters
221 /*private static char[] randomChars(int len, Random random) {
222 char[] result = new char [len];
227 runLen = (int) (30 * random.nextDouble());
228 if(used + runLen >= len) {
231 randomRun(result, used, runLen, random);
238 // generate a run of characters in a "window"
239 /*private static void randomRun(char[] target, int pos, int len, Random random) {
240 int offset = (int) (0xFFFF * random.nextDouble());
242 // don't overflow 16 bits
243 if(offset > 0xFF80) {
247 for(int i = pos; i < pos + len; i++) {
248 target[i] = (char)(offset + (0x7F * random.nextDouble()));
252 private static final String [] fTestCases = {
253 "Hello \u9292 \u9192 World!",
254 "Hell\u0429o \u9292 \u9192 W\u0084rld!",
255 "Hell\u0429o \u9292 \u9292W\u0084rld!",
257 "\u0648\u06c8", // catch missing reset
260 "\u4444\uE001", // lowest quotable
261 "\u4444\uf2FF", // highest quotable
262 "\u4444\uf188\u4444",
263 "\u4444\uf188\uf288",
264 "\u4444\uf188abc\0429\uf288",
266 "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",
267 "Hell\u0429o \u9292 \u9292W\u0084rld!",
268 "Hello World!123456",
269 "Hello W\u0081\u011f\u0082!", // Latin 1 run
271 "abc\u0301\u0302", // uses SQn for u301 u302
272 "abc\u4411d", // uses SQU
273 "abc\u4411\u4412d",// uses SCU
274 "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5
275 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data
277 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",
278 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",
279 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",
282 "\u0000", // smallest BMP character
283 "\uFFFF", // largest BMP character
285 "\ud800\udc00", // smallest surrogate
286 "\ud8ff\udcff", // largest surrogate pair
289 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",
290 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",
291 "\u0041\u00df\u0401\u015f",
293 "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",
294 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"
298 //==========================
300 //==========================
301 private final static int SINGLEBYTEMODE = 0;
302 private final static int UNICODEMODE = 1;
304 //==========================
305 // Single-byte mode tags
306 //==========================
307 private final static int SDEFINEX = 0x0B;
308 //private final static int SRESERVED = 0x0C; // this is a reserved value
309 private final static int SQUOTEU = 0x0E;
310 private final static int SSWITCHU = 0x0F;
312 private final static int SQUOTE0 = 0x01;
313 private final static int SQUOTE1 = 0x02;
314 private final static int SQUOTE2 = 0x03;
315 private final static int SQUOTE3 = 0x04;
316 private final static int SQUOTE4 = 0x05;
317 private final static int SQUOTE5 = 0x06;
318 private final static int SQUOTE6 = 0x07;
319 private final static int SQUOTE7 = 0x08;
321 private final static int SSWITCH0 = 0x10;
322 private final static int SSWITCH1 = 0x11;
323 private final static int SSWITCH2 = 0x12;
324 private final static int SSWITCH3 = 0x13;
325 private final static int SSWITCH4 = 0x14;
326 private final static int SSWITCH5 = 0x15;
327 private final static int SSWITCH6 = 0x16;
328 private final static int SSWITCH7 = 0x17;
330 private final static int SDEFINE0 = 0x18;
331 private final static int SDEFINE1 = 0x19;
332 private final static int SDEFINE2 = 0x1A;
333 private final static int SDEFINE3 = 0x1B;
334 private final static int SDEFINE4 = 0x1C;
335 private final static int SDEFINE5 = 0x1D;
336 private final static int SDEFINE6 = 0x1E;
337 private final static int SDEFINE7 = 0x1F;
339 //==========================
341 //==========================
342 private final static int USWITCH0 = 0xE0;
343 private final static int USWITCH1 = 0xE1;
344 private final static int USWITCH2 = 0xE2;
345 private final static int USWITCH3 = 0xE3;
346 private final static int USWITCH4 = 0xE4;
347 private final static int USWITCH5 = 0xE5;
348 private final static int USWITCH6 = 0xE6;
349 private final static int USWITCH7 = 0xE7;
351 private final static int UDEFINE0 = 0xE8;
352 private final static int UDEFINE1 = 0xE9;
353 private final static int UDEFINE2 = 0xEA;
354 private final static int UDEFINE3 = 0xEB;
355 private final static int UDEFINE4 = 0xEC;
356 private final static int UDEFINE5 = 0xED;
357 private final static int UDEFINE6 = 0xEE;
358 private final static int UDEFINE7 = 0xEF;
360 private final static int UQUOTEU = 0xF0;
361 private final static int UDEFINEX = 0xF1;
362 //private final static int URESERVED = 0xF2; // this is a reserved value
364 /* Print out an array of characters, with non-printables (for me)
365 displayed as hex values */
366 private void printChars(char[] chars, int len) {
367 for(int i = 0; i < len; i++) {
368 int c = (int)chars[i];
369 if(c < 0x0020 || c >= 0x7f) {
371 log(Integer.toHexString(c));
374 log(String.valueOf((char)c));
380 private void printBytes(byte[] byteBuffer, int len) {
381 int curByteIndex = 0;
382 int byteBufferLimit = len;
383 int mode = SINGLEBYTEMODE;
386 if(len > byteBuffer.length) {
387 logln("Warning: printBytes called with length too large. Truncating");
388 byteBufferLimit = byteBuffer.length;
391 while(curByteIndex < byteBufferLimit) {
394 while(curByteIndex < byteBufferLimit
395 && mode == SINGLEBYTEMODE) {
396 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
399 log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
404 if (curByteIndex < byteBufferLimit) {
405 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
407 if (curByteIndex < byteBufferLimit) {
408 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
412 // switch to Unicode mode
418 // handle all quote tags
419 case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
420 case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
421 log("SQUOTE" + (aByte - SQUOTE0) + " ");
422 if(curByteIndex < byteBufferLimit) {
423 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
427 // handle all switch tags
428 case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3:
429 case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7:
430 log("SSWITCH" + (aByte - SSWITCH0) + " ");
433 // handle all define tags
434 case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
435 case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
436 log("SDEFINE" + (aByte - SDEFINE0) + " ");
437 if (curByteIndex < byteBufferLimit) {
438 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
442 // handle define extended tag
445 if (curByteIndex < byteBufferLimit) {
446 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
448 if (curByteIndex < byteBufferLimit) {
449 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
458 while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) {
459 aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF;
461 // handle all define tags
462 case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
463 case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
464 log("UDEFINE" + (aByte - UDEFINE0) + " ");
465 if (curByteIndex < byteBufferLimit) {
466 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
468 mode = SINGLEBYTEMODE;
471 // handle define extended tag
474 if (curByteIndex < byteBufferLimit) {
475 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
477 if (curByteIndex < byteBufferLimit) {
478 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
482 // handle all switch tags
483 case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3:
484 case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7:
485 log("USWITCH" + (aByte - USWITCH0) + " ");
486 mode = SINGLEBYTEMODE;
492 if (curByteIndex < byteBufferLimit) {
493 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
495 if (curByteIndex < byteBufferLimit) {
496 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
501 log(Integer.toHexString(((int) aByte) & 0xFF) + " ");
502 if (curByteIndex < byteBufferLimit) {
503 log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " ");
511 } // end switch( mode )