/* ******************************************************************************* * Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.compression; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.UnicodeCompressor; import com.ibm.icu.text.UnicodeDecompressor; public class ExhaustiveTest extends TestFmwk { public static void main(String args[]) throws Exception { new ExhaustiveTest().run(args); } /** Test simple compress/decompress API, returning # of errors */ public void testSimple() throws Exception { for(int i = 0; i < fTestCases.length; i++) { simpleTest(fTestCases[i]); } } private void simpleTest(String s) throws Exception { byte [] compressed = UnicodeCompressor.compress(s); String res = UnicodeDecompressor.decompress(compressed); if (logDiffs(s.toCharArray(), s.length(), res.toCharArray(), res.length()) == false) { logln(s.length() + " chars ===> " + compressed.length + " bytes ===> " + res.length() + " chars"); } else { logln("Compressed:"); printBytes(compressed, compressed.length); errln("testSimple did not compress correctly"); } } /** Test iterative compress/decompress API, returning # of errors */ public void testIterative() throws Exception { for(int i = 0; i < fTestCases.length; i++) { myTest(fTestCases[i].toCharArray(), fTestCases[i].length()); } } private void myTest(char[] chars, int len) { UnicodeCompressor myCompressor = new UnicodeCompressor(); UnicodeDecompressor myDecompressor = new UnicodeDecompressor(); // variables for my compressor int myByteCount = 0; int myCharCount = 0; int myCompressedSize = Math.max(512, 3*len); byte[] myCompressed = new byte[myCompressedSize]; int myDecompressedSize = Math.max(2, 2 * len); char[] myDecompressed = new char[myDecompressedSize]; int[] unicharsRead = new int[1]; int[] bytesRead = new int[1]; myByteCount = myCompressor.compress(chars, 0, len, unicharsRead, myCompressed, 0, myCompressedSize); myCharCount = myDecompressor.decompress(myCompressed, 0, myByteCount, bytesRead, myDecompressed, 0, myDecompressedSize); if (logDiffs(chars, len, myDecompressed, myCharCount) == false) { logln(len + " chars ===> " + myByteCount + " bytes ===> " + myCharCount + " chars"); } else { logln("Compressed:"); printBytes(myCompressed, myByteCount); errln("Iterative test failed"); } } /** Test iterative compress/decompress API */ public void testMultipass() throws Exception { for(int i = 0; i < fTestCases.length; i++) { myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i].length()); } } private void myMultipassTest(char [] chars, int len) throws Exception { UnicodeCompressor myCompressor = new UnicodeCompressor(); UnicodeDecompressor myDecompressor = new UnicodeDecompressor(); // variables for my compressor // for looping int byteBufferSize = 4;//Math.max(4, len / 4); byte[] byteBuffer = new byte [byteBufferSize]; // real target int compressedSize = Math.max(512, 3 * len); byte[] compressed = new byte[compressedSize]; // for looping int unicharBufferSize = 2;//byteBufferSize; char[] unicharBuffer = new char[unicharBufferSize]; // real target int decompressedSize = Math.max(2, 2 * len); char[] decompressed = new char[decompressedSize]; int bytesWritten = 0; int unicharsWritten = 0; int[] unicharsRead = new int[1]; int[] bytesRead = new int[1]; int totalCharsCompressed = 0; int totalBytesWritten = 0; int totalBytesDecompressed = 0; int totalCharsWritten = 0; // not used boolean err = false; // perform the compression in a loop do { // do the compression bytesWritten = myCompressor.compress(chars, totalCharsCompressed, len, unicharsRead, byteBuffer, 0, byteBufferSize); // copy the current set of bytes into the target buffer System.arraycopy(byteBuffer, 0, compressed, totalBytesWritten, bytesWritten); // update the no. of characters compressed totalCharsCompressed += unicharsRead[0]; // update the no. of bytes written totalBytesWritten += bytesWritten; /*System.out.logln("Compression pass complete. Compressed " + unicharsRead[0] + " chars into " + bytesWritten + " bytes.");*/ } while(totalCharsCompressed < len); if (totalCharsCompressed != len) { errln("ERROR: Number of characters compressed(" + totalCharsCompressed + ") != len(" + len + ")"); } else { logln("MP: " + len + " chars ===> " + totalBytesWritten + " bytes."); } // perform the decompression in a loop do { // do the decompression unicharsWritten = myDecompressor.decompress(compressed, totalBytesDecompressed, totalBytesWritten, bytesRead, unicharBuffer, 0, unicharBufferSize); // copy the current set of chars into the target buffer System.arraycopy(unicharBuffer, 0, decompressed, totalCharsWritten, unicharsWritten); // update the no. of bytes decompressed totalBytesDecompressed += bytesRead[0]; // update the no. of chars written totalCharsWritten += unicharsWritten; /*System.out.logln("Decompression pass complete. Decompressed " + bytesRead[0] + " bytes into " + unicharsWritten + " chars.");*/ } while (totalBytesDecompressed < totalBytesWritten); if (totalBytesDecompressed != totalBytesWritten) { errln("ERROR: Number of bytes decompressed(" + totalBytesDecompressed + ") != totalBytesWritten(" + totalBytesWritten + ")"); } else { logln("MP: " + totalBytesWritten + " bytes ===> " + totalCharsWritten + " chars."); } if (logDiffs(chars, len, decompressed, totalCharsWritten)) { errln("ERROR: buffer contents incorrect"); } } /** Print differences between two character buffers */ private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) { boolean result = false; if(s1len != s2len) { logln("===================="); logln("Length doesn't match: expected " + s1len + ", got " + s2len); logln("Expected:"); printChars(s1, s1len); logln("Got:"); printChars(s2, s2len); result = true; } int len = Math.min(s1len, s2len); for(int i = 0; i < len; ++i) { if(s1[i] != s2[i]) { if(result == false) { logln("===================="); } logln("First difference at char " + i); logln("Exp. char: " + Integer.toHexString(s1[i])); logln("Got char : " + Integer.toHexString(s2[i])); logln("Expected:"); printChars(s1, s1len); logln("Got:"); printChars(s2, s2len); result = true; break; } } return result; } // generate a string of characters, with simulated runs of characters /*private static char[] randomChars(int len, Random random) { char[] result = new char [len]; int runLen = 0; int used = 0; while(used < len) { runLen = (int) (30 * random.nextDouble()); if(used + runLen >= len) { runLen = len - used; } randomRun(result, used, runLen, random); used += runLen; } return result; }*/ // generate a run of characters in a "window" /*private static void randomRun(char[] target, int pos, int len, Random random) { int offset = (int) (0xFFFF * random.nextDouble()); // don't overflow 16 bits if(offset > 0xFF80) { offset = 0xFF80; } for(int i = pos; i < pos + len; i++) { target[i] = (char)(offset + (0x7F * random.nextDouble())); } }*/ private static final String [] fTestCases = { "Hello \u9292 \u9192 World!", "Hell\u0429o \u9292 \u9192 W\u0084rld!", "Hell\u0429o \u9292 \u9292W\u0084rld!", "\u0648\u06c8", // catch missing reset "\u0648\u06c8", "\u4444\uE001", // lowest quotable "\u4444\uf2FF", // highest quotable "\u4444\uf188\u4444", "\u4444\uf188\uf288", "\u4444\uf188abc\0429\uf288", "\u9292\u2222", "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!", "Hell\u0429o \u9292 \u9292W\u0084rld!", "Hello World!123456", "Hello W\u0081\u011f\u0082!", // Latin 1 run "abc\u0301\u0302", // uses SQn for u301 u302 "abc\u4411d", // uses SQU "abc\u4411\u4412d",// uses SCU "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data "\u9292\u2222", "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", "", // empty input "\u0000", // smallest BMP character "\uFFFF", // largest BMP character "\ud800\udc00", // smallest surrogate "\ud8ff\udcff", // largest surrogate pair // regression tests "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", "\u0041\u00df\u0401\u015f", "\u9066\u2123abc", "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489" }; //========================== // Compression modes //========================== private final static int SINGLEBYTEMODE = 0; private final static int UNICODEMODE = 1; //========================== // Single-byte mode tags //========================== private final static int SDEFINEX = 0x0B; //private final static int SRESERVED = 0x0C; // this is a reserved value private final static int SQUOTEU = 0x0E; private final static int SSWITCHU = 0x0F; private final static int SQUOTE0 = 0x01; private final static int SQUOTE1 = 0x02; private final static int SQUOTE2 = 0x03; private final static int SQUOTE3 = 0x04; private final static int SQUOTE4 = 0x05; private final static int SQUOTE5 = 0x06; private final static int SQUOTE6 = 0x07; private final static int SQUOTE7 = 0x08; private final static int SSWITCH0 = 0x10; private final static int SSWITCH1 = 0x11; private final static int SSWITCH2 = 0x12; private final static int SSWITCH3 = 0x13; private final static int SSWITCH4 = 0x14; private final static int SSWITCH5 = 0x15; private final static int SSWITCH6 = 0x16; private final static int SSWITCH7 = 0x17; private final static int SDEFINE0 = 0x18; private final static int SDEFINE1 = 0x19; private final static int SDEFINE2 = 0x1A; private final static int SDEFINE3 = 0x1B; private final static int SDEFINE4 = 0x1C; private final static int SDEFINE5 = 0x1D; private final static int SDEFINE6 = 0x1E; private final static int SDEFINE7 = 0x1F; //========================== // Unicode mode tags //========================== private final static int USWITCH0 = 0xE0; private final static int USWITCH1 = 0xE1; private final static int USWITCH2 = 0xE2; private final static int USWITCH3 = 0xE3; private final static int USWITCH4 = 0xE4; private final static int USWITCH5 = 0xE5; private final static int USWITCH6 = 0xE6; private final static int USWITCH7 = 0xE7; private final static int UDEFINE0 = 0xE8; private final static int UDEFINE1 = 0xE9; private final static int UDEFINE2 = 0xEA; private final static int UDEFINE3 = 0xEB; private final static int UDEFINE4 = 0xEC; private final static int UDEFINE5 = 0xED; private final static int UDEFINE6 = 0xEE; private final static int UDEFINE7 = 0xEF; private final static int UQUOTEU = 0xF0; private final static int UDEFINEX = 0xF1; //private final static int URESERVED = 0xF2; // this is a reserved value /* Print out an array of characters, with non-printables (for me) displayed as hex values */ private void printChars(char[] chars, int len) { for(int i = 0; i < len; i++) { int c = (int)chars[i]; if(c < 0x0020 || c >= 0x7f) { log("[0x"); log(Integer.toHexString(c)); log("]"); } else { log(String.valueOf((char)c)); } } logln(""); } private void printBytes(byte[] byteBuffer, int len) { int curByteIndex = 0; int byteBufferLimit = len; int mode = SINGLEBYTEMODE; int aByte = 0x00; if(len > byteBuffer.length) { logln("Warning: printBytes called with length too large. Truncating"); byteBufferLimit = byteBuffer.length; } while(curByteIndex < byteBufferLimit) { switch(mode) { case SINGLEBYTEMODE: while(curByteIndex < byteBufferLimit && mode == SINGLEBYTEMODE) { aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF; switch(aByte) { default: log(Integer.toHexString(((int) aByte) & 0xFF) + " "); break; // quote unicode case SQUOTEU: log("SQUOTEU "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; // switch to Unicode mode case SSWITCHU: log("SSWITCHU "); mode = UNICODEMODE; break; // handle all quote tags case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3: case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7: log("SQUOTE" + (aByte - SQUOTE0) + " "); if(curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; // handle all switch tags case SSWITCH0: case SSWITCH1: case SSWITCH2: case SSWITCH3: case SSWITCH4: case SSWITCH5: case SSWITCH6: case SSWITCH7: log("SSWITCH" + (aByte - SSWITCH0) + " "); break; // handle all define tags case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3: case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7: log("SDEFINE" + (aByte - SDEFINE0) + " "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; // handle define extended tag case SDEFINEX: log("SDEFINEX "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; } // end switch } // end while break; case UNICODEMODE: while(curByteIndex < byteBufferLimit && mode == UNICODEMODE) { aByte = ((int)byteBuffer[curByteIndex++]) & 0xFF; switch(aByte) { // handle all define tags case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3: case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7: log("UDEFINE" + (aByte - UDEFINE0) + " "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } mode = SINGLEBYTEMODE; break; // handle define extended tag case UDEFINEX: log("UDEFINEX "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; // handle all switch tags case USWITCH0: case USWITCH1: case USWITCH2: case USWITCH3: case USWITCH4: case USWITCH5: case USWITCH6: case USWITCH7: log("USWITCH" + (aByte - USWITCH0) + " "); mode = SINGLEBYTEMODE; break; // quote unicode case UQUOTEU: log("UQUOTEU "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; default: log(Integer.toHexString(((int) aByte) & 0xFF) + " "); if (curByteIndex < byteBufferLimit) { log(Integer.toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF) + " "); } break; } // end switch } // end while break; } // end switch( mode ) } // end while logln(""); } }