jars/icu4j-52_1/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java

   1 /*
   2  *******************************************************************************
   3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
   4  * others. All Rights Reserved.                                                *
   5  *******************************************************************************
   6  *
   7  *******************************************************************************
   8  */
   9
  10 package com.ibm.icu.dev.test.charset;
  11
  12 import java.nio.ByteBuffer;
  13 import java.nio.CharBuffer;
  14 import java.nio.charset.Charset;
  15 import java.nio.charset.CharsetDecoder;
  16 import java.nio.charset.CharsetEncoder;
  17 import java.nio.charset.CoderResult;
  18 import java.nio.charset.CodingErrorAction;
  19 import java.util.Iterator;
  20
  21 import com.ibm.icu.charset.CharsetCallback;
  22 import com.ibm.icu.charset.CharsetDecoderICU;
  23 import com.ibm.icu.charset.CharsetEncoderICU;
  24 import com.ibm.icu.charset.CharsetICU;
  25 import com.ibm.icu.charset.CharsetProviderICU;
  26 import com.ibm.icu.dev.test.ModuleTest;
  27 import com.ibm.icu.dev.test.TestDataModule.DataMap;
  28 import com.ibm.icu.impl.ICUResourceBundle;
  29 import com.ibm.icu.text.UnicodeSet;
  30
  31 /**
  32  * This maps to convtest.c which tests the test file for data-driven conversion tests.
  33  *
  34  */
  35 public class TestConversion extends ModuleTest {
  36     /**
  37      * This maps to the C struct of conversion case in convtest.h that stores the
  38      * data for a conversion test
  39      *
  40      */
  41     private class ConversionCase {
  42         int caseNr;                                             // testcase index
  43         String option = null;                                   // callback options
  44         CodingErrorAction cbErrorAction = null;                 // callback action type
  45         CharBuffer toUnicodeResult = null;
  46         ByteBuffer fromUnicodeResult = null;
  47
  48         // data retrieved from a test case conversion.txt
  49         String charset;                                         // charset
  50         String unicode;                                         // unicode string
  51         ByteBuffer bytes;                                       // byte
  52         int[] offsets;                                          // offsets
  53         boolean finalFlush;                                     // flush
  54         boolean fallbacks;                                      // fallback
  55         String outErrorCode;                                    // errorCode
  56         String cbopt;                                           // callback
  57
  58         // TestGetUnicodeSet variables
  59         String map;
  60         String mapnot;
  61         int which;
  62
  63         // CharsetCallback encoder and decoder
  64         CharsetCallback.Decoder cbDecoder = null;
  65         CharsetCallback.Encoder cbEncoder = null;
  66
  67         String caseNrAsString() {
  68             return "[" + caseNr + "]";
  69         }
  70     }
  71
  72     /* In the data-driven conversion test, converters that are not available in
  73      * ICU4J are marked with the following leading symbol.
  74      */
  75     private static final char UNSUPPORTED_CHARSET_SYMBOL = '+';
  76
  77     // public methods --------------------------------------------------------
  78
  79     public static void main(String[] args) throws Exception {
  80         new TestConversion().run(args);
  81     }
  82
  83     public TestConversion() {
  84         super("com/ibm/icu/dev/data/testdata/", "conversion");
  85     }
  86
  87     /*
  88      * This method maps to the convtest.cpp runIndexedTest() method to run each
  89      * type of conversion.
  90      */
  91     public void processModules() {
  92         try {
  93             int testFromUnicode = 0;
  94             int testToUnicode = 0;
  95             String testName = t.getName().toString();
  96
  97             // Iterate through and get each of the test case to process
  98             for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
  99                 DataMap testcase = (DataMap) iter.next();
 100
 101                 if (testName.equalsIgnoreCase("toUnicode")) {
 102                     TestToUnicode(testcase, testToUnicode);
 103                     testToUnicode++;
 104
 105                 } else if (testName.equalsIgnoreCase("fromUnicode")) {
 106                     TestFromUnicode(testcase, testFromUnicode);
 107                     testFromUnicode++;
 108                 } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
 109                     TestGetUnicodeSet(testcase);
 110                 } else {
 111                     warnln("Could not load the test cases for conversion");
 112                     continue;
 113                 }
 114             }
 115         } catch (Exception e) {
 116             e.printStackTrace();
 117         }
 118
 119     }
 120
 121     // private methods -------------------------------------------------------
 122
 123
 124     // fromUnicode test worker functions ---------------------------------------
 125     private void TestFromUnicode(DataMap testcase, int caseNr) {
 126
 127         ConversionCase cc = new ConversionCase();
 128
 129        try {
 130             // retrieve test case data
 131             cc.caseNr = caseNr;
 132             cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
 133             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
 134             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
 135             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
 136             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
 137             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
 138             cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
 139             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();
 140
 141         } catch (Exception e) {
 142             errln("Skipping test:");
 143             errln("error parsing conversion/toUnicode test case " + cc.caseNr);
 144             return;
 145         }
 146
 147         /*
 148          * Skip the following data driven converter tests.
 149          * These tests were added to the data driven conversion test in ICU
 150          * to test direct-from-UTF-8 m:n Unicode:charset conversion.
 151          * This feature is not in ICU4J.
 152          * See #9601
 153          */
 154         String [] testsToSkip = {
 155                 "*test2"
 156         };
 157         for (int i = 0; i < testsToSkip.length; i++) {
 158             if (cc.charset.equals(testsToSkip[i])) {
 159                 logln("");
 160                 logln("Skipping: " + cc.charset);
 161                 logln("...............................................");
 162                 return;
 163             }
 164         }
 165
 166         // ----for debugging only
 167         logln("");
 168         logln("TestFromUnicode[" + caseNr + "] " + cc.charset + " ");
 169         logln("Unicode:   " + cc.unicode);
 170         logln("Bytes:    " + printbytes(cc.bytes, cc.bytes.limit()));
 171         ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
 172         logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
 173         logln("...............................................");
 174
 175         // process the retrieved test data case
 176         if (cc.offsets.length == 0) {
 177             cc.offsets = null;
 178         } else if (cc.offsets.length != cc.bytes.limit()) {
 179             errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
 180                     + "] and offsets[" + cc.offsets.length
 181                     + "] must have the same length");
 182             return;
 183         }
 184
 185         // check the callback replacement value
 186         if (cc.cbopt.length() > 0) {
 187
 188             switch ((cc.cbopt).charAt(0)) {
 189             case '?':
 190                 cc.cbErrorAction = CodingErrorAction.REPLACE;
 191                 break;
 192             case '0':
 193                 cc.cbErrorAction = CodingErrorAction.IGNORE;
 194                 break;
 195             case '.':
 196                 cc.cbErrorAction = CodingErrorAction.REPORT;
 197                 break;
 198             case '&':
 199                 cc.cbErrorAction = CodingErrorAction.REPLACE;
 200                 cc.cbEncoder = CharsetCallback.FROM_U_CALLBACK_ESCAPE;
 201                 break;
 202             default:
 203                 cc.cbErrorAction = null;
 204                 break;
 205             }
 206
 207             // check for any options for the callback value --
 208             cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt
 209                     .substring(1);
 210             if (cc.option == null) {
 211                 cc.option = null;
 212             }
 213         }
 214         FromUnicodeCase(cc);
 215     }
 216
 217
 218     private void FromUnicodeCase(ConversionCase cc) {
 219
 220         // create charset encoder for conversion test
 221         CharsetProviderICU provider = new CharsetProviderICU();
 222         CharsetEncoder encoder = null;
 223         Charset charset = null;
 224         try {
 225             // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
 226             charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
 227                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
 228                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
 229                     : (Charset) provider.charsetForName(cc.charset);
 230             encoder = (CharsetEncoder) charset.newEncoder();
 231             encoder.onMalformedInput(CodingErrorAction.REPLACE);
 232             encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 233             if (encoder instanceof CharsetEncoderICU) {
 234                 ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
 235                 if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
 236                     errln("Fallback could not be set for " + cc.charset);
 237                 }
 238             }
 239
 240         } catch (Exception e) {
 241             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
 242                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
 243             } else {
 244                 errln(cc.charset + " was not found");
 245             }
 246             return;
 247         }
 248
 249         // set the callback for the encoder
 250         if (cc.cbErrorAction != null) {
 251             if (cc.cbEncoder != null) {
 252                 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.malformedForLength(1), cc.cbEncoder, cc.option);
 253                 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), cc.cbEncoder, cc.option);
 254             } else {
 255                 encoder.onUnmappableCharacter(cc.cbErrorAction);
 256                 encoder.onMalformedInput(cc.cbErrorAction);
 257             }
 258
 259             // if action has an option, put in the option for the case
 260             if (cc.option.equals("i")) {
 261                 encoder.onMalformedInput(CodingErrorAction.REPORT);
 262             }
 263
 264             // if callback action is replace,
 265           //   and there is a subchar
 266             // replace the decoder's default replacement value
 267             // if substring, skip test due to current api not supporting
 268             // substring
 269             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
 270                 if (cc.cbopt.length() > 1) {
 271                     if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
 272                         logln("Skipping test due to limitation in Java API - substitution string not supported");
 273                         return;
 274                     } else {
 275                         // // read NUL-separated subchar first, if any
 276                         // copy the subchar from Latin-1 characters
 277                         // start after the NUL
 278                         if (cc.cbopt.charAt(1) == 0x00) {
 279                             cc.cbopt = cc.cbopt.substring(2);
 280
 281                             try {
 282                                 encoder.replaceWith(toByteArray(cc.cbopt));
 283                             } catch (Exception e) {
 284                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
 285                                 return;
 286                             }
 287                         }
 288                     }
 289                 }
 290             }
 291         }
 292
 293         // do charset encoding from unicode
 294
 295         // testing by steps using charset.encoder(in,out,flush)
 296         int resultLength;
 297         boolean ok;
 298         String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
 299                 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
 300         int i, step;
 301
 302         ok = true;
 303
 304         for (i = 0; i < steps.length && ok; ++i) {
 305             step = Integer.parseInt(steps[i][0]);
 306
 307             logln("Testing step:[" + step + "]");
 308             try {
 309                 resultLength = stepFromUnicode(cc, encoder, step);
 310                 ok = checkFromUnicode(cc, resultLength);
 311             } catch (Exception ex) {
 312                 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
 313                 ex.printStackTrace(System.out);
 314                 return;
 315             }
 316
 317         }
 318         // testing by whole buffer using out = charset.encoder(in)
 319         while (ok && cc.finalFlush) {
 320             logln("Testing java API charset.encoder(in):");
 321             cc.fromUnicodeResult = null;
 322             ByteBuffer out = null;
 323
 324             try {
 325                 out = encoder.encode(CharBuffer.wrap(cc.unicode.toCharArray()));
 326                 out.position(out.limit());
 327                 if (out.limit() != out.capacity() || cc.finalFlush) {
 328                     int pos = out.position();
 329                     byte[] temp = out.array();
 330                     out = ByteBuffer.allocate(temp.length * 4);
 331                     out.put(temp);
 332                     out.position(pos);
 333                     CoderResult cr = encoder.flush(out);
 334                     if (cr.isOverflow()) {
 335                         logln("Overflow error with flushing encoder");
 336                     }
 337                 }
 338                 cc.fromUnicodeResult = out;
 339
 340                 ok = checkFromUnicode(cc, out.limit());
 341                 if (!ok) {
 342                     break;
 343                 }
 344             } catch (Exception e) {
 345                 //check the error code to see if it matches cc.errorCode
 346                 logln("Encoder returned an error code");
 347                 logln("ErrorCode expected is: " + cc.outErrorCode);
 348                 logln("Error Result is: " + e.toString());
 349             }
 350             break;
 351         }
 352     }
 353
 354     private int stepFromUnicode(ConversionCase cc, CharsetEncoder encoder, int step) {
 355         if (step < 0) {
 356             errln("Negative step size, test internal error.");
 357             return 0;
 358         }
 359
 360         int sourceLen = cc.unicode.length();
 361         int targetLen = cc.bytes.capacity() + 20;  // for BOM, and to let failures produce excess output
 362         CharBuffer source = CharBuffer.wrap(cc.unicode.toCharArray());
 363         ByteBuffer target = ByteBuffer.allocate(targetLen);
 364         cc.fromUnicodeResult = null;
 365         encoder.reset();
 366
 367         int currentSourceLimit;
 368         int currentTargetLimit;
 369         if (step > 0) {
 370             currentSourceLimit = Math.min(step, sourceLen);
 371             currentTargetLimit = Math.min(step, targetLen);
 372         } else {
 373             currentSourceLimit = sourceLen;
 374             currentTargetLimit = targetLen;
 375         }
 376
 377         CoderResult cr = null;
 378
 379         for (;;) {
 380             source.limit(currentSourceLimit);
 381             target.limit(currentTargetLimit);
 382
 383             cr = encoder.encode(source, target, currentSourceLimit == sourceLen);
 384
 385             if (cr.isUnderflow()) {
 386                 if (currentSourceLimit == sourceLen) {
 387                     if (target.position() == cc.bytes.limit()) {
 388                         // target contains the correct number of bytes
 389                         break;
 390                     }
 391                     // Do a final flush for cleanup, then break out
 392                     // Encode loop, exits with cr==underflow in normal operation.
 393                     //target.limit(targetLen);
 394                     target.limit(targetLen);
 395                     cr = encoder.flush(target);
 396                     if (cr.isUnderflow()) {
 397                         // good
 398                     } else if (cr.isOverflow()) {
 399                         errln(cc.caseNrAsString() + " Flush is producing excessive output");
 400                     } else {
 401                         errln(cc.caseNrAsString() + " Flush operation failed.  CoderResult = \""
 402                                 + cr.toString() + "\"");
 403                     }
 404                     break;
 405                 }
 406                 currentSourceLimit = Math.min(currentSourceLimit + step, sourceLen);
 407             } else if (cr.isOverflow()) {
 408                 if (currentTargetLimit == targetLen) {
 409                     errln(cc.caseNrAsString() + " encode() is producing excessive output");
 410                     break;
 411                 }
 412                 currentTargetLimit = Math.min(currentTargetLimit + step, targetLen);
 413             } else {
 414                 // check the error code to see if it matches cc.errorCode
 415                 logln("Encoder returned an error code");
 416                 logln("ErrorCode expected is: " + cc.outErrorCode);
 417                 logln("Error Result is: " + cr.toString());
 418                 break;
 419             }
 420
 421         }
 422
 423         cc.fromUnicodeResult = target;
 424         return target.position();
 425     }
 426
 427     private boolean checkFromUnicode(ConversionCase cc, int resultLength) {
 428         return checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult);
 429     }
 430
 431     // toUnicode test worker functions ----------------------------------------- ***
 432
 433     private void TestToUnicode(DataMap testcase, int caseNr) {
 434         // create Conversion case to store the test case data
 435         ConversionCase cc = new ConversionCase();
 436
 437         try {
 438             // retrieve test case data
 439             cc.caseNr = caseNr;
 440             cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
 441             cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
 442             cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
 443             cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
 444             cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
 445             cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
 446             cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
 447             cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();
 448
 449         } catch (Exception e) {
 450             errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
 451             return;
 452         }
 453
 454         // ----for debugging only
 455         logln("");
 456         logln("TestToUnicode[" + caseNr + "] " + cc.charset + " ");
 457         logln("Unicode:   " + hex(cc.unicode));
 458         logln("Bytes:    " + printbytes(cc.bytes, cc.bytes.limit()));
 459         ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
 460         logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
 461         logln("...............................................");
 462
 463         // process the retrieved test data case
 464         if (cc.offsets.length == 0) {
 465             cc.offsets = null;
 466         } else if (cc.offsets.length != cc.unicode.length()) {
 467             errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode["
 468                     + cc.unicode.length() + "] and offsets["
 469                     + cc.offsets.length + "] must have the same length");
 470             return;
 471         }
 472         // check for the callback replacement value for unmappable
 473         // characters or malformed errors
 474         if (cc.cbopt.length() > 0) {
 475             switch ((cc.cbopt).charAt(0)) {
 476             case '?': // CALLBACK_SUBSTITUTE
 477                 cc.cbErrorAction = CodingErrorAction.REPLACE;
 478                 break;
 479             case '0': // CALLBACK_SKIP
 480                 cc.cbErrorAction = CodingErrorAction.IGNORE;
 481                 break;
 482             case '.': // CALLBACK_STOP
 483                 cc.cbErrorAction = CodingErrorAction.REPORT;
 484                 break;
 485             case '&': // CALLBACK_ESCAPE
 486                 cc.cbErrorAction = CodingErrorAction.REPORT;
 487                 cc.cbDecoder = CharsetCallback.TO_U_CALLBACK_ESCAPE;
 488                 break;
 489             default:
 490                 cc.cbErrorAction = null;
 491                 break;
 492             }
 493         }
 494         // check for any options for the callback value
 495         cc.option = cc.cbErrorAction == null ? null : cc.cbopt.substring(1);
 496         if (cc.option == null) {
 497             cc.option = null;
 498         }
 499
 500         ToUnicodeCase(cc);
 501
 502     }
 503
 504     private void ToUnicodeCase(ConversionCase cc) {
 505
 506         // create converter for charset and decoder for each test case
 507         CharsetProviderICU provider = new CharsetProviderICU();
 508         CharsetDecoder decoder = null;
 509         Charset charset = null;
 510
 511         try {
 512             // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
 513             charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
 514                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
 515                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
 516                     : (Charset) provider.charsetForName(cc.charset);
 517             decoder = (CharsetDecoder) charset.newDecoder();
 518             decoder.onMalformedInput(CodingErrorAction.REPLACE);
 519             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 520
 521         } catch (Exception e) {
 522             // TODO implement loading of test data.
 523             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
 524                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
 525             } else {
 526                 errln(cc.charset + " was not found");
 527             }
 528             return;
 529         }
 530
 531         // set the callback for the decoder
 532         if (cc.cbErrorAction != null) {
 533             if (cc.cbDecoder != null) {
 534                 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), cc.cbDecoder, cc.option);
 535                 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.unmappableForLength(1), cc.cbDecoder, cc.option);
 536             } else {
 537                 decoder.onMalformedInput(cc.cbErrorAction);
 538                 decoder.onUnmappableCharacter(cc.cbErrorAction);
 539             }
 540
 541             // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
 542             if (cc.option.equals("i")) {
 543                 decoder.onMalformedInput(CodingErrorAction.REPORT);
 544             }
 545
 546             // if callback action is replace, and there is a subchar
 547             // replace the decoder's default replacement value
 548             // if substring, skip test due to current api not supporting
 549             // substring replacement
 550             if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
 551                 if (cc.cbopt.length() > 1) {
 552                     if (cc.cbopt.charAt(1) == '=') {
 553                         logln("Skipping test due to limitation in Java API - substitution string not supported");
 554
 555                     } else {
 556                         // // read NUL-separated subchar first, if any
 557                         // copy the subchar from Latin-1 characters
 558                         // start after the NUL
 559                         if (cc.cbopt.charAt(1) == 0x00) {
 560                             cc.cbopt = cc.cbopt.substring(2);
 561
 562                             try {
 563                                 decoder.replaceWith(cc.cbopt);
 564                             } catch (Exception e) {
 565                                 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
 566                             }
 567                         }
 568                     }
 569                 }
 570             }
 571         }
 572
 573         //      Check the step to unicode
 574         boolean ok;
 575         int resultLength;
 576
 577         String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
 578                 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
 579         /* TODO: currently not supported test steps, getNext API is not supported for now
 580          { "-1", "getNext" },
 581          { "-2", "toU(bulk)+getNext" },
 582          { "-3", "getNext+toU(bulk)" },
 583          { "-4", "toU(1)+getNext" },
 584          { "-5", "getNext+toU(1)" },
 585          { "-12", "toU(5)+getNext" },
 586          { "-13", "getNext+toU(5)" }};*/
 587
 588         ok = true;
 589         int step;
 590         // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api
 591         for (int i = 0; i < steps.length && ok; ++i) {
 592             step = Integer.parseInt(steps[i][0]);
 593
 594             if (step < 0 && !cc.finalFlush) {
 595                 continue;
 596             }
 597             logln("Testing step:[" + step + "]");
 598
 599             try {
 600                 resultLength = stepToUnicode(cc, decoder, step);
 601                 ok = checkToUnicode(cc, resultLength);
 602             } catch (Exception ex) {
 603                 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
 604                 ex.printStackTrace(System.out);
 605                 return;
 606             }
 607         }
 608
 609         //testing the java's out = charset.decoder(in) api
 610         while (ok && cc.finalFlush) {
 611             logln("Testing java charset.decoder(in):");
 612             cc.toUnicodeResult = null;
 613             CharBuffer out = null;
 614
 615             try {
 616                 cc.bytes.rewind();
 617                 out = decoder.decode(cc.bytes);
 618                 out.position(out.limit());
 619                 if (out.limit() < cc.unicode.length()) {
 620                     int pos = out.position();
 621                     char[] temp = out.array();
 622                     out = CharBuffer.allocate(cc.bytes.limit());
 623                     out.put(temp);
 624                     out.position(pos);
 625                     CoderResult cr = decoder.flush(out);
 626                     if (cr.isOverflow()) {
 627                         logln("Overflow error with flushing decodering");
 628                     }
 629                 }
 630
 631                 cc.toUnicodeResult = out;
 632
 633                 ok = checkToUnicode(cc, out.limit());
 634                 if (!ok) {
 635                     break;
 636                 }
 637             } catch (Exception e) {
 638                 //check the error code to see if it matches cc.errorCode
 639                 logln("Decoder returned an error code");
 640                 logln("ErrorCode expected is: " + cc.outErrorCode);
 641                 logln("Error Result is: " + e.toString());
 642             }
 643             break;
 644         }
 645
 646         return;
 647     }
 648
 649
 650
 651
 652     private int stepToUnicode(ConversionCase cc, CharsetDecoder decoder,
 653             int step)
 654
 655     {
 656         ByteBuffer source;
 657         CharBuffer target;
 658         boolean flush = false;
 659         int sourceLen;
 660         source = cc.bytes;
 661         sourceLen = cc.bytes.limit();
 662         source.position(0);
 663         target = CharBuffer.allocate(cc.unicode.length() + 4);
 664         target.position(0);
 665         cc.toUnicodeResult = null;
 666         decoder.reset();
 667
 668         if (step >= 0) {
 669
 670             int iStep = step;
 671             int oStep = step;
 672
 673             for (;;) {
 674
 675                 if (step != 0) {
 676                     source.limit((iStep <= sourceLen) ? iStep : sourceLen);
 677                     target.limit((oStep <= target.capacity()) ? oStep : target
 678                             .capacity());
 679                     flush = (cc.finalFlush && source.limit() == sourceLen);
 680
 681                 } else {
 682                     //bulk mode
 683                     source.limit(sourceLen);
 684                     target.limit(target.capacity());
 685                     flush = cc.finalFlush;
 686                 }
 687                 // convert
 688                 CoderResult cr = null;
 689                 if (source.hasRemaining()) {
 690
 691                     cr = decoder.decode(source, target, flush);
 692                     // check pointers and errors
 693                     if (cr.isOverflow()) {
 694                         // the partial target is filled, set a new limit,
 695                         oStep = (target.position() + step);
 696                         target.limit((oStep < target.capacity()) ? oStep
 697                                 : target.capacity());
 698                         if (target.limit() > target.capacity()) {
 699                             //target has reached its limit, an error occurred or test case has an error code
 700                             //check error code
 701                             logln("UnExpected error: Target Buffer is larger than capacity");
 702                             break;
 703                         }
 704
 705                     } else if (cr.isError()) {
 706                         //check the error code to see if it matches cc.errorCode
 707                         logln("Decoder returned an error code");
 708                         logln("ErrorCode expected is: " + cc.outErrorCode);
 709                         logln("Error Result is: " + cr.toString());
 710                         break;
 711                     }
 712
 713                 } else {
 714                     if (source.limit() == sourceLen) {
 715
 716                         cr = decoder.decode(source, target, true);
 717
 718                         //due to limitation of the API we need to check for target limit for expected
 719                         if (target.position() != cc.unicode.length()) {
 720                             if (target.limit() != cc.unicode.length()) {
 721                                 target.limit(cc.unicode.length());
 722                             }
 723                             cr = decoder.flush(target);
 724                             if (cr.isError()) {
 725                                 errln("Flush operation failed");
 726                             }
 727                         }
 728                         break;
 729                     }
 730                 }
 731                 iStep += step;
 732
 733             }
 734
 735         }// if(step ==0)
 736
 737         //--------------------------------------------------------------------------
 738         else /* step<0 */{
 739             /*
 740              * step==-1: call only ucnv_getNextUChar()
 741              * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
 742              *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
 743              *   else give it at most (-step-2)/2 bytes
 744              */
 745
 746             for (;;) {
 747                 // convert
 748                 if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) {
 749
 750                     target.limit(target.position() < target.capacity() ? target
 751                             .position() + 1 : target.capacity());
 752
 753                     // decode behavior is return to output target 1 character
 754                     CoderResult cr = null;
 755
 756                     //similar to getNextUChar() , input is the whole string, while outputs only 1 character
 757                     source.limit(sourceLen);
 758                     while (target.position() != target.limit()
 759                             && source.hasRemaining()) {
 760                         cr = decoder.decode(source, target,
 761                                 source.limit() == sourceLen);
 762
 763                         if (cr.isOverflow()) {
 764
 765                             if (target.limit() >= target.capacity()) {
 766                                 // target has reached its limit, an error occurred
 767                                 logln("UnExpected error: Target Buffer is larger than capacity");
 768                                 break;
 769                             } else {
 770                                 //1 character has been consumed
 771                                 target.limit(target.position() + 1);
 772                                 break;
 773                             }
 774                         } else if (cr.isError()) {
 775                             logln("Decoder returned an error code");
 776                             logln("ErrorCode expected is: " + cc.outErrorCode);
 777                             logln("Error Result is: " + cr.toString());
 778
 779                             cc.toUnicodeResult = target;
 780                             return target.position();
 781                         }
 782
 783                         else {
 784                             // one character has been consumed
 785                             if (target.limit() == target.position()) {
 786                                 target.limit(target.position() + 1);
 787                                 break;
 788                             }
 789                         }
 790
 791                     }
 792                     if (source.position() == sourceLen) {
 793
 794                         // due to limitation of the API we need to check
 795                         // for target limit for expected
 796                         cr = decoder.decode(source, target, true);
 797                         if (target.position() != cc.unicode.length()) {
 798
 799                             target.limit(cc.unicode.length());
 800                             cr = decoder.flush(target);
 801                             if (cr.isError()) {
 802                                 errln("Flush operation failed");
 803                             }
 804                         }
 805                         break;
 806                     }
 807                     // alternate between -n-1 and -n but leave -1 alone
 808                     if (step < -1) {
 809                         ++step;
 810                     }
 811                 } else {/* step is even */
 812                     // allow only one UChar output
 813
 814                     target.limit(target.position() < target.capacity() ? target
 815                             .position() + 1 : target.capacity());
 816                     if (step == -2) {
 817                         source.limit(sourceLen);
 818                     } else {
 819                         source.limit(source.position() + (-step - 2) / 2);
 820                         if (source.limit() > sourceLen) {
 821                             source.limit(sourceLen);
 822                         }
 823                     }
 824                     CoderResult cr = decoder.decode(source, target, source
 825                             .limit() == sourceLen);
 826                     // check pointers and errors
 827                     if (cr.isOverflow()) {
 828                         // one character has been consumed
 829                         if (target.limit() >= target.capacity()) {
 830                             // target has reached its limit, an error occurred
 831                             logln("Unexpected error: Target Buffer is larger than capacity");
 832                             break;
 833                         }
 834                     } else if (cr.isError()) {
 835                         logln("Decoder returned an error code");
 836                         logln("ErrorCode expected is: " + cc.outErrorCode);
 837                         logln("Error Result is: " + cr.toString());
 838                         break;
 839                     }
 840
 841                     --step;
 842                 }
 843             }
 844         }
 845
 846         //--------------------------------------------------------------------------
 847
 848         cc.toUnicodeResult = target;
 849         return target.position();
 850     }
 851
 852
 853
 854     private boolean checkToUnicode(ConversionCase cc, int resultLength) {
 855         return checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult);
 856     }
 857
 858
 859     private void TestGetUnicodeSet(DataMap testcase) {
 860         /*
 861          * charset - will be opened, and ucnv_getUnicodeSet() called on it //
 862          * map - set of code points and strings that must be in the returned set //
 863          * mapnot - set of code points and strings that must *not* be in the //
 864          * returned set // which - numeric UConverterUnicodeSet value Headers {
 865          * "charset", "map", "mapnot", "which" }
 866          */
 867
 868
 869         // retrieve test case data
 870         ConversionCase cc = new ConversionCase();
 871         CharsetProviderICU provider = new CharsetProviderICU();
 872         CharsetICU charset  ;
 873
 874
 875         UnicodeSet mapset = new UnicodeSet();
 876         UnicodeSet mapnotset = new UnicodeSet();
 877         UnicodeSet unicodeset = new UnicodeSet();
 878         String ellipsis = "0x2e";
 879         cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
 880                 .getString();
 881         cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
 882         cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
 883                 .getString();
 884
 885
 886         cc.which = ((ICUResourceBundle) testcase.getObject("which")).getInt(); // only checking for ROUNDTRIP_SET
 887
 888         // ----for debugging only
 889         logln("");
 890         logln("TestGetUnicodeSet[" + cc.charset + "] ");
 891         logln("...............................................");
 892
 893         try{
 894            // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
 895            charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
 896                     ? (CharsetICU) provider.charsetForName(cc.charset.substring(1),
 897                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
 898                     : (CharsetICU) provider.charsetForName(cc.charset);
 899
 900            //checking for converter that are not supported at this point
 901            try{
 902                if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
 903                       charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
 904                       charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
 905                       charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
 906
 907                    logln("Converter not supported at this point :" +charset.displayName());
 908                    return;
 909                }
 910
 911                if(cc.which==1){
 912                    logln("Fallback set not supported at this point for converter : "+charset.displayName());
 913                   return;
 914                }
 915
 916            }catch(Exception e){
 917                return;
 918            }
 919
 920            mapset.clear();
 921            mapnotset.clear();
 922
 923            mapset.applyPattern(cc.map,false);
 924            mapnotset.applyPattern(cc.mapnot,false);
 925
 926            charset.getUnicodeSet(unicodeset, cc.which);
 927            UnicodeSet diffset = new UnicodeSet();
 928
 929            //are there items that must be in unicodeset but are not?
 930            (diffset = mapset).removeAll(unicodeset);
 931            if(!diffset.isEmpty()){
 932                StringBuffer s = new StringBuffer(diffset.toPattern(true));
 933                if(s.length()>100){
 934                    s.replace(0, 0x7fffffff, ellipsis);
 935                }
 936                errln("error in missing items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
 937            }
 938
 939           //are the items that must not be in unicodeset but are?
 940            (diffset=mapnotset).retainAll(unicodeset);
 941            if(!diffset.isEmpty()){
 942                StringBuffer s = new StringBuffer(diffset.toPattern(true));
 943                if(s.length()>100){
 944                    s.replace(0, 0x7fffffff, ellipsis);
 945                }
 946                errln("contains unexpected items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
 947            }
 948          } catch (Exception e) {
 949              errln("getUnicodeSet returned an error code");
 950              errln("ErrorCode expected is: " + cc.outErrorCode);
 951              errln("Error Result is: " + e.toString());
 952              return;
 953          }
 954     }
 955
 956     /**
 957      * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
 958      * start of the stream for example U+FEFF (the Unicode BOM/signature
 959      * character) that can be ignored.
 960      *
 961      * Detects Unicode signature byte sequences at the start of the byte stream
 962      * and returns number of bytes of the BOM of the indicated Unicode charset.
 963      * 0 is returned when no Unicode signature is recognized.
 964      *
 965      */
 966
 967     private String detectUnicodeSignature(ByteBuffer source) {
 968         int signatureLength = 0; // number of bytes of the signature
 969         final int SIG_MAX_LEN = 5;
 970         String sigUniCharset = null; // states what unicode charset is the BOM
 971         int i = 0;
 972
 973         /*
 974          * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
 975          * don't misdetect something
 976          */
 977         byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
 978                 (byte) 0xa5 };
 979
 980         while (i < source.limit() && i < SIG_MAX_LEN) {
 981             start[i] = source.get(i);
 982             i++;
 983         }
 984
 985         if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
 986             signatureLength = 2;
 987             sigUniCharset = "UTF-16BE";
 988             source.position(signatureLength);
 989             return sigUniCharset;
 990         } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
 991             if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
 992                 signatureLength = 4;
 993                 sigUniCharset = "UTF-32LE";
 994                 source.position(signatureLength);
 995                 return sigUniCharset;
 996             } else {
 997                 signatureLength = 2;
 998                 sigUniCharset = "UTF-16LE";
 999                 source.position(signatureLength);
1000                 return sigUniCharset;
1001             }
1002         } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
1003                 && start[2] == (byte) 0xBF) {
1004             signatureLength = 3;
1005             sigUniCharset = "UTF-8";
1006             source.position(signatureLength);
1007             return sigUniCharset;
1008         } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
1009                 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
1010             signatureLength = 4;
1011             sigUniCharset = "UTF-32BE";
1012             source.position(signatureLength);
1013             return sigUniCharset;
1014         } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
1015                 && start[2] == (byte) 0xFF) {
1016             signatureLength = 3;
1017             sigUniCharset = "SCSU";
1018             source.position(signatureLength);
1019             return sigUniCharset;
1020         } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
1021                 && start[2] == (byte) 0x28) {
1022             signatureLength = 3;
1023             sigUniCharset = "BOCU-1";
1024             source.position(signatureLength);
1025             return sigUniCharset;
1026         } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
1027                 && start[2] == (byte) 0x76) {
1028
1029             if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
1030                 signatureLength = 5;
1031                 sigUniCharset = "UTF-7";
1032                 source.position(signatureLength);
1033                 return sigUniCharset;
1034             } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
1035                     || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
1036                 signatureLength = 4;
1037                 sigUniCharset = "UTF-7";
1038                 source.position(signatureLength);
1039                 return sigUniCharset;
1040             }
1041         } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
1042                 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
1043             signatureLength = 4;
1044             sigUniCharset = "UTF-EBCDIC";
1045             source.position(signatureLength);
1046             return sigUniCharset;
1047         }
1048
1049         /* no known Unicode signature byte sequence recognized */
1050         return null;
1051     }
1052
1053     String printbytes(ByteBuffer buf, int pos) {
1054         int cur = buf.position();
1055         String res = " (" + pos + ")==[";
1056         for (int i = 0; i < pos; i++) {
1057             res += "(" + i + ")" + hex(buf.get(i) & 0xff).substring(2) + " ";
1058         }
1059         buf.position(cur);
1060         return res + "]";
1061     }
1062
1063     String printchars(CharBuffer buf, int pos) {
1064         int cur = buf.position();
1065         String res = " (" + pos + ")==[";
1066         for (int i = 0; i < pos; i++) {
1067             res += "(" + i + ")" + hex(buf.get(i)) + " ";
1068         }
1069         buf.position(cur);
1070         return res + "]";
1071     }
1072
1073     private boolean checkResultsFromUnicode(ConversionCase cc, ByteBuffer expected,
1074             ByteBuffer output) {
1075
1076         boolean res = true;
1077         expected.rewind();
1078         output.limit(output.position());
1079         output.rewind();
1080
1081         // remove any BOM signature before checking
1082         if (!cc.charset.contains("UnicodeLittle") && !cc.charset.contains("UnicodeBig")) {
1083             detectUnicodeSignature(output); // sets the position to after the BOM
1084             output = output.slice(); // removes anything before the current position
1085         }
1086
1087         if (output.limit() != expected.limit()) {
1088             errln("Test failed: output length does not match expected for charset: " + cc.charset
1089                     + " [" + cc.caseNr + "]");
1090             res = false;
1091         } else {
1092             while (output.hasRemaining()) {
1093                 if (output.get() != expected.get()) {
1094                     errln("Test failed: output does not match expected for charset: " + cc.charset
1095                             + " [" + cc.caseNr + "]");
1096                     res = false;
1097                     break;
1098                 }
1099             }
1100         }
1101
1102         if (res) {
1103             logln("[" + cc.caseNr + "]:" + cc.charset);
1104             logln("Input:       " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
1105             logln("Output:      " + printbytes(output, output.limit()));
1106             logln("Expected:    " + printbytes(expected, expected.limit()));
1107             logln("Passed");
1108         }
1109         else {
1110             errln("[" + cc.caseNr + "]:" + cc.charset);
1111             errln("Input:       " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
1112             errln("Output:      " + printbytes(output, output.limit()));
1113             errln("Expected:    " + printbytes(expected, expected.limit()));
1114             errln("Failed");
1115         }
1116         return res;
1117     }
1118
1119     private boolean checkResultsToUnicode(ConversionCase cc, String expected, CharBuffer output) {
1120
1121         boolean res = true;
1122         output.limit(output.position());
1123         output.rewind();
1124
1125         // test to see if the conversion matches actual results
1126         if (output.limit() != expected.length()) {
1127             errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
1128             res = false;
1129         } else {
1130             for (int i = 0; i < expected.length(); i++) {
1131                 if (output.get(i) != expected.charAt(i)) {
1132                     errln("Test failed: output does not match expected for charset: " + cc.charset
1133                             + " [" + cc.caseNr + "]");
1134                     res = false;
1135                     break;
1136                 }
1137             }
1138         }
1139
1140         if (res) {
1141             logln("[" + cc.caseNr + "]:" + cc.charset);
1142             logln("Input:       " + printbytes(cc.bytes, cc.bytes.limit()));
1143             logln("Output:      " + printchars(output, output.limit()));
1144             logln("Expected:    " + printchars(CharBuffer.wrap(expected), expected.length()));
1145             logln("Passed");
1146         } else {
1147             errln("[" + cc.caseNr + "]:" + cc.charset);
1148             errln("Input:       " + printbytes(cc.bytes, cc.bytes.limit()));
1149             errln("Output:      " + printchars(output, output.limit()));
1150             errln("Expected:    " + printchars(CharBuffer.wrap(expected), expected.length()));
1151             errln("Failed");
1152         }
1153         return res;
1154     }
1155
1156     private byte[] toByteArray(String str) {
1157         byte[] ret = new byte[str.length()];
1158         for (int i = 0; i < ret.length; i++) {
1159             char ch = str.charAt(i);
1160             if (ch <= 0xFF) {
1161                 ret[i] = (byte) ch;
1162             } else {
1163                 throw new IllegalArgumentException(" byte value out of range: " + ch);
1164             }
1165         }
1166         return ret;
1167     }
1168 }