2 *******************************************************************************
\r
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
10 package com.ibm.icu.dev.test.charset;
\r
12 import java.nio.ByteBuffer;
\r
13 import java.nio.CharBuffer;
\r
14 import java.nio.charset.Charset;
\r
15 import java.nio.charset.CharsetDecoder;
\r
16 import java.nio.charset.CharsetEncoder;
\r
17 import java.nio.charset.CoderResult;
\r
18 import java.nio.charset.CodingErrorAction;
\r
19 import java.util.Iterator;
\r
21 import com.ibm.icu.charset.CharsetCallback;
\r
22 import com.ibm.icu.charset.CharsetDecoderICU;
\r
23 import com.ibm.icu.charset.CharsetEncoderICU;
\r
24 import com.ibm.icu.charset.CharsetICU;
\r
25 import com.ibm.icu.charset.CharsetProviderICU;
\r
26 import com.ibm.icu.dev.test.ModuleTest;
\r
27 import com.ibm.icu.dev.test.TestDataModule.DataMap;
\r
28 import com.ibm.icu.impl.ICUResourceBundle;
\r
29 import com.ibm.icu.text.UnicodeSet;
\r
32 * This maps to convtest.c which tests the test file for data-driven conversion tests.
\r
35 public class TestConversion extends ModuleTest {
\r
37 * This maps to the C struct of conversion case in convtest.h that stores the
\r
38 * data for a conversion test
\r
41 private class ConversionCase {
\r
42 int caseNr; // testcase index
\r
43 String option = null; // callback options
\r
44 CodingErrorAction cbErrorAction = null; // callback action type
\r
45 CharBuffer toUnicodeResult = null;
\r
46 ByteBuffer fromUnicodeResult = null;
\r
48 // data retrieved from a test case conversion.txt
\r
49 String charset; // charset
\r
50 String unicode; // unicode string
\r
51 ByteBuffer bytes; // byte
\r
52 int[] offsets; // offsets
\r
53 boolean finalFlush; // flush
\r
54 boolean fallbacks; // fallback
\r
55 String outErrorCode; // errorCode
\r
56 String cbopt; // callback
\r
58 // TestGetUnicodeSet variables
\r
63 // CharsetCallback encoder and decoder
\r
64 CharsetCallback.Decoder cbDecoder = null;
\r
65 CharsetCallback.Encoder cbEncoder = null;
\r
67 String caseNrAsString() {
\r
68 return "[" + caseNr + "]";
\r
72 // public methods --------------------------------------------------------
\r
74 public static void main(String[] args) throws Exception {
\r
75 new TestConversion().run(args);
\r
78 public TestConversion() {
\r
79 super("com/ibm/icu/dev/data/testdata/", "conversion");
\r
83 * This method maps to the convtest.cpp runIndexedTest() method to run each
\r
84 * type of conversion.
\r
86 public void processModules() {
\r
88 int testFromUnicode = 0;
\r
89 int testToUnicode = 0;
\r
90 String testName = t.getName().toString();
\r
92 // Iterate through and get each of the test case to process
\r
93 for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
\r
94 DataMap testcase = (DataMap) iter.next();
\r
96 if (testName.equalsIgnoreCase("toUnicode")) {
\r
97 TestToUnicode(testcase, testToUnicode);
\r
100 } else if (testName.equalsIgnoreCase("fromUnicode")) {
\r
101 TestFromUnicode(testcase, testFromUnicode);
\r
103 } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
\r
104 TestGetUnicodeSet(testcase);
\r
106 warnln("Could not load the test cases for conversion");
\r
110 } catch (Exception e) {
\r
111 e.printStackTrace();
\r
116 // private methods -------------------------------------------------------
\r
119 // fromUnicode test worker functions ---------------------------------------
\r
120 private void TestFromUnicode(DataMap testcase, int caseNr) {
\r
122 ConversionCase cc = new ConversionCase();
\r
125 // retrieve test case data
\r
126 cc.caseNr = caseNr;
\r
127 cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
\r
128 cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
\r
129 cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
\r
130 cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
\r
131 cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
\r
132 cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
\r
133 cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
\r
134 cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();
\r
136 } catch (Exception e) {
\r
137 errln("Skipping test:");
\r
138 errln("error parsing conversion/toUnicode test case " + cc.caseNr);
\r
142 // ----for debugging only
\r
144 logln("TestFromUnicode[" + caseNr + "] " + cc.charset + " ");
\r
145 logln("Unicode: " + cc.unicode);
\r
146 logln("Bytes: " + printbytes(cc.bytes, cc.bytes.limit()));
\r
147 ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
\r
148 logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
\r
149 logln("...............................................");
\r
151 // process the retrieved test data case
\r
152 if (cc.offsets.length == 0) {
\r
154 } else if (cc.offsets.length != cc.bytes.limit()) {
\r
155 errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
\r
156 + "] and offsets[" + cc.offsets.length
\r
157 + "] must have the same length");
\r
161 // check the callback replacement value
\r
162 if (cc.cbopt.length() > 0) {
\r
164 switch ((cc.cbopt).charAt(0)) {
\r
166 cc.cbErrorAction = CodingErrorAction.REPLACE;
\r
169 cc.cbErrorAction = CodingErrorAction.IGNORE;
\r
172 cc.cbErrorAction = CodingErrorAction.REPORT;
\r
175 cc.cbErrorAction = CodingErrorAction.REPLACE;
\r
176 cc.cbEncoder = CharsetCallback.FROM_U_CALLBACK_ESCAPE;
\r
179 cc.cbErrorAction = null;
\r
183 // check for any options for the callback value --
\r
184 cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt
\r
186 if (cc.option == null) {
\r
190 FromUnicodeCase(cc);
\r
194 private void FromUnicodeCase(ConversionCase cc) {
\r
196 // create charset encoder for conversion test
\r
197 CharsetProviderICU provider = new CharsetProviderICU();
\r
198 CharsetEncoder encoder = null;
\r
199 Charset charset = null;
\r
201 // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
\r
202 charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
\r
203 ? (Charset) provider.charsetForName(cc.charset.substring(1),
\r
204 "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
\r
205 : (Charset) provider.charsetForName(cc.charset);
\r
206 encoder = (CharsetEncoder) charset.newEncoder();
\r
207 encoder.onMalformedInput(CodingErrorAction.REPLACE);
\r
208 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
\r
209 if (encoder instanceof CharsetEncoderICU) {
\r
210 ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
\r
211 if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
\r
212 errln("Fallback could not be set for " + cc.charset);
\r
216 } catch (Exception e) {
\r
217 if (skipIfBeforeICU(4,5,0)) { // TIME BOMB
\r
218 logln("Skipping test:(" + cc.charset + ") due to ICU Charset not supported at this time");
\r
220 errln(cc.charset + " was not found");
\r
225 // set the callback for the encoder
\r
226 if (cc.cbErrorAction != null) {
\r
227 if (cc.cbEncoder != null) {
\r
228 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.malformedForLength(1), cc.cbEncoder, cc.option);
\r
229 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), cc.cbEncoder, cc.option);
\r
231 encoder.onUnmappableCharacter(cc.cbErrorAction);
\r
232 encoder.onMalformedInput(cc.cbErrorAction);
\r
235 // if action has an option, put in the option for the case
\r
236 if (cc.option.equals("i")) {
\r
237 encoder.onMalformedInput(CodingErrorAction.REPORT);
\r
240 // if callback action is replace,
\r
241 // and there is a subchar
\r
242 // replace the decoder's default replacement value
\r
243 // if substring, skip test due to current api not supporting
\r
245 if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
\r
246 if (cc.cbopt.length() > 1) {
\r
247 if (cc.cbopt.length() > 1 && cc.cbopt.charAt(1) == '=') {
\r
248 logln("Skipping test due to limitation in Java API - substitution string not supported");
\r
251 // // read NUL-separated subchar first, if any
\r
252 // copy the subchar from Latin-1 characters
\r
253 // start after the NUL
\r
254 if (cc.cbopt.charAt(1) == 0x00) {
\r
255 cc.cbopt = cc.cbopt.substring(2);
\r
258 encoder.replaceWith(toByteArray(cc.cbopt));
\r
259 } catch (Exception e) {
\r
260 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
\r
269 // do charset encoding from unicode
\r
271 // testing by steps using charset.encoder(in,out,flush)
\r
274 String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
\r
275 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
\r
280 for (i = 0; i < steps.length && ok; ++i) {
\r
281 step = Integer.parseInt(steps[i][0]);
\r
283 logln("Testing step:[" + step + "]");
\r
285 resultLength = stepFromUnicode(cc, encoder, step);
\r
286 ok = checkFromUnicode(cc, resultLength);
\r
287 } catch (Exception ex) {
\r
288 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
\r
289 ex.printStackTrace(System.out);
\r
294 // testing by whole buffer using out = charset.encoder(in)
\r
295 while (ok && cc.finalFlush) {
\r
296 logln("Testing java API charset.encoder(in):");
\r
297 cc.fromUnicodeResult = null;
\r
298 ByteBuffer out = null;
\r
301 out = encoder.encode(CharBuffer.wrap(cc.unicode.toCharArray()));
\r
302 out.position(out.limit());
\r
303 if (out.limit() != out.capacity() || cc.finalFlush) {
\r
304 int pos = out.position();
\r
305 byte[] temp = out.array();
\r
306 out = ByteBuffer.allocate(temp.length * 4);
\r
309 CoderResult cr = encoder.flush(out);
\r
310 if (cr.isOverflow()) {
\r
311 logln("Overflow error with flushing encoder");
\r
314 cc.fromUnicodeResult = out;
\r
316 ok = checkFromUnicode(cc, out.limit());
\r
320 } catch (Exception e) {
\r
321 //check the error code to see if it matches cc.errorCode
\r
322 logln("Encoder returned an error code");
\r
323 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
324 logln("Error Result is: " + e.toString());
\r
330 private int stepFromUnicode(ConversionCase cc, CharsetEncoder encoder, int step) {
\r
332 errln("Negative step size, test internal error.");
\r
336 int sourceLen = cc.unicode.length();
\r
337 int targetLen = cc.bytes.capacity() + 20; // for BOM, and to let failures produce excess output
\r
338 CharBuffer source = CharBuffer.wrap(cc.unicode.toCharArray());
\r
339 ByteBuffer target = ByteBuffer.allocate(targetLen);
\r
340 cc.fromUnicodeResult = null;
\r
343 int currentSourceLimit;
\r
344 int currentTargetLimit;
\r
346 currentSourceLimit = Math.min(step, sourceLen);
\r
347 currentTargetLimit = Math.min(step, targetLen);
\r
349 currentSourceLimit = sourceLen;
\r
350 currentTargetLimit = targetLen;
\r
353 CoderResult cr = null;
\r
356 source.limit(currentSourceLimit);
\r
357 target.limit(currentTargetLimit);
\r
359 cr = encoder.encode(source, target, currentSourceLimit == sourceLen);
\r
361 if (cr.isUnderflow()) {
\r
362 if (currentSourceLimit == sourceLen) {
\r
363 if (target.position() == cc.bytes.limit()) {
\r
364 // target contains the correct number of bytes
\r
367 // Do a final flush for cleanup, then break out
\r
368 // Encode loop, exits with cr==underflow in normal operation.
\r
369 //target.limit(targetLen);
\r
370 target.limit(targetLen);
\r
371 cr = encoder.flush(target);
\r
372 if (cr.isUnderflow()) {
\r
374 } else if (cr.isOverflow()) {
\r
375 errln(cc.caseNrAsString() + " Flush is producing excessive output");
\r
377 errln(cc.caseNrAsString() + " Flush operation failed. CoderResult = \""
\r
378 + cr.toString() + "\"");
\r
382 currentSourceLimit = Math.min(currentSourceLimit + step, sourceLen);
\r
383 } else if (cr.isOverflow()) {
\r
384 if (currentTargetLimit == targetLen) {
\r
385 errln(cc.caseNrAsString() + " encode() is producing excessive output");
\r
388 currentTargetLimit = Math.min(currentTargetLimit + step, targetLen);
\r
390 // check the error code to see if it matches cc.errorCode
\r
391 logln("Encoder returned an error code");
\r
392 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
393 logln("Error Result is: " + cr.toString());
\r
399 cc.fromUnicodeResult = target;
\r
400 return target.position();
\r
403 private boolean checkFromUnicode(ConversionCase cc, int resultLength) {
\r
404 return checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult);
\r
407 // toUnicode test worker functions ----------------------------------------- ***
\r
409 private void TestToUnicode(DataMap testcase, int caseNr) {
\r
410 // create Conversion case to store the test case data
\r
411 ConversionCase cc = new ConversionCase();
\r
414 // retrieve test case data
\r
415 cc.caseNr = caseNr;
\r
416 cc.charset = ((ICUResourceBundle) testcase.getObject("charset")).getString();
\r
417 cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes")).getBinary();
\r
418 cc.unicode = ((ICUResourceBundle) testcase.getObject("unicode")).getString();
\r
419 cc.offsets = ((ICUResourceBundle) testcase.getObject("offsets")).getIntVector();
\r
420 cc.finalFlush = ((ICUResourceBundle) testcase.getObject("flush")).getUInt() != 0;
\r
421 cc.fallbacks = ((ICUResourceBundle) testcase.getObject("fallbacks")).getUInt() != 0;
\r
422 cc.outErrorCode = ((ICUResourceBundle) testcase.getObject("errorCode")).getString();
\r
423 cc.cbopt = ((ICUResourceBundle) testcase.getObject("callback")).getString();
\r
425 } catch (Exception e) {
\r
426 errln("Skipping test: error parsing conversion/toUnicode test case " + cc.caseNr);
\r
430 // ----for debugging only
\r
432 logln("TestToUnicode[" + caseNr + "] " + cc.charset + " ");
\r
433 logln("Unicode: " + hex(cc.unicode));
\r
434 logln("Bytes: " + printbytes(cc.bytes, cc.bytes.limit()));
\r
435 ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
\r
436 logln("Callback: " + printbytes(c, c.limit()) + " (" + cc.cbopt + ")");
\r
437 logln("...............................................");
\r
439 // process the retrieved test data case
\r
440 if (cc.offsets.length == 0) {
\r
442 } else if (cc.offsets.length != cc.unicode.length()) {
\r
443 errln("Skipping test: toUnicode[" + cc.caseNr + "] unicode["
\r
444 + cc.unicode.length() + "] and offsets["
\r
445 + cc.offsets.length + "] must have the same length");
\r
448 // check for the callback replacement value for unmappable
\r
449 // characters or malformed errors
\r
450 if (cc.cbopt.length() > 0) {
\r
451 switch ((cc.cbopt).charAt(0)) {
\r
452 case '?': // CALLBACK_SUBSTITUTE
\r
453 cc.cbErrorAction = CodingErrorAction.REPLACE;
\r
455 case '0': // CALLBACK_SKIP
\r
456 cc.cbErrorAction = CodingErrorAction.IGNORE;
\r
458 case '.': // CALLBACK_STOP
\r
459 cc.cbErrorAction = CodingErrorAction.REPORT;
\r
461 case '&': // CALLBACK_ESCAPE
\r
462 cc.cbErrorAction = CodingErrorAction.REPORT;
\r
463 cc.cbDecoder = CharsetCallback.TO_U_CALLBACK_ESCAPE;
\r
466 cc.cbErrorAction = null;
\r
470 // check for any options for the callback value
\r
471 cc.option = cc.cbErrorAction == null ? null : cc.cbopt.substring(1);
\r
472 if (cc.option == null) {
\r
480 private void ToUnicodeCase(ConversionCase cc) {
\r
482 // create converter for charset and decoder for each test case
\r
483 CharsetProviderICU provider = new CharsetProviderICU();
\r
484 CharsetDecoder decoder = null;
\r
485 Charset charset = null;
\r
488 // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
\r
489 charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
\r
490 ? (Charset) provider.charsetForName(cc.charset.substring(1),
\r
491 "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
\r
492 : (Charset) provider.charsetForName(cc.charset);
\r
493 decoder = (CharsetDecoder) charset.newDecoder();
\r
494 decoder.onMalformedInput(CodingErrorAction.REPLACE);
\r
495 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
\r
497 } catch (Exception e) {
\r
498 // TODO implement loading of test data.
\r
499 if (skipIfBeforeICU(4,5,0)) {
\r
500 logln("Skipping test:(" + cc.charset + ") due to ICU Charset not supported at this time");
\r
502 errln(cc.charset + " was not found");
\r
507 // set the callback for the decoder
\r
508 if (cc.cbErrorAction != null) {
\r
509 if (cc.cbDecoder != null) {
\r
510 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), cc.cbDecoder, cc.option);
\r
511 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.unmappableForLength(1), cc.cbDecoder, cc.option);
\r
513 decoder.onMalformedInput(cc.cbErrorAction);
\r
514 decoder.onUnmappableCharacter(cc.cbErrorAction);
\r
517 // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
\r
518 if (cc.option.equals("i")) {
\r
519 decoder.onMalformedInput(CodingErrorAction.REPORT);
\r
522 // if callback action is replace, and there is a subchar
\r
523 // replace the decoder's default replacement value
\r
524 // if substring, skip test due to current api not supporting
\r
525 // substring replacement
\r
526 if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
\r
527 if (cc.cbopt.length() > 1) {
\r
528 if (cc.cbopt.charAt(1) == '=') {
\r
529 logln("Skipping test due to limitation in Java API - substitution string not supported");
\r
532 // // read NUL-separated subchar first, if any
\r
533 // copy the subchar from Latin-1 characters
\r
534 // start after the NUL
\r
535 if (cc.cbopt.charAt(1) == 0x00) {
\r
536 cc.cbopt = cc.cbopt.substring(2);
\r
539 decoder.replaceWith(cc.cbopt);
\r
540 } catch (Exception e) {
\r
541 logln("Skipping test due to limitation in Java API - substitution character sequence size error");
\r
549 // Check the step to unicode
\r
553 String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
\r
554 { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
\r
555 /* TODO: currently not supported test steps, getNext API is not supported for now
\r
556 { "-1", "getNext" },
\r
557 { "-2", "toU(bulk)+getNext" },
\r
558 { "-3", "getNext+toU(bulk)" },
\r
559 { "-4", "toU(1)+getNext" },
\r
560 { "-5", "getNext+toU(1)" },
\r
561 { "-12", "toU(5)+getNext" },
\r
562 { "-13", "getNext+toU(5)" }};*/
\r
566 // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api
\r
567 for (int i = 0; i < steps.length && ok; ++i) {
\r
568 step = Integer.parseInt(steps[i][0]);
\r
570 if (step < 0 && !cc.finalFlush) {
\r
573 logln("Testing step:[" + step + "]");
\r
576 resultLength = stepToUnicode(cc, decoder, step);
\r
577 ok = checkToUnicode(cc, resultLength);
\r
578 } catch (Exception ex) {
\r
579 errln("Test failed: " + ex.getClass().getName() + " thrown: " + cc.charset+ " [" + cc.caseNr + "]");
\r
580 ex.printStackTrace(System.out);
\r
585 //testing the java's out = charset.decoder(in) api
\r
586 while (ok && cc.finalFlush) {
\r
587 logln("Testing java charset.decoder(in):");
\r
588 cc.toUnicodeResult = null;
\r
589 CharBuffer out = null;
\r
593 out = decoder.decode(cc.bytes);
\r
594 out.position(out.limit());
\r
595 if (out.limit() < cc.unicode.length()) {
\r
596 int pos = out.position();
\r
597 char[] temp = out.array();
\r
598 out = CharBuffer.allocate(cc.bytes.limit());
\r
601 CoderResult cr = decoder.flush(out);
\r
602 if (cr.isOverflow()) {
\r
603 logln("Overflow error with flushing decodering");
\r
607 cc.toUnicodeResult = out;
\r
609 ok = checkToUnicode(cc, out.limit());
\r
613 } catch (Exception e) {
\r
614 //check the error code to see if it matches cc.errorCode
\r
615 logln("Decoder returned an error code");
\r
616 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
617 logln("Error Result is: " + e.toString());
\r
628 private int stepToUnicode(ConversionCase cc, CharsetDecoder decoder,
\r
634 boolean flush = false;
\r
637 sourceLen = cc.bytes.limit();
\r
638 source.position(0);
\r
639 target = CharBuffer.allocate(cc.unicode.length() + 4);
\r
640 target.position(0);
\r
641 cc.toUnicodeResult = null;
\r
652 source.limit((iStep <= sourceLen) ? iStep : sourceLen);
\r
653 target.limit((oStep <= target.capacity()) ? oStep : target
\r
655 flush = (cc.finalFlush && source.limit() == sourceLen);
\r
659 source.limit(sourceLen);
\r
660 target.limit(target.capacity());
\r
661 flush = cc.finalFlush;
\r
664 CoderResult cr = null;
\r
665 if (source.hasRemaining()) {
\r
667 cr = decoder.decode(source, target, flush);
\r
668 // check pointers and errors
\r
669 if (cr.isOverflow()) {
\r
670 // the partial target is filled, set a new limit,
\r
671 oStep = (target.position() + step);
\r
672 target.limit((oStep < target.capacity()) ? oStep
\r
673 : target.capacity());
\r
674 if (target.limit() > target.capacity()) {
\r
675 //target has reached its limit, an error occurred or test case has an error code
\r
677 logln("UnExpected error: Target Buffer is larger than capacity");
\r
681 } else if (cr.isError()) {
\r
682 //check the error code to see if it matches cc.errorCode
\r
683 logln("Decoder returned an error code");
\r
684 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
685 logln("Error Result is: " + cr.toString());
\r
690 if (source.limit() == sourceLen) {
\r
692 cr = decoder.decode(source, target, true);
\r
694 //due to limitation of the API we need to check for target limit for expected
\r
695 if (target.limit() != cc.unicode.length()) {
\r
696 target.limit(cc.unicode.length());
\r
697 cr = decoder.flush(target);
\r
698 if (cr.isError()) {
\r
699 errln("Flush operation failed");
\r
711 //--------------------------------------------------------------------------
\r
714 * step==-1: call only ucnv_getNextUChar()
\r
715 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
\r
716 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
\r
717 * else give it at most (-step-2)/2 bytes
\r
722 if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) {
\r
724 target.limit(target.position() < target.capacity() ? target
\r
725 .position() + 1 : target.capacity());
\r
727 // decode behavior is return to output target 1 character
\r
728 CoderResult cr = null;
\r
730 //similar to getNextUChar() , input is the whole string, while outputs only 1 character
\r
731 source.limit(sourceLen);
\r
732 while (target.position() != target.limit()
\r
733 && source.hasRemaining()) {
\r
734 cr = decoder.decode(source, target,
\r
735 source.limit() == sourceLen);
\r
737 if (cr.isOverflow()) {
\r
739 if (target.limit() >= target.capacity()) {
\r
740 // target has reached its limit, an error occurred
\r
741 logln("UnExpected error: Target Buffer is larger than capacity");
\r
744 //1 character has been consumed
\r
745 target.limit(target.position() + 1);
\r
748 } else if (cr.isError()) {
\r
749 logln("Decoder returned an error code");
\r
750 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
751 logln("Error Result is: " + cr.toString());
\r
753 cc.toUnicodeResult = target;
\r
754 return target.position();
\r
758 // one character has been consumed
\r
759 if (target.limit() == target.position()) {
\r
760 target.limit(target.position() + 1);
\r
766 if (source.position() == sourceLen) {
\r
768 // due to limitation of the API we need to check
\r
769 // for target limit for expected
\r
770 cr = decoder.decode(source, target, true);
\r
771 if (target.position() != cc.unicode.length()) {
\r
773 target.limit(cc.unicode.length());
\r
774 cr = decoder.flush(target);
\r
775 if (cr.isError()) {
\r
776 errln("Flush operation failed");
\r
781 // alternate between -n-1 and -n but leave -1 alone
\r
785 } else {/* step is even */
\r
786 // allow only one UChar output
\r
788 target.limit(target.position() < target.capacity() ? target
\r
789 .position() + 1 : target.capacity());
\r
791 source.limit(sourceLen);
\r
793 source.limit(source.position() + (-step - 2) / 2);
\r
794 if (source.limit() > sourceLen) {
\r
795 source.limit(sourceLen);
\r
798 CoderResult cr = decoder.decode(source, target, source
\r
799 .limit() == sourceLen);
\r
800 // check pointers and errors
\r
801 if (cr.isOverflow()) {
\r
802 // one character has been consumed
\r
803 if (target.limit() >= target.capacity()) {
\r
804 // target has reached its limit, an error occurred
\r
805 logln("Unexpected error: Target Buffer is larger than capacity");
\r
808 } else if (cr.isError()) {
\r
809 logln("Decoder returned an error code");
\r
810 logln("ErrorCode expected is: " + cc.outErrorCode);
\r
811 logln("Error Result is: " + cr.toString());
\r
820 //--------------------------------------------------------------------------
\r
822 cc.toUnicodeResult = target;
\r
823 return target.position();
\r
828 private boolean checkToUnicode(ConversionCase cc, int resultLength) {
\r
829 return checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult);
\r
833 private void TestGetUnicodeSet(DataMap testcase) {
\r
835 * charset - will be opened, and ucnv_getUnicodeSet() called on it //
\r
836 * map - set of code points and strings that must be in the returned set //
\r
837 * mapnot - set of code points and strings that must *not* be in the //
\r
838 * returned set // which - numeric UConverterUnicodeSet value Headers {
\r
839 * "charset", "map", "mapnot", "which" }
\r
843 // retrieve test case data
\r
844 ConversionCase cc = new ConversionCase();
\r
845 CharsetProviderICU provider = new CharsetProviderICU();
\r
846 CharsetICU charset ;
\r
849 UnicodeSet mapset = new UnicodeSet();
\r
850 UnicodeSet mapnotset = new UnicodeSet();
\r
851 UnicodeSet unicodeset = new UnicodeSet();
\r
852 String ellipsis = "0x2e";
\r
853 cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
\r
855 cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
\r
856 cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
\r
860 cc.which = ((ICUResourceBundle) testcase.getObject("which")).getInt(); // only checking for ROUNDTRIP_SET
\r
862 // ----for debugging only
\r
864 logln("TestGetUnicodeSet[" + cc.charset + "] ");
\r
865 logln("...............................................");
\r
868 // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
\r
869 charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
\r
870 ? (CharsetICU) provider.charsetForName(cc.charset.substring(1),
\r
871 "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
\r
872 : (CharsetICU) provider.charsetForName(cc.charset);
\r
874 //checking for converter that are not supported at this point
\r
876 if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
\r
877 charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
\r
878 charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
\r
879 charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
\r
881 logln("Converter not supported at this point :" +charset.displayName());
\r
886 logln("Fallback set not supported at this point for converter : "+charset.displayName());
\r
890 }catch(Exception e){
\r
897 mapset.applyPattern(cc.map,false);
\r
898 mapnotset.applyPattern(cc.mapnot,false);
\r
900 charset.getUnicodeSet(unicodeset, cc.which);
\r
901 UnicodeSet diffset = new UnicodeSet();
\r
903 //are there items that must be in unicodeset but are not?
\r
904 (diffset = mapset).removeAll(unicodeset);
\r
905 if(!diffset.isEmpty()){
\r
906 StringBuffer s = new StringBuffer(diffset.toPattern(true));
\r
907 if(s.length()>100){
\r
908 s.replace(0, 0x7fffffff, ellipsis);
\r
910 errln("error in missing items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
\r
913 //are the items that must not be in unicodeset but are?
\r
914 (diffset=mapnotset).retainAll(unicodeset);
\r
915 if(!diffset.isEmpty()){
\r
916 StringBuffer s = new StringBuffer(diffset.toPattern(true));
\r
917 if(s.length()>100){
\r
918 s.replace(0, 0x7fffffff, ellipsis);
\r
920 errln("contains unexpected items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
\r
922 } catch (Exception e) {
\r
923 errln("getUnicodeSet returned an error code");
\r
924 errln("ErrorCode expected is: " + cc.outErrorCode);
\r
925 errln("Error Result is: " + e.toString());
\r
931 * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
\r
932 * start of the stream for example U+FEFF (the Unicode BOM/signature
\r
933 * character) that can be ignored.
\r
935 * Detects Unicode signature byte sequences at the start of the byte stream
\r
936 * and returns number of bytes of the BOM of the indicated Unicode charset.
\r
937 * 0 is returned when no Unicode signature is recognized.
\r
941 private String detectUnicodeSignature(ByteBuffer source) {
\r
942 int signatureLength = 0; // number of bytes of the signature
\r
943 final int SIG_MAX_LEN = 5;
\r
944 String sigUniCharset = null; // states what unicode charset is the BOM
\r
948 * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
\r
949 * don't misdetect something
\r
951 byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
\r
954 while (i < source.limit() && i < SIG_MAX_LEN) {
\r
955 start[i] = source.get(i);
\r
959 if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
\r
960 signatureLength = 2;
\r
961 sigUniCharset = "UTF-16BE";
\r
962 source.position(signatureLength);
\r
963 return sigUniCharset;
\r
964 } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
\r
965 if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
\r
966 signatureLength = 4;
\r
967 sigUniCharset = "UTF-32LE";
\r
968 source.position(signatureLength);
\r
969 return sigUniCharset;
\r
971 signatureLength = 2;
\r
972 sigUniCharset = "UTF-16LE";
\r
973 source.position(signatureLength);
\r
974 return sigUniCharset;
\r
976 } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
\r
977 && start[2] == (byte) 0xBF) {
\r
978 signatureLength = 3;
\r
979 sigUniCharset = "UTF-8";
\r
980 source.position(signatureLength);
\r
981 return sigUniCharset;
\r
982 } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
\r
983 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
\r
984 signatureLength = 4;
\r
985 sigUniCharset = "UTF-32BE";
\r
986 source.position(signatureLength);
\r
987 return sigUniCharset;
\r
988 } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
\r
989 && start[2] == (byte) 0xFF) {
\r
990 signatureLength = 3;
\r
991 sigUniCharset = "SCSU";
\r
992 source.position(signatureLength);
\r
993 return sigUniCharset;
\r
994 } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
\r
995 && start[2] == (byte) 0x28) {
\r
996 signatureLength = 3;
\r
997 sigUniCharset = "BOCU-1";
\r
998 source.position(signatureLength);
\r
999 return sigUniCharset;
\r
1000 } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
\r
1001 && start[2] == (byte) 0x76) {
\r
1003 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
\r
1004 signatureLength = 5;
\r
1005 sigUniCharset = "UTF-7";
\r
1006 source.position(signatureLength);
\r
1007 return sigUniCharset;
\r
1008 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
\r
1009 || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
\r
1010 signatureLength = 4;
\r
1011 sigUniCharset = "UTF-7";
\r
1012 source.position(signatureLength);
\r
1013 return sigUniCharset;
\r
1015 } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
\r
1016 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
\r
1017 signatureLength = 4;
\r
1018 sigUniCharset = "UTF-EBCDIC";
\r
1019 source.position(signatureLength);
\r
1020 return sigUniCharset;
\r
1023 /* no known Unicode signature byte sequence recognized */
\r
1027 String printbytes(ByteBuffer buf, int pos) {
\r
1028 int cur = buf.position();
\r
1029 String res = " (" + pos + ")==[";
\r
1030 for (int i = 0; i < pos; i++) {
\r
1031 res += "(" + i + ")" + hex(buf.get(i) & 0xff).substring(2) + " ";
\r
1033 buf.position(cur);
\r
1037 String printchars(CharBuffer buf, int pos) {
\r
1038 int cur = buf.position();
\r
1039 String res = " (" + pos + ")==[";
\r
1040 for (int i = 0; i < pos; i++) {
\r
1041 res += "(" + i + ")" + hex(buf.get(i)) + " ";
\r
1043 buf.position(cur);
\r
1047 private boolean checkResultsFromUnicode(ConversionCase cc, ByteBuffer expected,
\r
1048 ByteBuffer output) {
\r
1050 boolean res = true;
\r
1051 expected.rewind();
\r
1052 output.limit(output.position());
\r
1055 // remove any BOM signature before checking
\r
1056 detectUnicodeSignature(output); // sets the position to after the BOM
\r
1057 output = output.slice(); // removes anything before the current position
\r
1059 if (output.limit() != expected.limit()) {
\r
1060 errln("Test failed: output length does not match expected for charset: " + cc.charset
\r
1061 + " [" + cc.caseNr + "]");
\r
1064 while (output.hasRemaining()) {
\r
1065 if (output.get() != expected.get()) {
\r
1066 errln("Test failed: output does not match expected for charset: " + cc.charset
\r
1067 + " [" + cc.caseNr + "]");
\r
1075 logln("[" + cc.caseNr + "]:" + cc.charset);
\r
1076 logln("Input: " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
\r
1077 logln("Output: " + printbytes(output, output.limit()));
\r
1078 logln("Expected: " + printbytes(expected, expected.limit()));
\r
1082 errln("[" + cc.caseNr + "]:" + cc.charset);
\r
1083 errln("Input: " + printchars(CharBuffer.wrap(cc.unicode), cc.unicode.length()));
\r
1084 errln("Output: " + printbytes(output, output.limit()));
\r
1085 errln("Expected: " + printbytes(expected, expected.limit()));
\r
1091 private boolean checkResultsToUnicode(ConversionCase cc, String expected, CharBuffer output) {
\r
1093 boolean res = true;
\r
1094 output.limit(output.position());
\r
1097 // test to see if the conversion matches actual results
\r
1098 if (output.limit() != expected.length()) {
\r
1099 if (skipIfBeforeICU(4,5,0)) { // TIME BOMB
\r
1100 logln("Skipping test:(" + cc.charset + ") due to time bomb");
\r
1102 errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
\r
1106 for (int i = 0; i < expected.length(); i++) {
\r
1107 if (output.get(i) != expected.charAt(i)) {
\r
1108 errln("Test failed: output does not match expected for charset: " + cc.charset
\r
1109 + " [" + cc.caseNr + "]");
\r
1117 logln("[" + cc.caseNr + "]:" + cc.charset);
\r
1118 logln("Input: " + printbytes(cc.bytes, cc.bytes.limit()));
\r
1119 logln("Output: " + printchars(output, output.limit()));
\r
1120 logln("Expected: " + printchars(CharBuffer.wrap(expected), expected.length()));
\r
1123 else if (skipIfBeforeICU(4,5,0)) {
\r
1126 errln("[" + cc.caseNr + "]:" + cc.charset);
\r
1127 errln("Input: " + printbytes(cc.bytes, cc.bytes.limit()));
\r
1128 errln("Output: " + printchars(output, output.limit()));
\r
1129 errln("Expected: " + printchars(CharBuffer.wrap(expected), expected.length()));
\r
1135 private byte[] toByteArray(String str) {
\r
1136 byte[] ret = new byte[str.length()];
\r
1137 for (int i = 0; i < ret.length; i++) {
\r
1138 char ch = str.charAt(i);
\r
1140 ret[i] = (byte) ch;
\r
1142 throw new IllegalArgumentException(" byte value out of range: " + ch);
\r