2 *******************************************************************************
\r
3 * Copyright (C) 2006-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
9 package com.ibm.icu.charset;
\r
11 import java.nio.BufferOverflowException;
\r
12 import java.nio.BufferUnderflowException;
\r
13 import java.nio.ByteBuffer;
\r
14 import java.nio.CharBuffer;
\r
15 import java.nio.IntBuffer;
\r
16 import java.nio.charset.CharsetDecoder;
\r
17 import java.nio.charset.CharsetEncoder;
\r
18 import java.nio.charset.CoderResult;
\r
20 import com.ibm.icu.text.UTF16;
\r
21 import com.ibm.icu.text.UnicodeSet;
\r
23 class CharsetASCII extends CharsetICU {
\r
24 protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };
\r
26 public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
\r
27 super(icuCanonicalName, javaCanonicalName, aliases);
\r
28 maxBytesPerChar = 1;
\r
29 minBytesPerChar = 1;
\r
30 maxCharsPerByte = 1;
\r
33 class CharsetDecoderASCII extends CharsetDecoderICU {
\r
35 public CharsetDecoderASCII(CharsetICU cs) {
\r
39 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
41 if (!source.hasRemaining()) {
\r
42 /* no input, nothing to do */
\r
43 return CoderResult.UNDERFLOW;
\r
45 if (!target.hasRemaining()) {
\r
46 /* no output available, can't do anything */
\r
47 return CoderResult.OVERFLOW;
\r
51 int oldSource = source.position();
\r
52 int oldTarget = target.position();
\r
54 if (source.hasArray() && target.hasArray()) {
\r
55 /* optimized loop */
\r
58 * extract arrays from the buffers and obtain various constant values that will be
\r
59 * necessary in the core loop
\r
61 byte[] sourceArray = source.array();
\r
62 int sourceOffset = source.arrayOffset();
\r
63 int sourceIndex = oldSource + sourceOffset;
\r
64 int sourceLength = source.limit() - oldSource;
\r
66 char[] targetArray = target.array();
\r
67 int targetOffset = target.arrayOffset();
\r
68 int targetIndex = oldTarget + targetOffset;
\r
69 int targetLength = target.limit() - oldTarget;
\r
71 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
\r
73 int offset = targetIndex - sourceIndex;
\r
76 * perform the core loop... if it returns null, it must be due to an overflow or
\r
79 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit);
\r
81 if (sourceLength <= targetLength) {
\r
82 source.position(oldSource + sourceLength);
\r
83 target.position(oldTarget + sourceLength);
\r
84 cr = CoderResult.UNDERFLOW;
\r
86 source.position(oldSource + targetLength);
\r
87 target.position(oldTarget + targetLength);
\r
88 cr = CoderResult.OVERFLOW;
\r
92 /* unoptimized loop */
\r
96 * perform the core loop... if it throws an exception, it must be due to an
\r
97 * overflow or underflow
\r
99 cr = decodeLoopCoreUnoptimized(source, target);
\r
101 } catch (BufferUnderflowException ex) {
\r
102 /* all of the source has been read */
\r
103 cr = CoderResult.UNDERFLOW;
\r
104 } catch (BufferOverflowException ex) {
\r
105 /* the target is full */
\r
106 source.position(source.position() - 1); /* rewind by 1 */
\r
107 cr = CoderResult.OVERFLOW;
\r
111 /* set offsets since the start */
\r
112 if (offsets != null) {
\r
113 int count = target.position() - oldTarget;
\r
114 int sourceIndex = -1;
\r
115 while (--count >= 0) offsets.put(++sourceIndex);
\r
121 protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
\r
122 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
\r
126 * perform ascii conversion from the source array to the target array, making sure each
\r
127 * byte in the source is within the correct range
\r
129 for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)
\r
130 targetArray[i + offset] = (char) ch;
\r
133 * if some byte was not in the correct range, we need to deal with this byte by calling
\r
134 * decodeMalformedOrUnmappable and move the source and target positions to reflect the
\r
135 * early termination of the loop
\r
137 if ((ch & 0x80) != 0) {
\r
138 source.position(i + 1);
\r
139 target.position(i + offset);
\r
140 return decodeMalformedOrUnmappable(ch);
\r
145 protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)
\r
146 throws BufferUnderflowException, BufferOverflowException {
\r
150 * perform ascii conversion from the source buffer to the target buffer, making sure
\r
151 * each byte in the source is within the correct range
\r
153 while (((ch = (source.get() & 0xff)) & 0x80) == 0)
\r
154 target.put((char) ch);
\r
157 * if we reach here, it's because a character was not in the correct range, and we need
\r
158 * to deak with this by calling decodeMalformedOrUnmappable
\r
160 return decodeMalformedOrUnmappable(ch);
\r
163 protected CoderResult decodeMalformedOrUnmappable(int ch) {
\r
165 * put the guilty character into toUBytesArray and return a message saying that the
\r
166 * character was malformed and of length 1.
\r
168 toUBytesArray[0] = (byte) ch;
\r
170 return CoderResult.malformedForLength(1);
\r
174 class CharsetEncoderASCII extends CharsetEncoderICU {
\r
176 public CharsetEncoderASCII(CharsetICU cs) {
\r
177 super(cs, fromUSubstitution);
\r
181 private final static int NEED_TO_WRITE_BOM = 1;
\r
183 protected void implReset() {
\r
185 fromUnicodeStatus = NEED_TO_WRITE_BOM;
\r
188 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
190 if (!source.hasRemaining()) {
\r
191 /* no input, nothing to do */
\r
192 return CoderResult.UNDERFLOW;
\r
194 if (!target.hasRemaining()) {
\r
195 /* no output available, can't do anything */
\r
196 return CoderResult.OVERFLOW;
\r
200 int oldSource = source.position();
\r
201 int oldTarget = target.position();
\r
203 if (fromUChar32 != 0) {
\r
205 * if we have a leading character in fromUChar32 that needs to be dealt with, we
\r
206 * need to check for a matching trail character and taking the appropriate action as
\r
207 * dictated by encodeTrail.
\r
209 cr = encodeTrail(source, (char) fromUChar32, flush);
\r
211 if (source.hasArray() && target.hasArray()) {
\r
212 /* optimized loop */
\r
215 * extract arrays from the buffers and obtain various constant values that will
\r
216 * be necessary in the core loop
\r
218 char[] sourceArray = source.array();
\r
219 int sourceOffset = source.arrayOffset();
\r
220 int sourceIndex = oldSource + sourceOffset;
\r
221 int sourceLength = source.limit() - oldSource;
\r
223 byte[] targetArray = target.array();
\r
224 int targetOffset = target.arrayOffset();
\r
225 int targetIndex = oldTarget + targetOffset;
\r
226 int targetLength = target.limit() - oldTarget;
\r
228 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
\r
230 int offset = targetIndex - sourceIndex;
\r
233 * perform the core loop... if it returns null, it must be due to an overflow or
\r
236 cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush);
\r
238 if (sourceLength <= targetLength) {
\r
239 source.position(oldSource + sourceLength);
\r
240 target.position(oldTarget + sourceLength);
\r
241 cr = CoderResult.UNDERFLOW;
\r
243 source.position(oldSource + targetLength);
\r
244 target.position(oldTarget + targetLength);
\r
245 cr = CoderResult.OVERFLOW;
\r
249 /* unoptimized loop */
\r
253 * perform the core loop... if it throws an exception, it must be due to an
\r
254 * overflow or underflow
\r
256 cr = encodeLoopCoreUnoptimized(source, target, flush);
\r
258 } catch (BufferUnderflowException ex) {
\r
259 cr = CoderResult.UNDERFLOW;
\r
260 } catch (BufferOverflowException ex) {
\r
261 source.position(source.position() - 1); /* rewind by 1 */
\r
262 cr = CoderResult.OVERFLOW;
\r
267 /* set offsets since the start */
\r
268 if (offsets != null) {
\r
269 int count = target.position() - oldTarget;
\r
270 int sourceIndex = -1;
\r
271 while (--count >= 0) offsets.put(++sourceIndex);
\r
277 protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
\r
278 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
\r
283 * perform ascii conversion from the source array to the target array, making sure each
\r
284 * char in the source is within the correct range
\r
286 for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)
\r
287 targetArray[i + offset] = (byte) ch;
\r
290 * if some byte was not in the correct range, we need to deal with this byte by calling
\r
291 * encodeMalformedOrUnmappable and move the source and target positions to reflect the
\r
292 * early termination of the loop
\r
294 if ((ch & 0xff80) != 0) {
\r
295 source.position(i + 1);
\r
296 target.position(i + offset);
\r
297 return encodeMalformedOrUnmappable(source, ch, flush);
\r
302 protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,
\r
303 boolean flush) throws BufferUnderflowException, BufferOverflowException {
\r
307 * perform ascii conversion from the source buffer to the target buffer, making sure
\r
308 * each char in the source is within the correct range
\r
310 while (((ch = (int) source.get()) & 0xff80) == 0)
\r
311 target.put((byte) ch);
\r
314 * if we reach here, it's because a character was not in the correct range, and we need
\r
315 * to deak with this by calling encodeMalformedOrUnmappable.
\r
317 return encodeMalformedOrUnmappable(source, ch, flush);
\r
320 protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {
\r
322 * if the character is a lead surrogate, we need to call encodeTrail to attempt to match
\r
323 * it up with a trail surrogate. if not, the character is unmappable.
\r
325 return (UTF16.isSurrogate((char) ch))
\r
326 ? encodeTrail(source, (char) ch, flush)
\r
327 : CoderResult.unmappableForLength(1);
\r
330 private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
\r
332 * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
\r
333 * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
\r
335 CoderResult cr = handleSurrogates(source, lead);
\r
339 //source.position(source.position() - 2);
\r
340 return CoderResult.unmappableForLength(2);
\r
346 public CharsetDecoder newDecoder() {
\r
347 return new CharsetDecoderASCII(this);
\r
350 public CharsetEncoder newEncoder() {
\r
351 return new CharsetEncoderASCII(this);
\r
354 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
\r
355 setFillIn.add(0,0x7f);
\r