2 *******************************************************************************
\r
3 * Copyright (C) 2006-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
10 package com.ibm.icu.charset;
\r
12 import java.nio.ByteBuffer;
\r
13 import java.nio.CharBuffer;
\r
14 import java.nio.IntBuffer;
\r
15 import java.nio.charset.CharsetDecoder;
\r
16 import java.nio.charset.CharsetEncoder;
\r
17 import java.nio.charset.CoderResult;
\r
19 import com.ibm.icu.text.UTF16;
\r
20 import com.ibm.icu.text.UnicodeSet;
\r
23 * @author Niti Hantaweepant
\r
25 class CharsetUTF8 extends CharsetICU {
\r
27 private static final byte[] fromUSubstitution = new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbd };
\r
29 public CharsetUTF8(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
\r
30 super(icuCanonicalName, javaCanonicalName, aliases);
\r
31 /* max 3 bytes per code unit from UTF-8 (4 bytes from surrogate _pair_) */
\r
32 maxBytesPerChar = 3;
\r
33 minBytesPerChar = 1;
\r
34 maxCharsPerByte = 1;
\r
37 private static final int BITMASK_FROM_UTF8[] = { -1, 0x7f, 0x1f, 0xf, 0x7, 0x3, 0x1 };
\r
39 private static final byte BYTES_FROM_UTF8[] = {
\r
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
\r
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
\r
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
\r
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
\r
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
\r
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
\r
46 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
\r
47 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
\r
51 * Starting with Unicode 3.0.1: UTF-8 byte sequences of length N _must_ encode code points of or
\r
52 * above utf8_minChar32[N]; byte sequences with more than 4 bytes are illegal in UTF-8, which is
\r
53 * tested with impossible values for them
\r
55 private static final int UTF8_MIN_CHAR32[] = { 0, 0, 0x80, 0x800, 0x10000,
\r
56 Integer.MAX_VALUE, Integer.MAX_VALUE };
\r
58 private final boolean isCESU8 = this instanceof CharsetCESU8;
\r
60 class CharsetDecoderUTF8 extends CharsetDecoderICU {
\r
62 public CharsetDecoderUTF8(CharsetICU cs) {
\r
66 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
68 if (!source.hasRemaining()) {
\r
69 /* no input, nothing to do */
\r
70 return CoderResult.UNDERFLOW;
\r
72 if (!target.hasRemaining()) {
\r
73 /* no output available, can't do anything */
\r
74 return CoderResult.OVERFLOW;
\r
77 if (source.hasArray() && target.hasArray()) {
\r
78 /* source and target are backed by arrays, so use the arrays for optimal performance */
\r
79 byte[] sourceArray = source.array();
\r
80 int sourceIndex = source.arrayOffset() + source.position();
\r
81 int sourceLimit = source.arrayOffset() + source.limit();
\r
82 char[] targetArray = target.array();
\r
83 int targetIndex = target.arrayOffset() + target.position();
\r
84 int targetLimit = target.arrayOffset() + target.limit();
\r
87 int char32, bytesExpected, bytesSoFar;
\r
91 /* nothing is stored in toUnicodeStatus, read a byte as input */
\r
92 char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff;
\r
93 bytesExpected = BYTES_FROM_UTF8[char32];
\r
94 char32 &= BITMASK_FROM_UTF8[bytesExpected];
\r
97 /* a partially or fully built code point is stored in toUnicodeStatus */
\r
98 char32 = toUnicodeStatus;
\r
99 bytesExpected = mode;
\r
100 bytesSoFar = toULength;
\r
102 toUnicodeStatus = 0;
\r
107 outer: while (true) {
\r
108 if (bytesSoFar < bytesExpected) {
\r
109 /* read a trail byte and insert its relevant bits into char32 */
\r
110 if (sourceIndex >= sourceLimit) {
\r
111 /* no source left, save the state for later and break out of the loop */
\r
112 toUnicodeStatus = char32;
\r
113 mode = bytesExpected;
\r
114 toULength = bytesSoFar;
\r
115 cr = CoderResult.UNDERFLOW;
\r
118 if (((ch = toUBytesArray[bytesSoFar] = sourceArray[sourceIndex++]) & 0xc0) != 0x80) {
\r
119 /* not a trail byte (is not of the form 10xxxxxx) */
\r
121 toULength = bytesSoFar;
\r
122 cr = CoderResult.malformedForLength(bytesSoFar);
\r
125 char32 = (char32 << 6) | (ch & 0x3f);
\r
127 } else if (bytesSoFar == bytesExpected && UTF8_MIN_CHAR32[bytesExpected] <= char32 && char32 <= 0x10ffff
\r
128 && (isCESU8 ? bytesExpected <= 3 : !UTF16.isSurrogate((char) char32))) {
\r
130 * char32 is a valid code point and is composed of the correct number of
\r
131 * bytes ... we now need to output it in UTF-16
\r
134 if (char32 <= UConverterConstants.MAXIMUM_UCS2) {
\r
135 /* fits in 16 bits */
\r
136 targetArray[targetIndex++] = (char) char32;
\r
138 /* fit char32 into 20 bits */
\r
139 char32 -= UConverterConstants.HALF_BASE;
\r
141 /* write out the surrogates */
\r
142 targetArray[targetIndex++] = (char) ((char32 >>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START);
\r
144 if (targetIndex >= targetLimit) {
\r
145 /* put in overflow buffer (not handled here) */
\r
146 charErrorBufferArray[charErrorBufferBegin++] = (char) char32;
\r
147 cr = CoderResult.OVERFLOW;
\r
150 targetArray[targetIndex++] = (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START);
\r
154 * we're finished outputing, so now we need to read in the first byte of the
\r
155 * next byte sequence that could form a code point
\r
158 if (sourceIndex >= sourceLimit) {
\r
159 cr = CoderResult.UNDERFLOW;
\r
162 if (targetIndex >= targetLimit) {
\r
163 cr = CoderResult.OVERFLOW;
\r
167 /* keep reading the next input (and writing it) while bytes == 1 */
\r
168 while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff]) == 1) {
\r
169 targetArray[targetIndex++] = (char) char32;
\r
170 if (sourceIndex >= sourceLimit) {
\r
171 cr = CoderResult.UNDERFLOW;
\r
174 if (targetIndex >= targetLimit) {
\r
175 cr = CoderResult.OVERFLOW;
\r
180 /* remove the bits that indicate the number of bytes */
\r
181 char32 &= BITMASK_FROM_UTF8[bytesExpected];
\r
185 * either the lead byte in the code sequence is invalid (bytes == 0) or the
\r
186 * lead byte combined with all the trail chars does not form a valid code
\r
189 toULength = bytesSoFar;
\r
190 cr = CoderResult.malformedForLength(bytesSoFar);
\r
195 source.position(sourceIndex - source.arrayOffset());
\r
196 target.position(targetIndex - target.arrayOffset());
\r
201 int sourceIndex = source.position();
\r
202 int sourceLimit = source.limit();
\r
203 int targetIndex = target.position();
\r
204 int targetLimit = target.limit();
\r
207 int char32, bytesExpected, bytesSoFar;
\r
211 /* nothing is stored in toUnicodeStatus, read a byte as input */
\r
212 char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff;
\r
213 bytesExpected = BYTES_FROM_UTF8[char32];
\r
214 char32 &= BITMASK_FROM_UTF8[bytesExpected];
\r
217 /* a partially or fully built code point is stored in toUnicodeStatus */
\r
218 char32 = toUnicodeStatus;
\r
219 bytesExpected = mode;
\r
220 bytesSoFar = toULength;
\r
222 toUnicodeStatus = 0;
\r
227 outer: while (true) {
\r
228 if (bytesSoFar < bytesExpected) {
\r
229 /* read a trail byte and insert its relevant bits into char32 */
\r
230 if (sourceIndex >= sourceLimit) {
\r
231 /* no source left, save the state for later and break out of the loop */
\r
232 toUnicodeStatus = char32;
\r
233 mode = bytesExpected;
\r
234 toULength = bytesSoFar;
\r
235 cr = CoderResult.UNDERFLOW;
\r
238 if (((ch = toUBytesArray[bytesSoFar] = source.get(sourceIndex++)) & 0xc0) != 0x80) {
\r
239 /* not a trail byte (is not of the form 10xxxxxx) */
\r
241 toULength = bytesSoFar;
\r
242 cr = CoderResult.malformedForLength(bytesSoFar);
\r
245 char32 = (char32 << 6) | (ch & 0x3f);
\r
249 * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
\r
250 * - use only trail bytes after a lead byte (checked above)
\r
251 * - use the right number of trail bytes for a given lead byte
\r
252 * - encode a code point <= U+10ffff
\r
253 * - use the fewest possible number of bytes for their code points
\r
254 * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
\r
256 * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
\r
257 * There are no irregular sequences any more.
\r
258 * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
\r
260 else if (bytesSoFar == bytesExpected && UTF8_MIN_CHAR32[bytesExpected] <= char32 && char32 <= 0x10ffff
\r
261 && (isCESU8 ? bytesExpected <= 3 : !UTF16.isSurrogate((char) char32))) {
\r
263 * char32 is a valid code point and is composed of the correct number of
\r
264 * bytes ... we now need to output it in UTF-16
\r
267 if (char32 <= UConverterConstants.MAXIMUM_UCS2) {
\r
268 /* fits in 16 bits */
\r
269 target.put(targetIndex++, (char) char32);
\r
271 /* fit char32 into 20 bits */
\r
272 char32 -= UConverterConstants.HALF_BASE;
\r
274 /* write out the surrogates */
\r
277 (char) ((char32 >>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START));
\r
279 if (targetIndex >= targetLimit) {
\r
280 /* put in overflow buffer (not handled here) */
\r
281 charErrorBufferArray[charErrorBufferBegin++] = (char) char32;
\r
282 cr = CoderResult.OVERFLOW;
\r
287 (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START));
\r
291 * we're finished outputing, so now we need to read in the first byte of the
\r
292 * next byte sequence that could form a code point
\r
295 if (sourceIndex >= sourceLimit) {
\r
296 cr = CoderResult.UNDERFLOW;
\r
299 if (targetIndex >= targetLimit) {
\r
300 cr = CoderResult.OVERFLOW;
\r
304 /* keep reading the next input (and writing it) while bytes == 1 */
\r
305 while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff]) == 1) {
\r
306 target.put(targetIndex++, (char) char32);
\r
307 if (sourceIndex >= sourceLimit) {
\r
308 cr = CoderResult.UNDERFLOW;
\r
311 if (targetIndex >= targetLimit) {
\r
312 cr = CoderResult.OVERFLOW;
\r
317 /* remove the bits that indicate the number of bytes */
\r
318 char32 &= BITMASK_FROM_UTF8[bytesExpected];
\r
322 * either the lead byte in the code sequence is invalid (bytes == 0) or the
\r
323 * lead byte combined with all the trail chars does not form a valid code
\r
326 toULength = bytesSoFar;
\r
327 cr = CoderResult.malformedForLength(bytesSoFar);
\r
332 source.position(sourceIndex);
\r
333 target.position(targetIndex);
\r
340 class CharsetEncoderUTF8 extends CharsetEncoderICU {
\r
342 public CharsetEncoderUTF8(CharsetICU cs) {
\r
343 super(cs, fromUSubstitution);
\r
347 protected void implReset() {
\r
351 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
353 if (!source.hasRemaining()) {
\r
354 /* no input, nothing to do */
\r
355 return CoderResult.UNDERFLOW;
\r
357 if (!target.hasRemaining()) {
\r
358 /* no output available, can't do anything */
\r
359 return CoderResult.OVERFLOW;
\r
362 if (source.hasArray() && target.hasArray()) {
\r
363 /* source and target are backed by arrays, so use the arrays for optimal performance */
\r
364 char[] sourceArray = source.array();
\r
365 int srcIdx = source.arrayOffset() + source.position();
\r
366 int sourceLimit = source.arrayOffset() + source.limit();
\r
367 byte[] targetArray = target.array();
\r
368 int tgtIdx = target.arrayOffset() + target.position();
\r
369 int targetLimit = target.arrayOffset() + target.limit();
\r
374 /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */
\r
375 if (fromUChar32 != 0) {
\r
376 /* 4 bytes to encode from char32 and a following char in source */
\r
378 sourceIndex = srcIdx;
\r
379 targetIndex = tgtIdx;
\r
380 cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit,
\r
382 srcIdx = sourceIndex;
\r
383 tgtIdx = targetIndex;
\r
385 source.position(srcIdx - source.arrayOffset());
\r
386 target.position(tgtIdx - target.arrayOffset());
\r
392 if (srcIdx >= sourceLimit) {
\r
393 /* nothing left to read */
\r
394 cr = CoderResult.UNDERFLOW;
\r
397 if (tgtIdx >= targetLimit) {
\r
398 /* no space left to write */
\r
399 cr = CoderResult.OVERFLOW;
\r
403 /* reach the next char into char32 */
\r
404 char32 = sourceArray[srcIdx++];
\r
406 if (char32 <= 0x7f) {
\r
407 /* 1 byte to encode from char32 */
\r
409 targetArray[tgtIdx++] = encodeHeadOf1(char32);
\r
411 } else if (char32 <= 0x7ff) {
\r
412 /* 2 bytes to encode from char32 */
\r
414 targetArray[tgtIdx++] = encodeHeadOf2(char32);
\r
416 if (tgtIdx >= targetLimit) {
\r
417 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
418 cr = CoderResult.OVERFLOW;
\r
421 targetArray[tgtIdx++] = encodeLastTail(char32);
\r
423 } else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
\r
424 /* 3 bytes to encode from char32 */
\r
426 targetArray[tgtIdx++] = encodeHeadOf3(char32);
\r
428 if (tgtIdx >= targetLimit) {
\r
429 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
430 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
431 cr = CoderResult.OVERFLOW;
\r
434 targetArray[tgtIdx++] = encodeSecondToLastTail(char32);
\r
436 if (tgtIdx >= targetLimit) {
\r
437 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
438 cr = CoderResult.OVERFLOW;
\r
441 targetArray[tgtIdx++] = encodeLastTail(char32);
\r
444 /* 4 bytes to encode from char32 and a following char in source */
\r
446 sourceIndex = srcIdx;
\r
447 targetIndex = tgtIdx;
\r
448 cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit,
\r
450 srcIdx = sourceIndex;
\r
451 tgtIdx = targetIndex;
\r
457 /* set the new source and target positions and return the CoderResult stored in cr */
\r
458 source.position(srcIdx - source.arrayOffset());
\r
459 target.position(tgtIdx - target.arrayOffset());
\r
466 /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */
\r
467 if (fromUChar32 != 0) {
\r
468 /* 4 bytes to encode from char32 and a following char in source */
\r
470 cr = encodeFourBytes(source, target, fromUChar32);
\r
476 if (!source.hasRemaining()) {
\r
477 /* nothing left to read */
\r
478 cr = CoderResult.UNDERFLOW;
\r
481 if (!target.hasRemaining()) {
\r
482 /* no space left to write */
\r
483 cr = CoderResult.OVERFLOW;
\r
487 /* reach the next char into char32 */
\r
488 char32 = source.get();
\r
490 if (char32 <= 0x7f) {
\r
491 /* 1 byte to encode from char32 */
\r
493 target.put(encodeHeadOf1(char32));
\r
495 } else if (char32 <= 0x7ff) {
\r
496 /* 2 bytes to encode from char32 */
\r
498 target.put(encodeHeadOf2(char32));
\r
500 if (!target.hasRemaining()) {
\r
501 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
502 cr = CoderResult.OVERFLOW;
\r
505 target.put(encodeLastTail(char32));
\r
507 } else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
\r
508 /* 3 bytes to encode from char32 */
\r
510 target.put(encodeHeadOf3(char32));
\r
512 if (!target.hasRemaining()) {
\r
513 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
514 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
515 cr = CoderResult.OVERFLOW;
\r
518 target.put(encodeSecondToLastTail(char32));
\r
520 if (!target.hasRemaining()) {
\r
521 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
522 cr = CoderResult.OVERFLOW;
\r
525 target.put(encodeLastTail(char32));
\r
528 /* 4 bytes to encode from char32 and a following char in source */
\r
530 cr = encodeFourBytes(source, target, char32);
\r
536 /* set the new source and target positions and return the CoderResult stored in cr */
\r
541 private final CoderResult encodeFourBytes(char[] sourceArray, byte[] targetArray,
\r
542 int sourceLimit, int targetLimit, int char32) {
\r
544 /* we need to read another char to match up the surrogate stored in char32 */
\r
545 /* handle the surrogate stuff, returning on a non-null CoderResult */
\r
546 CoderResult cr = handleSurrogates(sourceArray, sourceIndex, sourceLimit, (char)char32);
\r
551 char32 = fromUChar32;
\r
554 /* the rest is routine -- encode four bytes, stopping on overflow */
\r
556 targetArray[targetIndex++] = encodeHeadOf4(char32);
\r
558 if (targetIndex >= targetLimit) {
\r
559 errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32);
\r
560 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
561 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
562 return CoderResult.OVERFLOW;
\r
564 targetArray[targetIndex++] = encodeThirdToLastTail(char32);
\r
566 if (targetIndex >= targetLimit) {
\r
567 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
568 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
569 return CoderResult.OVERFLOW;
\r
571 targetArray[targetIndex++] = encodeSecondToLastTail(char32);
\r
573 if (targetIndex >= targetLimit) {
\r
574 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
575 return CoderResult.OVERFLOW;
\r
577 targetArray[targetIndex++] = encodeLastTail(char32);
\r
579 /* return null for success */
\r
583 private final CoderResult encodeFourBytes(CharBuffer source, ByteBuffer target, int char32) {
\r
585 /* handle the surrogate stuff, returning on a non-null CoderResult */
\r
586 CoderResult cr = handleSurrogates(source, (char)char32);
\r
590 char32 = fromUChar32;
\r
593 /* the rest is routine -- encode four bytes, stopping on overflow */
\r
595 target.put(encodeHeadOf4(char32));
\r
597 if (!target.hasRemaining()) {
\r
598 errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32);
\r
599 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
600 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
601 return CoderResult.OVERFLOW;
\r
603 target.put(encodeThirdToLastTail(char32));
\r
605 if (!target.hasRemaining()) {
\r
606 errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
\r
607 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
608 return CoderResult.OVERFLOW;
\r
610 target.put(encodeSecondToLastTail(char32));
\r
612 if (!target.hasRemaining()) {
\r
613 errorBuffer[errorBufferLength++] = encodeLastTail(char32);
\r
614 return CoderResult.OVERFLOW;
\r
616 target.put(encodeLastTail(char32));
\r
618 /* return null for success */
\r
622 private int sourceIndex;
\r
624 private int targetIndex;
\r
628 private static final byte encodeHeadOf1(int char32) {
\r
629 return (byte) char32;
\r
632 private static final byte encodeHeadOf2(int char32) {
\r
633 return (byte) (0xc0 | (char32 >>> 6));
\r
636 private static final byte encodeHeadOf3(int char32) {
\r
637 return (byte) (0xe0 | ((char32 >>> 12)));
\r
640 private static final byte encodeHeadOf4(int char32) {
\r
641 return (byte) (0xf0 | ((char32 >>> 18)));
\r
644 private static final byte encodeThirdToLastTail(int char32) {
\r
645 return (byte) (0x80 | ((char32 >>> 12) & 0x3f));
\r
648 private static final byte encodeSecondToLastTail(int char32) {
\r
649 return (byte) (0x80 | ((char32 >>> 6) & 0x3f));
\r
652 private static final byte encodeLastTail(int char32) {
\r
653 return (byte) (0x80 | (char32 & 0x3f));
\r
656 /* single-code point definitions -------------------------------------------- */
\r
659 * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
\r
660 * @param c 8-bit code unit (byte)
\r
661 * @return TRUE or FALSE
\r
663 // static final boolean isSingle(byte c) {return (((c)&0x80)==0);}
\r
665 * Is this code unit (byte) a UTF-8 lead byte?
\r
666 * @param c 8-bit code unit (byte)
\r
667 * @return TRUE or FALSE
\r
669 // static final boolean isLead(byte c) {return ((((c)-0xc0) &
\r
670 // UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);}
\r
672 * Is this code unit (byte) a UTF-8 trail byte?
\r
675 * 8-bit code unit (byte)
\r
676 * @return TRUE or FALSE
\r
678 /*private static final boolean isTrail(byte c) {
\r
679 return (((c) & 0xc0) == 0x80);
\r
682 public CharsetDecoder newDecoder() {
\r
683 return new CharsetDecoderUTF8(this);
\r
686 public CharsetEncoder newEncoder() {
\r
687 return new CharsetEncoderUTF8(this);
\r
691 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
\r
692 getNonSurrogateUnicodeSet(setFillIn);
\r