2 *******************************************************************************
\r
3 * Copyright (C) 2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.charset;
\r
9 import java.nio.ByteBuffer;
\r
10 import java.nio.CharBuffer;
\r
11 import java.nio.IntBuffer;
\r
12 import java.nio.charset.CharsetDecoder;
\r
13 import java.nio.charset.CharsetEncoder;
\r
14 import java.nio.charset.CoderResult;
\r
17 import com.ibm.icu.text.UnicodeSet;
\r
18 import com.ibm.icu.text.UTF16;
\r
19 import com.ibm.icu.lang.UCharacter;
\r
25 class CharsetSCSU extends CharsetICU{
\r
26 /* SCSU definitions --------------------------------------------------------- */
\r
28 /* SCSU command byte values */
\r
30 private static final short SQ0=0x01; /* Quote from window pair 0 */
\r
31 private static final short SQ7=0x08; /* Quote from window pair 7 */
\r
32 private static final short SDX=0x0B; /* Define a window as extended */
\r
33 //private static final short Srs=0x0C; /* reserved */
\r
34 private static final short SQU=0x0E; /* Quote a single Unicode character */
\r
35 private static final short SCU=0x0F; /* Change to Unicode mode */
\r
36 private static final short SC0=0x10; /* Select window 0 */
\r
37 private static final short SC7=0x17; /* Select window 7 */
\r
38 private static final short SD0=0x18; /* Define and select window 0 */
\r
39 //private static final short SD7=0x1F; /* Define and select window 7 */
\r
41 private static final short UC0=0xE0; /* Select window 0 */
\r
42 private static final short UC7=0xE7; /* Select window 7 */
\r
43 private static final short UD0=0xE8; /* Define and select window 0 */
\r
44 private static final short UD7=0xEF; /* Define and select window 7 */
\r
45 private static final short UQU=0xF0; /* Quote a single Unicode character */
\r
46 private static final short UDX=0xF1; /* Define a Window as extended */
\r
47 private static final short Urs=0xF2; /* reserved */
\r
52 * Unicode code points from 3400 to E000 are not adressible by
\r
53 * dynamic window, since in these areas no short run alphabets are
\r
54 * found. Therefore add gapOffset to all values from gapThreshold.
\r
56 private static final int gapThreshold=0x68;
\r
57 private static final int gapOffset = 0xAC00 ;
\r
58 /* values between reservedStart and fixedThreshold are reserved */
\r
59 private static final int reservedStart=0xA8;
\r
60 /* use table of predefined fixed offsets for values from fixedThreshold */
\r
61 private static final int fixedThreshold=0xF;
\r
64 protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD};
\r
66 /* constant offsets for the 8 static windows */
\r
67 private static final int staticOffsets[]={
\r
68 0x0000, /* ASCII for quoted tags */
\r
69 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
\r
70 0x0100, /* Latin Extended-A */
\r
71 0x0300, /* Combining Diacritical Marks */
\r
72 0x2000, /* General Punctuation */
\r
73 0x2080, /* Currency Symbols */
\r
74 0x2100, /* Letterlike Symbols and Number Forms */
\r
75 0x3000 /* CJK Symbols and punctuation */
\r
78 /* initial offsets for the 8 dynamic (sliding) windows */
\r
79 private static final int initialDynamicOffsets[]={
\r
80 0x0080, /* Latin-1 */
\r
81 0x00C0, /* Latin Extended A */
\r
82 0x0400, /* Cyrillic */
\r
83 0x0600, /* Arabic */
\r
84 0x0900, /* Devanagari */
\r
85 0x3040, /* Hiragana */
\r
86 0x30A0, /* Katakana */
\r
87 0xFF00 /* Fullwidth ASCII */
\r
90 /* Table of fixed predefined Offsets */
\r
91 private static final int fixedOffsets[]={
\r
92 /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
\r
93 /* 0xFA */ 0x0250, /* IPA extensions */
\r
94 /* 0xFB */ 0x0370, /* Greek */
\r
95 /* 0xFC */ 0x0530, /* Armenian */
\r
96 /* 0xFD */ 0x3040, /* Hiragana */
\r
97 /* 0xFE */ 0x30A0, /* Katakana */
\r
98 /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
\r
103 private static final int readCommand=0;
\r
104 private static final int quotePairOne=1;
\r
105 private static final int quotePairTwo=2;
\r
106 private static final int quoteOne=3;
\r
107 private static final int definePairOne=4;
\r
108 private static final int definePairTwo=5;
\r
109 private static final int defineOne=6;
\r
112 private final class SCSUData{
\r
113 /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
\r
114 int toUDynamicOffsets[] = new int[8] ;
\r
115 int fromUDynamicOffsets[] = new int[8] ;
\r
117 /* state machine state - toUnicode */
\r
118 boolean toUIsSingleByteMode;
\r
120 byte toUQuoteWindow, toUDynamicWindow;
\r
122 short toUPadding[];
\r
124 /* state machine state - fromUnicode */
\r
125 boolean fromUIsSingleByteMode;
\r
126 byte fromUDynamicWindow;
\r
129 * windowUse[] keeps track of the use of the dynamic windows:
\r
130 * At nextWindowUseIndex there is the least recently used window,
\r
131 * and the following windows (in a wrapping manner) are more and more
\r
133 * At nextWindowUseIndex-1 there is the most recently used window.
\r
136 byte nextWindowUseIndex;
\r
137 byte windowUse[] = new byte[8];
\r
144 for(int i=0;i<8;i++){
\r
145 this.toUDynamicOffsets[i] = initialDynamicOffsets[i];
\r
147 this.toUIsSingleByteMode = true;
\r
148 this.toUState = readCommand;
\r
149 this.toUQuoteWindow = 0;
\r
150 this.toUDynamicWindow = 0;
\r
151 this.toUByteOne = 0;
\r
152 this.fromUIsSingleByteMode = true;
\r
153 this.fromUDynamicWindow = 0;
\r
154 for(int i=0;i<8;i++){
\r
155 this.fromUDynamicOffsets[i] = initialDynamicOffsets[i];
\r
157 this.nextWindowUseIndex = 0;
\r
158 switch(this.locale){
\r
160 for(int i=0;i<8;i++){
\r
161 this.windowUse[i] = initialWindowUse_ja[i];
\r
165 for(int i=0;i<8;i++){
\r
166 this.windowUse[i] = initialWindowUse[i];
\r
173 static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 };
\r
174 static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 };
\r
177 //private static final int lGeneric = 0;
\r
178 private static final int l_ja = 1;
\r
181 private SCSUData extraInfo = null;
\r
183 public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){
\r
184 super(icuCanonicalName, javaCanonicalName, aliases);
\r
185 maxBytesPerChar = 3;
\r
186 minBytesPerChar = 1;
\r
187 maxCharsPerByte = 1;
\r
188 extraInfo = new SCSUData();
\r
191 class CharsetDecoderSCSU extends CharsetDecoderICU {
\r
192 /* label values for supporting behavior similar to goto in C */
\r
193 private static final int FastSingle=0;
\r
194 private static final int SingleByteMode=1;
\r
195 private static final int EndLoop=2;
\r
198 private static final int ByteMode = 0;
\r
199 private static final int UnicodeMode =1;
\r
201 public CharsetDecoderSCSU(CharsetICU cs) {
\r
206 //private SCSUData data ;
\r
207 protected void implReset(){
\r
210 extraInfo.initialize();
\r
215 //Get the state machine state
\r
216 private boolean isSingleByteMode ;
\r
217 private short state ;
\r
218 private byte quoteWindow ;
\r
219 private byte dynamicWindow ;
\r
220 private short byteOne;
\r
223 //sourceIndex=-1 if the current character began in the previous buffer
\r
224 private int sourceIndex ;
\r
225 private int nextSourceIndex ;
\r
229 private boolean LabelLoop;// used to break the while loop
\r
231 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
235 //Get the state machine state
\r
236 isSingleByteMode = data.toUIsSingleByteMode;
\r
237 state = data.toUState;
\r
238 quoteWindow = data.toUQuoteWindow;
\r
239 dynamicWindow = data.toUDynamicWindow;
\r
240 byteOne = data.toUByteOne;
\r
244 //sourceIndex=-1 if the current character began in the previous buffer
\r
245 sourceIndex = data.toUState == readCommand ? 0: -1 ;
\r
246 nextSourceIndex = 0;
\r
248 cr = CoderResult.UNDERFLOW;
\r
251 if(isSingleByteMode){
\r
254 /*fast path for single-byte mode*/
\r
255 labelType = fastSingle(source, target, offsets, ByteMode);
\r
257 case SingleByteMode:
\r
258 /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */
\r
259 labelType = singleByteMode(source, target, offsets, ByteMode);
\r
262 endLoop(source, target, offsets);
\r
268 /*fast path for single-byte mode*/
\r
269 labelType = fastSingle(source, target, offsets, UnicodeMode);
\r
271 case SingleByteMode:
\r
272 /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */
\r
273 labelType = singleByteMode(source, target, offsets, UnicodeMode);
\r
276 endLoop(source, target, offsets);
\r
279 //LabelLoop = false;
\r
285 private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
\r
287 if(modeType==ByteMode){
\r
289 if(state==readCommand){
\r
290 while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){
\r
291 source.position(source.position()+1);
\r
294 /*Write US graphic character or DEL*/
\r
295 target.put((char)b);
\r
296 if(offsets != null){
\r
297 offsets.put(sourceIndex);
\r
300 /*Write from dynamic window*/
\r
301 int c = data.toUDynamicOffsets[dynamicWindow] + (b&0x7f);
\r
303 target.put((char)c);
\r
304 if(offsets != null){
\r
305 offsets.put(sourceIndex);
\r
308 /*Output surrogate pair */
\r
309 target.put((char)(0xd7c0 + (c>>10)));
\r
310 if(target.hasRemaining()){
\r
311 target.put((char)(0xdc00 | (c&0x3ff)));
\r
312 if(offsets != null){
\r
313 offsets.put(sourceIndex);
\r
314 offsets.put(sourceIndex);
\r
317 /* target overflow */
\r
318 if(offsets != null){
\r
319 offsets.put(sourceIndex);
\r
321 charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff));
\r
322 charErrorBufferLength = 1;
\r
324 cr = CoderResult.OVERFLOW;
\r
330 sourceIndex = nextSourceIndex;
\r
332 // label = SingleByteMode;
\r
334 }else if(modeType==UnicodeMode){
\r
335 /* fast path for unicode mode */
\r
336 if(state == readCommand){
\r
337 while((source.position()+1)<source.limit() && target.hasRemaining() && (((b=source.get(source.position()))-UC0)&UConverterConstants.UNSIGNED_BYTE_MASK)>(Urs-UC0)){
\r
338 target.put((char)((b<<8)|(source.get(source.position()+1)&UConverterConstants.UNSIGNED_BYTE_MASK)));
\r
339 if(offsets != null){
\r
340 offsets.put(sourceIndex);
\r
342 sourceIndex = nextSourceIndex;
\r
343 nextSourceIndex+=2;
\r
344 source.position(source.position()+2);
\r
348 label = SingleByteMode;
\r
352 private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
\r
353 int label = SingleByteMode;
\r
354 if(modeType == ByteMode){
\r
355 while(source.hasRemaining()){
\r
356 if(!target.hasRemaining()){
\r
357 cr = CoderResult.OVERFLOW;
\r
361 b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
\r
365 /*redundant conditions are commented out */
\r
366 if(((1L<<b)&0x2601)!=0){
\r
367 target.put((char)b);
\r
368 if(offsets != null){
\r
369 offsets.put(sourceIndex);
\r
371 sourceIndex = nextSourceIndex;
\r
372 label = FastSingle;
\r
374 }else if(SC0 <= b){
\r
376 dynamicWindow = (byte)(b-SC0);
\r
377 sourceIndex = nextSourceIndex;
\r
378 label = FastSingle;
\r
380 }else /* if(SD0<=b && b<=SQ7)*/{
\r
381 dynamicWindow = (byte)(b - SD0);
\r
384 }else if(/* SQ0<=b &&*/b <= SQ7){
\r
385 quoteWindow = (byte)(b - SQ0);
\r
388 state = definePairOne;
\r
390 state = quotePairOne;
\r
392 sourceIndex = nextSourceIndex;
\r
393 isSingleByteMode = false;
\r
394 label = FastSingle;
\r
397 /*callback (illegal)*/
\r
398 cr = CoderResult.malformedForLength(1);
\r
399 toUBytesArray[0] = (byte)b;
\r
405 /* Store the first byte of a multibyte sequence in toUByte[] */
\r
406 toUBytesArray[0] = (byte)b;
\r
411 toUBytesArray[1] = (byte)b;
\r
413 state = quotePairTwo;
\r
416 target.put((char)((byteOne<< 8) | b));
\r
417 if(offsets != null){
\r
418 offsets.put(sourceIndex);
\r
420 sourceIndex = nextSourceIndex;
\r
421 state = readCommand;
\r
422 label = FastSingle;
\r
426 /* all static offsets are in the BMP */
\r
427 target.put((char)(staticOffsets[quoteWindow] + b));
\r
428 if(offsets != null){
\r
429 offsets.put(sourceIndex);
\r
432 /*write from dynamic window */
\r
433 int c = data.toUDynamicOffsets[quoteWindow] + (b&0x7f);
\r
435 target.put((char)c);
\r
436 if(offsets != null){
\r
437 offsets.put(sourceIndex);
\r
440 /* output surrogate pair */
\r
441 target.put((char)(0xd7c0+(c>>10)));
\r
442 if(target.hasRemaining()){
\r
443 target.put((char)(0xdc00 | (c&0x3ff)));
\r
444 if(offsets != null){
\r
445 offsets.put(sourceIndex);
\r
446 offsets.put(sourceIndex);
\r
449 /* target overflow */
\r
450 if(offsets != null){
\r
451 offsets.put(sourceIndex);
\r
453 charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff));
\r
454 charErrorBufferLength = 1;
\r
456 cr = CoderResult.OVERFLOW;
\r
462 sourceIndex = nextSourceIndex;
\r
463 state = readCommand;
\r
464 label = FastSingle;
\r
466 case definePairOne:
\r
467 dynamicWindow = (byte)((b>>5)&7);
\r
468 byteOne = (byte)(b&0x1f);
\r
469 toUBytesArray[1] = (byte)b;
\r
471 state = definePairTwo;
\r
473 case definePairTwo:
\r
474 data.toUDynamicOffsets[dynamicWindow] = 0x10000 + (byteOne<<15L | b<<7L);
\r
475 sourceIndex = nextSourceIndex;
\r
476 state = readCommand;
\r
477 label = FastSingle;
\r
481 /*callback (illegal)*/
\r
482 toUBytesArray[1] = (byte)b;
\r
486 }else if(b<gapThreshold){
\r
487 data.toUDynamicOffsets[dynamicWindow] = b<<7L;
\r
488 }else if((byte)(b - gapThreshold)<(reservedStart - gapThreshold)){
\r
489 data.toUDynamicOffsets[dynamicWindow] = (b<<7L) + gapOffset;
\r
490 }else if(b>=fixedThreshold){
\r
491 data.toUDynamicOffsets[dynamicWindow] = fixedOffsets[b-fixedThreshold];
\r
493 /*callback (illegal)*/
\r
494 toUBytesArray[1] = (byte)b;
\r
499 sourceIndex = nextSourceIndex;
\r
500 state = readCommand;
\r
501 label = FastSingle;
\r
506 }else if(modeType==UnicodeMode){
\r
507 while(source.hasRemaining()){
\r
508 if(!target.hasRemaining()){
\r
509 cr = CoderResult.OVERFLOW;
\r
517 if((byte)(b -UC0)>(Urs - UC0)){
\r
519 toUBytesArray[0] = (byte)b;
\r
521 state = quotePairOne;
\r
522 }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UC7){
\r
523 dynamicWindow = (byte)(b - UC0);
\r
524 sourceIndex = nextSourceIndex;
\r
525 isSingleByteMode = true;
\r
526 label = FastSingle;
\r
528 }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UD7){
\r
529 dynamicWindow = (byte)(b - UD0);
\r
530 isSingleByteMode = true;
\r
531 toUBytesArray[0] = (byte)b;
\r
534 label = SingleByteMode;
\r
536 }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UDX){
\r
537 isSingleByteMode = true;
\r
538 toUBytesArray[0] = (byte)b;
\r
540 state = definePairOne;
\r
541 label = SingleByteMode;
\r
543 }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UQU){
\r
544 toUBytesArray[0] = (byte)b;
\r
546 state = quotePairOne;
\r
548 /* callback (illegal)*/
\r
549 cr = CoderResult.malformedForLength(1);
\r
550 toUBytesArray[0] = (byte)b;
\r
558 toUBytesArray[1] = (byte)b;
\r
560 state = quotePairTwo;
\r
563 target.put((char)((byteOne<<8) | b));
\r
564 if(offsets != null){
\r
565 offsets.put(sourceIndex);
\r
567 sourceIndex = nextSourceIndex;
\r
568 state = readCommand;
\r
569 label = FastSingle;
\r
578 private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
\r
579 if(cr==CoderResult.OVERFLOW){
\r
580 state = readCommand;
\r
581 }else if(state == readCommand){
\r
584 data.toUIsSingleByteMode = isSingleByteMode;
\r
585 data.toUState = state;
\r
586 data.toUQuoteWindow = quoteWindow;
\r
587 data.toUDynamicWindow = dynamicWindow;
\r
588 data.toUByteOne = byteOne;
\r
593 class CharsetEncoderSCSU extends CharsetEncoderICU{
\r
594 public CharsetEncoderSCSU(CharsetICU cs) {
\r
595 super(cs, fromUSubstitution);
\r
599 //private SCSUData data;
\r
600 protected void implReset() {
\r
602 extraInfo.initialize();
\r
605 /* label values for supporting behavior similar to goto in C */
\r
606 private static final int Loop=0;
\r
607 private static final int GetTrailUnicode=1;
\r
608 private static final int OutputBytes=2;
\r
609 private static final int EndLoop =3;
\r
612 private int length;
\r
614 ///variables of compression heuristics
\r
615 private int offset;
\r
616 private char lead, trail;
\r
618 private byte window;
\r
620 //Get the state machine state
\r
621 private boolean isSingleByteMode;
\r
622 private byte dynamicWindow ;
\r
623 private int currentOffset;
\r
628 //sourceIndex=-1 if the current character began in the previous buffer
\r
629 private int sourceIndex ;
\r
630 private int nextSourceIndex;
\r
631 private int targetCapacity;
\r
633 private boolean LabelLoop;//used to break the while loop
\r
634 private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle:
\r
635 private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode:
\r
639 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
\r
641 cr = CoderResult.UNDERFLOW;
\r
643 //Get the state machine state
\r
644 isSingleByteMode = data.fromUIsSingleByteMode;
\r
645 dynamicWindow = data.fromUDynamicWindow;
\r
646 currentOffset = data.fromUDynamicOffsets[dynamicWindow];
\r
649 sourceIndex = c== 0 ? 0: -1 ;
\r
650 nextSourceIndex = 0;
\r
653 targetCapacity = target.limit()-target.position();
\r
655 //sourceIndex=-1 if the current character began in the previous buffer
\r
656 sourceIndex = c== 0 ? 0: -1 ;
\r
657 nextSourceIndex = 0;
\r
659 int labelType = Loop; // set to Loop so that the code starts from loop:
\r
661 AfterGetTrail = false;
\r
662 AfterGetTrailUnicode = false;
\r
667 labelType = loop(source, target, offsets);
\r
669 case GetTrailUnicode:
\r
670 labelType = getTrailUnicode(source, target, offsets);
\r
673 labelType = outputBytes(source, target, offsets);
\r
676 endLoop(source, target, offsets);
\r
683 private byte getWindow(int[] offsets){
\r
686 if(((c-offsets[i]) & UConverterConstants.UNSIGNED_INT_MASK) <= 0x7f){
\r
693 private boolean isInOffsetWindowOrDirect(int offsetValue, int a){
\r
694 return (boolean)((a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f &
\r
695 ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) ||
\r
696 ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20
\r
697 || ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0))));
\r
700 private byte getNextDynamicWindow(){
\r
701 byte windowValue = data.windowUse[data.nextWindowUseIndex];
\r
702 if(++data.nextWindowUseIndex==8){
\r
703 data.nextWindowUseIndex=0;
\r
705 return windowValue;
\r
708 private void useDynamicWindow(byte windowValue){
\r
709 /*first find the index of the window*/
\r
711 i = data.nextWindowUseIndex;
\r
716 }while(data.windowUse[i]!=windowValue);
\r
718 /*now copy each window[i+1] to [i]*/
\r
723 while(j!=data.nextWindowUseIndex){
\r
724 data.windowUse[i] = data.windowUse[j];
\r
731 /*finally, set the window into the most recently used index*/
\r
732 data.windowUse[i]= windowValue;
\r
736 private int getDynamicOffset(){
\r
739 if(((c-fixedOffsets[i])&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){
\r
740 offset = fixedOffsets[i];
\r
744 if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){
\r
745 /*No dynamic window for US-ASCII*/
\r
747 }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) ||
\r
748 ((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){
\r
749 /*This character is in the code range for a "small", i.e, reasonably windowable, script*/
\r
750 offset = c&0x7fffff80;
\r
751 return (int)(c>>7);
\r
752 }else if(0xe000<=(c&UConverterConstants.UNSIGNED_INT_MASK) && (c&UConverterConstants.UNSIGNED_INT_MASK)!=0xfeff && (c&UConverterConstants.UNSIGNED_INT_MASK) < 0xfff0){
\r
753 /*for these characters we need to take the gapOffset into account*/
\r
754 offset=(c)&0x7fffff80;
\r
755 return (int)((c-gapOffset)>>7);
\r
761 private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
\r
763 if(isSingleByteMode){
\r
764 if(c!=0 && targetCapacity>0 && !AfterGetTrail){
\r
765 label = getTrail(source, target, offsets);
\r
768 /*state machine for single byte mode*/
\r
769 while(AfterGetTrail || source.hasRemaining()){
\r
770 if(targetCapacity<=0 && !AfterGetTrail){
\r
772 cr = CoderResult.OVERFLOW;
\r
776 if(!AfterGetTrail){
\r
781 if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){
\r
782 /*pass US-ASCII graphic character through*/
\r
783 target.put((byte)c);
\r
785 offsets.put(sourceIndex);
\r
788 }else if((c & UConverterConstants.UNSIGNED_INT_MASK)<0x20 && !AfterGetTrail){
\r
789 if(((1L<<(c & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0){
\r
791 target.put((byte)c);
\r
793 offsets.put(sourceIndex);
\r
797 /*quote c0 control character*/
\r
800 label = OutputBytes;
\r
803 } else if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && !AfterGetTrail){
\r
804 /*use the current dynamic window*/
\r
805 target.put((byte)(delta|0x80));
\r
807 offsets.put(sourceIndex);
\r
810 } else if(AfterGetTrail || UTF16.isSurrogate((char)c)){
\r
811 if(!AfterGetTrail){
\r
812 if(UTF16.isLeadSurrogate((char)c)){
\r
813 label = getTrail(source, target, offsets);
\r
814 if(label==EndLoop){
\r
818 /*this is unmatched lead code unit (2nd Surrogate)*/
\r
819 /*callback(illegal)*/
\r
820 cr = CoderResult.malformedForLength(1);
\r
828 AfterGetTrail = false;
\r
831 /*Compress supplementary character U+10000...U+10ffff */
\r
832 if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){
\r
833 /*use the current dynamic window*/
\r
834 target.put((byte)(delta|0x80));
\r
836 offsets.put(sourceIndex);
\r
839 } else if((window=getWindow(data.fromUDynamicOffsets))>=0){
\r
840 /*there is a dynamic window that contains this character, change to it*/
\r
841 dynamicWindow = window;
\r
842 currentOffset = data.fromUDynamicOffsets[dynamicWindow];
\r
843 useDynamicWindow(dynamicWindow);
\r
844 c = (((int)(SC0+dynamicWindow))<<8 | (c-currentOffset)|0x80);
\r
846 label = OutputBytes;
\r
848 } else if((code=getDynamicOffset())>=0){
\r
849 /*might check if there are come character in this window to come */
\r
850 /*define an extended window with this character*/
\r
852 dynamicWindow=getNextDynamicWindow();
\r
853 currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
\r
854 useDynamicWindow(dynamicWindow);
\r
855 c = ((int)(SDX<<24) | (int)(dynamicWindow<<21)|
\r
856 (int)(code<<8)| (c- currentOffset) |0x80 );
\r
857 // c = (((SDX)<<25) | (dynamicWindow<<21)|
\r
858 // (code<<8)| (c- currentOffset) |0x80 );
\r
860 label = OutputBytes;
\r
863 /*change to unicode mode and output this (lead, trail) pair*/
\r
864 isSingleByteMode = false;
\r
865 target.put((byte)SCU);
\r
867 offsets.put(sourceIndex);
\r
870 c = ((int)(lead<<16))|trail;
\r
872 label = OutputBytes;
\r
875 } else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0xa0){
\r
876 /*quote C1 control character*/
\r
877 c = (c&0x7f) | (SQ0+1)<<8; /*SQ0+1 == SQ1*/
\r
879 label = OutputBytes;
\r
881 } else if((c&UConverterConstants.UNSIGNED_INT_MASK)==0xfeff || (c&UConverterConstants.UNSIGNED_INT_MASK)>= 0xfff0){
\r
882 /*quote signature character = byte order mark and specials*/
\r
885 label = OutputBytes;
\r
888 /*compress all other BMP characters*/
\r
889 if((window=getWindow(data.fromUDynamicOffsets))>=0){
\r
890 /*there is a window defined that contains this character - switch to it or quote from it*/
\r
891 if(source.position()>=source.limit() || isInOffsetWindowOrDirect(data.fromUDynamicOffsets[window], source.get(source.position()))){
\r
892 /*change to dynamic window*/
\r
893 dynamicWindow = window;
\r
894 currentOffset = data.fromUDynamicOffsets[dynamicWindow];
\r
895 useDynamicWindow(dynamicWindow);
\r
896 c = ((int)((SC0+window)<<8)) | (c- currentOffset) | 0x80;
\r
898 label = OutputBytes;
\r
901 /*quote from dynamic window*/
\r
902 c = ((int)((SQ0+window)<<8)) | (c - data.fromUDynamicOffsets[window]) |
\r
905 label = OutputBytes;
\r
908 } else if((window = getWindow(staticOffsets))>=0){
\r
909 /*quote from static window*/
\r
910 c = ((int)((SQ0+window)<<8)) | (c - staticOffsets[window]);
\r
912 label = OutputBytes;
\r
914 }else if((code=getDynamicOffset())>=0){
\r
915 /*define a dynamic window with this character*/
\r
916 dynamicWindow = getNextDynamicWindow();
\r
917 currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
\r
918 useDynamicWindow(dynamicWindow);
\r
919 c = ((int)((SD0+dynamicWindow)<<16)) | (int)(code<<8)|
\r
920 (c- currentOffset) | 0x80;
\r
922 label = OutputBytes;
\r
924 } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() ||
\r
925 ((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){
\r
928 * this character is not compressible (a BMP ideograph of similar)
\r
929 * switch to Unicode mode if this is the last character in the block
\r
930 * or there is at least one more ideograph following immediately
\r
932 isSingleByteMode = false;
\r
935 label = OutputBytes;
\r
941 label = OutputBytes;
\r
945 /*normal end of conversion : prepare for new character */
\r
947 sourceIndex = nextSourceIndex;
\r
950 if(c!=0 && targetCapacity>0 && !AfterGetTrailUnicode){
\r
951 label = GetTrailUnicode;
\r
955 /*state machine for Unicode*/
\r
956 /*unicodeByteMode*/
\r
957 while(AfterGetTrailUnicode || source.hasRemaining()){
\r
958 if(targetCapacity<=0 && !AfterGetTrailUnicode){
\r
960 cr = CoderResult.OVERFLOW;
\r
964 if(!AfterGetTrailUnicode){
\r
969 if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){
\r
970 /*not compressible, write character directly */
\r
971 if(targetCapacity>=2){
\r
972 target.put((byte)(c>>8));
\r
973 target.put((byte)c);
\r
975 offsets.put(sourceIndex);
\r
976 offsets.put(sourceIndex);
\r
981 label = OutputBytes;
\r
984 } else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){
\r
985 /*compress BMP character if the following one is not an uncompressible ideograph*/
\r
986 if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){
\r
987 if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26
\r
988 || (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){
\r
989 /*ASCII digit or letter*/
\r
990 isSingleByteMode = true;
\r
991 c |=((int)((UC0+dynamicWindow)<<8))|c;
\r
993 label = OutputBytes;
\r
995 } else if((window=getWindow(data.fromUDynamicOffsets))>=0){
\r
996 /*there is a dynamic window that contains this character, change to it*/
\r
997 isSingleByteMode = true;
\r
998 dynamicWindow = window;
\r
999 currentOffset = data.fromUDynamicOffsets[dynamicWindow];
\r
1000 useDynamicWindow(dynamicWindow);
\r
1001 c = ((int)((UC0+dynamicWindow)<<8)) | (c- currentOffset) | 0x80;
\r
1003 label = OutputBytes;
\r
1005 } else if((code=getDynamicOffset())>=0){
\r
1006 /*define a dynamic window with this character*/
\r
1007 isSingleByteMode = true;
\r
1008 dynamicWindow = getNextDynamicWindow();
\r
1009 currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
\r
1010 useDynamicWindow(dynamicWindow);
\r
1011 c = ((int)((UD0+dynamicWindow)<<16)) | (int)(code<<8)
\r
1012 |(c- currentOffset) | 0x80;
\r
1014 label = OutputBytes;
\r
1019 /*don't know how to compress these character, just write it directly*/
\r
1021 label = OutputBytes;
\r
1023 } else if(c<0xe000 && !AfterGetTrailUnicode){
\r
1024 label = GetTrailUnicode;
\r
1027 /*quote to avoid SCSU tags*/
\r
1030 label = OutputBytes;
\r
1034 if(AfterGetTrailUnicode){
\r
1035 AfterGetTrailUnicode = false;
\r
1037 /*normal end of conversion, prepare for a new character*/
\r
1039 sourceIndex = nextSourceIndex;
\r
1046 private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
\r
1049 if(source.hasRemaining()){
\r
1050 /*test the following code unit*/
\r
1051 trail = source.get(source.position());
\r
1052 if(UTF16.isTrailSurrogate((char)trail)){
\r
1053 source.position(source.position()+1);
\r
1054 ++nextSourceIndex;
\r
1055 c = UCharacter.getCodePoint((char)c, trail);
\r
1058 /*this is unmatched lead code unit (1st Surrogate)*/
\r
1059 /*callback(illegal)*/
\r
1060 cr = CoderResult.malformedForLength(1);
\r
1067 AfterGetTrail = true;
\r
1071 private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){
\r
1072 int label = EndLoop;
\r
1073 AfterGetTrailUnicode = true;
\r
1074 /*c is surrogate*/
\r
1075 if(UTF16.isLeadSurrogate((char)c)){
\r
1076 // getTrailUnicode:
\r
1078 if(source.hasRemaining()){
\r
1079 /*test the following code unit*/
\r
1080 trail = source.get(source.position());
\r
1081 if(UTF16.isTrailSurrogate(trail)){
\r
1083 ++nextSourceIndex;
\r
1084 c = UCharacter.getCodePoint((char)c, trail);
\r
1085 /*convert this surrogate code point*/
\r
1086 /*exit this condition tree*/
\r
1088 /*this is unmatched lead code unit(1st surrogate)*/
\r
1089 /*callback(illegal)*/
\r
1090 cr = CoderResult.malformedForLength(1);
\r
1100 /*this is an unmatched trail code point (2nd surrogate)*/
\r
1101 /*callback (illegal)*/
\r
1102 cr = CoderResult.malformedForLength(1);
\r
1107 /*compress supplementary character*/
\r
1108 if((window=getWindow(data.fromUDynamicOffsets))>=0 &&
\r
1109 !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) <
\r
1110 (0xd800 - 0x3400))){
\r
1112 * this is the dynamic window that contains this character and the following
\r
1113 * character is not uncompressible,
\r
1114 * change to the window
\r
1116 isSingleByteMode = true;
\r
1117 dynamicWindow = window;
\r
1118 currentOffset = data.fromUDynamicOffsets[dynamicWindow];
\r
1119 useDynamicWindow(dynamicWindow);
\r
1120 c = ((UC0+dynamicWindow)<<8 | (c-currentOffset) | 0x80);
\r
1122 label = OutputBytes;
\r
1124 } else if(source.hasRemaining() && lead == source.get(source.position()) && (code=getDynamicOffset())>=0){
\r
1125 /*two supplementary characters in (probably) the same window - define an extended one*/
\r
1126 isSingleByteMode = true;
\r
1127 dynamicWindow = getNextDynamicWindow();
\r
1128 currentOffset = data.fromUDynamicOffsets[dynamicWindow] = offset;
\r
1129 useDynamicWindow(dynamicWindow);
\r
1130 c = (UDX<<24) | (dynamicWindow<<21) |(code<<8) |(c - currentOffset) | 0x80;
\r
1132 label = OutputBytes;
\r
1135 /*don't know how to compress this character, just write it directly*/
\r
1136 c = (lead<<16)|trail;
\r
1138 label = OutputBytes;
\r
1144 private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
\r
1145 /*set the converter state back to UConverter*/
\r
1146 data.fromUIsSingleByteMode = isSingleByteMode;
\r
1147 data.fromUDynamicWindow = dynamicWindow;
\r
1149 LabelLoop = false;
\r
1152 private int outputBytes(CharBuffer source, ByteBuffer target, IntBuffer offsets){
\r
1154 //int targetCapacity = target.limit()-target.position();
\r
1155 /*write the output character byte from c and length*/
\r
1156 /*from the first if in the loop we know that targetCapacity>0*/
\r
1157 if(length<=targetCapacity){
\r
1158 if(offsets==null){
\r
1160 /*each branch falls through the next one*/
\r
1162 target.put((byte)(c>>24));
\r
1164 target.put((byte)(c>>16));
\r
1166 target.put((byte)(c>>8));
\r
1168 target.put((byte)c);
\r
1170 /*will never occur*/
\r
1175 /*each branch falls through to the next one*/
\r
1177 target.put((byte)(c>>24));
\r
1178 if(offsets!=null){
\r
1179 offsets.put(sourceIndex);
\r
1182 target.put((byte)(c>>16));
\r
1183 if(offsets!=null){
\r
1184 offsets.put(sourceIndex);
\r
1187 target.put((byte)(c>>8));
\r
1188 if(offsets!=null){
\r
1189 offsets.put(sourceIndex);
\r
1192 target.put((byte)c);
\r
1193 if(offsets!=null){
\r
1194 offsets.put(sourceIndex);
\r
1197 /*will never occur*/
\r
1201 targetCapacity-=length;
\r
1203 /*normal end of conversion: prepare for a new character*/
\r
1205 sourceIndex = nextSourceIndex;
\r
1209 ByteBuffer p = ByteBuffer.wrap(errorBuffer);
\r
1211 * We actually do this backwards here:
\r
1212 * In order to save an intermediate variable, we output
\r
1213 * first to the overflow buffer what does not fit into the
\r
1216 /* we know that 0<=targetCapacity<length<=4 */
\r
1217 /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
\r
1218 length -= targetCapacity;
\r
1220 /*each branch falls through the next one*/
\r
1222 p.put((byte)(c>>24));
\r
1224 p.put((byte)(c>>16));
\r
1226 p.put((byte)(c>>8));
\r
1230 /*will never occur*/
\r
1233 errorBufferLength = length;
\r
1235 /*now output what fits into the regular target*/
\r
1236 c>>=8*length; //length was reduced by targetCapacity
\r
1237 switch(targetCapacity){
\r
1238 /*each branch falls through the next one*/
\r
1240 target.put((byte)(c>>16));
\r
1241 if(offsets!=null){
\r
1242 offsets.put(sourceIndex);
\r
1245 target.put((byte)(c>>8));
\r
1246 if(offsets!=null){
\r
1247 offsets.put(sourceIndex);
\r
1250 target.put((byte)c);
\r
1251 if(offsets!=null){
\r
1252 offsets.put(sourceIndex);
\r
1258 /*target overflow*/
\r
1259 targetCapacity = 0;
\r
1260 cr = CoderResult.OVERFLOW;
\r
1269 public CharsetDecoder newDecoder() {
\r
1270 return new CharsetDecoderSCSU(this);
\r
1273 public CharsetEncoder newEncoder() {
\r
1274 return new CharsetEncoderSCSU(this);
\r
1277 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
\r
1278 CharsetICU.getCompleteUnicodeSet(setFillIn);
\r