/* ******************************************************************************* * Copyright (C) 2008-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.charset; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; /** * @author krajwade * */ class CharsetSCSU extends CharsetICU{ /* SCSU definitions --------------------------------------------------------- */ /* SCSU command byte values */ //enum { private static final short SQ0=0x01; /* Quote from window pair 0 */ private static final short SQ7=0x08; /* Quote from window pair 7 */ private static final short SDX=0x0B; /* Define a window as extended */ //private static final short Srs=0x0C; /* reserved */ private static final short SQU=0x0E; /* Quote a single Unicode character */ private static final short SCU=0x0F; /* Change to Unicode mode */ private static final short SC0=0x10; /* Select window 0 */ private static final short SC7=0x17; /* Select window 7 */ private static final short SD0=0x18; /* Define and select window 0 */ //private static final short SD7=0x1F; /* Define and select window 7 */ private static final short UC0=0xE0; /* Select window 0 */ private static final short UC7=0xE7; /* Select window 7 */ private static final short UD0=0xE8; /* Define and select window 0 */ private static final short UD7=0xEF; /* Define and select window 7 */ private static final short UQU=0xF0; /* Quote a single Unicode character */ private static final short UDX=0xF1; /* Define a Window as extended */ private static final short Urs=0xF2; /* reserved */ // }; // enum { /* * Unicode code points from 3400 to E000 are not adressible by * dynamic window, since in these areas no short run alphabets are * found. Therefore add gapOffset to all values from gapThreshold. */ private static final int gapThreshold=0x68; private static final int gapOffset = 0xAC00 ; /* values between reservedStart and fixedThreshold are reserved */ private static final int reservedStart=0xA8; /* use table of predefined fixed offsets for values from fixedThreshold */ private static final int fixedThreshold=0xF9; //}; protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD}; /* constant offsets for the 8 static windows */ private static final int staticOffsets[]={ 0x0000, /* ASCII for quoted tags */ 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ 0x0100, /* Latin Extended-A */ 0x0300, /* Combining Diacritical Marks */ 0x2000, /* General Punctuation */ 0x2080, /* Currency Symbols */ 0x2100, /* Letterlike Symbols and Number Forms */ 0x3000 /* CJK Symbols and punctuation */ }; /* initial offsets for the 8 dynamic (sliding) windows */ private static final int initialDynamicOffsets[]={ 0x0080, /* Latin-1 */ 0x00C0, /* Latin Extended A */ 0x0400, /* Cyrillic */ 0x0600, /* Arabic */ 0x0900, /* Devanagari */ 0x3040, /* Hiragana */ 0x30A0, /* Katakana */ 0xFF00 /* Fullwidth ASCII */ }; /* Table of fixed predefined Offsets */ private static final int fixedOffsets[]={ /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ /* 0xFA */ 0x0250, /* IPA extensions */ /* 0xFB */ 0x0370, /* Greek */ /* 0xFC */ 0x0530, /* Armenian */ /* 0xFD */ 0x3040, /* Hiragana */ /* 0xFE */ 0x30A0, /* Katakana */ /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ }; /* state values */ //enum { private static final int readCommand=0; private static final int quotePairOne=1; private static final int quotePairTwo=2; private static final int quoteOne=3; private static final int definePairOne=4; private static final int definePairTwo=5; private static final int defineOne=6; // }; @SuppressWarnings("unused") private final class SCSUData{ /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ int toUDynamicOffsets[] = new int[8] ; int fromUDynamicOffsets[] = new int[8] ; /* state machine state - toUnicode */ boolean toUIsSingleByteMode; short toUState; byte toUQuoteWindow, toUDynamicWindow; short toUByteOne; short toUPadding[]; /* state machine state - fromUnicode */ boolean fromUIsSingleByteMode; byte fromUDynamicWindow; /* * windowUse[] keeps track of the use of the dynamic windows: * At nextWindowUseIndex there is the least recently used window, * and the following windows (in a wrapping manner) are more and more * recently used. * At nextWindowUseIndex-1 there is the most recently used window. */ byte locale; byte nextWindowUseIndex; byte windowUse[] = new byte[8]; SCSUData(){ initialize(); } void initialize(){ for(int i=0;i<8;i++){ this.toUDynamicOffsets[i] = initialDynamicOffsets[i]; } this.toUIsSingleByteMode = true; this.toUState = readCommand; this.toUQuoteWindow = 0; this.toUDynamicWindow = 0; this.toUByteOne = 0; this.fromUIsSingleByteMode = true; this.fromUDynamicWindow = 0; for(int i=0;i<8;i++){ this.fromUDynamicOffsets[i] = initialDynamicOffsets[i]; } this.nextWindowUseIndex = 0; switch(this.locale){ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ /* case l_ja: for(int i=0;i<8;i++){ this.windowUse[i] = initialWindowUse_ja[i]; } break; */ default: for(int i=0;i<8;i++){ this.windowUse[i] = initialWindowUse[i]; } } } } static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 }; /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ // static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 }; //enum { //private static final int lGeneric = 0; /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ // private static final int l_ja = 1; //}; private SCSUData extraInfo = null; public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){ super(icuCanonicalName, javaCanonicalName, aliases); maxBytesPerChar = 3; minBytesPerChar = 1; maxCharsPerByte = 1; extraInfo = new SCSUData(); } class CharsetDecoderSCSU extends CharsetDecoderICU { /* label values for supporting behavior similar to goto in C */ private static final int FastSingle=0; private static final int SingleByteMode=1; private static final int EndLoop=2; /* Mode Type */ private static final int ByteMode = 0; private static final int UnicodeMode =1; public CharsetDecoderSCSU(CharsetICU cs) { super(cs); implReset(); } //private SCSUData data ; protected void implReset(){ super.implReset(); toULength = 0; extraInfo.initialize(); } short b; //Get the state machine state private boolean isSingleByteMode ; private short state ; private byte quoteWindow ; private byte dynamicWindow ; private short byteOne; //sourceIndex=-1 if the current character began in the previous buffer private int sourceIndex ; private int nextSourceIndex ; CoderResult cr; SCSUData data ; private boolean LabelLoop;// used to break the while loop protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){ data = extraInfo; //Get the state machine state isSingleByteMode = data.toUIsSingleByteMode; state = data.toUState; quoteWindow = data.toUQuoteWindow; dynamicWindow = data.toUDynamicWindow; byteOne = data.toUByteOne; LabelLoop = true; //sourceIndex=-1 if the current character began in the previous buffer sourceIndex = data.toUState == readCommand ? 0: -1 ; nextSourceIndex = 0; cr = CoderResult.UNDERFLOW; int labelType = 0; while(LabelLoop){ if(isSingleByteMode){ switch(labelType){ case FastSingle: /*fast path for single-byte mode*/ labelType = fastSingle(source, target, offsets, ByteMode); break; case SingleByteMode: /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */ labelType = singleByteMode(source, target, offsets, ByteMode); break; case EndLoop: endLoop(source, target, offsets); break; } }else{ switch(labelType){ case FastSingle: /*fast path for single-byte mode*/ labelType = fastSingle(source, target, offsets, UnicodeMode); break; case SingleByteMode: /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */ labelType = singleByteMode(source, target, offsets, UnicodeMode); break; case EndLoop: endLoop(source, target, offsets); break; } //LabelLoop = false; } } return cr; } private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ int label = 0; if(modeType==ByteMode){ if(state==readCommand){ while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){ source.position(source.position()+1); ++nextSourceIndex; if(b <= 0x7f){ /*Write US graphic character or DEL*/ target.put((char)b); if(offsets != null){ offsets.put(sourceIndex); } }else{ /*Write from dynamic window*/ int c = data.toUDynamicOffsets[dynamicWindow] + (b&0x7f); if(c <= 0xffff){ target.put((char)c); if(offsets != null){ offsets.put(sourceIndex); } }else{ /*Output surrogate pair */ target.put((char)(0xd7c0 + (c>>10))); if(target.hasRemaining()){ target.put((char)(0xdc00 | (c&0x3ff))); if(offsets != null){ offsets.put(sourceIndex); offsets.put(sourceIndex); } }else{ /* target overflow */ if(offsets != null){ offsets.put(sourceIndex); } charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff)); charErrorBufferLength = 1; label = EndLoop; cr = CoderResult.OVERFLOW; return label; } } } sourceIndex = nextSourceIndex; } // label = SingleByteMode; } }else if(modeType==UnicodeMode){ /* fast path for unicode mode */ if(state == readCommand){ while((source.position()+1)(Urs-UC0)){ target.put((char)((b<<8)|(source.get(source.position()+1)&UConverterConstants.UNSIGNED_BYTE_MASK))); if(offsets != null){ offsets.put(sourceIndex); } sourceIndex = nextSourceIndex; nextSourceIndex+=2; source.position(source.position()+2); } } } label = SingleByteMode; return label; } private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ int label = SingleByteMode; if(modeType == ByteMode){ while(source.hasRemaining()){ if(!target.hasRemaining()){ cr = CoderResult.OVERFLOW; LabelLoop = false; return label; } b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); ++nextSourceIndex; switch(state){ case readCommand: /*redundant conditions are commented out */ if(((1L<>10))); if(target.hasRemaining()){ target.put((char)(0xdc00 | (c&0x3ff))); if(offsets != null){ offsets.put(sourceIndex); offsets.put(sourceIndex); } }else { /* target overflow */ if(offsets != null){ offsets.put(sourceIndex); } charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff)); charErrorBufferLength = 1; label = EndLoop; cr = CoderResult.OVERFLOW; LabelLoop = false; return label; } } } sourceIndex = nextSourceIndex; state = readCommand; label = FastSingle; return label; case definePairOne: dynamicWindow = (byte)((b>>5)&7); byteOne = (byte)(b&0x1f); toUBytesArray[1] = (byte)b; toULength = 2; state = definePairTwo; break; case definePairTwo: data.toUDynamicOffsets[dynamicWindow] = 0x10000 + (byteOne<<15L | b<<7L); sourceIndex = nextSourceIndex; state = readCommand; label = FastSingle; return label; case defineOne: if(b==0){ /*callback (illegal)*/ toUBytesArray[1] = (byte)b; toULength =2; label = EndLoop; return label; }else if(b=fixedThreshold){ data.toUDynamicOffsets[dynamicWindow] = fixedOffsets[b-fixedThreshold]; }else{ /*callback (illegal)*/ toUBytesArray[1] = (byte)b; toULength =2; label = EndLoop; return label; } sourceIndex = nextSourceIndex; state = readCommand; label = FastSingle; return label; } } }else if(modeType==UnicodeMode){ while(source.hasRemaining()){ if(!target.hasRemaining()){ cr = CoderResult.OVERFLOW; LabelLoop = false; return label; } b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); ++nextSourceIndex; switch(state){ case readCommand: if((short)((b -UC0)&UConverterConstants.UNSIGNED_BYTE_MASK)>(Urs - UC0)){ byteOne = b; toUBytesArray[0] = (byte)b; toULength = 1; state = quotePairTwo; }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UC7){ dynamicWindow = (byte)(b - UC0); sourceIndex = nextSourceIndex; isSingleByteMode = true; label = FastSingle; return label; }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UD7){ dynamicWindow = (byte)(b - UD0); isSingleByteMode = true; toUBytesArray[0] = (byte)b; toULength = 1; state = defineOne; label = SingleByteMode; return label; }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UDX){ isSingleByteMode = true; toUBytesArray[0] = (byte)b; toULength = 1; state = definePairOne; label = SingleByteMode; return label; }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UQU){ toUBytesArray[0] = (byte)b; toULength = 1; state = quotePairOne; }else { /* callback (illegal)*/ cr = CoderResult.malformedForLength(1); toUBytesArray[0] = (byte)b; toULength = 1; label = EndLoop; return label; } break; case quotePairOne: byteOne = b; toUBytesArray[1] = (byte)b; toULength = 2; state = quotePairTwo; break; case quotePairTwo: target.put((char)((byteOne<<8) | b)); if(offsets != null){ offsets.put(sourceIndex); } sourceIndex = nextSourceIndex; state = readCommand; label = FastSingle; return label; } } } label = EndLoop; return label; } private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){ if(cr==CoderResult.OVERFLOW){ state = readCommand; }else if(state == readCommand){ toULength = 0; } data.toUIsSingleByteMode = isSingleByteMode; data.toUState = state; data.toUQuoteWindow = quoteWindow; data.toUDynamicWindow = dynamicWindow; data.toUByteOne = byteOne; LabelLoop = false; } } class CharsetEncoderSCSU extends CharsetEncoderICU{ public CharsetEncoderSCSU(CharsetICU cs) { super(cs, fromUSubstitution); implReset(); } //private SCSUData data; protected void implReset() { super.implReset(); extraInfo.initialize(); } /* label values for supporting behavior similar to goto in C */ private static final int Loop=0; private static final int GetTrailUnicode=1; private static final int OutputBytes=2; private static final int EndLoop =3; private int delta; private int length; ///variables of compression heuristics private int offset; private char lead, trail; private int code; private byte window; //Get the state machine state private boolean isSingleByteMode; private byte dynamicWindow ; private int currentOffset; int c; SCSUData data ; //sourceIndex=-1 if the current character began in the previous buffer private int sourceIndex ; private int nextSourceIndex; private int targetCapacity; private boolean LabelLoop;//used to break the while loop private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle: private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode: CoderResult cr; protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { data = extraInfo; cr = CoderResult.UNDERFLOW; //Get the state machine state isSingleByteMode = data.fromUIsSingleByteMode; dynamicWindow = data.fromUDynamicWindow; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; c = fromUChar32; sourceIndex = c== 0 ? 0: -1 ; nextSourceIndex = 0; targetCapacity = target.limit()-target.position(); //sourceIndex=-1 if the current character began in the previous buffer sourceIndex = c== 0 ? 0: -1 ; nextSourceIndex = 0; int labelType = Loop; // set to Loop so that the code starts from loop: LabelLoop = true; AfterGetTrail = false; AfterGetTrailUnicode = false; while(LabelLoop){ switch(labelType){ case Loop: labelType = loop(source, target, offsets); break; case GetTrailUnicode: labelType = getTrailUnicode(source, target, offsets); break; case OutputBytes: labelType = outputBytes(source, target, offsets); break; case EndLoop: endLoop(source, target, offsets); break; } } return cr; } private byte getWindow(int[] offsets){ int i; for (i=0;i<8;i++){ if(((c-offsets[i]) & UConverterConstants.UNSIGNED_INT_MASK) <= 0x7f){ return (byte)i; } } return -1; } private boolean isInOffsetWindowOrDirect(int offsetValue, int a){ return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f & ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) || ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20 || ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0))); } private byte getNextDynamicWindow(){ byte windowValue = data.windowUse[data.nextWindowUseIndex]; if(++data.nextWindowUseIndex==8){ data.nextWindowUseIndex=0; } return windowValue; } private void useDynamicWindow(byte windowValue){ /*first find the index of the window*/ int i,j; i = data.nextWindowUseIndex; do{ if(--i<0){ i=7; } }while(data.windowUse[i]!=windowValue); /*now copy each window[i+1] to [i]*/ j= i+1; if(j==8){ j=0; } while(j!=data.nextWindowUseIndex){ data.windowUse[i] = data.windowUse[j]; i=j; if(++j==8){ j=0; } } /*finally, set the window into the most recently used index*/ data.windowUse[i]= windowValue; } private int getDynamicOffset(){ int i; for(i=0;i<7;++i){ if(((c-fixedOffsets[i])&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){ offset = fixedOffsets[i]; return 0xf9+i; } } if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){ /*No dynamic window for US-ASCII*/ return -1; }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) || ((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){ /*This character is in the code range for a "small", i.e, reasonably windowable, script*/ offset = c&0x7fffff80; return (c>>7); }else if(0xe000<=(c&UConverterConstants.UNSIGNED_INT_MASK) && (c&UConverterConstants.UNSIGNED_INT_MASK)!=0xfeff && (c&UConverterConstants.UNSIGNED_INT_MASK) < 0xfff0){ /*for these characters we need to take the gapOffset into account*/ offset=(c)&0x7fffff80; return ((c-gapOffset)>>7); }else{ return -1; } } private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ int label = 0; if(isSingleByteMode){ if(c!=0 && targetCapacity>0 && !AfterGetTrail){ label = getTrail(source, target, offsets); return label; } /*state machine for single byte mode*/ while(AfterGetTrail || source.hasRemaining()){ if(targetCapacity<=0 && !AfterGetTrail){ /*target is full*/ cr = CoderResult.OVERFLOW; label = EndLoop; return label; } if(!AfterGetTrail){ c = source.get(); ++nextSourceIndex; } if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){ /*pass US-ASCII graphic character through*/ target.put((byte)c); if(offsets!=null){ offsets.put(sourceIndex); } --targetCapacity; }else if((c & UConverterConstants.UNSIGNED_INT_MASK)<0x20 && !AfterGetTrail){ if(((1L<<(c & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0){ /*CR/LF/TAB/NUL*/ target.put((byte)c); if(offsets!=null){ offsets.put(sourceIndex); } --targetCapacity; } else { /*quote c0 control character*/ c|=SQ0<<8; length = 2; label = OutputBytes; return label; } } else if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && !AfterGetTrail){ /*use the current dynamic window*/ target.put((byte)(delta|0x80)); if(offsets!=null){ offsets.put(sourceIndex); } --targetCapacity; } else if(AfterGetTrail || UTF16.isSurrogate((char)c)){ if(!AfterGetTrail){ if(UTF16.isLeadSurrogate((char)c)){ label = getTrail(source, target, offsets); if(label==EndLoop){ return label; } } else { /*this is unmatched lead code unit (2nd Surrogate)*/ /*callback(illegal)*/ cr = CoderResult.malformedForLength(1); label = EndLoop; return label; } } if(AfterGetTrail){ AfterGetTrail = false; } /*Compress supplementary character U+10000...U+10ffff */ if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){ /*use the current dynamic window*/ target.put((byte)(delta|0x80)); if(offsets!=null){ offsets.put(sourceIndex); } --targetCapacity; } else if((window=getWindow(data.fromUDynamicOffsets))>=0){ /*there is a dynamic window that contains this character, change to it*/ dynamicWindow = window; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; useDynamicWindow(dynamicWindow); c = ((SC0+dynamicWindow)<<8 | (c-currentOffset)|0x80); length = 2; label = OutputBytes; return label; } else if((code=getDynamicOffset())>=0){ /*might check if there are come character in this window to come */ /*define an extended window with this character*/ code-=0x200; dynamicWindow=getNextDynamicWindow(); currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; useDynamicWindow(dynamicWindow); c = ((SDX<<24) | (dynamicWindow<<21)| (code<<8)| (c- currentOffset) |0x80); // c = (((SDX)<<25) | (dynamicWindow<<21)| // (code<<8)| (c- currentOffset) |0x80 ); length = 4; label = OutputBytes; return label; } else { /*change to unicode mode and output this (lead, trail) pair*/ isSingleByteMode = false; target.put((byte)SCU); if(offsets!=null){ offsets.put(sourceIndex); } --targetCapacity; c = (lead<<16)|trail; length = 4; label = OutputBytes; return label; } } else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0xa0){ /*quote C1 control character*/ c = (c&0x7f) | (SQ0+1)<<8; /*SQ0+1 == SQ1*/ length = 2; label = OutputBytes; return label; } else if((c&UConverterConstants.UNSIGNED_INT_MASK)==0xfeff || (c&UConverterConstants.UNSIGNED_INT_MASK)>= 0xfff0){ /*quote signature character = byte order mark and specials*/ c |= SQU<<16; length = 3; label = OutputBytes; return label; } else { /*compress all other BMP characters*/ if((window=getWindow(data.fromUDynamicOffsets))>=0){ /*there is a window defined that contains this character - switch to it or quote from it*/ if(source.position()>=source.limit() || isInOffsetWindowOrDirect(data.fromUDynamicOffsets[window], source.get(source.position()))){ /*change to dynamic window*/ dynamicWindow = window; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; useDynamicWindow(dynamicWindow); c = ((SC0+window)<<8) | (c- currentOffset) | 0x80; length = 2; label = OutputBytes; return label; } else { /*quote from dynamic window*/ c = ((SQ0+window)<<8) | (c - data.fromUDynamicOffsets[window]) | 0x80; length = 2; label = OutputBytes; return label; } } else if((window = getWindow(staticOffsets))>=0){ /*quote from static window*/ c = ((SQ0+window)<<8) | (c - staticOffsets[window]); length = 2; label = OutputBytes; return label; }else if((code=getDynamicOffset())>=0){ /*define a dynamic window with this character*/ dynamicWindow = getNextDynamicWindow(); currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; useDynamicWindow(dynamicWindow); c = ((SD0+dynamicWindow)<<16) | (code<<8)| (c - currentOffset) | 0x80; length = 3; label = OutputBytes; return label; } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() || ((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){ /* * this character is not compressible (a BMP ideograph of similar) * switch to Unicode mode if this is the last character in the block * or there is at least one more ideograph following immediately */ isSingleByteMode = false; c|=SCU<<16; length =3; label = OutputBytes; return label; } else { /*quote Unicode*/ c|=SQU<<16; length = 3; label = OutputBytes; return label; } } /*normal end of conversion : prepare for new character */ c = 0; sourceIndex = nextSourceIndex; } } else { if(c!=0 && targetCapacity>0 && !AfterGetTrailUnicode){ label = GetTrailUnicode; return label; } /*state machine for Unicode*/ /*unicodeByteMode*/ while(AfterGetTrailUnicode || source.hasRemaining()){ if(targetCapacity<=0 && !AfterGetTrailUnicode){ /*target is full*/ cr = CoderResult.OVERFLOW; LabelLoop = false; break; } if(!AfterGetTrailUnicode){ c = source.get(); ++nextSourceIndex; } if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){ /*not compressible, write character directly */ if(targetCapacity>=2){ target.put((byte)(c>>8)); target.put((byte)c); if(offsets!=null){ offsets.put(sourceIndex); offsets.put(sourceIndex); } targetCapacity-=2; } else { length =2; label = OutputBytes; return label; } } else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){ /*compress BMP character if the following one is not an uncompressible ideograph*/ if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){ if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26 || (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){ /*ASCII digit or letter*/ isSingleByteMode = true; c |=((UC0+dynamicWindow)<<8)|c; length = 2; label = OutputBytes; return label; } else if((window=getWindow(data.fromUDynamicOffsets))>=0){ /*there is a dynamic window that contains this character, change to it*/ isSingleByteMode = true; dynamicWindow = window; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; useDynamicWindow(dynamicWindow); c = ((UC0+dynamicWindow)<<8) | (c- currentOffset) | 0x80; length = 2; label = OutputBytes; return label; } else if((code=getDynamicOffset())>=0){ /*define a dynamic window with this character*/ isSingleByteMode = true; dynamicWindow = getNextDynamicWindow(); currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; useDynamicWindow(dynamicWindow); c = ((UD0+dynamicWindow)<<16) | (code<<8) |(c - currentOffset) | 0x80; length = 3; label = OutputBytes; return label; } } /*don't know how to compress these character, just write it directly*/ length = 2; label = OutputBytes; return label; } else if(c<0xe000 && !AfterGetTrailUnicode){ label = GetTrailUnicode; return label; } else if (!AfterGetTrailUnicode){ /*quote to avoid SCSU tags*/ c|=UQU<<16; length = 3; label = OutputBytes; return label; } if(AfterGetTrailUnicode){ AfterGetTrailUnicode = false; } /*normal end of conversion, prepare for a new character*/ c = 0; sourceIndex = nextSourceIndex; } } label = EndLoop; return label; } private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){ lead = (char)c; int label = Loop; if(source.hasRemaining()){ /*test the following code unit*/ trail = source.get(source.position()); if(UTF16.isTrailSurrogate(trail)){ source.position(source.position()+1); ++nextSourceIndex; c = UCharacter.getCodePoint((char)c, trail); label = Loop; } else { /*this is unmatched lead code unit (1st Surrogate)*/ /*callback(illegal)*/ cr = CoderResult.malformedForLength(1); label = EndLoop; } }else { /*no more input*/ label = EndLoop; } AfterGetTrail = true; return label; } private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){ int label = EndLoop; AfterGetTrailUnicode = true; /*c is surrogate*/ if(UTF16.isLeadSurrogate((char)c)){ // getTrailUnicode: lead = (char)c; if(source.hasRemaining()){ /*test the following code unit*/ trail = source.get(source.position()); if(UTF16.isTrailSurrogate(trail)){ source.get(); ++nextSourceIndex; c = UCharacter.getCodePoint((char)c, trail); /*convert this surrogate code point*/ /*exit this condition tree*/ } else { /*this is unmatched lead code unit(1st surrogate)*/ /*callback(illegal)*/ cr = CoderResult.malformedForLength(1); label = EndLoop; return label; } } else { /*no more input*/ label = EndLoop; return label; } } else { /*this is an unmatched trail code point (2nd surrogate)*/ /*callback (illegal)*/ cr = CoderResult.malformedForLength(1); label = EndLoop; return label; } /*compress supplementary character*/ if((window=getWindow(data.fromUDynamicOffsets))>=0 && !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) < (0xd800 - 0x3400))){ /* * this is the dynamic window that contains this character and the following * character is not uncompressible, * change to the window */ isSingleByteMode = true; dynamicWindow = window; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; useDynamicWindow(dynamicWindow); c = ((UC0+dynamicWindow)<<8 | (c-currentOffset) | 0x80); length = 2; label = OutputBytes; return label; } else if(source.hasRemaining() && lead == source.get(source.position()) && (code=getDynamicOffset())>=0){ /*two supplementary characters in (probably) the same window - define an extended one*/ isSingleByteMode = true; dynamicWindow = getNextDynamicWindow(); currentOffset = data.fromUDynamicOffsets[dynamicWindow] = offset; useDynamicWindow(dynamicWindow); c = (UDX<<24) | (dynamicWindow<<21) |(code<<8) |(c - currentOffset) | 0x80; length = 4; label = OutputBytes; return label; } else { /*don't know how to compress this character, just write it directly*/ c = (lead<<16)|trail; length = 4; label = OutputBytes; return label; } } private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ /*set the converter state back to UConverter*/ data.fromUIsSingleByteMode = isSingleByteMode; data.fromUDynamicWindow = dynamicWindow; fromUChar32 = c; LabelLoop = false; } @SuppressWarnings("fallthrough") private int outputBytes(CharBuffer source, ByteBuffer target, IntBuffer offsets){ int label; //int targetCapacity = target.limit()-target.position(); /*write the output character byte from c and length*/ /*from the first if in the loop we know that targetCapacity>0*/ if(length<=targetCapacity){ switch(length){ /*each branch falls through the next one*/ case 4: target.put((byte)(c>>24)); if(offsets!=null){ offsets.put(sourceIndex); } case 3: target.put((byte)(c>>16)); if(offsets!=null){ offsets.put(sourceIndex); } case 2: target.put((byte)(c>>8)); if(offsets!=null){ offsets.put(sourceIndex); } case 1: target.put((byte)c); if(offsets!=null){ offsets.put(sourceIndex); } default: /*will never occur*/ break; } targetCapacity-=length; /*normal end of conversion: prepare for a new character*/ c = 0; sourceIndex = nextSourceIndex; label = Loop; return label; } else { ByteBuffer p = ByteBuffer.wrap(errorBuffer); /* * We actually do this backwards here: * In order to save an intermediate variable, we output * first to the overflow buffer what does not fit into the * regular target */ /* we know that 0<=targetCapacity>24)); case 3: p.put((byte)(c>>16)); case 2: p.put((byte)(c>>8)); case 1: p.put((byte)c); default: /*will never occur*/ break; } errorBufferLength = length; /*now output what fits into the regular target*/ c>>=8*length; //length was reduced by targetCapacity switch(targetCapacity){ /*each branch falls through the next one*/ case 3: target.put((byte)(c>>16)); if(offsets!=null){ offsets.put(sourceIndex); } case 2: target.put((byte)(c>>8)); if(offsets!=null){ offsets.put(sourceIndex); } case 1: target.put((byte)c); if(offsets!=null){ offsets.put(sourceIndex); } default: break; } /*target overflow*/ targetCapacity = 0; cr = CoderResult.OVERFLOW; c = 0; label = EndLoop; return label; } } } public CharsetDecoder newDecoder() { return new CharsetDecoderSCSU(this); } public CharsetEncoder newEncoder() { return new CharsetEncoderSCSU(this); } void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ CharsetICU.getCompleteUnicodeSet(setFillIn); } }