jars/icu4j-4_4_2-src/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java

   1 /**\r
   2  *******************************************************************************\r
   3  * Copyright (C) 2006-2008, International Business Machines Corporation and    *\r
   4  * others. All Rights Reserved.                                                *\r
   5  *******************************************************************************\r
   6  *\r
   7  *******************************************************************************\r
   8  */\r
   9 package com.ibm.icu.charset;\r
  10 \r
  11 import java.nio.BufferOverflowException;\r
  12 import java.nio.BufferUnderflowException;\r
  13 import java.nio.ByteBuffer;\r
  14 import java.nio.CharBuffer;\r
  15 import java.nio.IntBuffer;\r
  16 import java.nio.charset.CharsetDecoder;\r
  17 import java.nio.charset.CharsetEncoder;\r
  18 import java.nio.charset.CoderResult;\r
  19 \r
  20 import com.ibm.icu.text.UTF16;\r
  21 import com.ibm.icu.text.UnicodeSet;\r
  22 \r
  23 class CharsetASCII extends CharsetICU {\r
  24     protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };\r
  25 \r
  26     public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {\r
  27         super(icuCanonicalName, javaCanonicalName, aliases);\r
  28         maxBytesPerChar = 1;\r
  29         minBytesPerChar = 1;\r
  30         maxCharsPerByte = 1;\r
  31     }\r
  32 \r
  33     class CharsetDecoderASCII extends CharsetDecoderICU {\r
  34 \r
  35         public CharsetDecoderASCII(CharsetICU cs) {\r
  36             super(cs);\r
  37         }\r
  38 \r
  39         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,\r
  40                 boolean flush) {\r
  41             if (!source.hasRemaining()) {\r
  42                 /* no input, nothing to do */\r
  43                 return CoderResult.UNDERFLOW;\r
  44             }\r
  45             if (!target.hasRemaining()) {\r
  46                 /* no output available, can't do anything */\r
  47                 return CoderResult.OVERFLOW;\r
  48             }\r
  49 \r
  50             CoderResult cr;\r
  51             int oldSource = source.position();\r
  52             int oldTarget = target.position();\r
  53 \r
  54             if (source.hasArray() && target.hasArray()) {\r
  55                 /* optimized loop */\r
  56 \r
  57                 /*\r
  58                  * extract arrays from the buffers and obtain various constant values that will be\r
  59                  * necessary in the core loop\r
  60                  */\r
  61                 byte[] sourceArray = source.array();\r
  62                 int sourceOffset = source.arrayOffset();\r
  63                 int sourceIndex = oldSource + sourceOffset;\r
  64                 int sourceLength = source.limit() - oldSource;\r
  65                 \r
  66                 char[] targetArray = target.array();\r
  67                 int targetOffset = target.arrayOffset();\r
  68                 int targetIndex = oldTarget + targetOffset;\r
  69                 int targetLength = target.limit() - oldTarget;\r
  70 \r
  71                 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)\r
  72                         + sourceIndex;\r
  73                 int offset = targetIndex - sourceIndex;\r
  74 \r
  75                 /*\r
  76                  * perform the core loop... if it returns null, it must be due to an overflow or\r
  77                  * underflow\r
  78                  */\r
  79                 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit);\r
  80                 if (cr == null) {\r
  81                     if (sourceLength <= targetLength) {\r
  82                         source.position(oldSource + sourceLength);\r
  83                         target.position(oldTarget + sourceLength);\r
  84                         cr = CoderResult.UNDERFLOW;\r
  85                     } else {\r
  86                         source.position(oldSource + targetLength);\r
  87                         target.position(oldTarget + targetLength);\r
  88                         cr = CoderResult.OVERFLOW;\r
  89                     }\r
  90                 }\r
  91             } else {\r
  92                 /* unoptimized loop */\r
  93 \r
  94                 try {\r
  95                     /*\r
  96                      * perform the core loop... if it throws an exception, it must be due to an\r
  97                      * overflow or underflow\r
  98                      */\r
  99                     cr = decodeLoopCoreUnoptimized(source, target);\r
 100 \r
 101                 } catch (BufferUnderflowException ex) {\r
 102                     /* all of the source has been read */\r
 103                     cr = CoderResult.UNDERFLOW;\r
 104                 } catch (BufferOverflowException ex) {\r
 105                     /* the target is full */\r
 106                     source.position(source.position() - 1); /* rewind by 1 */\r
 107                     cr = CoderResult.OVERFLOW;\r
 108                 }\r
 109             }\r
 110 \r
 111             /* set offsets since the start */\r
 112             if (offsets != null) {\r
 113                 int count = target.position() - oldTarget;\r
 114                 int sourceIndex = -1;\r
 115                 while (--count >= 0) offsets.put(++sourceIndex);\r
 116             }\r
 117 \r
 118             return cr;\r
 119         }\r
 120 \r
 121         protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,\r
 122                 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {\r
 123             int i, ch = 0;\r
 124 \r
 125             /*\r
 126              * perform ascii conversion from the source array to the target array, making sure each\r
 127              * byte in the source is within the correct range\r
 128              */\r
 129             for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)\r
 130                 targetArray[i + offset] = (char) ch;\r
 131 \r
 132             /*\r
 133              * if some byte was not in the correct range, we need to deal with this byte by calling\r
 134              * decodeMalformedOrUnmappable and move the source and target positions to reflect the\r
 135              * early termination of the loop\r
 136              */\r
 137             if ((ch & 0x80) != 0) {\r
 138                 source.position(i + 1);\r
 139                 target.position(i + offset);\r
 140                 return decodeMalformedOrUnmappable(ch);\r
 141             } else\r
 142                 return null;\r
 143         }\r
 144 \r
 145         protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)\r
 146                 throws BufferUnderflowException, BufferOverflowException {\r
 147             int ch = 0;\r
 148 \r
 149             /*\r
 150              * perform ascii conversion from the source buffer to the target buffer, making sure\r
 151              * each byte in the source is within the correct range\r
 152              */\r
 153             while (((ch = (source.get() & 0xff)) & 0x80) == 0)\r
 154                 target.put((char) ch);\r
 155 \r
 156             /*\r
 157              * if we reach here, it's because a character was not in the correct range, and we need\r
 158              * to deak with this by calling decodeMalformedOrUnmappable\r
 159              */\r
 160             return decodeMalformedOrUnmappable(ch);\r
 161         }\r
 162 \r
 163         protected CoderResult decodeMalformedOrUnmappable(int ch) {\r
 164             /*\r
 165              * put the guilty character into toUBytesArray and return a message saying that the\r
 166              * character was malformed and of length 1.\r
 167              */\r
 168             toUBytesArray[0] = (byte) ch;\r
 169             toULength = 1;\r
 170             return CoderResult.malformedForLength(1);\r
 171         }\r
 172     }\r
 173 \r
 174     class CharsetEncoderASCII extends CharsetEncoderICU {\r
 175 \r
 176         public CharsetEncoderASCII(CharsetICU cs) {\r
 177             super(cs, fromUSubstitution);\r
 178             implReset();\r
 179         }\r
 180 \r
 181         private final static int NEED_TO_WRITE_BOM = 1;\r
 182 \r
 183         protected void implReset() {\r
 184             super.implReset();\r
 185             fromUnicodeStatus = NEED_TO_WRITE_BOM;\r
 186         }\r
 187 \r
 188         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,\r
 189                 boolean flush) {\r
 190             if (!source.hasRemaining()) {\r
 191                 /* no input, nothing to do */\r
 192                 return CoderResult.UNDERFLOW;\r
 193             }\r
 194             if (!target.hasRemaining()) {\r
 195                 /* no output available, can't do anything */\r
 196                 return CoderResult.OVERFLOW;\r
 197             }\r
 198 \r
 199             CoderResult cr;\r
 200             int oldSource = source.position();\r
 201             int oldTarget = target.position();\r
 202 \r
 203             if (fromUChar32 != 0) {\r
 204                 /*\r
 205                  * if we have a leading character in fromUChar32 that needs to be dealt with, we\r
 206                  * need to check for a matching trail character and taking the appropriate action as\r
 207                  * dictated by encodeTrail.\r
 208                  */\r
 209                 cr = encodeTrail(source, (char) fromUChar32, flush);\r
 210             } else {\r
 211                 if (source.hasArray() && target.hasArray()) {\r
 212                     /* optimized loop */\r
 213 \r
 214                     /*\r
 215                      * extract arrays from the buffers and obtain various constant values that will\r
 216                      * be necessary in the core loop\r
 217                      */\r
 218                     char[] sourceArray = source.array();\r
 219                     int sourceOffset = source.arrayOffset();\r
 220                     int sourceIndex = oldSource + sourceOffset;\r
 221                     int sourceLength = source.limit() - oldSource;\r
 222 \r
 223                     byte[] targetArray = target.array();\r
 224                     int targetOffset = target.arrayOffset();\r
 225                     int targetIndex = oldTarget + targetOffset;\r
 226                     int targetLength = target.limit() - oldTarget;\r
 227 \r
 228                     int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)\r
 229                             + sourceIndex;\r
 230                     int offset = targetIndex - sourceIndex;\r
 231 \r
 232                     /*\r
 233                      * perform the core loop... if it returns null, it must be due to an overflow or\r
 234                      * underflow\r
 235                      */\r
 236                     cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush);\r
 237                     if (cr == null) {\r
 238                         if (sourceLength <= targetLength) {\r
 239                             source.position(oldSource + sourceLength);\r
 240                             target.position(oldTarget + sourceLength);\r
 241                             cr = CoderResult.UNDERFLOW;\r
 242                         } else {\r
 243                             source.position(oldSource + targetLength);\r
 244                             target.position(oldTarget + targetLength);\r
 245                             cr = CoderResult.OVERFLOW;\r
 246                         }\r
 247                     }\r
 248                 } else {\r
 249                     /* unoptimized loop */\r
 250 \r
 251                     try {\r
 252                         /*\r
 253                          * perform the core loop... if it throws an exception, it must be due to an\r
 254                          * overflow or underflow\r
 255                          */\r
 256                         cr = encodeLoopCoreUnoptimized(source, target, flush);\r
 257 \r
 258                     } catch (BufferUnderflowException ex) {\r
 259                         cr = CoderResult.UNDERFLOW;\r
 260                     } catch (BufferOverflowException ex) {\r
 261                         source.position(source.position() - 1); /* rewind by 1 */\r
 262                         cr = CoderResult.OVERFLOW;\r
 263                     }\r
 264                 }\r
 265             }\r
 266 \r
 267             /* set offsets since the start */\r
 268             if (offsets != null) {\r
 269                 int count = target.position() - oldTarget;\r
 270                 int sourceIndex = -1;\r
 271                 while (--count >= 0) offsets.put(++sourceIndex);\r
 272             }\r
 273 \r
 274             return cr;\r
 275         }\r
 276 \r
 277         protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,\r
 278                 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,\r
 279                 boolean flush) {\r
 280             int i, ch = 0;\r
 281 \r
 282             /*\r
 283              * perform ascii conversion from the source array to the target array, making sure each\r
 284              * char in the source is within the correct range\r
 285              */\r
 286             for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)\r
 287                 targetArray[i + offset] = (byte) ch;\r
 288 \r
 289             /*\r
 290              * if some byte was not in the correct range, we need to deal with this byte by calling\r
 291              * encodeMalformedOrUnmappable and move the source and target positions to reflect the\r
 292              * early termination of the loop\r
 293              */\r
 294             if ((ch & 0xff80) != 0) {\r
 295                 source.position(i + 1);\r
 296                 target.position(i + offset);\r
 297                 return encodeMalformedOrUnmappable(source, ch, flush);\r
 298             } else\r
 299                 return null;\r
 300         }\r
 301 \r
 302         protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,\r
 303                 boolean flush) throws BufferUnderflowException, BufferOverflowException {\r
 304             int ch;\r
 305 \r
 306             /*\r
 307              * perform ascii conversion from the source buffer to the target buffer, making sure\r
 308              * each char in the source is within the correct range\r
 309              */\r
 310             while (((ch = (int) source.get()) & 0xff80) == 0)\r
 311                 target.put((byte) ch);\r
 312 \r
 313             /*\r
 314              * if we reach here, it's because a character was not in the correct range, and we need\r
 315              * to deak with this by calling encodeMalformedOrUnmappable.\r
 316              */\r
 317             return encodeMalformedOrUnmappable(source, ch, flush);\r
 318         }\r
 319 \r
 320         protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {\r
 321             /*\r
 322              * if the character is a lead surrogate, we need to call encodeTrail to attempt to match\r
 323              * it up with a trail surrogate. if not, the character is unmappable.\r
 324              */\r
 325             return (UTF16.isSurrogate((char) ch))\r
 326                     ? encodeTrail(source, (char) ch, flush)\r
 327                     : CoderResult.unmappableForLength(1);\r
 328         }\r
 329 \r
 330         private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {\r
 331             /*\r
 332              * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,\r
 333              * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. \r
 334              */\r
 335             CoderResult cr = handleSurrogates(source, lead);\r
 336             if (cr != null) {\r
 337                 return cr;\r
 338             } else {\r
 339                 //source.position(source.position() - 2);\r
 340                 return CoderResult.unmappableForLength(2);\r
 341             }\r
 342         }\r
 343 \r
 344     }\r
 345 \r
 346     public CharsetDecoder newDecoder() {\r
 347         return new CharsetDecoderASCII(this);\r
 348     }\r
 349 \r
 350     public CharsetEncoder newEncoder() {\r
 351         return new CharsetEncoderASCII(this);\r
 352     }\r
 353     \r
 354     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){\r
 355         setFillIn.add(0,0x7f);\r
 356      }\r
 357 }\r