jars/icu4j-4_4_2-src/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java

   1 /*\r
   2  *******************************************************************************\r
   3  * Copyright (C) 2007-2009, International Business Machines Corporation and         *\r
   4  * others. All Rights Reserved.                                                *\r
   5  *******************************************************************************\r
   6  */\r
   7 package com.ibm.icu.charset;\r
   8 \r
   9 import java.nio.ByteBuffer;\r
  10 import java.nio.CharBuffer;\r
  11 import java.nio.IntBuffer;\r
  12 import java.nio.charset.CharsetDecoder;\r
  13 import java.nio.charset.CharsetEncoder;\r
  14 import java.nio.charset.CoderResult;\r
  15 \r
  16 import com.ibm.icu.text.UnicodeSet;\r
  17 \r
  18 /**\r
  19  * @author Michael Ow\r
  20  *\r
  21  */\r
  22 class CharsetUTF7 extends CharsetICU {\r
  23     private final String IMAP_NAME="IMAP-mailbox-name";\r
  24     private boolean useIMAP;\r
  25     protected byte[] fromUSubstitution=new byte[]{0x3F};\r
  26    \r
  27     public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {\r
  28         super(icuCanonicalName, javaCanonicalName, aliases);\r
  29         maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */\r
  30         minBytesPerChar=1;\r
  31         maxCharsPerByte=1;\r
  32         \r
  33         useIMAP=false;\r
  34         \r
  35         if (icuCanonicalName.equals(IMAP_NAME)) {\r
  36             useIMAP=true;\r
  37         }\r
  38     }\r
  39     \r
  40     //private static boolean inSetD(char c) {\r
  41     //    return (\r
  42     //            (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */\r
  43     //            (char)(c - 48) < 10 ||                        /* digits */\r
  44     //            (char)(c - 39) < 3 ||                          /* ' () */\r
  45     //            (char)(c - 44) < 4 ||                          /* ,-./ */\r
  46     //            (c==58) || (c==63)            /* :? */\r
  47     //            );\r
  48     //}\r
  49     \r
  50     //private static boolean inSetO(char c) {\r
  51     //    return (\r
  52     //            (char)(c - 33) < 6 ||                           /* !"#$%& */\r
  53     //            (char)(c - 59) < 4 ||                           /* ;<=> */\r
  54     //            (char)(c - 93) < 4 ||                           /* ]^_` */\r
  55     //            (char)(c - 123) < 3 ||                         /* {|} */\r
  56     //            (c==58) || (c==63)             /* *@[ */\r
  57     //            );\r
  58     //}\r
  59     \r
  60     private static boolean isCRLFTAB(char c) {\r
  61         return (\r
  62                 (c==13) || (c==10) || (c==9)\r
  63                 );\r
  64     }\r
  65     \r
  66     //private static boolean isCRLFSPTAB(char c) {\r
  67     //   return (\r
  68     //            (c==32) || (c==13) || (c==10) || (c==9)\r
  69     //            );\r
  70     //}\r
  71     \r
  72     private static final byte PLUS=43;\r
  73     private static final byte MINUS=45;\r
  74     private static final byte BACKSLASH=92;\r
  75     //private static final byte TILDE=126;\r
  76     private static final byte AMPERSAND=0x26;\r
  77     private static final byte COMMA=0x2c;\r
  78     private static final byte SLASH=0x2f;\r
  79     \r
  80     // legal byte values: all US-ASCII graphic characters 0x20..0x7e\r
  81     private static boolean isLegal(char c, boolean useIMAP) {\r
  82         if (useIMAP) {\r
  83             return (\r
  84                     (0x20 <= c) && (c <= 0x7e)\r
  85                     );\r
  86         } else {\r
  87             return (\r
  88                     ((char)(c - 32) < 94 && (c != BACKSLASH)) || isCRLFTAB(c)\r
  89                     );\r
  90         }\r
  91     }\r
  92     \r
  93     // directly encode all of printable ASCII 0x20..0x7e except '&' 0x26\r
  94     private static boolean inSetDIMAP(char c) {\r
  95         return (\r
  96                 (isLegal(c, true) && c != AMPERSAND)\r
  97                 );\r
  98     }\r
  99     \r
 100     private static byte TO_BASE64_IMAP(int n) {\r
 101         return (n < 63 ? TO_BASE_64[n] : COMMA);\r
 102     }\r
 103     \r
 104     private static byte FROM_BASE64_IMAP(char c) {\r
 105         return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]);\r
 106     }\r
 107     \r
 108     /* encode directly sets D and O and CR LF SP TAB */\r
 109     private static final byte ENCODE_DIRECTLY_MAXIMUM[] =\r
 110     {\r
 111      /*0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f*/\r
 112         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,\r
 113         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
 114         \r
 115         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\r
 116         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
 117         \r
 118         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
 119         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,\r
 120         \r
 121         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
 122         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0\r
 123     };\r
 124     \r
 125     /* encode directly set D and CR LF SP TAB but not set O */\r
 126     private static final byte ENCODE_DIRECTLY_RESTRICTED[] =\r
 127     {\r
 128      /*0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f*/\r
 129         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,\r
 130         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
 131         \r
 132         1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,\r
 133         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,\r
 134         \r
 135         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
 136         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, \r
 137         \r
 138         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
 139         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0\r
 140     };\r
 141     \r
 142     private static final byte TO_BASE_64[] =\r
 143     {\r
 144        /* A-Z */\r
 145        65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,\r
 146        78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,\r
 147        /* a-z */\r
 148        97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,\r
 149        110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,\r
 150        /* 0-9 */\r
 151        48, 49, 50, 51, 52, 53, 54, 55, 56, 57,\r
 152        /* +/ */\r
 153        43, 47\r
 154     };\r
 155     \r
 156     private static final byte FROM_BASE_64[] =\r
 157     {\r
 158        /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */\r
 159        -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,\r
 160        -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,\r
 161        /* general punctuation with + and / and a special value (-2) for - */\r
 162        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,\r
 163        /* digits */\r
 164        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,\r
 165        /* A-Z */\r
 166        -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,\r
 167        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,       \r
 168        /* a-z*/\r
 169        -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,\r
 170        41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3\r
 171     };\r
 172     \r
 173     class CharsetDecoderUTF7 extends CharsetDecoderICU {\r
 174         public CharsetDecoderUTF7(CharsetICU cs) {\r
 175             super(cs);\r
 176             implReset();\r
 177         }\r
 178     \r
 179         protected void implReset() {\r
 180             super.implReset();\r
 181             toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000;\r
 182         }\r
 183         \r
 184         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { \r
 185             CoderResult cr=CoderResult.UNDERFLOW;\r
 186             byte base64Value;\r
 187             byte base64Counter;\r
 188             byte inDirectMode;\r
 189             char bits;\r
 190             int byteIndex;\r
 191             int sourceIndex, nextSourceIndex;\r
 192             \r
 193             int length;\r
 194             \r
 195             char b;\r
 196             char c;\r
 197             \r
 198             int sourceArrayIndex=source.position();\r
 199             \r
 200             //get the state of the machine state\r
 201             {\r
 202             int status=toUnicodeStatus;\r
 203             inDirectMode=(byte)((status >> 24) & 1);\r
 204             base64Counter=(byte)(status >> 16);\r
 205             bits=(char)status;\r
 206             }\r
 207             byteIndex=toULength;\r
 208             /* sourceIndex=-1 if the current character began in the previous buffer */\r
 209             sourceIndex=byteIndex==0 ? 0 : -1;\r
 210             nextSourceIndex=0;            \r
 211             \r
 212             directMode:  while (true) {\r
 213                 if (inDirectMode==1) {\r
 214                     /* \r
 215                      * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,\r
 216                      * with their US-ASCII byte values.\r
 217                      * Backslash and Tilde and most control characters are not alled in UTF-7.\r
 218                      * A plus sign starts Unicode (or "escape") Mode.\r
 219                      * An ampersand starts Unicode Mode for IMAP.\r
 220                      * \r
 221                      * In Direct Mode, only the sourceIndex is used.\r
 222                      */\r
 223                     byteIndex=0;\r
 224                     length=source.remaining();\r
 225                     //targetCapacity=target.remaining();\r
 226                     //Commented out because length of source may be larger than target when it comes to bytes \r
 227                     /*if (useIMAP && length > targetCapacity) {\r
 228                         length=targetCapacity;\r
 229                     }*/\r
 230                     while (length > 0) {\r
 231                         b=(char)(source.get());\r
 232                         sourceArrayIndex++;\r
 233                         if (!isLegal(b, useIMAP)) {\r
 234                             toUBytesArray[0]=(byte)b;\r
 235                             byteIndex=1;\r
 236                             cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 237                             break;\r
 238                         } else if ((!useIMAP && b!=PLUS) || (useIMAP && b!=AMPERSAND)) {\r
 239                             // write directly encoded character\r
 240                             if (target.hasRemaining()) { // Check to make sure that there is room in target.\r
 241                                 target.put(b);\r
 242                                 if (offsets!= null) {\r
 243                                     offsets.put(sourceIndex++);\r
 244                                 }\r
 245                             } else {  // Get out and set the CoderResult.\r
 246                                 break;\r
 247                             }\r
 248                         } else { /* PLUS or (AMPERSAND in IMAP)*/\r
 249                             /* switch to Unicode mode */\r
 250                             nextSourceIndex=++sourceIndex;\r
 251                             inDirectMode=0;\r
 252                             byteIndex=0;\r
 253                             bits=0;\r
 254                             base64Counter=-1;\r
 255                             continue directMode;\r
 256                         }\r
 257                         --length;\r
 258                     }//end of while\r
 259                     if (source.hasRemaining() && target.position() >= target.limit()) {\r
 260                         /* target is full */\r
 261                         cr=CoderResult.OVERFLOW;\r
 262                     }\r
 263                     break directMode;\r
 264                 } else { /* Unicode Mode*/\r
 265                     /* \r
 266                      * In Unicode Mode, UTF-16BE is base64-encoded.\r
 267                      * The base64 sequence ends with any character that is not in the base64 alphabet.\r
 268                      * A terminating minus sign is consumed.\r
 269                      * \r
 270                      * In Unicode Mode, the sourceIndex has the index to the start of the current\r
 271                      * base64 bytes, while nextSourceIndex is precisely parallel to source,\r
 272                      * keeping the index to the following byte.\r
 273                      */\r
 274                     while(source.hasRemaining()) {\r
 275                         if (target.hasRemaining()) {\r
 276                             b=(char)source.get();\r
 277                             sourceArrayIndex++;\r
 278                             toUBytesArray[byteIndex++]=(byte)b;\r
 279                             if ((!useIMAP && b>=126) || (useIMAP && b>0x7e)) {\r
 280                                 /* illegal - test other illegal US-ASCII values by base64Value==-3 */\r
 281                                 inDirectMode=1;\r
 282                                 cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 283                                 break directMode;\r
 284                             } else if (((base64Value=FROM_BASE_64[b])>=0 && !useIMAP) || ((base64Value=FROM_BASE64_IMAP(b))>=0) && useIMAP) {\r
 285                                 /* collect base64 bytes */\r
 286                                 switch (base64Counter) {\r
 287                                 case -1: /* -1 is immediately after the + */\r
 288                                 case 0:\r
 289                                     bits=(char)base64Value;\r
 290                                     base64Counter=1;\r
 291                                     break;\r
 292                                 case 1:\r
 293                                 case 3:\r
 294                                 case 4:\r
 295                                 case 6:\r
 296                                     bits=(char)((bits<<6) | base64Value);\r
 297                                     ++base64Counter;\r
 298                                     break;\r
 299                                 case 2:\r
 300                                     c=(char)((bits<<4) | (base64Value>>2));\r
 301                                     if (useIMAP && isLegal(c, useIMAP)) {\r
 302                                         // illegal\r
 303                                         inDirectMode=1;\r
 304                                         cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 305                                         // goto endloop;\r
 306                                         break directMode;\r
 307                                     }\r
 308                                     target.put(c);\r
 309                                     if (offsets != null) {\r
 310                                         offsets.put(sourceIndex);\r
 311                                         sourceIndex=nextSourceIndex - 1;\r
 312                                     }\r
 313                                     toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */\r
 314                                     byteIndex=1;\r
 315                                     bits=(char)(base64Value&3);\r
 316                                     base64Counter=3;\r
 317                                     break;\r
 318                                 case 5:\r
 319                                     c=(char)((bits<<2) | (base64Value>>4));\r
 320                                     if(useIMAP && isLegal(c, useIMAP)) {\r
 321                                         // illegal\r
 322                                         inDirectMode=1;\r
 323                                         cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 324                                         // goto endloop;\r
 325                                         break directMode;\r
 326                                     }\r
 327                                     target.put(c);\r
 328                                     if (offsets != null) {\r
 329                                         offsets.put(sourceIndex);\r
 330                                         sourceIndex=nextSourceIndex - 1;\r
 331                                     }\r
 332                                     toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */\r
 333                                     byteIndex=1;\r
 334                                     bits=(char)(base64Value&15);\r
 335                                     base64Counter=6;\r
 336                                     break;\r
 337                                 case 7:\r
 338                                     c=(char)((bits<<6) | base64Value);\r
 339                                     if (useIMAP && isLegal(c, useIMAP)) {\r
 340                                         // illegal\r
 341                                         inDirectMode=1;\r
 342                                         cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 343                                         // goto endloop;\r
 344                                         break directMode;\r
 345                                     }\r
 346                                     target.put(c);\r
 347                                     if (offsets != null) {\r
 348                                         offsets.put(sourceIndex);\r
 349                                         sourceIndex=nextSourceIndex;\r
 350                                     }\r
 351                                     byteIndex=0;\r
 352                                     bits=0;\r
 353                                     base64Counter=0;\r
 354                                     break;\r
 355                                 //default:                  \r
 356                                     /* will never occur */\r
 357                                     //break;                                                           \r
 358                                 }//end of switch\r
 359                             } else if (base64Value==-2) {\r
 360                                 /* minus sign terminates the base64 sequence */\r
 361                                 inDirectMode=1;\r
 362                                 if (base64Counter==-1) {\r
 363                                     /* +- i.e. a minus immediately following a plus */\r
 364                                     target.put(useIMAP ? (char)AMPERSAND : (char)PLUS);\r
 365                                     if (offsets != null) {\r
 366                                         offsets.put(sourceIndex - 1);\r
 367                                     }\r
 368                                 } else {\r
 369                                     /* absorb the minus and leave the Unicode Mode */\r
 370                                     if (bits!=0 || (useIMAP && base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {\r
 371                                         /*bits are illegally left over, a unicode character is incomplete */\r
 372                                         cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 373                                         break;\r
 374                                     }\r
 375                                 }\r
 376                                 sourceIndex=nextSourceIndex;\r
 377                                 continue directMode;\r
 378                             } else if (!useIMAP && base64Value==-1) { /* for any legal character except base64 and minus sign */\r
 379                                 /* leave the Unicode Mode */\r
 380                                 inDirectMode=1;\r
 381                                 if (base64Counter==-1) {\r
 382                                     /* illegal:  + immediately followed by something other than base64 minus sign */\r
 383                                     /* include the plus sign in the reported sequence */\r
 384                                     --sourceIndex;\r
 385                                     toUBytesArray[0]=PLUS;\r
 386                                     toUBytesArray[1]=(byte)b;\r
 387                                     byteIndex=2;\r
 388                                     cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 389                                     break;\r
 390                                 } else if (bits==0) {\r
 391                                     /* un-read the character in case it is a plus sign */\r
 392                                     source.position(--sourceArrayIndex);\r
 393                                     sourceIndex=nextSourceIndex - 1;\r
 394                                     continue directMode;\r
 395                                 } else {\r
 396                                     /* bits are illegally left over, a unicode character is incomplete */\r
 397                                     cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 398                                     break;\r
 399                                 }\r
 400                             } else { \r
 401                                 if (useIMAP && base64Counter==-1) {\r
 402                                     // illegal: & immediately followed by something other than base64 or minus sign\r
 403                                     // include the ampersand in the reported sequence\r
 404                                     --sourceIndex;\r
 405                                     toUBytesArray[0]=AMPERSAND;\r
 406                                     toUBytesArray[1]=(byte)b;\r
 407                                     byteIndex=2;\r
 408                                 }\r
 409                                 /* base64Value==-3 for illegal characters */\r
 410                                 /* illegal */\r
 411                                 inDirectMode=1;\r
 412                                 cr=CoderResult.malformedForLength(sourceArrayIndex);\r
 413                                 break;\r
 414                             }\r
 415                         } else {\r
 416                             /* target is full */\r
 417                             cr=CoderResult.OVERFLOW;\r
 418                             break;\r
 419                         }\r
 420                     } //end of while\r
 421                     break directMode;\r
 422                 }\r
 423             }//end of direct mode label\r
 424             if (useIMAP) {\r
 425                 if (!cr.isError() && inDirectMode==0 && flush && byteIndex==0 && !source.hasRemaining()) {\r
 426                     if (base64Counter==-1) {\r
 427                         /* & at the very end of the input */\r
 428                         /* make the ampersand the reported sequence */\r
 429                         toUBytesArray[0]=AMPERSAND;\r
 430                         byteIndex=1;\r
 431                     }\r
 432                     /* else if (base64Counter!=-1) byteIndex remains 0 because ther is no particular byte sequence */\r
 433                     inDirectMode=1;\r
 434                     cr=CoderResult.malformedForLength(sourceIndex);\r
 435                 }\r
 436                 \r
 437             } else {\r
 438                 if (!cr.isError() && flush && !source.hasRemaining() && bits  ==0) {\r
 439                     /*\r
 440                      * if we are in Unicode Mode, then the byteIndex might not be 0,\r
 441                      * but that is ok if bits -- 0\r
 442                      * -> we set byteIndex=0 at the end of the stream to avoid a truncated error \r
 443                      * (not true for IMAP-mailbox-name where we must end in direct mode)\r
 444                      */\r
 445                     if (!cr.isOverflow()) {\r
 446                         byteIndex=0;\r
 447                     }\r
 448                 }\r
 449             }\r
 450             /* set the converter state */\r
 451             toUnicodeStatus=(inDirectMode<<24 | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (int)bits);\r
 452             toULength=byteIndex;\r
 453    \r
 454             return cr;\r
 455         }\r
 456     }\r
 457     \r
 458     class CharsetEncoderUTF7 extends CharsetEncoderICU {\r
 459         public CharsetEncoderUTF7(CharsetICU cs) {\r
 460             super(cs, fromUSubstitution);\r
 461             implReset();\r
 462         }\r
 463         \r
 464         protected void implReset() {\r
 465             super.implReset();\r
 466             fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000;\r
 467         }\r
 468         \r
 469         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
 470             CoderResult cr=CoderResult.UNDERFLOW;\r
 471             byte inDirectMode;\r
 472             byte encodeDirectly[];\r
 473             int status;\r
 474             \r
 475             int length, targetCapacity, sourceIndex;\r
 476             \r
 477             byte base64Counter;\r
 478             char bits;\r
 479             char c;\r
 480             char b;\r
 481             /* get the state machine state */\r
 482             {\r
 483                 status=fromUnicodeStatus;\r
 484                 encodeDirectly=(((long)status) < 0x10000000) ? ENCODE_DIRECTLY_MAXIMUM : ENCODE_DIRECTLY_RESTRICTED;\r
 485                 inDirectMode=(byte)((status >> 24) & 1);\r
 486                 base64Counter=(byte)(status >> 16);\r
 487                 bits=(char)((byte)status);\r
 488             }\r
 489             /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */\r
 490             sourceIndex=0;\r
 491             \r
 492             directMode: while(true) {\r
 493             if(inDirectMode==1) {\r
 494                 length=source.remaining();\r
 495                 targetCapacity=target.remaining();\r
 496                 if(length > targetCapacity) {\r
 497                     length=targetCapacity;\r
 498                 }\r
 499                 while (length > 0) {\r
 500                     c=source.get();\r
 501                     /* UTF7: currently always encode CR LF SP TAB directly */\r
 502                     /* IMAP: encode 0x20..0x7e except '&' directly */\r
 503                     if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && inSetDIMAP(c))) {\r
 504                         /* encode directly */\r
 505                         target.put((byte)c);\r
 506                         if (offsets != null) {\r
 507                             offsets.put(sourceIndex++);\r
 508                         }\r
 509                     } else if ((!useIMAP && c==PLUS) || (useIMAP && c==AMPERSAND)) {\r
 510                         /* IMAP: output &- for & */\r
 511                         /* UTF-7: output +- for + */\r
 512                         target.put(useIMAP ? AMPERSAND : PLUS);\r
 513                         if (target.hasRemaining()) {\r
 514                             target.put(MINUS);\r
 515                             if (offsets != null) {\r
 516                                 offsets.put(sourceIndex);\r
 517                                 offsets.put(sourceIndex++);\r
 518                             }\r
 519                             /* realign length and targetCapacity */\r
 520                             continue directMode;\r
 521                         } else {\r
 522                             if (offsets != null) {\r
 523                                 offsets.put(sourceIndex++);\r
 524                             }\r
 525                             errorBuffer[0]=MINUS;\r
 526                             errorBufferLength=1;\r
 527                             cr=CoderResult.OVERFLOW;\r
 528                             break;\r
 529                         }\r
 530                     } else {\r
 531                         /* un-read this character and switch to unicode mode */\r
 532                         source.position(source.position() - 1);\r
 533                         target.put(useIMAP ? AMPERSAND : PLUS);\r
 534                         if (offsets != null) {\r
 535                             offsets.put(sourceIndex);\r
 536                         }\r
 537                         inDirectMode=0;\r
 538                         base64Counter=0;\r
 539                         continue directMode;\r
 540                     }\r
 541                     --length;\r
 542                 } //end of while\r
 543                 if (source.hasRemaining() && !target.hasRemaining()) {\r
 544                     /* target is full */\r
 545                     cr=CoderResult.OVERFLOW;\r
 546                 }\r
 547                 break directMode;\r
 548             } else { \r
 549                 /* Unicode Mode */\r
 550                 while (source.hasRemaining()) {\r
 551                     if (target.hasRemaining()) {\r
 552                         c=source.get();\r
 553                         if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) {\r
 554                             /* encode directly */\r
 555                             inDirectMode=1;\r
 556                             \r
 557                             /* trick: back out this character to make this easier */\r
 558                             source.position(source.position() - 1);\r
 559                             \r
 560                             /* terminate the base64 sequence */\r
 561                             if (base64Counter!=0) {\r
 562                                 /* write remaining bits for the previous character */\r
 563                                 target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);\r
 564                                 if (offsets!=null) {\r
 565                                     offsets.put(sourceIndex-1);\r
 566                                 }\r
 567                             }\r
 568                             if (FROM_BASE_64[c]!=-1 || useIMAP) {\r
 569                                 /* need to terminate with a minus */\r
 570                                 if (target.hasRemaining()) {\r
 571                                     target.put(MINUS);\r
 572                                     if (offsets!=null) {\r
 573                                         offsets.put(sourceIndex-1);\r
 574                                     }\r
 575                                 } else {\r
 576                                     errorBuffer[0]=MINUS;\r
 577                                     errorBufferLength=1;\r
 578                                     cr=CoderResult.OVERFLOW;\r
 579                                     break;\r
 580                                 }\r
 581                             }\r
 582                             continue directMode;\r
 583                         } else {\r
 584                             /*\r
 585                              * base64 this character:\r
 586                              * Output 2 or 3 base64 bytres for the remaining bits of the previous character\r
 587                              * and the bits of this character, each implicitly in UTF-16BE.\r
 588                              * \r
 589                              * Here, bits is an 8-bit variable because only 6 bits need to be kept from one\r
 590                              * character to the next.  The actual 2 or 4 bits are shifted to the left edge\r
 591                              * of the 6-bits filed 5..0 to make the termination of the base64 sequence easier.\r
 592                              */\r
 593                             switch (base64Counter) {\r
 594                             case 0:\r
 595                                 b=(char)(c>>10);\r
 596                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 597                                 if (target.hasRemaining()) {\r
 598                                     b=(char)((c>>4)&0x3f);\r
 599                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 600                                     if (offsets!=null) {\r
 601                                         offsets.put(sourceIndex);\r
 602                                         offsets.put(sourceIndex++);\r
 603                                     }\r
 604                                 } else {\r
 605                                     if (offsets!=null) {\r
 606                                         offsets.put(sourceIndex++);\r
 607                                     }\r
 608                                     b=(char)((c>>4)&0x3f);\r
 609                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 610                                     errorBufferLength=1;\r
 611                                     cr=CoderResult.OVERFLOW;\r
 612                                 }\r
 613                                 bits=(char)((c&15)<<2);\r
 614                                 base64Counter=1;\r
 615                                 break;\r
 616                             case 1:\r
 617                                 b=(char)(bits|(c>>14));\r
 618                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 619                                 if (target.hasRemaining()) {\r
 620                                     b=(char)((c>>8)&0x3f);\r
 621                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 622                                     if (target.hasRemaining()) {\r
 623                                         b=(char)((c>>2)&0x3f);\r
 624                                         target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 625                                         if (offsets!=null) {\r
 626                                             offsets.put(sourceIndex);\r
 627                                             offsets.put(sourceIndex);\r
 628                                             offsets.put(sourceIndex++);\r
 629                                         }\r
 630                                     } else {\r
 631                                         if (offsets!=null) {\r
 632                                             offsets.put(sourceIndex);\r
 633                                             offsets.put(sourceIndex++);\r
 634                                         }\r
 635                                         b=(char)((c>>2)&0x3f);\r
 636                                         errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 637                                         errorBufferLength=1;\r
 638                                         cr=CoderResult.OVERFLOW;\r
 639                                     }\r
 640                                 } else {\r
 641                                     if (offsets!=null) {\r
 642                                         offsets.put(sourceIndex++);\r
 643                                     }\r
 644                                     b=(char)((c>>8)&0x3f);\r
 645                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 646                                     b=(char)((c>>2)&0x3f);\r
 647                                     errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 648                                     errorBufferLength=2;\r
 649                                     cr=CoderResult.OVERFLOW;\r
 650                                 }\r
 651                                 bits=(char)((c&3)<<4);\r
 652                                 base64Counter=2;\r
 653                                 break;\r
 654                             case 2:\r
 655                                 b=(char)(bits|(c>>12));\r
 656                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 657                                 if (target.hasRemaining()) {\r
 658                                     b=(char)((c>>6)&0x3f);\r
 659                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 660                                     if (target.hasRemaining()) {\r
 661                                         b=(char)(c&0x3f);\r
 662                                         target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);\r
 663                                         if (offsets!=null) {\r
 664                                             offsets.put(sourceIndex);\r
 665                                             offsets.put(sourceIndex);\r
 666                                             offsets.put(sourceIndex++);\r
 667                                         }\r
 668                                     } else {\r
 669                                         if (offsets!=null) {\r
 670                                             offsets.put(sourceIndex);\r
 671                                             offsets.put(sourceIndex++);\r
 672                                         }\r
 673                                         b=(char)(c&0x3f);\r
 674                                         errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 675                                         errorBufferLength=1;\r
 676                                         cr=CoderResult.OVERFLOW;\r
 677                                     }\r
 678                                 } else {\r
 679                                     if (offsets!=null) {\r
 680                                         offsets.put(sourceIndex++);\r
 681                                     }\r
 682                                     b=(char)((c>>6)&0x3f);\r
 683                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 684                                     b=(char)(c&0x3f);\r
 685                                     errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];\r
 686                                     errorBufferLength=2;\r
 687                                     cr=CoderResult.OVERFLOW;\r
 688                                 }\r
 689                                 bits=0;\r
 690                                 base64Counter=0;\r
 691                                 break;\r
 692                            //default:\r
 693                                /* will never occur */\r
 694                                //break;\r
 695                            } //end of switch \r
 696                         }                      \r
 697                     } else {\r
 698                         /* target is full */\r
 699                         cr=CoderResult.OVERFLOW;\r
 700                         break;\r
 701                     }\r
 702                 } //end of while\r
 703                 break directMode;\r
 704             }\r
 705             } //end of directMode label\r
 706             \r
 707             if (flush && !source.hasRemaining()) {\r
 708                 /* flush remaining bits to the target */\r
 709                 if (inDirectMode==0) {\r
 710                     if (base64Counter!=0) {\r
 711                         if (target.hasRemaining()) {\r
 712                             target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);\r
 713                             if (offsets!=null) {\r
 714                                 offsets.put(sourceIndex - 1);\r
 715                             }\r
 716                         } else {\r
 717                             errorBuffer[errorBufferLength++]=useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits];\r
 718                             cr=CoderResult.OVERFLOW;\r
 719                         }\r
 720                     }\r
 721                     if (useIMAP) {\r
 722                         /* IMAP: need to terminate with a minus */\r
 723                         if (target.hasRemaining()) {\r
 724                             target.put(MINUS);\r
 725                             if (offsets!=null) {\r
 726                                 offsets.put(sourceIndex - 1);\r
 727                             }\r
 728                         } else {\r
 729                             errorBuffer[errorBufferLength++]=MINUS;\r
 730                             cr=CoderResult.OVERFLOW;\r
 731                         }\r
 732                     }\r
 733                 }\r
 734                 /*reset the state for the next conversion */\r
 735                 fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */\r
 736             } else {\r
 737                 /* set the converter state back */\r
 738                 fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | ((int)bits));\r
 739             }\r
 740             \r
 741             return cr;\r
 742         }\r
 743     }\r
 744     \r
 745     public CharsetDecoder newDecoder() {\r
 746         return new CharsetDecoderUTF7(this);\r
 747     }\r
 748     \r
 749     public CharsetEncoder newEncoder() {\r
 750         return new CharsetEncoderUTF7(this);\r
 751     }\r
 752     \r
 753     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){\r
 754         getCompleteUnicodeSet(setFillIn);\r
 755     }\r
 756 }\r