jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java

   1 /*
   2  *******************************************************************************
   3  * Copyright (C) 2003-2011, International Business Machines Corporation and    *
   4  * others. All Rights Reserved.                                                *
   5  *******************************************************************************
   6 */
   7 package com.ibm.icu.dev.test.stringprep;
   8
   9 import com.ibm.icu.text.StringPrepParseException;
  10 import com.ibm.icu.text.UCharacterIterator;
  11
  12 /**
  13  * @author ram
  14  *
  15  * To change the template for this generated type comment go to
  16  * Window>Preferences>Java>Code Generation>Code and Comments
  17  */
  18 public class IDNAReference {
  19
  20     private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
  21     private static final int ACE_PREFIX_LENGTH  = 4;
  22
  23     private static final int MAX_LABEL_LENGTH   = 63;
  24     private static final int HYPHEN             = 0x002D;
  25     private static final int CAPITAL_A          = 0x0041;
  26     private static final int CAPITAL_Z          = 0x005A;
  27     private static final int LOWER_CASE_DELTA   = 0x0020;
  28     private static final int FULL_STOP          = 0x002E;
  29
  30
  31     public static final int DEFAULT             = 0x0000;
  32     public static final int ALLOW_UNASSIGNED    = 0x0001;
  33     public static final int USE_STD3_RULES      = 0x0002;
  34     public static final NamePrepTransform transform = NamePrepTransform.getInstance();
  35
  36     public static boolean isReady() {
  37         return transform.isReady();
  38     }
  39
  40     private static boolean startsWithPrefix(StringBuffer src){
  41         boolean startsWithPrefix = true;
  42
  43         if(src.length() < ACE_PREFIX_LENGTH){
  44             return false;
  45         }
  46         for(int i=0; i<ACE_PREFIX_LENGTH;i++){
  47             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
  48                 startsWithPrefix = false;
  49             }
  50         }
  51         return startsWithPrefix;
  52     }
  53
  54     private static char toASCIILower(char ch){
  55         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
  56             return (char)(ch + LOWER_CASE_DELTA);
  57         }
  58         return ch;
  59     }
  60
  61     private static StringBuffer toASCIILower(StringBuffer src){
  62         StringBuffer dest = new StringBuffer();
  63         for(int i=0; i<src.length();i++){
  64             dest.append(toASCIILower(src.charAt(i)));
  65         }
  66         return dest;
  67     }
  68
  69     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
  70         char c1,c2;
  71         int rc;
  72         for(int i =0;/* no condition */;i++) {
  73             /* If we reach the ends of both strings then they match */
  74             if(i == s1.length()) {
  75                 return 0;
  76             }
  77
  78             c1 = s1.charAt(i);
  79             c2 = s2.charAt(i);
  80
  81             /* Case-insensitive comparison */
  82             if(c1!=c2) {
  83                 rc=toASCIILower(c1)-toASCIILower(c2);
  84                 if(rc!=0) {
  85                     return rc;
  86                 }
  87             }
  88         }
  89     }
  90
  91     private static int getSeparatorIndex(char[] src,int start, int limit){
  92         for(; start<limit;start++){
  93             if(NamePrepTransform.isLabelSeparator(src[start])){
  94                 return start;
  95             }
  96         }
  97         // we have not found the separator just return length
  98         return start;
  99     }
 100
 101     private static boolean isLDHChar(int ch){
 102         // high runner case
 103         if(ch>0x007A){
 104             return false;
 105         }
 106         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
 107         if( (ch==0x002D) ||
 108             (0x0030 <= ch && ch <= 0x0039) ||
 109             (0x0041 <= ch && ch <= 0x005A) ||
 110             (0x0061 <= ch && ch <= 0x007A)
 111           ){
 112             return true;
 113         }
 114         return false;
 115     }
 116
 117     public static StringBuffer convertToASCII(String src, int options)
 118         throws StringPrepParseException{
 119         UCharacterIterator iter = UCharacterIterator.getInstance(src);
 120         return convertToASCII(iter,options);
 121     }
 122     public static StringBuffer convertToASCII(StringBuffer src, int options)
 123         throws StringPrepParseException{
 124         UCharacterIterator iter = UCharacterIterator.getInstance(src);
 125         return convertToASCII(iter,options);
 126     }
 127     public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
 128                 throws StringPrepParseException{
 129
 130         char[] caseFlags = null;
 131
 132         // the source contains all ascii codepoints
 133         boolean srcIsASCII  = true;
 134         // assume the source contains all LDH codepoints
 135         boolean srcIsLDH = true;
 136
 137         //get the options
 138         boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
 139
 140         int ch;
 141         // step 1
 142         while((ch = srcIter.next())!= UCharacterIterator.DONE){
 143             if(ch> 0x7f){
 144                 srcIsASCII = false;
 145             }
 146         }
 147         int failPos = -1;
 148         srcIter.setToStart();
 149         StringBuffer processOut = null;
 150         // step 2 is performed only if the source contains non ASCII
 151         if(!srcIsASCII){
 152             // step 2
 153             processOut =  transform.prepare(srcIter,options);
 154         }else{
 155             processOut = new StringBuffer(srcIter.getText());
 156         }
 157         int poLen = processOut.length();
 158         if(poLen==0){
 159             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
 160         }
 161         StringBuffer dest = new StringBuffer();
 162
 163         // reset the variable to verify if output of prepare is ASCII or not
 164         srcIsASCII = true;
 165
 166         // step 3 & 4
 167         for(int j=0;j<poLen;j++ ){
 168             ch=processOut.charAt(j);
 169             if(ch > 0x7F){
 170                 srcIsASCII = false;
 171             }else if(isLDHChar(ch)==false){
 172                 // here we do not assemble surrogates
 173                 // since we know that LDH code points
 174                 // are in the ASCII range only
 175                 srcIsLDH = false;
 176                 failPos = j;
 177             }
 178         }
 179
 180         if(useSTD3ASCIIRules == true){
 181             // verify 3a and 3b
 182             if( srcIsLDH == false /* source contains some non-LDH characters */
 183                 || processOut.charAt(0) ==  HYPHEN
 184                 || processOut.charAt(processOut.length()-1) == HYPHEN){
 185
 186                 /* populate the parseError struct */
 187                 if(srcIsLDH==false){
 188                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
 189                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
 190                                               processOut.toString(),
 191                                              (failPos>0) ? (failPos-1) : failPos);
 192                 }else if(processOut.charAt(0) == HYPHEN){
 193                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
 194                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
 195
 196                 }else{
 197                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
 198                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
 199                                               processOut.toString(),
 200                                               (poLen>0) ? poLen-1 : poLen);
 201
 202                 }
 203             }
 204         }
 205         if(srcIsASCII){
 206             dest =  processOut;
 207         }else{
 208             // step 5 : verify the sequence does not begin with ACE prefix
 209             if(!startsWithPrefix(processOut)){
 210
 211                 //step 6: encode the sequence with punycode
 212                 StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
 213
 214                 // convert all codepoints to lower case ASCII
 215                 StringBuffer lowerOut = toASCIILower(punyout);
 216
 217                 //Step 7: prepend the ACE prefix
 218                 dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
 219                 //Step 6: copy the contents in b2 into dest
 220                 dest.append(lowerOut);
 221             }else{
 222                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
 223                                    StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
 224             }
 225         }
 226         if(dest.length() > MAX_LABEL_LENGTH){
 227             throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
 228                                     StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
 229         }
 230         return dest;
 231     }
 232
 233     public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
 234             throws StringPrepParseException{
 235             return convertIDNToASCII(iter.getText(), options);
 236     }
 237     public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
 238             throws StringPrepParseException{
 239             return convertIDNToASCII(str.toString(), options);
 240     }
 241     public static StringBuffer convertIDNToASCII(String src,int options)
 242             throws StringPrepParseException{
 243         char[] srcArr = src.toCharArray();
 244         StringBuffer result = new StringBuffer();
 245         int sepIndex=0;
 246         int oldSepIndex = 0;
 247         for(;;){
 248             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
 249             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
 250             //make sure this is not a root label separator.
 251             if(!(label.length()==0 && sepIndex==srcArr.length)){
 252                 UCharacterIterator iter = UCharacterIterator.getInstance(label);
 253                 result.append(convertToASCII(iter,options));
 254             }
 255             if(sepIndex==srcArr.length){
 256                 break;
 257             }
 258             // increment the sepIndex to skip past the separator
 259             sepIndex++;
 260             oldSepIndex = sepIndex;
 261             result.append((char)FULL_STOP);
 262         }
 263         return result;
 264     }
 265
 266     public static StringBuffer convertToUnicode(String src, int options)
 267            throws StringPrepParseException{
 268         UCharacterIterator iter = UCharacterIterator.getInstance(src);
 269         return convertToUnicode(iter,options);
 270     }
 271     public static StringBuffer convertToUnicode(StringBuffer src, int options)
 272            throws StringPrepParseException{
 273         UCharacterIterator iter = UCharacterIterator.getInstance(src);
 274         return convertToUnicode(iter,options);
 275     }
 276     public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
 277            throws StringPrepParseException{
 278
 279         // the source contains all ascii codepoints
 280         boolean srcIsASCII = true;
 281
 282         int ch;
 283         int saveIndex = iter.getIndex();
 284         // step 1: find out if all the codepoints in src are ASCII
 285         while ((ch = iter.next()) != UCharacterIterator.DONE) {
 286             if (ch > 0x7F) {
 287                 srcIsASCII = false;
 288                 break;
 289             }
 290         }
 291
 292         // The RFC states that
 293         // <quote>
 294         // ToUnicode never fails. If any step fails, then the original input
 295         // is returned immediately in that step.
 296         // </quote>
 297         do {
 298             StringBuffer processOut;
 299             if (srcIsASCII == false) {
 300                 // step 2: process the string
 301                 iter.setIndex(saveIndex);
 302                 try {
 303                     processOut = transform.prepare(iter, options);
 304                 } catch (StringPrepParseException e) {
 305                     break;
 306                 }
 307             } else {
 308                 // just point to source
 309                 processOut = new StringBuffer(iter.getText());
 310             }
 311
 312             // step 3: verify ACE Prefix
 313             if (startsWithPrefix(processOut)) {
 314
 315                 // step 4: Remove the ACE Prefix
 316                 String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length());
 317
 318                 // step 5: Decode using punycode
 319                 StringBuffer decodeOut = null;
 320                 try {
 321                     decodeOut = PunycodeReference.decode(new StringBuffer(temp), null);
 322                 } catch (StringPrepParseException e) {
 323                     break;
 324                 }
 325
 326                 // step 6:Apply toASCII
 327                 StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
 328
 329                 // step 7: verify
 330                 if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
 331                     break;
 332                 }
 333                 // step 8: return output of step 5
 334                 return decodeOut;
 335             }
 336         } while (false);
 337
 338         return new StringBuffer(iter.getText());
 339     }
 340
 341     public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
 342         throws StringPrepParseException{
 343         return convertIDNToUnicode(iter.getText(), options);
 344     }
 345     public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
 346         throws StringPrepParseException{
 347         return convertIDNToUnicode(str.toString(), options);
 348     }
 349     public static StringBuffer convertIDNToUnicode(String src, int options)
 350         throws StringPrepParseException{
 351
 352         char[] srcArr = src.toCharArray();
 353         StringBuffer result = new StringBuffer();
 354         int sepIndex=0;
 355         int oldSepIndex=0;
 356         for(;;){
 357             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
 358             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
 359             if(label.length()==0 && sepIndex!=srcArr.length ){
 360                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
 361             }
 362             UCharacterIterator iter = UCharacterIterator.getInstance(label);
 363             result.append(convertToUnicode(iter,options));
 364             if(sepIndex==srcArr.length){
 365                 break;
 366             }
 367             // increment the sepIndex to skip past the separator
 368             sepIndex++;
 369             oldSepIndex = sepIndex;
 370             result.append((char)FULL_STOP);
 371         }
 372         return result;
 373     }
 374     //  TODO: optimize
 375     public static int compare(StringBuffer s1, StringBuffer s2, int options)
 376         throws StringPrepParseException{
 377         if(s1==null || s2 == null){
 378             throw new IllegalArgumentException("One of the source buffers is null");
 379         }
 380         StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
 381         StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
 382         return compareCaseInsensitiveASCII(s1Out,s2Out);
 383     }
 384     //  TODO: optimize
 385     public static int compare(String s1, String s2, int options)
 386         throws StringPrepParseException{
 387         if(s1==null || s2 == null){
 388             throw new IllegalArgumentException("One of the source buffers is null");
 389         }
 390         StringBuffer s1Out = convertIDNToASCII(s1, options);
 391         StringBuffer s2Out = convertIDNToASCII(s2, options);
 392         return compareCaseInsensitiveASCII(s1Out,s2Out);
 393     }
 394     //  TODO: optimize
 395     public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
 396         throws StringPrepParseException{
 397         if(i1==null || i2 == null){
 398             throw new IllegalArgumentException("One of the source buffers is null");
 399         }
 400         StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
 401         StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
 402         return compareCaseInsensitiveASCII(s1Out,s2Out);
 403     }
 404
 405 }