/* ******************************************************************************* * Copyright (C) 2003-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.stringprep; import com.ibm.icu.text.StringPrepParseException; import com.ibm.icu.text.UCharacterIterator; /** * @author ram * * To change the template for this generated type comment go to * Window>Preferences>Java>Code Generation>Code and Comments */ public class IDNAReference { private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ; private static final int ACE_PREFIX_LENGTH = 4; private static final int MAX_LABEL_LENGTH = 63; private static final int HYPHEN = 0x002D; private static final int CAPITAL_A = 0x0041; private static final int CAPITAL_Z = 0x005A; private static final int LOWER_CASE_DELTA = 0x0020; private static final int FULL_STOP = 0x002E; public static final int DEFAULT = 0x0000; public static final int ALLOW_UNASSIGNED = 0x0001; public static final int USE_STD3_RULES = 0x0002; public static final NamePrepTransform transform = NamePrepTransform.getInstance(); public static boolean isReady() { return transform.isReady(); } private static boolean startsWithPrefix(StringBuffer src){ boolean startsWithPrefix = true; if(src.length() < ACE_PREFIX_LENGTH){ return false; } for(int i=0; i0x007A){ return false; } //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] if( (ch==0x002D) || (0x0030 <= ch && ch <= 0x0039) || (0x0041 <= ch && ch <= 0x005A) || (0x0061 <= ch && ch <= 0x007A) ){ return true; } return false; } public static StringBuffer convertToASCII(String src, int options) throws StringPrepParseException{ UCharacterIterator iter = UCharacterIterator.getInstance(src); return convertToASCII(iter,options); } public static StringBuffer convertToASCII(StringBuffer src, int options) throws StringPrepParseException{ UCharacterIterator iter = UCharacterIterator.getInstance(src); return convertToASCII(iter,options); } public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options) throws StringPrepParseException{ char[] caseFlags = null; // the source contains all ascii codepoints boolean srcIsASCII = true; // assume the source contains all LDH codepoints boolean srcIsLDH = true; //get the options boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); int ch; // step 1 while((ch = srcIter.next())!= UCharacterIterator.DONE){ if(ch> 0x7f){ srcIsASCII = false; } } int failPos = -1; srcIter.setToStart(); StringBuffer processOut = null; // step 2 is performed only if the source contains non ASCII if(!srcIsASCII){ // step 2 processOut = transform.prepare(srcIter,options); }else{ processOut = new StringBuffer(srcIter.getText()); } int poLen = processOut.length(); if(poLen==0){ throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); } StringBuffer dest = new StringBuffer(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for(int j=0;j 0x7F){ srcIsASCII = false; }else if(isLDHChar(ch)==false){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if(useSTD3ASCIIRules == true){ // verify 3a and 3b if( srcIsLDH == false /* source contains some non-LDH characters */ || processOut.charAt(0) == HYPHEN || processOut.charAt(processOut.length()-1) == HYPHEN){ /* populate the parseError struct */ if(srcIsLDH==false){ throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.toString(), (failPos>0) ? (failPos-1) : failPos); }else if(processOut.charAt(0) == HYPHEN){ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0); }else{ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.toString(), (poLen>0) ? poLen-1 : poLen); } } } if(srcIsASCII){ dest = processOut; }else{ // step 5 : verify the sequence does not begin with ACE prefix if(!startsWithPrefix(processOut)){ //step 6: encode the sequence with punycode StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags); // convert all codepoints to lower case ASCII StringBuffer lowerOut = toASCIILower(punyout); //Step 7: prepend the ACE prefix dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH); //Step 6: copy the contents in b2 into dest dest.append(lowerOut); }else{ throw new StringPrepParseException("The input does not start with the ACE Prefix.", StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0); } } if(dest.length() > MAX_LABEL_LENGTH){ throw new StringPrepParseException("The labels in the input are too long. Length > 64.", StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0); } return dest; } public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options) throws StringPrepParseException{ return convertIDNToASCII(iter.getText(), options); } public static StringBuffer convertIDNtoASCII(StringBuffer str,int options) throws StringPrepParseException{ return convertIDNToASCII(str.toString(), options); } public static StringBuffer convertIDNToASCII(String src,int options) throws StringPrepParseException{ char[] srcArr = src.toCharArray(); StringBuffer result = new StringBuffer(); int sepIndex=0; int oldSepIndex = 0; for(;;){ sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); //make sure this is not a root label separator. if(!(label.length()==0 && sepIndex==srcArr.length)){ UCharacterIterator iter = UCharacterIterator.getInstance(label); result.append(convertToASCII(iter,options)); } if(sepIndex==srcArr.length){ break; } // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; result.append((char)FULL_STOP); } return result; } public static StringBuffer convertToUnicode(String src, int options) throws StringPrepParseException{ UCharacterIterator iter = UCharacterIterator.getInstance(src); return convertToUnicode(iter,options); } public static StringBuffer convertToUnicode(StringBuffer src, int options) throws StringPrepParseException{ UCharacterIterator iter = UCharacterIterator.getInstance(src); return convertToUnicode(iter,options); } public static StringBuffer convertToUnicode(UCharacterIterator iter, int options) throws StringPrepParseException{ char[] caseFlags = null; //get the options boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); // the source contains all ascii codepoints boolean srcIsASCII = true; // assume the source contains all LDH codepoints boolean srcIsLDH = true; int failPos = -1; int ch; int saveIndex = iter.getIndex(); // step 1: find out if all the codepoints in src are ASCII while((ch=iter.next())!= UCharacterIterator.DONE){ if(ch>0x7F){ srcIsASCII = false; }else if(isLDHChar(ch)==false){ failPos = iter.getIndex(); srcIsLDH = false; } } StringBuffer processOut; if(srcIsASCII == false){ // step 2: process the string iter.setIndex(saveIndex); processOut = transform.prepare(iter,options); }else{ //just point to source processOut = new StringBuffer(iter.getText()); } // TODO: // The RFC states that // // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // //step 3: verify ACE Prefix if(startsWithPrefix(processOut)){ //step 4: Remove the ACE Prefix String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length()); //step 5: Decode using punycode StringBuffer decodeOut = PunycodeReference.decode(new StringBuffer(temp),caseFlags); //step 6:Apply toASCII StringBuffer toASCIIOut = convertToASCII(decodeOut, options); //step 7: verify if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){ throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", StringPrepParseException.VERIFICATION_ERROR); } //step 8: return output of step 5 return decodeOut; }else{ // verify that STD3 ASCII rules are satisfied if(useSTD3ASCIIRules == true){ if( srcIsLDH == false /* source contains some non-LDH characters */ || processOut.charAt(0) == HYPHEN || processOut.charAt(processOut.length()-1) == HYPHEN){ if(srcIsLDH==false){ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), (failPos>0) ? (failPos-1) : failPos); }else if(processOut.charAt(0) == HYPHEN){ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.toString(),0); }else{ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.toString(), processOut.length()); } } } // just return the source return new StringBuffer(iter.getText()); } } public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options) throws StringPrepParseException{ return convertIDNToUnicode(iter.getText(), options); } public static StringBuffer convertIDNToUnicode(StringBuffer str, int options) throws StringPrepParseException{ return convertIDNToUnicode(str.toString(), options); } public static StringBuffer convertIDNToUnicode(String src, int options) throws StringPrepParseException{ char[] srcArr = src.toCharArray(); StringBuffer result = new StringBuffer(); int sepIndex=0; int oldSepIndex=0; for(;;){ sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); if(label.length()==0 && sepIndex!=srcArr.length ){ throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); } UCharacterIterator iter = UCharacterIterator.getInstance(label); result.append(convertToUnicode(iter,options)); if(sepIndex==srcArr.length){ break; } // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; result.append((char)FULL_STOP); } return result; } // TODO: optimize public static int compare(StringBuffer s1, StringBuffer s2, int options) throws StringPrepParseException{ if(s1==null || s2 == null){ throw new IllegalArgumentException("One of the source buffers is null"); } StringBuffer s1Out = convertIDNToASCII(s1.toString(), options); StringBuffer s2Out = convertIDNToASCII(s2.toString(), options); return compareCaseInsensitiveASCII(s1Out,s2Out); } // TODO: optimize public static int compare(String s1, String s2, int options) throws StringPrepParseException{ if(s1==null || s2 == null){ throw new IllegalArgumentException("One of the source buffers is null"); } StringBuffer s1Out = convertIDNToASCII(s1, options); StringBuffer s2Out = convertIDNToASCII(s2, options); return compareCaseInsensitiveASCII(s1Out,s2Out); } // TODO: optimize public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options) throws StringPrepParseException{ if(i1==null || i2 == null){ throw new IllegalArgumentException("One of the source buffers is null"); } StringBuffer s1Out = convertIDNToASCII(i1.getText(), options); StringBuffer s2Out = convertIDNToASCII(i2.getText(), options); return compareCaseInsensitiveASCII(s1Out,s2Out); } }