2 *******************************************************************************
\r
3 * Copyright (C) 2003-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.stringprep;
\r
9 import com.ibm.icu.text.StringPrepParseException;
\r
10 import com.ibm.icu.text.UCharacterIterator;
\r
15 * To change the template for this generated type comment go to
\r
16 * Window>Preferences>Java>Code Generation>Code and Comments
\r
18 public class IDNAReference {
\r
20 private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
\r
21 private static final int ACE_PREFIX_LENGTH = 4;
\r
23 private static final int MAX_LABEL_LENGTH = 63;
\r
24 private static final int HYPHEN = 0x002D;
\r
25 private static final int CAPITAL_A = 0x0041;
\r
26 private static final int CAPITAL_Z = 0x005A;
\r
27 private static final int LOWER_CASE_DELTA = 0x0020;
\r
28 private static final int FULL_STOP = 0x002E;
\r
31 public static final int DEFAULT = 0x0000;
\r
32 public static final int ALLOW_UNASSIGNED = 0x0001;
\r
33 public static final int USE_STD3_RULES = 0x0002;
\r
34 public static final NamePrepTransform transform = NamePrepTransform.getInstance();
\r
36 public static boolean isReady() {
\r
37 return transform.isReady();
\r
40 private static boolean startsWithPrefix(StringBuffer src){
\r
41 boolean startsWithPrefix = true;
\r
43 if(src.length() < ACE_PREFIX_LENGTH){
\r
46 for(int i=0; i<ACE_PREFIX_LENGTH;i++){
\r
47 if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
\r
48 startsWithPrefix = false;
\r
51 return startsWithPrefix;
\r
54 private static char toASCIILower(char ch){
\r
55 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
\r
56 return (char)(ch + LOWER_CASE_DELTA);
\r
61 private static StringBuffer toASCIILower(StringBuffer src){
\r
62 StringBuffer dest = new StringBuffer();
\r
63 for(int i=0; i<src.length();i++){
\r
64 dest.append(toASCIILower(src.charAt(i)));
\r
69 private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
\r
72 for(int i =0;/* no condition */;i++) {
\r
73 /* If we reach the ends of both strings then they match */
\r
74 if(i == s1.length()) {
\r
81 /* Case-insensitive comparison */
\r
83 rc=toASCIILower(c1)-toASCIILower(c2);
\r
91 private static int getSeparatorIndex(char[] src,int start, int limit){
\r
92 for(; start<limit;start++){
\r
93 if(NamePrepTransform.isLabelSeparator(src[start])){
\r
97 // we have not found the separator just return length
\r
101 private static boolean isLDHChar(int ch){
\r
102 // high runner case
\r
106 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
\r
107 if( (ch==0x002D) ||
\r
108 (0x0030 <= ch && ch <= 0x0039) ||
\r
109 (0x0041 <= ch && ch <= 0x005A) ||
\r
110 (0x0061 <= ch && ch <= 0x007A)
\r
117 public static StringBuffer convertToASCII(String src, int options)
\r
118 throws StringPrepParseException{
\r
119 UCharacterIterator iter = UCharacterIterator.getInstance(src);
\r
120 return convertToASCII(iter,options);
\r
122 public static StringBuffer convertToASCII(StringBuffer src, int options)
\r
123 throws StringPrepParseException{
\r
124 UCharacterIterator iter = UCharacterIterator.getInstance(src);
\r
125 return convertToASCII(iter,options);
\r
127 public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
\r
128 throws StringPrepParseException{
\r
130 char[] caseFlags = null;
\r
132 // the source contains all ascii codepoints
\r
133 boolean srcIsASCII = true;
\r
134 // assume the source contains all LDH codepoints
\r
135 boolean srcIsLDH = true;
\r
138 boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
\r
142 while((ch = srcIter.next())!= UCharacterIterator.DONE){
\r
144 srcIsASCII = false;
\r
148 srcIter.setToStart();
\r
149 StringBuffer processOut = null;
\r
150 // step 2 is performed only if the source contains non ASCII
\r
153 processOut = transform.prepare(srcIter,options);
\r
155 processOut = new StringBuffer(srcIter.getText());
\r
157 int poLen = processOut.length();
\r
159 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
\r
161 StringBuffer dest = new StringBuffer();
\r
163 // reset the variable to verify if output of prepare is ASCII or not
\r
167 for(int j=0;j<poLen;j++ ){
\r
168 ch=processOut.charAt(j);
\r
170 srcIsASCII = false;
\r
171 }else if(isLDHChar(ch)==false){
\r
172 // here we do not assemble surrogates
\r
173 // since we know that LDH code points
\r
174 // are in the ASCII range only
\r
180 if(useSTD3ASCIIRules == true){
\r
181 // verify 3a and 3b
\r
182 if( srcIsLDH == false /* source contains some non-LDH characters */
\r
183 || processOut.charAt(0) == HYPHEN
\r
184 || processOut.charAt(processOut.length()-1) == HYPHEN){
\r
186 /* populate the parseError struct */
\r
187 if(srcIsLDH==false){
\r
188 throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
\r
189 StringPrepParseException.STD3_ASCII_RULES_ERROR,
\r
190 processOut.toString(),
\r
191 (failPos>0) ? (failPos-1) : failPos);
\r
192 }else if(processOut.charAt(0) == HYPHEN){
\r
193 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
\r
194 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
\r
197 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
\r
198 StringPrepParseException.STD3_ASCII_RULES_ERROR,
\r
199 processOut.toString(),
\r
200 (poLen>0) ? poLen-1 : poLen);
\r
208 // step 5 : verify the sequence does not begin with ACE prefix
\r
209 if(!startsWithPrefix(processOut)){
\r
211 //step 6: encode the sequence with punycode
\r
212 StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
\r
214 // convert all codepoints to lower case ASCII
\r
215 StringBuffer lowerOut = toASCIILower(punyout);
\r
217 //Step 7: prepend the ACE prefix
\r
218 dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
\r
219 //Step 6: copy the contents in b2 into dest
\r
220 dest.append(lowerOut);
\r
222 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
\r
223 StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
\r
226 if(dest.length() > MAX_LABEL_LENGTH){
\r
227 throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
\r
228 StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
\r
233 public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
\r
234 throws StringPrepParseException{
\r
235 return convertIDNToASCII(iter.getText(), options);
\r
237 public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
\r
238 throws StringPrepParseException{
\r
239 return convertIDNToASCII(str.toString(), options);
\r
241 public static StringBuffer convertIDNToASCII(String src,int options)
\r
242 throws StringPrepParseException{
\r
243 char[] srcArr = src.toCharArray();
\r
244 StringBuffer result = new StringBuffer();
\r
246 int oldSepIndex = 0;
\r
248 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
\r
249 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
\r
250 //make sure this is not a root label separator.
\r
251 if(!(label.length()==0 && sepIndex==srcArr.length)){
\r
252 UCharacterIterator iter = UCharacterIterator.getInstance(label);
\r
253 result.append(convertToASCII(iter,options));
\r
255 if(sepIndex==srcArr.length){
\r
258 // increment the sepIndex to skip past the separator
\r
260 oldSepIndex = sepIndex;
\r
261 result.append((char)FULL_STOP);
\r
266 public static StringBuffer convertToUnicode(String src, int options)
\r
267 throws StringPrepParseException{
\r
268 UCharacterIterator iter = UCharacterIterator.getInstance(src);
\r
269 return convertToUnicode(iter,options);
\r
271 public static StringBuffer convertToUnicode(StringBuffer src, int options)
\r
272 throws StringPrepParseException{
\r
273 UCharacterIterator iter = UCharacterIterator.getInstance(src);
\r
274 return convertToUnicode(iter,options);
\r
276 public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
\r
277 throws StringPrepParseException{
\r
279 char[] caseFlags = null;
\r
282 boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
\r
284 // the source contains all ascii codepoints
\r
285 boolean srcIsASCII = true;
\r
286 // assume the source contains all LDH codepoints
\r
287 boolean srcIsLDH = true;
\r
291 int saveIndex = iter.getIndex();
\r
292 // step 1: find out if all the codepoints in src are ASCII
\r
293 while((ch=iter.next())!= UCharacterIterator.DONE){
\r
295 srcIsASCII = false;
\r
296 }else if(isLDHChar(ch)==false){
\r
297 failPos = iter.getIndex();
\r
301 StringBuffer processOut;
\r
303 if(srcIsASCII == false){
\r
304 // step 2: process the string
\r
305 iter.setIndex(saveIndex);
\r
306 processOut = transform.prepare(iter,options);
\r
309 //just point to source
\r
310 processOut = new StringBuffer(iter.getText());
\r
313 // The RFC states that
\r
315 // ToUnicode never fails. If any step fails, then the original input
\r
316 // is returned immediately in that step.
\r
319 //step 3: verify ACE Prefix
\r
320 if(startsWithPrefix(processOut)){
\r
322 //step 4: Remove the ACE Prefix
\r
323 String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());
\r
325 //step 5: Decode using punycode
\r
326 StringBuffer decodeOut = PunycodeReference.decode(new StringBuffer(temp),caseFlags);
\r
328 //step 6:Apply toASCII
\r
329 StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
\r
332 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
\r
333 throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
\r
334 StringPrepParseException.VERIFICATION_ERROR);
\r
337 //step 8: return output of step 5
\r
341 // verify that STD3 ASCII rules are satisfied
\r
342 if(useSTD3ASCIIRules == true){
\r
343 if( srcIsLDH == false /* source contains some non-LDH characters */
\r
344 || processOut.charAt(0) == HYPHEN
\r
345 || processOut.charAt(processOut.length()-1) == HYPHEN){
\r
347 if(srcIsLDH==false){
\r
348 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
\r
349 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
\r
350 (failPos>0) ? (failPos-1) : failPos);
\r
351 }else if(processOut.charAt(0) == HYPHEN){
\r
352 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
\r
353 StringPrepParseException.STD3_ASCII_RULES_ERROR,
\r
354 processOut.toString(),0);
\r
357 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
\r
358 StringPrepParseException.STD3_ASCII_RULES_ERROR,
\r
359 processOut.toString(),
\r
360 processOut.length());
\r
365 // just return the source
\r
366 return new StringBuffer(iter.getText());
\r
369 public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
\r
370 throws StringPrepParseException{
\r
371 return convertIDNToUnicode(iter.getText(), options);
\r
373 public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
\r
374 throws StringPrepParseException{
\r
375 return convertIDNToUnicode(str.toString(), options);
\r
377 public static StringBuffer convertIDNToUnicode(String src, int options)
\r
378 throws StringPrepParseException{
\r
380 char[] srcArr = src.toCharArray();
\r
381 StringBuffer result = new StringBuffer();
\r
385 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
\r
386 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
\r
387 if(label.length()==0 && sepIndex!=srcArr.length ){
\r
388 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
\r
390 UCharacterIterator iter = UCharacterIterator.getInstance(label);
\r
391 result.append(convertToUnicode(iter,options));
\r
392 if(sepIndex==srcArr.length){
\r
395 // increment the sepIndex to skip past the separator
\r
397 oldSepIndex = sepIndex;
\r
398 result.append((char)FULL_STOP);
\r
403 public static int compare(StringBuffer s1, StringBuffer s2, int options)
\r
404 throws StringPrepParseException{
\r
405 if(s1==null || s2 == null){
\r
406 throw new IllegalArgumentException("One of the source buffers is null");
\r
408 StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
\r
409 StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
\r
410 return compareCaseInsensitiveASCII(s1Out,s2Out);
\r
413 public static int compare(String s1, String s2, int options)
\r
414 throws StringPrepParseException{
\r
415 if(s1==null || s2 == null){
\r
416 throw new IllegalArgumentException("One of the source buffers is null");
\r
418 StringBuffer s1Out = convertIDNToASCII(s1, options);
\r
419 StringBuffer s2Out = convertIDNToASCII(s2, options);
\r
420 return compareCaseInsensitiveASCII(s1Out,s2Out);
\r
423 public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
\r
424 throws StringPrepParseException{
\r
425 if(i1==null || i2 == null){
\r
426 throw new IllegalArgumentException("One of the source buffers is null");
\r
428 StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
\r
429 StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
\r
430 return compareCaseInsensitiveASCII(s1Out,s2Out);
\r