]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / core / src / com / ibm / icu / dev / test / stringprep / IDNAReference.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2003-2011, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6 */
7 package com.ibm.icu.dev.test.stringprep;
8
9 import com.ibm.icu.text.StringPrepParseException;
10 import com.ibm.icu.text.UCharacterIterator;
11
12 /**
13  * @author ram
14  *
15  * To change the template for this generated type comment go to
16  * Window>Preferences>Java>Code Generation>Code and Comments
17  */
18 public class IDNAReference {
19     
20     private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
21     private static final int ACE_PREFIX_LENGTH  = 4;
22
23     private static final int MAX_LABEL_LENGTH   = 63;
24     private static final int HYPHEN             = 0x002D;
25     private static final int CAPITAL_A          = 0x0041;
26     private static final int CAPITAL_Z          = 0x005A;
27     private static final int LOWER_CASE_DELTA   = 0x0020;
28     private static final int FULL_STOP          = 0x002E;
29
30
31     public static final int DEFAULT             = 0x0000;
32     public static final int ALLOW_UNASSIGNED    = 0x0001;
33     public static final int USE_STD3_RULES      = 0x0002;
34     public static final NamePrepTransform transform = NamePrepTransform.getInstance();
35   
36     public static boolean isReady() {
37         return transform.isReady();
38     }
39
40     private static boolean startsWithPrefix(StringBuffer src){
41         boolean startsWithPrefix = true;
42
43         if(src.length() < ACE_PREFIX_LENGTH){
44             return false;
45         }
46         for(int i=0; i<ACE_PREFIX_LENGTH;i++){
47             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
48                 startsWithPrefix = false;
49             }
50         }
51         return startsWithPrefix;
52     }
53
54     private static char toASCIILower(char ch){
55         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
56             return (char)(ch + LOWER_CASE_DELTA);
57         }
58         return ch;
59     }
60
61     private static StringBuffer toASCIILower(StringBuffer src){
62         StringBuffer dest = new StringBuffer();
63         for(int i=0; i<src.length();i++){
64             dest.append(toASCIILower(src.charAt(i)));
65         }
66         return dest;
67     }
68
69     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
70         char c1,c2;
71         int rc;
72         for(int i =0;/* no condition */;i++) {
73             /* If we reach the ends of both strings then they match */
74             if(i == s1.length()) {
75                 return 0;
76             }
77
78             c1 = s1.charAt(i);
79             c2 = s2.charAt(i);
80         
81             /* Case-insensitive comparison */
82             if(c1!=c2) {
83                 rc=toASCIILower(c1)-toASCIILower(c2);
84                 if(rc!=0) {
85                     return rc;
86                 }
87             }
88         }
89     }
90     
91     private static int getSeparatorIndex(char[] src,int start, int limit){
92         for(; start<limit;start++){
93             if(NamePrepTransform.isLabelSeparator(src[start])){
94                 return start;
95             }
96         }
97         // we have not found the separator just return length
98         return start;
99     }
100     
101     private static boolean isLDHChar(int ch){
102         // high runner case
103         if(ch>0x007A){
104             return false;
105         }
106         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
107         if( (ch==0x002D) || 
108             (0x0030 <= ch && ch <= 0x0039) ||
109             (0x0041 <= ch && ch <= 0x005A) ||
110             (0x0061 <= ch && ch <= 0x007A)
111           ){
112             return true;
113         }
114         return false;
115     }
116         
117     public static StringBuffer convertToASCII(String src, int options)
118         throws StringPrepParseException{
119         UCharacterIterator iter = UCharacterIterator.getInstance(src);
120         return convertToASCII(iter,options);
121     }
122     public static StringBuffer convertToASCII(StringBuffer src, int options)
123         throws StringPrepParseException{
124         UCharacterIterator iter = UCharacterIterator.getInstance(src);
125         return convertToASCII(iter,options);
126     }
127     public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
128                 throws StringPrepParseException{
129     
130         char[] caseFlags = null;
131     
132         // the source contains all ascii codepoints
133         boolean srcIsASCII  = true;
134         // assume the source contains all LDH codepoints
135         boolean srcIsLDH = true; 
136
137         //get the options
138         boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
139
140         int ch;
141         // step 1
142         while((ch = srcIter.next())!= UCharacterIterator.DONE){
143             if(ch> 0x7f){
144                 srcIsASCII = false;
145             }
146         }
147         int failPos = -1;
148         srcIter.setToStart();
149         StringBuffer processOut = null;
150         // step 2 is performed only if the source contains non ASCII
151         if(!srcIsASCII){
152             // step 2
153             processOut =  transform.prepare(srcIter,options);
154         }else{
155             processOut = new StringBuffer(srcIter.getText());
156         }
157         int poLen = processOut.length();
158         if(poLen==0){
159             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
160         }
161         StringBuffer dest = new StringBuffer();
162         
163         // reset the variable to verify if output of prepare is ASCII or not
164         srcIsASCII = true;
165         
166         // step 3 & 4
167         for(int j=0;j<poLen;j++ ){
168             ch=processOut.charAt(j);
169             if(ch > 0x7F){
170                 srcIsASCII = false;
171             }else if(isLDHChar(ch)==false){
172                 // here we do not assemble surrogates
173                 // since we know that LDH code points
174                 // are in the ASCII range only
175                 srcIsLDH = false;
176                 failPos = j;
177             }
178         }
179     
180         if(useSTD3ASCIIRules == true){
181             // verify 3a and 3b
182             if( srcIsLDH == false /* source contains some non-LDH characters */
183                 || processOut.charAt(0) ==  HYPHEN 
184                 || processOut.charAt(processOut.length()-1) == HYPHEN){
185
186                 /* populate the parseError struct */
187                 if(srcIsLDH==false){
188                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
189                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
190                                               processOut.toString(),
191                                              (failPos>0) ? (failPos-1) : failPos);
192                 }else if(processOut.charAt(0) == HYPHEN){
193                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
194                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
195      
196                 }else{
197                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
198                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
199                                               processOut.toString(),
200                                               (poLen>0) ? poLen-1 : poLen);
201
202                 }
203             }
204         }
205         if(srcIsASCII){
206             dest =  processOut;
207         }else{
208             // step 5 : verify the sequence does not begin with ACE prefix
209             if(!startsWithPrefix(processOut)){
210
211                 //step 6: encode the sequence with punycode
212                 StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
213                 
214                 // convert all codepoints to lower case ASCII
215                 StringBuffer lowerOut = toASCIILower(punyout);
216
217                 //Step 7: prepend the ACE prefix
218                 dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
219                 //Step 6: copy the contents in b2 into dest
220                 dest.append(lowerOut);
221             }else{
222                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
223                                    StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
224             }
225         }
226         if(dest.length() > MAX_LABEL_LENGTH){
227             throw new StringPrepParseException("The labels in the input are too long. Length > 64.", 
228                                     StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
229         }
230         return dest;
231     }
232     
233     public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
234             throws StringPrepParseException{
235             return convertIDNToASCII(iter.getText(), options);          
236     }
237     public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
238             throws StringPrepParseException{
239             return convertIDNToASCII(str.toString(), options);          
240     }
241     public static StringBuffer convertIDNToASCII(String src,int options)
242             throws StringPrepParseException{
243         char[] srcArr = src.toCharArray();
244         StringBuffer result = new StringBuffer();
245         int sepIndex=0;
246         int oldSepIndex = 0;
247         for(;;){
248             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
249             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
250             //make sure this is not a root label separator.
251             if(!(label.length()==0 && sepIndex==srcArr.length)){
252                 UCharacterIterator iter = UCharacterIterator.getInstance(label);
253                 result.append(convertToASCII(iter,options));
254             }
255             if(sepIndex==srcArr.length){
256                 break;
257             }
258             // increment the sepIndex to skip past the separator
259             sepIndex++;
260             oldSepIndex = sepIndex;
261             result.append((char)FULL_STOP);
262         }
263         return result;
264     }
265
266     public static StringBuffer convertToUnicode(String src, int options)
267            throws StringPrepParseException{
268         UCharacterIterator iter = UCharacterIterator.getInstance(src);
269         return convertToUnicode(iter,options);
270     }
271     public static StringBuffer convertToUnicode(StringBuffer src, int options)
272            throws StringPrepParseException{
273         UCharacterIterator iter = UCharacterIterator.getInstance(src);
274         return convertToUnicode(iter,options);
275     }   
276     public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
277            throws StringPrepParseException{
278
279         // the source contains all ascii codepoints
280         boolean srcIsASCII = true;
281
282         int ch;
283         int saveIndex = iter.getIndex();
284         // step 1: find out if all the codepoints in src are ASCII
285         while ((ch = iter.next()) != UCharacterIterator.DONE) {
286             if (ch > 0x7F) {
287                 srcIsASCII = false;
288                 break;
289             }
290         }
291
292         // The RFC states that
293         // <quote>
294         // ToUnicode never fails. If any step fails, then the original input
295         // is returned immediately in that step.
296         // </quote>
297         do {
298             StringBuffer processOut;
299             if (srcIsASCII == false) {
300                 // step 2: process the string
301                 iter.setIndex(saveIndex);
302                 try {
303                     processOut = transform.prepare(iter, options);
304                 } catch (StringPrepParseException e) {
305                     break;
306                 }
307             } else {
308                 // just point to source
309                 processOut = new StringBuffer(iter.getText());
310             }
311
312             // step 3: verify ACE Prefix
313             if (startsWithPrefix(processOut)) {
314
315                 // step 4: Remove the ACE Prefix
316                 String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length());
317
318                 // step 5: Decode using punycode
319                 StringBuffer decodeOut = null;
320                 try {
321                     decodeOut = PunycodeReference.decode(new StringBuffer(temp), null);
322                 } catch (StringPrepParseException e) {
323                     break;
324                 }
325
326                 // step 6:Apply toASCII
327                 StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
328
329                 // step 7: verify
330                 if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
331                     break;
332                 }
333                 // step 8: return output of step 5
334                 return decodeOut;
335             }
336         } while (false);
337
338         return new StringBuffer(iter.getText());
339     }
340
341     public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
342         throws StringPrepParseException{
343         return convertIDNToUnicode(iter.getText(), options);
344     }
345     public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
346         throws StringPrepParseException{
347         return convertIDNToUnicode(str.toString(), options);
348     }
349     public static StringBuffer convertIDNToUnicode(String src, int options)
350         throws StringPrepParseException{
351             
352         char[] srcArr = src.toCharArray();
353         StringBuffer result = new StringBuffer();
354         int sepIndex=0;
355         int oldSepIndex=0;
356         for(;;){
357             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
358             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
359             if(label.length()==0 && sepIndex!=srcArr.length ){
360                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
361             }
362             UCharacterIterator iter = UCharacterIterator.getInstance(label);
363             result.append(convertToUnicode(iter,options));
364             if(sepIndex==srcArr.length){
365                 break;
366             }
367             // increment the sepIndex to skip past the separator
368             sepIndex++;
369             oldSepIndex = sepIndex;
370             result.append((char)FULL_STOP);
371         }
372         return result;
373     }
374     //  TODO: optimize
375     public static int compare(StringBuffer s1, StringBuffer s2, int options)
376         throws StringPrepParseException{
377         if(s1==null || s2 == null){
378             throw new IllegalArgumentException("One of the source buffers is null");
379         }
380         StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
381         StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
382         return compareCaseInsensitiveASCII(s1Out,s2Out);
383     }
384     //  TODO: optimize
385     public static int compare(String s1, String s2, int options)
386         throws StringPrepParseException{
387         if(s1==null || s2 == null){
388             throw new IllegalArgumentException("One of the source buffers is null");
389         }
390         StringBuffer s1Out = convertIDNToASCII(s1, options);
391         StringBuffer s2Out = convertIDNToASCII(s2, options);
392         return compareCaseInsensitiveASCII(s1Out,s2Out);
393     }
394     //  TODO: optimize
395     public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
396         throws StringPrepParseException{
397         if(i1==null || i2 == null){
398             throw new IllegalArgumentException("One of the source buffers is null");
399         }
400         StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
401         StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
402         return compareCaseInsensitiveASCII(s1Out,s2Out);
403     }
404
405 }