]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / stringprep / IDNAReference.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2003-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6 */\r
7 package com.ibm.icu.dev.test.stringprep;\r
8 \r
9 import com.ibm.icu.text.StringPrepParseException;\r
10 import com.ibm.icu.text.UCharacterIterator;\r
11 \r
12 /**\r
13  * @author ram\r
14  *\r
15  * To change the template for this generated type comment go to\r
16  * Window>Preferences>Java>Code Generation>Code and Comments\r
17  */\r
18 public class IDNAReference {\r
19     \r
20     private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;\r
21     private static final int ACE_PREFIX_LENGTH  = 4;\r
22 \r
23     private static final int MAX_LABEL_LENGTH   = 63;\r
24     private static final int HYPHEN             = 0x002D;\r
25     private static final int CAPITAL_A          = 0x0041;\r
26     private static final int CAPITAL_Z          = 0x005A;\r
27     private static final int LOWER_CASE_DELTA   = 0x0020;\r
28     private static final int FULL_STOP          = 0x002E;\r
29 \r
30 \r
31     public static final int DEFAULT             = 0x0000;\r
32     public static final int ALLOW_UNASSIGNED    = 0x0001;\r
33     public static final int USE_STD3_RULES      = 0x0002;\r
34     public static final NamePrepTransform transform = NamePrepTransform.getInstance();\r
35   \r
36     public static boolean isReady() {\r
37         return transform.isReady();\r
38     }\r
39 \r
40     private static boolean startsWithPrefix(StringBuffer src){\r
41         boolean startsWithPrefix = true;\r
42 \r
43         if(src.length() < ACE_PREFIX_LENGTH){\r
44             return false;\r
45         }\r
46         for(int i=0; i<ACE_PREFIX_LENGTH;i++){\r
47             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){\r
48                 startsWithPrefix = false;\r
49             }\r
50         }\r
51         return startsWithPrefix;\r
52     }\r
53 \r
54     private static char toASCIILower(char ch){\r
55         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){\r
56             return (char)(ch + LOWER_CASE_DELTA);\r
57         }\r
58         return ch;\r
59     }\r
60 \r
61     private static StringBuffer toASCIILower(StringBuffer src){\r
62         StringBuffer dest = new StringBuffer();\r
63         for(int i=0; i<src.length();i++){\r
64             dest.append(toASCIILower(src.charAt(i)));\r
65         }\r
66         return dest;\r
67     }\r
68 \r
69     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){\r
70         char c1,c2;\r
71         int rc;\r
72         for(int i =0;/* no condition */;i++) {\r
73             /* If we reach the ends of both strings then they match */\r
74             if(i == s1.length()) {\r
75                 return 0;\r
76             }\r
77 \r
78             c1 = s1.charAt(i);\r
79             c2 = s2.charAt(i);\r
80         \r
81             /* Case-insensitive comparison */\r
82             if(c1!=c2) {\r
83                 rc=toASCIILower(c1)-toASCIILower(c2);\r
84                 if(rc!=0) {\r
85                     return rc;\r
86                 }\r
87             }\r
88         }\r
89     }\r
90     \r
91     private static int getSeparatorIndex(char[] src,int start, int limit){\r
92         for(; start<limit;start++){\r
93             if(NamePrepTransform.isLabelSeparator(src[start])){\r
94                 return start;\r
95             }\r
96         }\r
97         // we have not found the separator just return length\r
98         return start;\r
99     }\r
100     \r
101     private static boolean isLDHChar(int ch){\r
102         // high runner case\r
103         if(ch>0x007A){\r
104             return false;\r
105         }\r
106         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]\r
107         if( (ch==0x002D) || \r
108             (0x0030 <= ch && ch <= 0x0039) ||\r
109             (0x0041 <= ch && ch <= 0x005A) ||\r
110             (0x0061 <= ch && ch <= 0x007A)\r
111           ){\r
112             return true;\r
113         }\r
114         return false;\r
115     }\r
116         \r
117     public static StringBuffer convertToASCII(String src, int options)\r
118         throws StringPrepParseException{\r
119         UCharacterIterator iter = UCharacterIterator.getInstance(src);\r
120         return convertToASCII(iter,options);\r
121     }\r
122     public static StringBuffer convertToASCII(StringBuffer src, int options)\r
123         throws StringPrepParseException{\r
124         UCharacterIterator iter = UCharacterIterator.getInstance(src);\r
125         return convertToASCII(iter,options);\r
126     }\r
127     public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)\r
128                 throws StringPrepParseException{\r
129     \r
130         char[] caseFlags = null;\r
131     \r
132         // the source contains all ascii codepoints\r
133         boolean srcIsASCII  = true;\r
134         // assume the source contains all LDH codepoints\r
135         boolean srcIsLDH = true; \r
136 \r
137         //get the options\r
138         boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);\r
139 \r
140         int ch;\r
141         // step 1\r
142         while((ch = srcIter.next())!= UCharacterIterator.DONE){\r
143             if(ch> 0x7f){\r
144                 srcIsASCII = false;\r
145             }\r
146         }\r
147         int failPos = -1;\r
148         srcIter.setToStart();\r
149         StringBuffer processOut = null;\r
150         // step 2 is performed only if the source contains non ASCII\r
151         if(!srcIsASCII){\r
152             // step 2\r
153             processOut =  transform.prepare(srcIter,options);\r
154         }else{\r
155             processOut = new StringBuffer(srcIter.getText());\r
156         }\r
157         int poLen = processOut.length();\r
158         if(poLen==0){\r
159             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);\r
160         }\r
161         StringBuffer dest = new StringBuffer();\r
162         \r
163         // reset the variable to verify if output of prepare is ASCII or not\r
164         srcIsASCII = true;\r
165         \r
166         // step 3 & 4\r
167         for(int j=0;j<poLen;j++ ){\r
168             ch=processOut.charAt(j);\r
169             if(ch > 0x7F){\r
170                 srcIsASCII = false;\r
171             }else if(isLDHChar(ch)==false){\r
172                 // here we do not assemble surrogates\r
173                 // since we know that LDH code points\r
174                 // are in the ASCII range only\r
175                 srcIsLDH = false;\r
176                 failPos = j;\r
177             }\r
178         }\r
179     \r
180         if(useSTD3ASCIIRules == true){\r
181             // verify 3a and 3b\r
182             if( srcIsLDH == false /* source contains some non-LDH characters */\r
183                 || processOut.charAt(0) ==  HYPHEN \r
184                 || processOut.charAt(processOut.length()-1) == HYPHEN){\r
185 \r
186                 /* populate the parseError struct */\r
187                 if(srcIsLDH==false){\r
188                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",\r
189                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,\r
190                                               processOut.toString(),\r
191                                              (failPos>0) ? (failPos-1) : failPos);\r
192                 }else if(processOut.charAt(0) == HYPHEN){\r
193                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",\r
194                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);\r
195      \r
196                 }else{\r
197                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",\r
198                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,\r
199                                               processOut.toString(),\r
200                                               (poLen>0) ? poLen-1 : poLen);\r
201 \r
202                 }\r
203             }\r
204         }\r
205         if(srcIsASCII){\r
206             dest =  processOut;\r
207         }else{\r
208             // step 5 : verify the sequence does not begin with ACE prefix\r
209             if(!startsWithPrefix(processOut)){\r
210 \r
211                 //step 6: encode the sequence with punycode\r
212                 StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);\r
213                 \r
214                 // convert all codepoints to lower case ASCII\r
215                 StringBuffer lowerOut = toASCIILower(punyout);\r
216 \r
217                 //Step 7: prepend the ACE prefix\r
218                 dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);\r
219                 //Step 6: copy the contents in b2 into dest\r
220                 dest.append(lowerOut);\r
221             }else{\r
222                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",\r
223                                    StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);\r
224             }\r
225         }\r
226         if(dest.length() > MAX_LABEL_LENGTH){\r
227             throw new StringPrepParseException("The labels in the input are too long. Length > 64.", \r
228                                     StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);\r
229         }\r
230         return dest;\r
231     }\r
232     \r
233     public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)\r
234             throws StringPrepParseException{\r
235             return convertIDNToASCII(iter.getText(), options);          \r
236     }\r
237     public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)\r
238             throws StringPrepParseException{\r
239             return convertIDNToASCII(str.toString(), options);          \r
240     }\r
241     public static StringBuffer convertIDNToASCII(String src,int options)\r
242             throws StringPrepParseException{\r
243         char[] srcArr = src.toCharArray();\r
244         StringBuffer result = new StringBuffer();\r
245         int sepIndex=0;\r
246         int oldSepIndex = 0;\r
247         for(;;){\r
248             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);\r
249             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);\r
250             //make sure this is not a root label separator.\r
251             if(!(label.length()==0 && sepIndex==srcArr.length)){\r
252                 UCharacterIterator iter = UCharacterIterator.getInstance(label);\r
253                 result.append(convertToASCII(iter,options));\r
254             }\r
255             if(sepIndex==srcArr.length){\r
256                 break;\r
257             }\r
258             // increment the sepIndex to skip past the separator\r
259             sepIndex++;\r
260             oldSepIndex = sepIndex;\r
261             result.append((char)FULL_STOP);\r
262         }\r
263         return result;\r
264     }\r
265 \r
266     public static StringBuffer convertToUnicode(String src, int options)\r
267            throws StringPrepParseException{\r
268         UCharacterIterator iter = UCharacterIterator.getInstance(src);\r
269         return convertToUnicode(iter,options);\r
270     }\r
271     public static StringBuffer convertToUnicode(StringBuffer src, int options)\r
272            throws StringPrepParseException{\r
273         UCharacterIterator iter = UCharacterIterator.getInstance(src);\r
274         return convertToUnicode(iter,options);\r
275     }   \r
276     public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)\r
277            throws StringPrepParseException{\r
278 \r
279         char[] caseFlags = null;\r
280         \r
281         //get the options\r
282         boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);\r
283 \r
284         // the source contains all ascii codepoints\r
285         boolean srcIsASCII  = true;\r
286         // assume the source contains all LDH codepoints\r
287         boolean srcIsLDH = true; \r
288                \r
289         int failPos = -1;\r
290         int ch;\r
291         int saveIndex = iter.getIndex();\r
292         // step 1: find out if all the codepoints in src are ASCII  \r
293         while((ch=iter.next())!= UCharacterIterator.DONE){\r
294             if(ch>0x7F){\r
295                 srcIsASCII = false;\r
296             }else if(isLDHChar(ch)==false){\r
297                 failPos = iter.getIndex();\r
298                 srcIsLDH = false;\r
299             }\r
300         }\r
301         StringBuffer processOut;\r
302         \r
303         if(srcIsASCII == false){\r
304             // step 2: process the string\r
305             iter.setIndex(saveIndex);\r
306             processOut = transform.prepare(iter,options);\r
307 \r
308         }else{\r
309             //just point to source\r
310             processOut = new StringBuffer(iter.getText());\r
311         }\r
312         // TODO:\r
313         // The RFC states that \r
314         // <quote>\r
315         // ToUnicode never fails. If any step fails, then the original input\r
316         // is returned immediately in that step.\r
317         // </quote>\r
318         \r
319         //step 3: verify ACE Prefix\r
320         if(startsWithPrefix(processOut)){\r
321 \r
322            //step 4: Remove the ACE Prefix\r
323            String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());\r
324 \r
325            //step 5: Decode using punycode\r
326            StringBuffer decodeOut = PunycodeReference.decode(new StringBuffer(temp),caseFlags);\r
327         \r
328             //step 6:Apply toASCII\r
329             StringBuffer toASCIIOut = convertToASCII(decodeOut, options);\r
330 \r
331             //step 7: verify\r
332             if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){\r
333                 throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",\r
334                                           StringPrepParseException.VERIFICATION_ERROR); \r
335              }\r
336 \r
337             //step 8: return output of step 5\r
338             return decodeOut;\r
339             \r
340         }else{\r
341             // verify that STD3 ASCII rules are satisfied\r
342             if(useSTD3ASCIIRules == true){\r
343                 if( srcIsLDH == false /* source contains some non-LDH characters */\r
344                     || processOut.charAt(0) ==  HYPHEN \r
345                     || processOut.charAt(processOut.length()-1) == HYPHEN){\r
346     \r
347                         if(srcIsLDH==false){\r
348                             throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",\r
349                                                      StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),\r
350                                                      (failPos>0) ? (failPos-1) : failPos);\r
351                         }else if(processOut.charAt(0) == HYPHEN){\r
352                             throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",\r
353                                                      StringPrepParseException.STD3_ASCII_RULES_ERROR,\r
354                                                      processOut.toString(),0);\r
355          \r
356                         }else{\r
357                             throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",\r
358                                                      StringPrepParseException.STD3_ASCII_RULES_ERROR,\r
359                                                      processOut.toString(),\r
360                                                      processOut.length());\r
361     \r
362                         }\r
363                   }\r
364             }\r
365             // just return the source\r
366             return new StringBuffer(iter.getText());\r
367         }  \r
368     }\r
369     public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)\r
370         throws StringPrepParseException{\r
371         return convertIDNToUnicode(iter.getText(), options);\r
372     }\r
373     public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)\r
374         throws StringPrepParseException{\r
375         return convertIDNToUnicode(str.toString(), options);\r
376     }\r
377     public static StringBuffer convertIDNToUnicode(String src, int options)\r
378         throws StringPrepParseException{\r
379             \r
380         char[] srcArr = src.toCharArray();\r
381         StringBuffer result = new StringBuffer();\r
382         int sepIndex=0;\r
383         int oldSepIndex=0;\r
384         for(;;){\r
385             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);\r
386             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);\r
387             if(label.length()==0 && sepIndex!=srcArr.length ){\r
388                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);\r
389             }\r
390             UCharacterIterator iter = UCharacterIterator.getInstance(label);\r
391             result.append(convertToUnicode(iter,options));\r
392             if(sepIndex==srcArr.length){\r
393                 break;\r
394             }\r
395             // increment the sepIndex to skip past the separator\r
396             sepIndex++;\r
397             oldSepIndex = sepIndex;\r
398             result.append((char)FULL_STOP);\r
399         }\r
400         return result;\r
401     }\r
402     //  TODO: optimize\r
403     public static int compare(StringBuffer s1, StringBuffer s2, int options)\r
404         throws StringPrepParseException{\r
405         if(s1==null || s2 == null){\r
406             throw new IllegalArgumentException("One of the source buffers is null");\r
407         }\r
408         StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);\r
409         StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);\r
410         return compareCaseInsensitiveASCII(s1Out,s2Out);\r
411     }\r
412     //  TODO: optimize\r
413     public static int compare(String s1, String s2, int options)\r
414         throws StringPrepParseException{\r
415         if(s1==null || s2 == null){\r
416             throw new IllegalArgumentException("One of the source buffers is null");\r
417         }\r
418         StringBuffer s1Out = convertIDNToASCII(s1, options);\r
419         StringBuffer s2Out = convertIDNToASCII(s2, options);\r
420         return compareCaseInsensitiveASCII(s1Out,s2Out);\r
421     }\r
422     //  TODO: optimize\r
423     public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)\r
424         throws StringPrepParseException{\r
425         if(i1==null || i2 == null){\r
426             throw new IllegalArgumentException("One of the source buffers is null");\r
427         }\r
428         StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);\r
429         StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);\r
430         return compareCaseInsensitiveASCII(s1Out,s2Out);\r
431     }\r
432 \r
433 }\r