2 *******************************************************************************
\r
3 * Copyright (C) 2003-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.stringprep;
\r
10 import java.lang.reflect.InvocationTargetException;
\r
11 import java.lang.reflect.Method;
\r
13 import com.ibm.icu.impl.ICUResourceBundle;
\r
14 import com.ibm.icu.lang.UCharacter;
\r
15 import com.ibm.icu.lang.UCharacterDirection;
\r
16 import com.ibm.icu.text.StringPrepParseException;
\r
17 import com.ibm.icu.text.UCharacterIterator;
\r
18 import com.ibm.icu.text.UnicodeSet;
\r
23 * To change the template for this generated type comment go to
\r
24 * Window>Preferences>Java>Code Generation>Code and Comments
\r
26 public class NamePrepTransform {
\r
28 private static final NamePrepTransform transform = new NamePrepTransform();
\r
30 private UnicodeSet labelSeparatorSet;
\r
31 private UnicodeSet prohibitedSet;
\r
32 private UnicodeSet unassignedSet;
\r
33 private MapTransform mapTransform;
\r
34 public static final int NONE = 0;
\r
35 public static final int ALLOW_UNASSIGNED = 1;
\r
37 private NamePrepTransform(){
\r
38 // load the resource bundle
\r
39 ICUResourceBundle bundle = (ICUResourceBundle)ICUResourceBundle.getBundleInstance("com/ibm/icu/dev/data/testdata","idna_rules", NamePrepTransform.class.getClassLoader(), true);
\r
40 String mapRules = bundle.getString("MapNoNormalization");
\r
41 mapRules += bundle.getString("MapNFKC");
\r
43 mapTransform = new MapTransform("CaseMap", mapRules, 0 /*Transliterator.FORWARD*/);
\r
44 labelSeparatorSet = new UnicodeSet(bundle.getString("LabelSeparatorSet"));
\r
45 prohibitedSet = new UnicodeSet(bundle.getString("ProhibitedSet"));
\r
46 unassignedSet = new UnicodeSet(bundle.getString("UnassignedSet"));
\r
49 public static final NamePrepTransform getInstance(){
\r
52 public static boolean isLabelSeparator(int ch){
\r
53 return transform.labelSeparatorSet.contains(ch);
\r
57 1) Map -- For each character in the input, check if it has a mapping
\r
58 and, if so, replace it with its mapping.
\r
60 2) Normalize -- Possibly normalize the result of step 1 using Unicode
\r
63 3) Prohibit -- Check for any characters that are not allowed in the
\r
64 output. If any are found, return an error.
\r
66 4) Check bidi -- Possibly check for right-to-left characters, and if
\r
67 any are found, make sure that the whole string satisfies the
\r
68 requirements for bidirectional strings. If the string does not
\r
69 satisfy the requirements for bidirectional strings, return an
\r
71 [Unicode3.2] defines several bidirectional categories; each character
\r
72 has one bidirectional category assigned to it. For the purposes of
\r
73 the requirements below, an "RandALCat character" is a character that
\r
74 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
\r
75 is a character that has Unicode bidirectional category "L". Note
\r
78 that there are many characters which fall in neither of the above
\r
79 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
\r
80 this because they have bidirectional category "EN".
\r
82 In any profile that specifies bidirectional character handling, all
\r
83 three of the following requirements MUST be met:
\r
85 1) The characters in section 5.8 MUST be prohibited.
\r
87 2) If a string contains any RandALCat character, the string MUST NOT
\r
88 contain any LCat character.
\r
90 3) If a string contains any RandALCat character, a RandALCat
\r
91 character MUST be the first character of the string, and a
\r
92 RandALCat character MUST be the last character of the string.
\r
95 public boolean isReady() {
\r
96 return mapTransform.isReady();
\r
99 public StringBuffer prepare(UCharacterIterator src,
\r
101 throws StringPrepParseException{
\r
102 return prepare(src.getText(),options);
\r
105 private String map ( String src, int options)
\r
106 throws StringPrepParseException{
\r
108 boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);
\r
110 String caseMapOut = mapTransform.transliterate(src);
\r
111 UCharacterIterator iter = UCharacterIterator.getInstance(caseMapOut);
\r
113 while((ch=iter.nextCodePoint())!=UCharacterIterator.DONE){
\r
114 if(transform.unassignedSet.contains(ch)==true && allowUnassigned ==false){
\r
115 throw new StringPrepParseException("An unassigned code point was found in the input",
\r
116 StringPrepParseException.UNASSIGNED_ERROR);
\r
121 public StringBuffer prepare(String src,int options)
\r
122 throws StringPrepParseException{
\r
125 String mapOut = map(src,options);
\r
126 UCharacterIterator iter = UCharacterIterator.getInstance(mapOut);
\r
128 int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
\r
129 firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
\r
130 int rtlPos=-1, ltrPos=-1;
\r
131 boolean rightToLeft=false, leftToRight=false;
\r
133 while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
\r
136 if(transform.prohibitedSet.contains(ch)==true && ch!=0x0020){
\r
137 throw new StringPrepParseException("A prohibited code point was found in the input",
\r
138 StringPrepParseException.PROHIBITED_ERROR,
\r
139 iter.getText(),iter.getIndex());
\r
142 direction = UCharacter.getDirection(ch);
\r
143 if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
\r
144 firstCharDir = direction;
\r
146 if(direction == UCharacterDirection.LEFT_TO_RIGHT){
\r
147 leftToRight = true;
\r
148 ltrPos = iter.getIndex()-1;
\r
150 if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
\r
151 rightToLeft = true;
\r
152 rtlPos = iter.getIndex()-1;
\r
157 if( leftToRight == true && rightToLeft == true){
\r
158 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
\r
159 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
\r
163 if( rightToLeft == true &&
\r
164 !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
\r
165 (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
\r
167 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
\r
168 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
\r
171 return new StringBuffer(mapOut);
\r
175 private static class MapTransform {
\r
176 private Object translitInstance;
\r
177 private Method translitMethod;
\r
178 private boolean isReady;
\r
180 MapTransform(String id, String rule, int direction) {
\r
181 isReady = initialize(id, rule, direction);
\r
184 boolean initialize(String id, String rule, int direction) {
\r
186 Class cls = Class.forName("com.ibm.icu.text.Transliterator");
\r
187 Method createMethod = cls.getMethod("createFromRules", String.class, String.class, Integer.TYPE);
\r
188 translitInstance = createMethod.invoke(null, id, rule, Integer.valueOf(direction));
\r
189 translitMethod = cls.getMethod("transliterate", String.class);
\r
190 } catch (Throwable e) {
\r
196 boolean isReady() {
\r
200 String transliterate(String text) {
\r
202 throw new IllegalStateException("Transliterator is not ready");
\r
204 String result = null;
\r
206 result = (String)translitMethod.invoke(translitInstance, text);
\r
207 } catch (InvocationTargetException ite) {
\r
208 throw new RuntimeException(ite);
\r
209 } catch (IllegalAccessException iae) {
\r
210 throw new RuntimeException(iae);
\r