2 *******************************************************************************
\r
3 * Copyright (C) 2003-2007, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.stringprep;
\r
10 //import com.ibm.icu.impl.ICULocaleData;
\r
11 import com.ibm.icu.impl.ICUResourceBundle;
\r
12 import com.ibm.icu.lang.UCharacter;
\r
13 import com.ibm.icu.lang.UCharacterDirection;
\r
14 import com.ibm.icu.text.StringPrepParseException;
\r
15 import com.ibm.icu.text.UCharacterIterator;
\r
16 import com.ibm.icu.text.UnicodeSet;
\r
17 import com.ibm.icu.text.Transliterator;
\r
22 * To change the template for this generated type comment go to
\r
23 * Window>Preferences>Java>Code Generation>Code and Comments
\r
25 public class NamePrepTransform {
\r
27 private static final NamePrepTransform transform = new NamePrepTransform();
\r
29 private UnicodeSet labelSeparatorSet;
\r
30 private UnicodeSet prohibitedSet;
\r
31 private UnicodeSet unassignedSet;
\r
32 private Transliterator mapTransform;
\r
33 public static final int NONE = 0;
\r
34 public static final int ALLOW_UNASSIGNED = 1;
\r
36 private NamePrepTransform(){
\r
37 // load the resource bundle
\r
38 ICUResourceBundle bundle = (ICUResourceBundle)ICUResourceBundle.getBundleInstance("com/ibm/icu/dev/data/testdata","idna_rules", NamePrepTransform.class.getClassLoader(), true);
\r
39 String mapRules = bundle.getString("MapNoNormalization");
\r
40 mapRules += bundle.getString("MapNFKC");
\r
41 mapTransform = Transliterator.createFromRules("CaseMap",mapRules,Transliterator.FORWARD);
\r
42 labelSeparatorSet = new UnicodeSet(bundle.getString("LabelSeparatorSet"));
\r
43 prohibitedSet = new UnicodeSet(bundle.getString("ProhibitedSet"));
\r
44 unassignedSet = new UnicodeSet(bundle.getString("UnassignedSet"));
\r
47 public static final NamePrepTransform getInstance(){
\r
50 public static boolean isLabelSeparator(int ch){
\r
51 return transform.labelSeparatorSet.contains(ch);
\r
55 1) Map -- For each character in the input, check if it has a mapping
\r
56 and, if so, replace it with its mapping.
\r
58 2) Normalize -- Possibly normalize the result of step 1 using Unicode
\r
61 3) Prohibit -- Check for any characters that are not allowed in the
\r
62 output. If any are found, return an error.
\r
64 4) Check bidi -- Possibly check for right-to-left characters, and if
\r
65 any are found, make sure that the whole string satisfies the
\r
66 requirements for bidirectional strings. If the string does not
\r
67 satisfy the requirements for bidirectional strings, return an
\r
69 [Unicode3.2] defines several bidirectional categories; each character
\r
70 has one bidirectional category assigned to it. For the purposes of
\r
71 the requirements below, an "RandALCat character" is a character that
\r
72 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
\r
73 is a character that has Unicode bidirectional category "L". Note
\r
76 that there are many characters which fall in neither of the above
\r
77 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
\r
78 this because they have bidirectional category "EN".
\r
80 In any profile that specifies bidirectional character handling, all
\r
81 three of the following requirements MUST be met:
\r
83 1) The characters in section 5.8 MUST be prohibited.
\r
85 2) If a string contains any RandALCat character, the string MUST NOT
\r
86 contain any LCat character.
\r
88 3) If a string contains any RandALCat character, a RandALCat
\r
89 character MUST be the first character of the string, and a
\r
90 RandALCat character MUST be the last character of the string.
\r
92 public StringBuffer prepare(UCharacterIterator src,
\r
94 throws StringPrepParseException{
\r
95 return prepare(src.getText(),options);
\r
97 private String map ( String src, int options)
\r
98 throws StringPrepParseException{
\r
100 boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);
\r
101 String caseMapOut = transform.mapTransform.transliterate(src);
\r
102 UCharacterIterator iter = UCharacterIterator.getInstance(caseMapOut);
\r
104 while((ch=iter.nextCodePoint())!=UCharacterIterator.DONE){
\r
105 if(transform.unassignedSet.contains(ch)==true && allowUnassigned ==false){
\r
106 throw new StringPrepParseException("An unassigned code point was found in the input",
\r
107 StringPrepParseException.UNASSIGNED_ERROR);
\r
112 public StringBuffer prepare(String src,int options)
\r
113 throws StringPrepParseException{
\r
116 String mapOut = map(src,options);
\r
117 UCharacterIterator iter = UCharacterIterator.getInstance(mapOut);
\r
119 int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
\r
120 firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
\r
121 int rtlPos=-1, ltrPos=-1;
\r
122 boolean rightToLeft=false, leftToRight=false;
\r
124 while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
\r
127 if(transform.prohibitedSet.contains(ch)==true && ch!=0x0020){
\r
128 throw new StringPrepParseException("A prohibited code point was found in the input",
\r
129 StringPrepParseException.PROHIBITED_ERROR,
\r
130 iter.getText(),iter.getIndex());
\r
133 direction = UCharacter.getDirection(ch);
\r
134 if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
\r
135 firstCharDir = direction;
\r
137 if(direction == UCharacterDirection.LEFT_TO_RIGHT){
\r
138 leftToRight = true;
\r
139 ltrPos = iter.getIndex()-1;
\r
141 if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
\r
142 rightToLeft = true;
\r
143 rtlPos = iter.getIndex()-1;
\r
148 if( leftToRight == true && rightToLeft == true){
\r
149 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
\r
150 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
\r
154 if( rightToLeft == true &&
\r
155 !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
\r
156 (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
\r
158 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
\r
159 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
\r
162 return new StringBuffer(mapOut);
\r