/*
*******************************************************************************
-* Copyright (C) 2010, International Business Machines
+* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.lang.UCharacterDirection;
-import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.IDNA;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.StringPrepParseException;
) {
addLabelError(info, Error.CONTEXTJ);
}
+ if((options&CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
+ checkLabelContextO(labelString, labelStart, labelLength, info);
+ }
if(toASCII) {
if(wasPunycode) {
// Leave a Punycode label unchanged if it has no severe errors.
int i=labelStart;
c=Character.codePointAt(label, i);
i+=Character.charCount(c);
- int firstMask=U_MASK(UCharacter.getDirection(c));
+ int firstMask=U_MASK(UBiDiProps.INSTANCE.getClass(c));
// 1. The first character must be a character with BIDI property L, R
// or AL. If it has the R or AL property, it is an RTL label; if it
// has the L property, it is an LTR label.
}
c=Character.codePointBefore(label, labelLimit);
labelLimit-=Character.charCount(c);
- int dir=UCharacter.getDirection(c);
+ int dir=UBiDiProps.INSTANCE.getClass(c);
if(dir!=UCharacterDirection.DIR_NON_SPACING_MARK) {
lastMask=U_MASK(dir);
break;
while(i<labelLimit) {
c=Character.codePointAt(label, i);
i+=Character.charCount(c);
- mask|=U_MASK(UCharacter.getDirection(c));
+ mask|=U_MASK(UBiDiProps.INSTANCE.getClass(c));
}
if((firstMask&L_MASK)!=0) {
// 5. In an LTR label, only characters with the BIDI properties L, EN,
int j=i;
c=Character.codePointBefore(label, j);
j-=Character.charCount(c);
- if(UCharacter.getCombiningClass(c)==9) {
+ if(uts46Norm2.getCombiningClass(c)==9) {
continue;
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
- /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+ /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
if(type==UCharacter.JoiningType.TRANSPARENT) {
if(j==0) {
return false;
}
c=Character.codePointAt(label, j);
j+=Character.charCount(c);
- /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+ /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
if(type==UCharacter.JoiningType.TRANSPARENT) {
// just skip this character
} else if(type==UCharacter.JoiningType.RIGHT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
return false;
}
int c=Character.codePointBefore(label, i);
- if(UCharacter.getCombiningClass(c)!=9) {
+ if(uts46Norm2.getCombiningClass(c)!=9) {
return false;
}
}
return true;
}
+ private void
+ checkLabelContextO(CharSequence label, int labelStart, int labelLength, Info info) {
+ int labelEnd=labelStart+labelLength-1; // inclusive
+ int arabicDigits=0; // -1 for 066x, +1 for 06Fx
+ for(int i=labelStart; i<=labelEnd; ++i) {
+ int c=label.charAt(i);
+ if(c<0xb7) {
+ // ASCII fastpath
+ } else if(c<=0x6f9) {
+ if(c==0xb7) {
+ // Appendix A.3. MIDDLE DOT (U+00B7)
+ // Rule Set:
+ // False;
+ // If Before(cp) .eq. U+006C And
+ // After(cp) .eq. U+006C Then True;
+ if(!(labelStart<i && label.charAt(i-1)=='l' &&
+ i<labelEnd && label.charAt(i+1)=='l')) {
+ addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+ }
+ } else if(c==0x375) {
+ // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
+ // Rule Set:
+ // False;
+ // If Script(After(cp)) .eq. Greek Then True;
+ if(!(i<labelEnd &&
+ UScript.GREEK==UScript.getScript(Character.codePointAt(label, i+1)))) {
+ addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+ }
+ } else if(c==0x5f3 || c==0x5f4) {
+ // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
+ // Rule Set:
+ // False;
+ // If Script(Before(cp)) .eq. Hebrew Then True;
+ //
+ // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
+ // Rule Set:
+ // False;
+ // If Script(Before(cp)) .eq. Hebrew Then True;
+ if(!(labelStart<i &&
+ UScript.HEBREW==UScript.getScript(Character.codePointBefore(label, i)))) {
+ addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+ }
+ } else if(0x660<=c /* && c<=0x6f9 */) {
+ // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
+ // Rule Set:
+ // True;
+ // For All Characters:
+ // If cp .in. 06F0..06F9 Then False;
+ // End For;
+ //
+ // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
+ // Rule Set:
+ // True;
+ // For All Characters:
+ // If cp .in. 0660..0669 Then False;
+ // End For;
+ if(c<=0x669) {
+ if(arabicDigits>0) {
+ addLabelError(info, Error.CONTEXTO_DIGITS);
+ }
+ arabicDigits=-1;
+ } else if(0x6f0<=c) {
+ if(arabicDigits<0) {
+ addLabelError(info, Error.CONTEXTO_DIGITS);
+ }
+ arabicDigits=1;
+ }
+ }
+ } else if(c==0x30fb) {
+ // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
+ // Rule Set:
+ // False;
+ // For All Characters:
+ // If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
+ // End For;
+ for(int j=labelStart;; j+=Character.charCount(c)) {
+ if(j>labelEnd) {
+ addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+ break;
+ }
+ c=Character.codePointAt(label, j);
+ int script=UScript.getScript(c);
+ if(script==UScript.HIRAGANA || script==UScript.KATAKANA || script==UScript.HAN) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
// TODO: make public(?) -- in C, these are public in uchar.h
private static int U_MASK(int x) {
return 1<<x;