]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/UTS46.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / core / src / com / ibm / icu / impl / UTS46.java
similarity index 86%
rename from jars/icu4j-4_8_1_1/main/classes/core/src/com/ibm/icu/impl/UTS46.java
rename to jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/UTS46.java
index 809b772eda189d72995243be8d9a6bfcb2182b45..610995518760a448ac4c85a18e9fd44df49b3c95 100644 (file)
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2010, International Business Machines
+* Copyright (C) 2010-2011, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 */
@@ -11,7 +11,7 @@ import java.util.EnumSet;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UCharacterCategory;
 import com.ibm.icu.lang.UCharacterDirection;
-import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.IDNA;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.StringPrepParseException;
@@ -438,6 +438,9 @@ public final class UTS46 extends IDNA {
             ) {
                 addLabelError(info, Error.CONTEXTJ);
             }
+            if((options&CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
+                checkLabelContextO(labelString, labelStart, labelLength, info);
+            }
             if(toASCII) {
                 if(wasPunycode) {
                     // Leave a Punycode label unchanged if it has no severe errors.
@@ -551,7 +554,7 @@ public final class UTS46 extends IDNA {
         int i=labelStart;
         c=Character.codePointAt(label, i);
         i+=Character.charCount(c);
-        int firstMask=U_MASK(UCharacter.getDirection(c));
+        int firstMask=U_MASK(UBiDiProps.INSTANCE.getClass(c));
         // 1. The first character must be a character with BIDI property L, R
         // or AL.  If it has the R or AL property, it is an RTL label; if it
         // has the L property, it is an LTR label.
@@ -568,7 +571,7 @@ public final class UTS46 extends IDNA {
             }
             c=Character.codePointBefore(label, labelLimit);
             labelLimit-=Character.charCount(c);
-            int dir=UCharacter.getDirection(c);
+            int dir=UBiDiProps.INSTANCE.getClass(c);
             if(dir!=UCharacterDirection.DIR_NON_SPACING_MARK) {
                 lastMask=U_MASK(dir);
                 break;
@@ -591,7 +594,7 @@ public final class UTS46 extends IDNA {
         while(i<labelLimit) {
             c=Character.codePointAt(label, i);
             i+=Character.charCount(c);
-            mask|=U_MASK(UCharacter.getDirection(c));
+            mask|=U_MASK(UBiDiProps.INSTANCE.getClass(c));
         }
         if((firstMask&L_MASK)!=0) {
             // 5. In an LTR label, only characters with the BIDI properties L, EN,
@@ -684,12 +687,12 @@ public final class UTS46 extends IDNA {
                 int j=i;
                 c=Character.codePointBefore(label, j);
                 j-=Character.charCount(c);
-                if(UCharacter.getCombiningClass(c)==9) {
+                if(uts46Norm2.getCombiningClass(c)==9) {
                     continue;
                 }
                 // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
                 for(;;) {
-                    /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+                    /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
                     if(type==UCharacter.JoiningType.TRANSPARENT) {
                         if(j==0) {
                             return false;
@@ -709,7 +712,7 @@ public final class UTS46 extends IDNA {
                     }
                     c=Character.codePointAt(label, j);
                     j+=Character.charCount(c);
-                    /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+                    /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
                     if(type==UCharacter.JoiningType.TRANSPARENT) {
                         // just skip this character
                     } else if(type==UCharacter.JoiningType.RIGHT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
@@ -727,7 +730,7 @@ public final class UTS46 extends IDNA {
                     return false;
                 }
                 int c=Character.codePointBefore(label, i);
-                if(UCharacter.getCombiningClass(c)!=9) {
+                if(uts46Norm2.getCombiningClass(c)!=9) {
                     return false;
                 }
             }
@@ -735,6 +738,96 @@ public final class UTS46 extends IDNA {
         return true;
     }
 
+    private void
+    checkLabelContextO(CharSequence label, int labelStart, int labelLength, Info info) {
+        int labelEnd=labelStart+labelLength-1;  // inclusive
+        int arabicDigits=0;  // -1 for 066x, +1 for 06Fx
+        for(int i=labelStart; i<=labelEnd; ++i) {
+            int c=label.charAt(i);
+            if(c<0xb7) {
+                // ASCII fastpath
+            } else if(c<=0x6f9) {
+                if(c==0xb7) {
+                    // Appendix A.3. MIDDLE DOT (U+00B7)
+                    // Rule Set:
+                    //  False;
+                    //  If Before(cp) .eq.  U+006C And
+                    //     After(cp) .eq.  U+006C Then True;
+                    if(!(labelStart<i && label.charAt(i-1)=='l' &&
+                         i<labelEnd && label.charAt(i+1)=='l')) {
+                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+                    }
+                } else if(c==0x375) {
+                    // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
+                    // Rule Set:
+                    //  False;
+                    //  If Script(After(cp)) .eq.  Greek Then True;
+                    if(!(i<labelEnd &&
+                         UScript.GREEK==UScript.getScript(Character.codePointAt(label, i+1)))) {
+                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+                    }
+                } else if(c==0x5f3 || c==0x5f4) {
+                    // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
+                    // Rule Set:
+                    //  False;
+                    //  If Script(Before(cp)) .eq.  Hebrew Then True;
+                    //
+                    // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
+                    // Rule Set:
+                    //  False;
+                    //  If Script(Before(cp)) .eq.  Hebrew Then True;
+                    if(!(labelStart<i &&
+                         UScript.HEBREW==UScript.getScript(Character.codePointBefore(label, i)))) {
+                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+                    }
+                } else if(0x660<=c /* && c<=0x6f9 */) {
+                    // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
+                    // Rule Set:
+                    //  True;
+                    //  For All Characters:
+                    //    If cp .in. 06F0..06F9 Then False;
+                    //  End For;
+                    //
+                    // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
+                    // Rule Set:
+                    //  True;
+                    //  For All Characters:
+                    //    If cp .in. 0660..0669 Then False;
+                    //  End For;
+                    if(c<=0x669) {
+                        if(arabicDigits>0) {
+                            addLabelError(info, Error.CONTEXTO_DIGITS);
+                        }
+                        arabicDigits=-1;
+                    } else if(0x6f0<=c) {
+                        if(arabicDigits<0) {
+                            addLabelError(info, Error.CONTEXTO_DIGITS);
+                        }
+                        arabicDigits=1;
+                    }
+                }
+            } else if(c==0x30fb) {
+                // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
+                // Rule Set:
+                //  False;
+                //  For All Characters:
+                //    If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
+                //  End For;
+                for(int j=labelStart;; j+=Character.charCount(c)) {
+                    if(j>labelEnd) {
+                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
+                        break;
+                    }
+                    c=Character.codePointAt(label, j);
+                    int script=UScript.getScript(c);
+                    if(script==UScript.HIRAGANA || script==UScript.KATAKANA || script==UScript.HAN) {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
     // TODO: make public(?) -- in C, these are public in uchar.h
     private static int U_MASK(int x) {
         return 1<<x;