jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2004-2012, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  UCaseProps.java
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2005jan29
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Low-level Unicode character/string case mapping code.
  17 *   Java port of ucase.h/.c.
  18 */
  19
  20 package com.ibm.icu.impl;
  21
  22 import java.io.BufferedInputStream;
  23 import java.io.DataInputStream;
  24 import java.io.IOException;
  25 import java.io.InputStream;
  26 import java.util.Iterator;
  27
  28 import com.ibm.icu.lang.UCharacter;
  29 import com.ibm.icu.lang.UProperty;
  30 import com.ibm.icu.text.UTF16;
  31 import com.ibm.icu.text.UnicodeSet;
  32 import com.ibm.icu.util.ULocale;
  33
  34 public final class UCaseProps {
  35
  36     // constructors etc. --------------------------------------------------- ***
  37
  38     // port of ucase_openProps()
  39     private UCaseProps() throws IOException {
  40         InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
  41         BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */);
  42         readData(b);
  43         b.close();
  44         is.close();
  45     }
  46
  47     private final void readData(InputStream is) throws IOException {
  48         DataInputStream inputStream=new DataInputStream(is);
  49
  50         // read the header
  51         ICUBinary.readHeader(inputStream, FMT, new IsAcceptable());
  52
  53         // read indexes[]
  54         int i, count;
  55         count=inputStream.readInt();
  56         if(count<IX_TOP) {
  57             throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
  58         }
  59         indexes=new int[count];
  60
  61         indexes[0]=count;
  62         for(i=1; i<count; ++i) {
  63             indexes[i]=inputStream.readInt();
  64         }
  65
  66         // read the trie
  67         trie=Trie2_16.createFromSerialized(inputStream);
  68         int expectedTrieLength=indexes[IX_TRIE_SIZE];
  69         int trieLength=trie.getSerializedLength();
  70         if(trieLength>expectedTrieLength) {
  71             throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
  72         }
  73         // skip padding after trie bytes
  74         inputStream.skipBytes(expectedTrieLength-trieLength);
  75
  76         // read exceptions[]
  77         count=indexes[IX_EXC_LENGTH];
  78         if(count>0) {
  79             exceptions=new char[count];
  80             for(i=0; i<count; ++i) {
  81                 exceptions[i]=inputStream.readChar();
  82             }
  83         }
  84
  85         // read unfold[]
  86         count=indexes[IX_UNFOLD_LENGTH];
  87         if(count>0) {
  88             unfold=new char[count];
  89             for(i=0; i<count; ++i) {
  90                 unfold[i]=inputStream.readChar();
  91             }
  92         }
  93     }
  94
  95     // implement ICUBinary.Authenticate
  96     private final static class IsAcceptable implements ICUBinary.Authenticate {
  97         // @Override when we switch to Java 6
  98         public boolean isDataVersionAcceptable(byte version[]) {
  99             return version[0]==3;
 100         }
 101     }
 102
 103     // set of property starts for UnicodeSet ------------------------------- ***
 104
 105     public final void addPropertyStarts(UnicodeSet set) {
 106         /* add the start code point of each same-value range of the trie */
 107         Iterator<Trie2.Range> trieIterator=trie.iterator();
 108         Trie2.Range range;
 109         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
 110             set.add(range.startCodePoint);
 111         }
 112
 113         /* add code points with hardcoded properties, plus the ones following them */
 114
 115         /* (none right now, see comment below) */
 116
 117         /*
 118          * Omit code points with hardcoded specialcasing properties
 119          * because we do not build property UnicodeSets for them right now.
 120          */
 121     }
 122
 123     // data access primitives ---------------------------------------------- ***
 124     private static final int getExceptionsOffset(int props) {
 125         return props>>EXC_SHIFT;
 126     }
 127
 128     private static final boolean propsHasException(int props) {
 129         return (props&EXCEPTION)!=0;
 130     }
 131
 132     /* number of bits in an 8-bit integer value */
 133     private static final byte flagsOffset[/*256*/]={
 134         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
 135         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 136         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 137         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 138         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 139         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 140         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 141         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 142         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 143         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 144         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 145         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 146         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 147         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 148         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 149         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 150     };
 151
 152     private static final boolean hasSlot(int flags, int index) {
 153         return (flags&(1<<index))!=0;
 154     }
 155     private static final byte slotOffset(int flags, int index) {
 156         return flagsOffset[flags&((1<<index)-1)];
 157     }
 158
 159     /*
 160      * Get the value of an optional-value slot where hasSlot(excWord, index).
 161      *
 162      * @param excWord (in) initial exceptions word
 163      * @param index (in) desired slot index
 164      * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
 165      * @return bits 31..0: slot value
 166      *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
 167      */
 168     private final long getSlotValueAndOffset(int excWord, int index, int excOffset) {
 169         long value;
 170         if((excWord&EXC_DOUBLE_SLOTS)==0) {
 171             excOffset+=slotOffset(excWord, index);
 172             value=exceptions[excOffset];
 173         } else {
 174             excOffset+=2*slotOffset(excWord, index);
 175             value=exceptions[excOffset++];
 176             value=(value<<16)|exceptions[excOffset];
 177         }
 178         return value |((long)excOffset<<32);
 179     }
 180
 181     /* same as getSlotValueAndOffset() but does not return the slot offset */
 182     private final int getSlotValue(int excWord, int index, int excOffset) {
 183         int value;
 184         if((excWord&EXC_DOUBLE_SLOTS)==0) {
 185             excOffset+=slotOffset(excWord, index);
 186             value=exceptions[excOffset];
 187         } else {
 188             excOffset+=2*slotOffset(excWord, index);
 189             value=exceptions[excOffset++];
 190             value=(value<<16)|exceptions[excOffset];
 191         }
 192         return value;
 193     }
 194
 195     // simple case mappings ------------------------------------------------ ***
 196
 197     public final int tolower(int c) {
 198         int props=trie.get(c);
 199         if(!propsHasException(props)) {
 200             if(getTypeFromProps(props)>=UPPER) {
 201                 c+=getDelta(props);
 202             }
 203         } else {
 204             int excOffset=getExceptionsOffset(props);
 205             int excWord=exceptions[excOffset++];
 206             if(hasSlot(excWord, EXC_LOWER)) {
 207                 c=getSlotValue(excWord, EXC_LOWER, excOffset);
 208             }
 209         }
 210         return c;
 211     }
 212
 213     public final int toupper(int c) {
 214         int props=trie.get(c);
 215         if(!propsHasException(props)) {
 216             if(getTypeFromProps(props)==LOWER) {
 217                 c+=getDelta(props);
 218             }
 219         } else {
 220             int excOffset=getExceptionsOffset(props);
 221             int excWord=exceptions[excOffset++];
 222             if(hasSlot(excWord, EXC_UPPER)) {
 223                 c=getSlotValue(excWord, EXC_UPPER, excOffset);
 224             }
 225         }
 226         return c;
 227     }
 228
 229     public final int totitle(int c) {
 230         int props=trie.get(c);
 231         if(!propsHasException(props)) {
 232             if(getTypeFromProps(props)==LOWER) {
 233                 c+=getDelta(props);
 234             }
 235         } else {
 236             int excOffset=getExceptionsOffset(props);
 237             int excWord=exceptions[excOffset++];
 238             int index;
 239             if(hasSlot(excWord, EXC_TITLE)) {
 240                 index=EXC_TITLE;
 241             } else if(hasSlot(excWord, EXC_UPPER)) {
 242                 index=EXC_UPPER;
 243             } else {
 244                 return c;
 245             }
 246             c=getSlotValue(excWord, index, excOffset);
 247         }
 248         return c;
 249     }
 250
 251     /**
 252      * Adds all simple case mappings and the full case folding for c to sa,
 253      * and also adds special case closure mappings.
 254      * c itself is not added.
 255      * For example, the mappings
 256      * - for s include long s
 257      * - for sharp s include ss
 258      * - for k include the Kelvin sign
 259      */
 260     public final void addCaseClosure(int c, UnicodeSet set) {
 261         /*
 262          * Hardcode the case closure of i and its relatives and ignore the
 263          * data file data for these characters.
 264          * The Turkic dotless i and dotted I with their case mapping conditions
 265          * and case folding option make the related characters behave specially.
 266          * This code matches their closure behavior to their case folding behavior.
 267          */
 268
 269         switch(c) {
 270         case 0x49:
 271             /* regular i and I are in one equivalence class */
 272             set.add(0x69);
 273             return;
 274         case 0x69:
 275             set.add(0x49);
 276             return;
 277         case 0x130:
 278             /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
 279             set.add(iDot);
 280             return;
 281         case 0x131:
 282             /* dotless i is in a class by itself */
 283             return;
 284         default:
 285             /* otherwise use the data file data */
 286             break;
 287         }
 288
 289         int props=trie.get(c);
 290         if(!propsHasException(props)) {
 291             if(getTypeFromProps(props)!=NONE) {
 292                 /* add the one simple case mapping, no matter what type it is */
 293                 int delta=getDelta(props);
 294                 if(delta!=0) {
 295                     set.add(c+delta);
 296                 }
 297             }
 298         } else {
 299             /*
 300              * c has exceptions, so there may be multiple simple and/or
 301              * full case mappings. Add them all.
 302              */
 303             int excOffset0, excOffset=getExceptionsOffset(props);
 304             int closureOffset;
 305             int excWord=exceptions[excOffset++];
 306             int index, closureLength, fullLength, length;
 307
 308             excOffset0=excOffset;
 309
 310             /* add all simple case mappings */
 311             for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
 312                 if(hasSlot(excWord, index)) {
 313                     excOffset=excOffset0;
 314                     c=getSlotValue(excWord, index, excOffset);
 315                     set.add(c);
 316                 }
 317             }
 318
 319             /* get the closure string pointer & length */
 320             if(hasSlot(excWord, EXC_CLOSURE)) {
 321                 excOffset=excOffset0;
 322                 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
 323                 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
 324                 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
 325             } else {
 326                 closureLength=0;
 327                 closureOffset=0;
 328             }
 329
 330             /* add the full case folding */
 331             if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
 332                 excOffset=excOffset0;
 333                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
 334                 fullLength=(int)value;
 335
 336                 /* start of full case mapping strings */
 337                 excOffset=(int)(value>>32)+1;
 338
 339                 fullLength&=0xffff; /* bits 16 and higher are reserved */
 340
 341                 /* skip the lowercase result string */
 342                 excOffset+=fullLength&FULL_LOWER;
 343                 fullLength>>=4;
 344
 345                 /* add the full case folding string */
 346                 length=fullLength&0xf;
 347                 if(length!=0) {
 348                     set.add(new String(exceptions, excOffset, length));
 349                     excOffset+=length;
 350                 }
 351
 352                 /* skip the uppercase and titlecase strings */
 353                 fullLength>>=4;
 354                 excOffset+=fullLength&0xf;
 355                 fullLength>>=4;
 356                 excOffset+=fullLength;
 357
 358                 closureOffset=excOffset; /* behind full case mappings */
 359             }
 360
 361             /* add each code point in the closure string */
 362             for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
 363                 c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
 364                 set.add(c);
 365             }
 366         }
 367     }
 368
 369     /*
 370      * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated
 371      * must be s.length()>0 and max>0 and s.length()<=max
 372      */
 373     private final int strcmpMax(String s, int unfoldOffset, int max) {
 374         int i1, length, c1, c2;
 375
 376         length=s.length();
 377         max-=length; /* we require length<=max, so no need to decrement max in the loop */
 378         i1=0;
 379         do {
 380             c1=s.charAt(i1++);
 381             c2=unfold[unfoldOffset++];
 382             if(c2==0) {
 383                 return 1; /* reached the end of t but not of s */
 384             }
 385             c1-=c2;
 386             if(c1!=0) {
 387                 return c1; /* return difference result */
 388             }
 389         } while(--length>0);
 390         /* ends with length==0 */
 391
 392         if(max==0 || unfold[unfoldOffset]==0) {
 393             return 0; /* equal to length of both strings */
 394         } else {
 395             return -max; /* return lengh difference */
 396         }
 397     }
 398
 399     /**
 400      * Maps the string to single code points and adds the associated case closure
 401      * mappings.
 402      * The string is mapped to code points if it is their full case folding string.
 403      * In other words, this performs a reverse full case folding and then
 404      * adds the case closure items of the resulting code points.
 405      * If the string is found and its closure applied, then
 406      * the string itself is added as well as part of its code points' closure.
 407      *
 408      * @return true if the string was found
 409      */
 410     public final boolean addStringCaseClosure(String s, UnicodeSet set) {
 411         int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
 412
 413         if(unfold==null || s==null) {
 414             return false; /* no reverse case folding data, or no string */
 415         }
 416         length=s.length();
 417         if(length<=1) {
 418             /* the string is too short to find any match */
 419             /*
 420              * more precise would be:
 421              * if(!u_strHasMoreChar32Than(s, length, 1))
 422              * but this does not make much practical difference because
 423              * a single supplementary code point would just not be found
 424              */
 425             return false;
 426         }
 427
 428         unfoldRows=unfold[UNFOLD_ROWS];
 429         unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
 430         unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
 431         //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
 432
 433         if(length>unfoldStringWidth) {
 434             /* the string is too long to find any match */
 435             return false;
 436         }
 437
 438         /* do a binary search for the string */
 439         start=0;
 440         limit=unfoldRows;
 441         while(start<limit) {
 442             i=(start+limit)/2;
 443             unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
 444             result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
 445
 446             if(result==0) {
 447                 /* found the string: add each code point, and its case closure */
 448                 int c;
 449
 450                 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
 451                     c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
 452                     set.add(c);
 453                     addCaseClosure(c, set);
 454                 }
 455                 return true;
 456             } else if(result<0) {
 457                 limit=i;
 458             } else /* result>0 */ {
 459                 start=i+1;
 460             }
 461         }
 462
 463         return false; /* string not found */
 464     }
 465
 466     /** @return NONE, LOWER, UPPER, TITLE */
 467     public final int getType(int c) {
 468         return getTypeFromProps(trie.get(c));
 469     }
 470
 471     /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
 472     public final int getTypeOrIgnorable(int c) {
 473         return getTypeAndIgnorableFromProps(trie.get(c));
 474     }
 475
 476     /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
 477     public final int getDotType(int c) {
 478         int props=trie.get(c);
 479         if(!propsHasException(props)) {
 480             return props&DOT_MASK;
 481         } else {
 482             return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
 483         }
 484     }
 485
 486     public final boolean isSoftDotted(int c) {
 487         return getDotType(c)==SOFT_DOTTED;
 488     }
 489
 490     public final boolean isCaseSensitive(int c) {
 491         return (trie.get(c)&SENSITIVE)!=0;
 492     }
 493
 494     // string casing ------------------------------------------------------- ***
 495
 496     /*
 497      * These internal functions form the core of string case mappings.
 498      * They map single code points to result code points or strings and take
 499      * all necessary conditions (context, locale ID, options) into account.
 500      *
 501      * They do not iterate over the source or write to the destination
 502      * so that the same functions are useful for non-standard string storage,
 503      * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
 504      * For the same reason, the "surrounding text" context is passed in as a
 505      * ContextIterator which does not make any assumptions about
 506      * the underlying storage.
 507      *
 508      * This section contains helper functions that check for conditions
 509      * in the input text surrounding the current code point
 510      * according to SpecialCasing.txt.
 511      *
 512      * Each helper function gets the index
 513      * - after the current code point if it looks at following text
 514      * - before the current code point if it looks at preceding text
 515      *
 516      * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
 517      *
 518      * Final_Sigma
 519      *   C is preceded by a sequence consisting of
 520      *     a cased letter and a case-ignorable sequence,
 521      *   and C is not followed by a sequence consisting of
 522      *     an ignorable sequence and then a cased letter.
 523      *
 524      * More_Above
 525      *   C is followed by one or more characters of combining class 230 (ABOVE)
 526      *   in the combining character sequence.
 527      *
 528      * After_Soft_Dotted
 529      *   The last preceding character with combining class of zero before C
 530      *   was Soft_Dotted,
 531      *   and there is no intervening combining character class 230 (ABOVE).
 532      *
 533      * Before_Dot
 534      *   C is followed by combining dot above (U+0307).
 535      *   Any sequence of characters with a combining class that is neither 0 nor 230
 536      *   may intervene between the current character and the combining dot above.
 537      *
 538      * The erratum from 2002-10-31 adds the condition
 539      *
 540      * After_I
 541      *   The last preceding base character was an uppercase I, and there is no
 542      *   intervening combining character class 230 (ABOVE).
 543      *
 544      *   (See Jitterbug 2344 and the comments on After_I below.)
 545      *
 546      * Helper definitions in Unicode 3.2 UAX 21:
 547      *
 548      * D1. A character C is defined to be cased
 549      *     if it meets any of the following criteria:
 550      *
 551      *   - The general category of C is Titlecase Letter (Lt)
 552      *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
 553      *   - Given D = NFD(C), then it is not the case that:
 554      *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
 555      *     (This third criterium does not add any characters to the list
 556      *      for Unicode 3.2. Ignored.)
 557      *
 558      * D2. A character C is defined to be case-ignorable
 559      *     if it meets either of the following criteria:
 560      *
 561      *   - The general category of C is
 562      *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
 563      *     Letter Modifier (Lm), or Symbol Modifier (Sk)
 564      *   - C is one of the following characters
 565      *     U+0027 APOSTROPHE
 566      *     U+00AD SOFT HYPHEN (SHY)
 567      *     U+2019 RIGHT SINGLE QUOTATION MARK
 568      *            (the preferred character for apostrophe)
 569      *
 570      * D3. A case-ignorable sequence is a sequence of
 571      *     zero or more case-ignorable characters.
 572      */
 573
 574     /**
 575      * Iterator for string case mappings, which need to look at the
 576      * context (surrounding text) of a given character for conditional mappings.
 577      *
 578      * The iterator only needs to go backward or forward away from the
 579      * character in question. It does not use any indexes on this interface.
 580      * It does not support random access or an arbitrary change of
 581      * iteration direction.
 582      *
 583      * The code point being case-mapped itself is never returned by
 584      * this iterator.
 585      */
 586     public interface ContextIterator {
 587         /**
 588          * Reset the iterator for forward or backward iteration.
 589          * @param dir >0: Begin iterating forward from the first code point
 590          * after the one that is being case-mapped.
 591          *            <0: Begin iterating backward from the first code point
 592          * before the one that is being case-mapped.
 593          */
 594         public void reset(int dir);
 595         /**
 596          * Iterate and return the next code point, moving in the direction
 597          * determined by the reset() call.
 598          * @return Next code point, or <0 when the iteration is done.
 599          */
 600         public int next();
 601     }
 602
 603     /**
 604      * For string case mappings, a single character (a code point) is mapped
 605      * either to itself (in which case in-place mapping functions do nothing),
 606      * or to another single code point, or to a string.
 607      * Aside from the string contents, these are indicated with a single int
 608      * value as follows:
 609      *
 610      * Mapping to self: Negative values (~self instead of -self to support U+0000)
 611      *
 612      * Mapping to another code point: Positive values >MAX_STRING_LENGTH
 613      *
 614      * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is
 615      * returned. Note that the string result may indeed have zero length.
 616      */
 617     public static final int MAX_STRING_LENGTH=0x1f;
 618
 619     private static final int LOC_UNKNOWN=0;
 620     private static final int LOC_ROOT=1;
 621     private static final int LOC_TURKISH=2;
 622     private static final int LOC_LITHUANIAN=3;
 623
 624     /*
 625      * Checks and caches the type of locale ID as it is relevant for case mapping.
 626      * If the locCache is not null, then it must be initialized with locCache[0]=0 .
 627      */
 628     private static final int getCaseLocale(ULocale locale, int[] locCache) {
 629         int result;
 630
 631         if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
 632             return result;
 633         }
 634
 635         result=LOC_ROOT;
 636
 637         String language=locale.getLanguage();
 638         if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
 639             result=LOC_TURKISH;
 640         } else if(language.equals("lt") || language.equals("lit")) {
 641             result=LOC_LITHUANIAN;
 642         }
 643
 644         if(locCache!=null) {
 645             locCache[0]=result;
 646         }
 647         return result;
 648     }
 649
 650     /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
 651     private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) {
 652         int c;
 653
 654         if(iter==null) {
 655             return false;
 656         }
 657
 658         for(iter.reset(dir); (c=iter.next())>=0;) {
 659             int type=getTypeOrIgnorable(c);
 660             if((type&4)!=0) {
 661                 /* case-ignorable, continue with the loop */
 662             } else if(type!=NONE) {
 663                 return true; /* followed by cased letter */
 664             } else {
 665                 return false; /* uncased and not case-ignorable */
 666             }
 667         }
 668
 669         return false; /* not followed by cased letter */
 670     }
 671
 672     /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
 673     private final boolean isPrecededBySoftDotted(ContextIterator iter) {
 674         int c;
 675         int dotType;
 676
 677         if(iter==null) {
 678             return false;
 679         }
 680
 681         for(iter.reset(-1); (c=iter.next())>=0;) {
 682             dotType=getDotType(c);
 683             if(dotType==SOFT_DOTTED) {
 684                 return true; /* preceded by TYPE_i */
 685             } else if(dotType!=OTHER_ACCENT) {
 686                 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
 687             }
 688         }
 689
 690         return false; /* not preceded by TYPE_i */
 691     }
 692
 693     /*
 694      * See Jitterbug 2344:
 695      * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
 696      * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
 697      * we made those releases compatible with Unicode 3.2 which had not fixed
 698      * a related bug in SpecialCasing.txt.
 699      *
 700      * From the Jitterbug 2344 text:
 701      * ... this bug is listed as a Unicode erratum
 702      * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
 703      * <quote>
 704      * There are two errors in SpecialCasing.txt.
 705      * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
 706      * 2. An incorrect context definition. Correct as follows:
 707      * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
 708      * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
 709      * ---
 710      * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
 711      * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
 712      * where the context After_I is defined as:
 713      * The last preceding base character was an uppercase I, and there is no
 714      * intervening combining character class 230 (ABOVE).
 715      * </quote>
 716      *
 717      * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
 718      *
 719      * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 720      * # This matches the behavior of the canonically equivalent I-dot_above
 721      *
 722      * See also the description in this place in older versions of uchar.c (revision 1.100).
 723      *
 724      * Markus W. Scherer 2003-feb-15
 725      */
 726
 727     /* Is preceded by base character 'I' with no intervening cc=230 ? */
 728     private final boolean isPrecededBy_I(ContextIterator iter) {
 729         int c;
 730         int dotType;
 731
 732         if(iter==null) {
 733             return false;
 734         }
 735
 736         for(iter.reset(-1); (c=iter.next())>=0;) {
 737             if(c==0x49) {
 738                 return true; /* preceded by I */
 739             }
 740             dotType=getDotType(c);
 741             if(dotType!=OTHER_ACCENT) {
 742                 return false; /* preceded by different base character (not I), or intervening cc==230 */
 743             }
 744         }
 745
 746         return false; /* not preceded by I */
 747     }
 748
 749     /* Is followed by one or more cc==230 ? */
 750     private final boolean isFollowedByMoreAbove(ContextIterator iter) {
 751         int c;
 752         int dotType;
 753
 754         if(iter==null) {
 755             return false;
 756         }
 757
 758         for(iter.reset(1); (c=iter.next())>=0;) {
 759             dotType=getDotType(c);
 760             if(dotType==ABOVE) {
 761                 return true; /* at least one cc==230 following */
 762             } else if(dotType!=OTHER_ACCENT) {
 763                 return false; /* next base character, no more cc==230 following */
 764             }
 765         }
 766
 767         return false; /* no more cc==230 following */
 768     }
 769
 770     /* Is followed by a dot above (without cc==230 in between) ? */
 771     private final boolean isFollowedByDotAbove(ContextIterator iter) {
 772         int c;
 773         int dotType;
 774
 775         if(iter==null) {
 776             return false;
 777         }
 778
 779         for(iter.reset(1); (c=iter.next())>=0; ) {
 780             if(c==0x307) {
 781                 return true;
 782             }
 783             dotType=getDotType(c);
 784             if(dotType!=OTHER_ACCENT) {
 785                 return false; /* next base character or cc==230 in between */
 786             }
 787         }
 788
 789         return false; /* no dot above following */
 790     }
 791
 792     private static final String
 793         iDot=       "i\u0307",
 794         jDot=       "j\u0307",
 795         iOgonekDot= "\u012f\u0307",
 796         iDotGrave=  "i\u0307\u0300",
 797         iDotAcute=  "i\u0307\u0301",
 798         iDotTilde=  "i\u0307\u0303";
 799
 800     /**
 801      * Get the full lowercase mapping for c.
 802      *
 803      * @param c Character to be mapped.
 804      * @param iter Character iterator, used for context-sensitive mappings.
 805      *             See ContextIterator for details.
 806      *             If iter==null then a context-independent result is returned.
 807      * @param out If the mapping result is a string, then it is appended to out.
 808      * @param locale Locale ID for locale-dependent mappings.
 809      * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
 810      *                 the locale ID for subsequent calls.
 811      *                 Can be null.
 812      * @return Output code point or string length, see MAX_STRING_LENGTH.
 813      *
 814      * @see ContextIterator
 815      * @see #MAX_STRING_LENGTH
 816      * @internal
 817      */
 818     public final int toFullLower(int c, ContextIterator iter,
 819                                  StringBuilder out,
 820                                  ULocale locale, int[] locCache) {
 821         int result, props;
 822
 823         result=c;
 824         props=trie.get(c);
 825         if(!propsHasException(props)) {
 826             if(getTypeFromProps(props)>=UPPER) {
 827                 result=c+getDelta(props);
 828             }
 829         } else {
 830             int excOffset=getExceptionsOffset(props), excOffset2;
 831             int excWord=exceptions[excOffset++];
 832             int full;
 833
 834             excOffset2=excOffset;
 835
 836             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
 837                 /* use hardcoded conditions and mappings */
 838                 int loc=getCaseLocale(locale, locCache);
 839
 840                 /*
 841                  * Test for conditional mappings first
 842                  *   (otherwise the unconditional default mappings are always taken),
 843                  * then test for characters that have unconditional mappings in SpecialCasing.txt,
 844                  * then get the UnicodeData.txt mappings.
 845                  */
 846                 if( loc==LOC_LITHUANIAN &&
 847                         /* base characters, find accents above */
 848                         (((c==0x49 || c==0x4a || c==0x12e) &&
 849                             isFollowedByMoreAbove(iter)) ||
 850                         /* precomposed with accent above, no need to find one */
 851                         (c==0xcc || c==0xcd || c==0x128))
 852                 ) {
 853                     /*
 854                         # Lithuanian
 855
 856                         # Lithuanian retains the dot in a lowercase i when followed by accents.
 857
 858                         # Introduce an explicit dot above when lowercasing capital I's and J's
 859                         # whenever there are more accents above.
 860                         # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
 861
 862                         0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
 863                         004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
 864                         012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
 865                         00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
 866                         00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
 867                         0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
 868                      */
 869                     switch(c) {
 870                     case 0x49:  /* LATIN CAPITAL LETTER I */
 871                         out.append(iDot);
 872                         return 2;
 873                     case 0x4a:  /* LATIN CAPITAL LETTER J */
 874                         out.append(jDot);
 875                         return 2;
 876                     case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
 877                         out.append(iOgonekDot);
 878                         return 2;
 879                     case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
 880                         out.append(iDotGrave);
 881                         return 3;
 882                     case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
 883                         out.append(iDotAcute);
 884                         return 3;
 885                     case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
 886                         out.append(iDotTilde);
 887                         return 3;
 888                     default:
 889                         return 0; /* will not occur */
 890                     }
 891                 /* # Turkish and Azeri */
 892                 } else if(loc==LOC_TURKISH && c==0x130) {
 893                     /*
 894                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 895                         # The following rules handle those cases.
 896
 897                         0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
 898                         0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
 899                      */
 900                     return 0x69;
 901                 } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
 902                     /*
 903                         # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 904                         # This matches the behavior of the canonically equivalent I-dot_above
 905
 906                         0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
 907                         0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
 908                      */
 909                     return 0; /* remove the dot (continue without output) */
 910                 } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
 911                     /*
 912                         # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
 913
 914                         0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
 915                         0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
 916                      */
 917                     return 0x131;
 918                 } else if(c==0x130) {
 919                     /*
 920                         # Preserve canonical equivalence for I with dot. Turkic is handled below.
 921
 922                         0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 923                      */
 924                     out.append(iDot);
 925                     return 2;
 926                 } else if(  c==0x3a3 &&
 927                             !isFollowedByCasedLetter(iter, 1) &&
 928                             isFollowedByCasedLetter(iter, -1) /* -1=preceded */
 929                 ) {
 930                     /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
 931                     /*
 932                         # Special case for final form of sigma
 933
 934                         03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
 935                      */
 936                     return 0x3c2; /* greek small final sigma */
 937                 } else {
 938                     /* no known conditional special case mapping, use a normal mapping */
 939                 }
 940             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
 941                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
 942                 full=(int)value&FULL_LOWER;
 943                 if(full!=0) {
 944                     /* start of full case mapping strings */
 945                     excOffset=(int)(value>>32)+1;
 946
 947                     /* set the output pointer to the lowercase mapping */
 948                     out.append(exceptions, excOffset, full);
 949
 950                     /* return the string length */
 951                     return full;
 952                 }
 953             }
 954
 955             if(hasSlot(excWord, EXC_LOWER)) {
 956                 result=getSlotValue(excWord, EXC_LOWER, excOffset2);
 957             }
 958         }
 959
 960         return (result==c) ? ~result : result;
 961     }
 962
 963     /* internal */
 964     private final int toUpperOrTitle(int c, ContextIterator iter,
 965                                      StringBuilder out,
 966                                      ULocale locale, int[] locCache,
 967                                      boolean upperNotTitle) {
 968         int result;
 969         int props;
 970
 971         result=c;
 972         props=trie.get(c);
 973         if(!propsHasException(props)) {
 974             if(getTypeFromProps(props)==LOWER) {
 975                 result=c+getDelta(props);
 976             }
 977         } else {
 978             int excOffset=getExceptionsOffset(props), excOffset2;
 979             int excWord=exceptions[excOffset++];
 980             int full, index;
 981
 982             excOffset2=excOffset;
 983
 984             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
 985                 /* use hardcoded conditions and mappings */
 986                 int loc=getCaseLocale(locale, locCache);
 987
 988                 if(loc==LOC_TURKISH && c==0x69) {
 989                     /*
 990                         # Turkish and Azeri
 991
 992                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 993                         # The following rules handle those cases.
 994
 995                         # When uppercasing, i turns into a dotted capital I
 996
 997                         0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
 998                         0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
 999                     */
1000                     return 0x130;
1001                 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) {
1002                     /*
1003                         # Lithuanian
1004
1005                         # Lithuanian retains the dot in a lowercase i when followed by accents.
1006
1007                         # Remove DOT ABOVE after "i" with upper or titlecase
1008
1009                         0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1010                      */
1011                     return 0; /* remove the dot (continue without output) */
1012                 } else {
1013                     /* no known conditional special case mapping, use a normal mapping */
1014                 }
1015             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1016                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1017                 full=(int)value&0xffff;
1018
1019                 /* start of full case mapping strings */
1020                 excOffset=(int)(value>>32)+1;
1021
1022                 /* skip the lowercase and case-folding result strings */
1023                 excOffset+=full&FULL_LOWER;
1024                 full>>=4;
1025                 excOffset+=full&0xf;
1026                 full>>=4;
1027
1028                 if(upperNotTitle) {
1029                     full&=0xf;
1030                 } else {
1031                     /* skip the uppercase result string */
1032                     excOffset+=full&0xf;
1033                     full=(full>>4)&0xf;
1034                 }
1035
1036                 if(full!=0) {
1037                     /* set the output pointer to the result string */
1038                     out.append(exceptions, excOffset, full);
1039
1040                     /* return the string length */
1041                     return full;
1042                 }
1043             }
1044
1045             if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) {
1046                 index=EXC_TITLE;
1047             } else if(hasSlot(excWord, EXC_UPPER)) {
1048                 /* here, titlecase is same as uppercase */
1049                 index=EXC_UPPER;
1050             } else {
1051                 return ~c;
1052             }
1053             result=getSlotValue(excWord, index, excOffset2);
1054         }
1055
1056         return (result==c) ? ~result : result;
1057     }
1058
1059     public final int toFullUpper(int c, ContextIterator iter,
1060                                  StringBuilder out,
1061                                  ULocale locale, int[] locCache) {
1062         return toUpperOrTitle(c, iter, out, locale, locCache, true);
1063     }
1064
1065     public final int toFullTitle(int c, ContextIterator iter,
1066                                  StringBuilder out,
1067                                  ULocale locale, int[] locCache) {
1068         return toUpperOrTitle(c, iter, out, locale, locCache, false);
1069     }
1070
1071     /* case folding ------------------------------------------------------------- */
1072
1073     /*
1074      * Case folding is similar to lowercasing.
1075      * The result may be a simple mapping, i.e., a single code point, or
1076      * a full mapping, i.e., a string.
1077      * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1078      * then only the lowercase mapping is stored.
1079      *
1080      * Some special cases are hardcoded because their conditions cannot be
1081      * parsed and processed from CaseFolding.txt.
1082      *
1083      * Unicode 3.2 CaseFolding.txt specifies for its status field:
1084
1085     # C: common case folding, common mappings shared by both simple and full mappings.
1086     # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1087     # S: simple case folding, mappings to single characters where different from F.
1088     # T: special case for uppercase I and dotted uppercase I
1089     #    - For non-Turkic languages, this mapping is normally not used.
1090     #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1091     #
1092     # Usage:
1093     #  A. To do a simple case folding, use the mappings with status C + S.
1094     #  B. To do a full case folding, use the mappings with status C + F.
1095     #
1096     #    The mappings with status T can be used or omitted depending on the desired case-folding
1097     #    behavior. (The default option is to exclude them.)
1098
1099      * Unicode 3.2 has 'T' mappings as follows:
1100
1101     0049; T; 0131; # LATIN CAPITAL LETTER I
1102     0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1103
1104      * while the default mappings for these code points are:
1105
1106     0049; C; 0069; # LATIN CAPITAL LETTER I
1107     0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1108
1109      * U+0130 has no simple case folding (simple-case-folds to itself).
1110      */
1111
1112     /**
1113      * Bit mask for getting just the options from a string compare options word
1114      * that are relevant for case folding (of a single string or code point).
1115      * @internal
1116      */
1117     private static final int FOLD_CASE_OPTIONS_MASK = 0xff;
1118
1119     /* return the simple case folding mapping for c */
1120     public final int fold(int c, int options) {
1121         int props=trie.get(c);
1122         if(!propsHasException(props)) {
1123             if(getTypeFromProps(props)>=UPPER) {
1124                 c+=getDelta(props);
1125             }
1126         } else {
1127             int excOffset=getExceptionsOffset(props);
1128             int excWord=exceptions[excOffset++];
1129             int index;
1130             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1131                 /* special case folding mappings, hardcoded */
1132                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1133                     /* default mappings */
1134                     if(c==0x49) {
1135                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1136                         return 0x69;
1137                     } else if(c==0x130) {
1138                         /* no simple case folding for U+0130 */
1139                         return c;
1140                     }
1141                 } else {
1142                     /* Turkic mappings */
1143                     if(c==0x49) {
1144                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1145                         return 0x131;
1146                     } else if(c==0x130) {
1147                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1148                         return 0x69;
1149                     }
1150                 }
1151             }
1152             if(hasSlot(excWord, EXC_FOLD)) {
1153                 index=EXC_FOLD;
1154             } else if(hasSlot(excWord, EXC_LOWER)) {
1155                 index=EXC_LOWER;
1156             } else {
1157                 return c;
1158             }
1159             c=getSlotValue(excWord, index, excOffset);
1160         }
1161         return c;
1162     }
1163
1164     /*
1165      * Issue for canonical caseless match (UAX #21):
1166      * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1167      * canonical equivalence, unlike default-option casefolding.
1168      * For example, I-grave and I + grave fold to strings that are not canonically
1169      * equivalent.
1170      * For more details, see the comment in unorm_compare() in unorm.cpp
1171      * and the intermediate prototype changes for Jitterbug 2021.
1172      * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1173      *
1174      * This did not get fixed because it appears that it is not possible to fix
1175      * it for uppercase and lowercase characters (I-grave vs. i-grave)
1176      * together in a way that they still fold to common result strings.
1177      */
1178
1179     public final int toFullFolding(int c, StringBuilder out, int options) {
1180         int result;
1181         int props;
1182
1183         result=c;
1184         props=trie.get(c);
1185         if(!propsHasException(props)) {
1186             if(getTypeFromProps(props)>=UPPER) {
1187                 result=c+getDelta(props);
1188             }
1189         } else {
1190             int excOffset=getExceptionsOffset(props), excOffset2;
1191             int excWord=exceptions[excOffset++];
1192             int full, index;
1193
1194             excOffset2=excOffset;
1195
1196             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1197                 /* use hardcoded conditions and mappings */
1198                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1199                     /* default mappings */
1200                     if(c==0x49) {
1201                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1202                         return 0x69;
1203                     } else if(c==0x130) {
1204                         /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1205                         out.append(iDot);
1206                         return 2;
1207                     }
1208                 } else {
1209                     /* Turkic mappings */
1210                     if(c==0x49) {
1211                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1212                         return 0x131;
1213                     } else if(c==0x130) {
1214                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1215                         return 0x69;
1216                     }
1217                 }
1218             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1219                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1220                 full=(int)value&0xffff;
1221
1222                 /* start of full case mapping strings */
1223                 excOffset=(int)(value>>32)+1;
1224
1225                 /* skip the lowercase result string */
1226                 excOffset+=full&FULL_LOWER;
1227                 full=(full>>4)&0xf;
1228
1229                 if(full!=0) {
1230                     /* set the output pointer to the result string */
1231                     out.append(exceptions, excOffset, full);
1232
1233                     /* return the string length */
1234                     return full;
1235                 }
1236             }
1237
1238             if(hasSlot(excWord, EXC_FOLD)) {
1239                 index=EXC_FOLD;
1240             } else if(hasSlot(excWord, EXC_LOWER)) {
1241                 index=EXC_LOWER;
1242             } else {
1243                 return ~c;
1244             }
1245             result=getSlotValue(excWord, index, excOffset2);
1246         }
1247
1248         return (result==c) ? ~result : result;
1249     }
1250
1251     /* case mapping properties API ---------------------------------------------- */
1252
1253     private static final int[] rootLocCache = { LOC_ROOT };
1254     /*
1255      * We need a StringBuilder for multi-code point output from the
1256      * full case mapping functions. However, we do not actually use that output,
1257      * we just check whether the input character was mapped to anything else.
1258      * We use a shared StringBuilder to avoid allocating a new one in each call.
1259      * We remove its contents each time so that it does not grow large over time.
1260      *
1261      * @internal
1262      */
1263     public static final StringBuilder dummyStringBuilder = new StringBuilder();
1264
1265     public final boolean hasBinaryProperty(int c, int which) {
1266         switch(which) {
1267         case UProperty.LOWERCASE:
1268             return LOWER==getType(c);
1269         case UProperty.UPPERCASE:
1270             return UPPER==getType(c);
1271         case UProperty.SOFT_DOTTED:
1272             return isSoftDotted(c);
1273         case UProperty.CASE_SENSITIVE:
1274             return isCaseSensitive(c);
1275         case UProperty.CASED:
1276             return NONE!=getType(c);
1277         case UProperty.CASE_IGNORABLE:
1278             return (getTypeOrIgnorable(c)>>2)!=0;
1279         /*
1280          * Note: The following Changes_When_Xyz are defined as testing whether
1281          * the NFD form of the input changes when Xyz-case-mapped.
1282          * However, this simpler implementation of these properties,
1283          * ignoring NFD, passes the tests.
1284          * The implementation needs to be changed if the tests start failing.
1285          * When that happens, optimizations should be used to work with the
1286          * per-single-code point ucase_toFullXyz() functions unless
1287          * the NFD form has more than one code point,
1288          * and the property starts set needs to be the union of the
1289          * start sets for normalization and case mappings.
1290          */
1291         case UProperty.CHANGES_WHEN_LOWERCASED:
1292             dummyStringBuilder.setLength(0);
1293             return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
1294         case UProperty.CHANGES_WHEN_UPPERCASED:
1295             dummyStringBuilder.setLength(0);
1296             return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
1297         case UProperty.CHANGES_WHEN_TITLECASED:
1298             dummyStringBuilder.setLength(0);
1299             return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
1300         /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
1301         case UProperty.CHANGES_WHEN_CASEMAPPED:
1302             dummyStringBuilder.setLength(0);
1303             return
1304                 toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
1305                 toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
1306                 toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
1307         default:
1308             return false;
1309         }
1310     }
1311
1312     // data members -------------------------------------------------------- ***
1313     private int indexes[];
1314     private char exceptions[];
1315     private char unfold[];
1316
1317     private Trie2_16 trie;
1318
1319     // data format constants ----------------------------------------------- ***
1320     private static final String DATA_NAME="ucase";
1321     private static final String DATA_TYPE="icu";
1322     private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
1323
1324     /* format "cAsE" */
1325     private static final byte FMT[]={ 0x63, 0x41, 0x53, 0x45 };
1326
1327     /* indexes into indexes[] */
1328     //private static final int IX_INDEX_TOP=0;
1329     //private static final int IX_LENGTH=1;
1330     private static final int IX_TRIE_SIZE=2;
1331     private static final int IX_EXC_LENGTH=3;
1332     private static final int IX_UNFOLD_LENGTH=4;
1333
1334     //private static final int IX_MAX_FULL_LENGTH=15;
1335     private static final int IX_TOP=16;
1336
1337     // definitions for 16-bit case properties word ------------------------- ***
1338
1339     /* 2-bit constants for types of cased characters */
1340     public static final int TYPE_MASK=3;
1341     public static final int NONE=0;
1342     public static final int LOWER=1;
1343     public static final int UPPER=2;
1344     public static final int TITLE=3;
1345
1346     private static final int getTypeFromProps(int props) {
1347         return props&TYPE_MASK;
1348     }
1349
1350     private static final int getTypeAndIgnorableFromProps(int props) {
1351         return props&7;
1352     }
1353
1354     //private static final int IGNORABLE=   4;
1355     private static final int SENSITIVE=     8;
1356     private static final int EXCEPTION=     0x10;
1357
1358     private static final int DOT_MASK=      0x60;
1359     //private static final int NO_DOT=        0;      /* normal characters with cc=0 */
1360     private static final int SOFT_DOTTED=   0x20;   /* soft-dotted characters with cc=0 */
1361     private static final int ABOVE=         0x40;   /* "above" accents with cc=230 */
1362     private static final int OTHER_ACCENT=  0x60;   /* other accent character (0<cc!=230) */
1363
1364     /* no exception: bits 15..7 are a 9-bit signed case mapping delta */
1365     private static final int DELTA_SHIFT=   7;
1366     //private static final int DELTA_MASK=    0xff80;
1367     //private static final int MAX_DELTA=     0xff;
1368     //private static final int MIN_DELTA=     (-MAX_DELTA-1);
1369
1370     private static final int getDelta(int props) {
1371         return (short)props>>DELTA_SHIFT;
1372     }
1373
1374     /* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
1375     private static final int EXC_SHIFT=     5;
1376     //private static final int EXC_MASK=      0xffe0;
1377     //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1);
1378
1379     /* definitions for 16-bit main exceptions word ------------------------------ */
1380
1381     /* first 8 bits indicate values in optional slots */
1382     private static final int EXC_LOWER=0;
1383     private static final int EXC_FOLD=1;
1384     private static final int EXC_UPPER=2;
1385     private static final int EXC_TITLE=3;
1386     //private static final int EXC_4=4;           /* reserved */
1387     //private static final int EXC_5=5;           /* reserved */
1388     private static final int EXC_CLOSURE=6;
1389     private static final int EXC_FULL_MAPPINGS=7;
1390     //private static final int EXC_ALL_SLOTS=8;   /* one past the last slot */
1391
1392     /* each slot is 2 uint16_t instead of 1 */
1393     private static final int EXC_DOUBLE_SLOTS=          0x100;
1394
1395     /* reserved: exception bits 11..9 */
1396
1397     /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */
1398     private static final int EXC_DOT_SHIFT=7;
1399
1400     /* normally stored in the main word, but pushed out for larger exception indexes */
1401     //private static final int EXC_DOT_MASK=              0x3000;
1402     //private static final int EXC_NO_DOT=                0;
1403     //private static final int EXC_SOFT_DOTTED=           0x1000;
1404     //private static final int EXC_ABOVE=                 0x2000; /* "above" accents with cc=230 */
1405     //private static final int EXC_OTHER_ACCENT=          0x3000; /* other character (0<cc!=230) */
1406
1407     /* complex/conditional mappings */
1408     private static final int EXC_CONDITIONAL_SPECIAL=   0x4000;
1409     private static final int EXC_CONDITIONAL_FOLD=      0x8000;
1410
1411     /* definitions for lengths word for full case mappings */
1412     private static final int FULL_LOWER=    0xf;
1413     //private static final int FULL_FOLDING=  0xf0;
1414     //private static final int FULL_UPPER=    0xf00;
1415     //private static final int FULL_TITLE=    0xf000;
1416
1417     /* maximum lengths */
1418     //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf;
1419     private static final int CLOSURE_MAX_LENGTH=0xf;
1420
1421     /* constants for reverse case folding ("unfold") data */
1422     private static final int UNFOLD_ROWS=0;
1423     private static final int UNFOLD_ROW_WIDTH=1;
1424     private static final int UNFOLD_STRING_WIDTH=2;
1425
1426     /*
1427      * public singleton instance
1428      */
1429     public static final UCaseProps INSTANCE;
1430
1431     // This static initializer block must be placed after
1432     // other static member initialization
1433     static {
1434         try {
1435             INSTANCE = new UCaseProps();
1436         } catch (IOException e) {
1437             throw new RuntimeException(e);
1438         }
1439     }
1440 }