jars/icu4j-4_4_2-src/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java

   1 /**\r
   2 *******************************************************************************\r
   3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
   4 * others. All Rights Reserved.                                                *\r
   5 *******************************************************************************\r
   6 */\r
   7 package com.ibm.icu.text;\r
   8 \r
   9 import java.io.BufferedInputStream;\r
  10 import java.io.DataInputStream;\r
  11 import java.io.IOException;\r
  12 import java.io.InputStream;\r
  13 import java.nio.ByteBuffer;\r
  14 \r
  15 import com.ibm.icu.impl.ICUBinary;\r
  16 import com.ibm.icu.impl.ICUData;\r
  17 import com.ibm.icu.impl.ICUResourceBundle;\r
  18 import com.ibm.icu.impl.IntTrie;\r
  19 import com.ibm.icu.lang.UCharacter;\r
  20 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;\r
  21 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;\r
  22 import com.ibm.icu.util.VersionInfo;\r
  23 \r
  24 /**\r
  25 * <p>Internal reader class for ICU data file uca.icu containing \r
  26 * Unicode Collation Algorithm data.</p> \r
  27 * <p>This class simply reads uca.icu, authenticates that it is a valid\r
  28 * ICU data file and split its contents up into blocks of data for use in\r
  29 * <a href=Collator.html>com.ibm.icu.text.Collator</a>.\r
  30 * </p> \r
  31 * <p>uca.icu which is in big-endian format is jared together with this \r
  32 * package.</p>\r
  33 * @author Syn Wee Quek\r
  34 * @since release 2.2, April 18 2002\r
  35 */\r
  36 \r
  37 final class CollatorReader\r
  38 {          \r
  39     static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {\r
  40         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");\r
  41         BufferedInputStream b = new BufferedInputStream(i, 90000);\r
  42         CollatorReader reader = new CollatorReader(b);\r
  43         char[] result = reader.readImp(rbc, ucac);\r
  44         b.close();\r
  45         return result;\r
  46     }\r
  47 \r
  48     public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {\r
  49         return new InputStream() {\r
  50             public int read() throws IOException {\r
  51                 if (!buf.hasRemaining()) {\r
  52                     return -1;\r
  53                 }\r
  54                 return buf.get() & 0xff;\r
  55             }\r
  56             public int read(byte[] bytes, int off, int len) throws IOException {\r
  57                 len = Math.min(len, buf.remaining());\r
  58                 buf.get(bytes, off, len);\r
  59                 return len;\r
  60             }\r
  61         };\r
  62     }\r
  63 \r
  64     static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {\r
  65         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;\r
  66         int dataLength = data.remaining();\r
  67         // TODO: Change the rest of this class to use the ByteBuffer directly, rather than\r
  68         // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().\r
  69         // Consider changing ICUBinary to also work with a ByteBuffer.\r
  70         CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);\r
  71         if (dataLength > MIN_BINARY_DATA_SIZE_) {\r
  72             reader.readImp(rbc, null);\r
  73         } else {\r
  74             reader.readHeader(rbc);\r
  75             reader.readOptions(rbc);\r
  76             // duplicating UCA_'s data\r
  77             rbc.setWithUCATables();\r
  78         }\r
  79     }\r
  80     \r
  81     static InverseUCA getInverseUCA() throws IOException {\r
  82         InverseUCA result = null;\r
  83         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");\r
  84 //        try    {\r
  85 //            String invdat = "/com/ibm/icu/impl/data/invuca.icu";\r
  86 //            InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);\r
  87             BufferedInputStream b = new BufferedInputStream(i, 110000);\r
  88             result = CollatorReader.readInverseUCA(b);\r
  89             b.close();\r
  90             i.close();\r
  91             return result;\r
  92 //        } catch (Exception e) {\r
  93 //            throw new RuntimeException(e.getMessage());\r
  94 //        }\r
  95     }\r
  96     \r
  97     // protected constructor ---------------------------------------------\r
  98     \r
  99     /**\r
 100     * <p>Protected constructor.</p>\r
 101     * @param inputStream ICU collator file input stream\r
 102     * @exception IOException throw if data file fails authentication \r
 103     */\r
 104     private CollatorReader(InputStream inputStream) throws IOException\r
 105     {\r
 106         this(inputStream, true);\r
 107         /*\r
 108         byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);\r
 109         // weiv: check that we have the correct Unicode version in \r
 110         // binary files\r
 111         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
 112         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
 113         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
 114             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
 115         }\r
 116         m_dataInputStream_ = new DataInputStream(inputStream);\r
 117         */\r
 118     }\r
 119     \r
 120     /**\r
 121     * <p>Protected constructor.</p>\r
 122     * @param inputStream ICU uprops.icu file input stream\r
 123     * @param readICUHeader flag to indicate if the ICU header has to be read\r
 124     * @exception IOException throw if data file fails authentication \r
 125     */\r
 126     private CollatorReader(InputStream inputStream, boolean readICUHeader) \r
 127                                                             throws IOException\r
 128     {\r
 129         if (readICUHeader) {\r
 130             byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, \r
 131                                  UCA_AUTHENTICATE_);\r
 132             // weiv: check that we have the correct Unicode version in \r
 133             // binary files\r
 134             VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
 135             if(UnicodeVersion[0] != UCDVersion.getMajor() \r
 136             || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
 137                 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
 138             }\r
 139         }\r
 140         m_dataInputStream_ = new DataInputStream(inputStream);\r
 141     }\r
 142   \r
 143     // protected methods -------------------------------------------------\r
 144       \r
 145     /**\r
 146     * Read and break up the header stream of data passed in as arguments into \r
 147     * meaningful Collator data.\r
 148     * @param rbc RuleBasedCollator to populate with header information\r
 149     * @exception IOException thrown when there's a data error.\r
 150     */\r
 151     private void readHeader(RuleBasedCollator rbc) throws IOException\r
 152     {\r
 153         m_size_ = m_dataInputStream_.readInt();\r
 154         // all the offsets are in bytes\r
 155         // to get the address add to the header address and cast properly\r
 156         // Default options int options\r
 157         m_headerSize_ = m_dataInputStream_.readInt(); // start of options\r
 158         int readcount = 8; // for size and headersize\r
 159         // structure which holds values for indirect positioning and implicit\r
 160         // ranges\r
 161         int UCAConst = m_dataInputStream_.readInt();\r
 162         readcount += 4;\r
 163         // this one is needed only for UCA, to copy the appropriate\r
 164         // contractions\r
 165         m_dataInputStream_.skip(4);\r
 166         readcount += 4;\r
 167         // reserved for future use\r
 168         m_dataInputStream_.skipBytes(4);\r
 169         readcount += 4;\r
 170         // const uint8_t *mappingPosition;\r
 171         int mapping = m_dataInputStream_.readInt();\r
 172         readcount += 4;\r
 173         // uint32_t *expansion;\r
 174         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();\r
 175         readcount += 4;\r
 176         // UChar *contractionIndex;\r
 177         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();\r
 178         readcount += 4;\r
 179         // uint32_t *contractionCEs;\r
 180         int contractionCE = m_dataInputStream_.readInt();\r
 181         readcount += 4;\r
 182         // needed for various closures int contractionSize\r
 183         /*int contractionSize = */m_dataInputStream_.readInt();\r
 184         readcount += 4;\r
 185         // array of last collation element in expansion\r
 186         int expansionEndCE = m_dataInputStream_.readInt();\r
 187         readcount += 4;\r
 188         // array of maximum expansion size corresponding to the expansion\r
 189         // collation elements with last element in expansionEndCE\r
 190         int expansionEndCEMaxSize = m_dataInputStream_.readInt();\r
 191         readcount += 4;\r
 192         // size of endExpansionCE int expansionEndCESize\r
 193         m_dataInputStream_.skipBytes(4);\r
 194         readcount += 4;\r
 195         // hash table of unsafe code points\r
 196         int unsafe = m_dataInputStream_.readInt();\r
 197         readcount += 4;\r
 198         // hash table of final code points in contractions.\r
 199         int contractionEnd = m_dataInputStream_.readInt();\r
 200         readcount += 4;\r
 201         // int CEcount = m_dataInputStream_.readInt();\r
 202         m_dataInputStream_.skipBytes(4);\r
 203         readcount += 4;\r
 204         // is jamoSpecial\r
 205         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();\r
 206         readcount++;\r
 207         // padding\r
 208         m_dataInputStream_.skipBytes(3);\r
 209         readcount += 3;\r
 210         rbc.m_version_ = readVersion(m_dataInputStream_);\r
 211         readcount += 4;\r
 212         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);\r
 213         readcount += 4;\r
 214         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);\r
 215         readcount += 4;\r
 216         // byte charsetName[] = new byte[32]; // for charset CEs\r
 217         m_dataInputStream_.skipBytes(32);\r
 218         readcount += 32;\r
 219         m_dataInputStream_.skipBytes(56); // for future use\r
 220         readcount += 56;\r
 221         if (m_headerSize_ < readcount) {\r
 222             ///CLOVER:OFF\r
 223             throw new IOException("Internal Error: Header size error");\r
 224             ///CLOVER:ON\r
 225         }\r
 226         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);\r
 227 \r
 228         if (rbc.m_contractionOffset_ == 0) { // contraction can be null\r
 229             rbc.m_contractionOffset_ = mapping;\r
 230             contractionCE = mapping;\r
 231         }\r
 232         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;\r
 233         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;\r
 234         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;\r
 235         m_contractionCESize_ = mapping - contractionCE;\r
 236         //m_trieSize_ = expansionEndCE - mapping;\r
 237         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;\r
 238         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;\r
 239         m_unsafeSize_ = contractionEnd - unsafe;\r
 240         m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled\r
 241                                                 // later\r
 242         // treat it as normal collator first\r
 243         // for normal collator there is no UCA contraction\r
 244         m_contractionEndSize_ = m_size_ - contractionEnd;\r
 245 \r
 246         rbc.m_contractionOffset_ >>= 1; // casting to ints\r
 247         rbc.m_expansionOffset_ >>= 2; // casting to chars\r
 248     }\r
 249     \r
 250     /**\r
 251      * Read and break up the collation options passed in the stream of data and\r
 252      * update the argument Collator with the results\r
 253      * \r
 254      * @param rbc\r
 255      *            RuleBasedCollator to populate\r
 256      * @exception IOException\r
 257      *                thrown when there's a data error.\r
 258      */\r
 259     private void readOptions(RuleBasedCollator rbc) throws IOException\r
 260     {\r
 261         int readcount = 0;\r
 262         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();\r
 263         readcount += 4;\r
 264         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()\r
 265                                       == RuleBasedCollator.AttributeValue.ON_);\r
 266         readcount += 4;\r
 267         rbc.m_defaultIsAlternateHandlingShifted_ \r
 268                                    = (m_dataInputStream_.readInt() == \r
 269                                     RuleBasedCollator.AttributeValue.SHIFTED_);\r
 270         readcount += 4;\r
 271         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();\r
 272         readcount += 4;\r
 273         rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() \r
 274                                      == RuleBasedCollator.AttributeValue.ON_);\r
 275         readcount += 4;\r
 276         int value = m_dataInputStream_.readInt();\r
 277         readcount += 4;\r
 278         if (value == RuleBasedCollator.AttributeValue.ON_) {\r
 279             value = Collator.CANONICAL_DECOMPOSITION;\r
 280         }\r
 281         else {\r
 282             value = Collator.NO_DECOMPOSITION;\r
 283         }\r
 284         rbc.m_defaultDecomposition_ = value;\r
 285         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();\r
 286         readcount += 4;\r
 287         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() \r
 288                                      == RuleBasedCollator.AttributeValue.ON_);\r
 289         readcount += 4;\r
 290         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() \r
 291                                       == RuleBasedCollator.AttributeValue.ON_);\r
 292         readcount += 4;\r
 293         m_dataInputStream_.skip(60); // reserved for future use\r
 294         readcount += 60;\r
 295         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);\r
 296         if (m_optionSize_ < readcount) {\r
 297             ///CLOVER:OFF\r
 298             throw new IOException("Internal Error: Option size error");\r
 299             ///CLOVER:ON\r
 300         }\r
 301     }\r
 302     \r
 303     /**\r
 304     * Read and break up the stream of data passed in as arguments into \r
 305     * meaningful Collator data.\r
 306     * @param rbc RuleBasedCollator to populate\r
 307     * @param UCAConst object to fill up with UCA constants if we are reading \r
 308     *                 the UCA collator, if not use a null\r
 309     * @return UCAContractions array filled up with the UCA contractions if we\r
 310     *                        are reading the UCA collator\r
 311     * @exception IOException thrown when there's a data error.\r
 312     */\r
 313     private char[] readImp(RuleBasedCollator rbc, \r
 314                           RuleBasedCollator.UCAConstants UCAConst) \r
 315                                                             throws IOException\r
 316     {\r
 317         readHeader(rbc);\r
 318         // header size has been checked by readHeader\r
 319         int readcount = m_headerSize_; \r
 320         // option size has been checked by readOptions\r
 321         readOptions(rbc);\r
 322         readcount += m_optionSize_;\r
 323         m_expansionSize_ >>= 2;\r
 324         rbc.m_expansion_ = new int[m_expansionSize_];\r
 325         for (int i = 0; i < m_expansionSize_; i ++) {\r
 326             rbc.m_expansion_[i] = m_dataInputStream_.readInt();\r
 327         }\r
 328         readcount += (m_expansionSize_ << 2);\r
 329         if (m_contractionIndexSize_ > 0) { \r
 330             m_contractionIndexSize_ >>= 1;\r
 331             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];\r
 332             for (int i = 0; i < m_contractionIndexSize_; i ++) {\r
 333                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();\r
 334             }\r
 335             readcount += (m_contractionIndexSize_ << 1);\r
 336             m_contractionCESize_ >>= 2;\r
 337             rbc.m_contractionCE_ = new int[m_contractionCESize_];\r
 338             for (int i = 0; i < m_contractionCESize_; i ++) {\r
 339                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();\r
 340             }\r
 341             readcount += (m_contractionCESize_ << 2);\r
 342         }\r
 343         rbc.m_trie_ = new IntTrie(m_dataInputStream_, \r
 344                                  RuleBasedCollator.DataManipulate.getInstance());\r
 345         if (!rbc.m_trie_.isLatin1Linear()) {\r
 346             throw new IOException("Data corrupted, " \r
 347                                   + "Collator Tries expected to have linear "\r
 348                                   + "latin one data arrays");\r
 349         }\r
 350         readcount += rbc.m_trie_.getSerializedDataSize();\r
 351         m_expansionEndCESize_ >>= 2;\r
 352         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];\r
 353         for (int i = 0; i < m_expansionEndCESize_; i ++) {\r
 354             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();\r
 355         }\r
 356         readcount += (m_expansionEndCESize_ << 2);\r
 357         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];\r
 358         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {\r
 359             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();\r
 360         }\r
 361         readcount += m_expansionEndCEMaxSizeSize_;\r
 362         rbc.m_unsafe_ = new byte[m_unsafeSize_];\r
 363         for (int i = 0; i < m_unsafeSize_; i ++) {\r
 364             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();\r
 365         }\r
 366         readcount += m_unsafeSize_;\r
 367         if (UCAConst != null) {\r
 368             // we are reading the UCA\r
 369             // unfortunately the UCA offset in any collator data is not 0 and\r
 370             // only refers to the UCA data\r
 371             m_contractionEndSize_ -= m_UCAValuesSize_;       \r
 372         }\r
 373         rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];\r
 374         for (int i = 0; i < m_contractionEndSize_; i ++) {\r
 375             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();\r
 376         }\r
 377         readcount += m_contractionEndSize_;\r
 378         if (UCAConst != null) {\r
 379             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] \r
 380                                                = m_dataInputStream_.readInt();\r
 381             int readUCAConstcount = 4;\r
 382             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] \r
 383                                                = m_dataInputStream_.readInt();\r
 384             readUCAConstcount += 4;\r
 385             UCAConst.LAST_TERTIARY_IGNORABLE_[0] \r
 386                                                = m_dataInputStream_.readInt();\r
 387             readUCAConstcount += 4;\r
 388             UCAConst.LAST_TERTIARY_IGNORABLE_[1] \r
 389                                                = m_dataInputStream_.readInt();\r
 390             readUCAConstcount += 4;\r
 391             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] \r
 392                                                = m_dataInputStream_.readInt();\r
 393             readUCAConstcount += 4;\r
 394             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] \r
 395                                                = m_dataInputStream_.readInt();\r
 396             readUCAConstcount += 4;\r
 397             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] \r
 398                                                = m_dataInputStream_.readInt();\r
 399             readUCAConstcount += 4;\r
 400             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] \r
 401                                                = m_dataInputStream_.readInt();\r
 402             readUCAConstcount += 4;\r
 403             UCAConst.LAST_SECONDARY_IGNORABLE_[0] \r
 404                                                = m_dataInputStream_.readInt();\r
 405             readUCAConstcount += 4;\r
 406             UCAConst.LAST_SECONDARY_IGNORABLE_[1] \r
 407                                                = m_dataInputStream_.readInt();\r
 408             readUCAConstcount += 4;\r
 409             UCAConst.LAST_PRIMARY_IGNORABLE_[0] \r
 410                                                = m_dataInputStream_.readInt();\r
 411             readUCAConstcount += 4;\r
 412             UCAConst.LAST_PRIMARY_IGNORABLE_[1] \r
 413                                                = m_dataInputStream_.readInt();\r
 414             readUCAConstcount += 4;\r
 415             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();     \r
 416             readUCAConstcount += 4;\r
 417             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();\r
 418             readUCAConstcount += 4;\r
 419             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); \r
 420             readUCAConstcount += 4;\r
 421             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();                     \r
 422             readUCAConstcount += 4;\r
 423             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
 424             readUCAConstcount += 4;\r
 425             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
 426             readUCAConstcount += 4;\r
 427             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
 428             readUCAConstcount += 4;\r
 429             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
 430             readUCAConstcount += 4;\r
 431             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();  \r
 432             readUCAConstcount += 4;\r
 433             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();  \r
 434             readUCAConstcount += 4;\r
 435             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
 436             readUCAConstcount += 4;\r
 437             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
 438             readUCAConstcount += 4;\r
 439             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
 440             readUCAConstcount += 4;\r
 441             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
 442             readUCAConstcount += 4;\r
 443             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
 444             readUCAConstcount += 4;\r
 445             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();  \r
 446             readUCAConstcount += 4;\r
 447             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
 448             readUCAConstcount += 4;\r
 449             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();   \r
 450             readUCAConstcount += 4; \r
 451             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();  \r
 452             readUCAConstcount += 4;\r
 453             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();   \r
 454             readUCAConstcount += 4;\r
 455             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();   \r
 456             readUCAConstcount += 4;\r
 457             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();   \r
 458             readUCAConstcount += 4;\r
 459             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();   \r
 460             readUCAConstcount += 4;\r
 461             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();   \r
 462             readUCAConstcount += 4;\r
 463             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();   \r
 464             readUCAConstcount += 4;\r
 465             int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;\r
 466             char result[] = new char[resultsize];\r
 467             for (int i = 0; i < resultsize; i ++) {\r
 468                 result[i] = m_dataInputStream_.readChar();\r
 469             }\r
 470             readcount += m_UCAValuesSize_;\r
 471             if (readcount != m_size_) {\r
 472                 ///CLOVER:OFF\r
 473                 throw new IOException("Internal Error: Data file size error");\r
 474                 ///CLOVER:ON\r
 475             }\r
 476             return result;\r
 477         }\r
 478         if (readcount != m_size_) {\r
 479             ///CLOVER:OFF\r
 480             throw new IOException("Internal Error: Data file size error");\r
 481             ///CLOVER:ON\r
 482         }\r
 483         return null;\r
 484     }\r
 485     \r
 486     /**\r
 487      * Reads in the inverse uca data\r
 488      * @param input input stream with the inverse uca data\r
 489      * @return an object containing the inverse uca data\r
 490      * @exception IOException thrown when error occurs while reading the \r
 491      *            inverse uca\r
 492      */\r
 493     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(\r
 494                                                       InputStream inputStream)\r
 495                                                       throws IOException\r
 496     {\r
 497          byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, \r
 498                               INVERSE_UCA_AUTHENTICATE_);\r
 499                               \r
 500         // weiv: check that we have the correct Unicode version in \r
 501         // binary files\r
 502         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
 503         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
 504         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
 505             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
 506         }\r
 507                               \r
 508         CollationParsedRuleBuilder.InverseUCA result = \r
 509                                   new CollationParsedRuleBuilder.InverseUCA();\r
 510         DataInputStream input = new DataInputStream(inputStream);        \r
 511         input.readInt(); // bytesize\r
 512         int tablesize = input.readInt(); // in int size\r
 513         int contsize = input.readInt();  // in char size\r
 514         input.readInt(); // table in bytes\r
 515         input.readInt(); // conts in bytes\r
 516         result.m_UCA_version_ = readVersion(input);\r
 517         input.skipBytes(8); // skip padding\r
 518         \r
 519         int size = tablesize * 3; // one column for each strength\r
 520         result.m_table_ = new int[size];\r
 521         result.m_continuations_ = new char[contsize];\r
 522         \r
 523         for (int i = 0; i < size; i ++) {\r
 524             result.m_table_[i] = input.readInt();\r
 525         }\r
 526         for (int i = 0; i < contsize; i ++) {\r
 527             result.m_continuations_[i] = input.readChar();\r
 528         }\r
 529         input.close();\r
 530         return result;\r
 531     }\r
 532     \r
 533     /**\r
 534      * Reads four bytes from the input and returns a VersionInfo\r
 535      * object. Use it to read different collator versions.\r
 536      * @param input already instantiated DataInputStream, positioned \r
 537      *              at the start of four version bytes\r
 538      * @return a ready VersionInfo object\r
 539      * @throws IOException thrown when error occurs while reading  \r
 540      *            version bytes\r
 541      */\r
 542     \r
 543     protected static VersionInfo readVersion(DataInputStream input) \r
 544         throws IOException {\r
 545         byte[] version = new byte[4];\r
 546         version[0] = input.readByte();\r
 547         version[1] = input.readByte();\r
 548         version[2] = input.readByte();\r
 549         version[3] = input.readByte();\r
 550         \r
 551         VersionInfo result = \r
 552         VersionInfo.getInstance(\r
 553             (int)version[0], (int)version[1], \r
 554             (int)version[2], (int)version[3]);\r
 555         \r
 556         return result;\r
 557     }\r
 558     \r
 559     // private inner class -----------------------------------------------\r
 560     \r
 561     // private variables -------------------------------------------------\r
 562     \r
 563     /**\r
 564      * Authenticate uca data format version\r
 565      */\r
 566     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ \r
 567                 = new ICUBinary.Authenticate() {\r
 568                         public boolean isDataVersionAcceptable(byte version[])\r
 569                         {\r
 570                             return version[0] == DATA_FORMAT_VERSION_[0] \r
 571                                    && version[1] >= DATA_FORMAT_VERSION_[1];\r
 572                                    // Too harsh \r
 573                                    //&& version[1] == DATA_FORMAT_VERSION_[1]\r
 574                                    //&& version[2] == DATA_FORMAT_VERSION_[2] \r
 575                                    //&& version[3] == DATA_FORMAT_VERSION_[3];\r
 576                         }\r
 577                 };\r
 578                 \r
 579     /**\r
 580      * Authenticate uca data format version\r
 581      */\r
 582     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ \r
 583                 = new ICUBinary.Authenticate() {\r
 584                         public boolean isDataVersionAcceptable(byte version[])\r
 585                         {\r
 586                             return version[0] \r
 587                                     == INVERSE_UCA_DATA_FORMAT_VERSION_[0] \r
 588                                 && version[1] \r
 589                                     >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];\r
 590                         }\r
 591                 };\r
 592   \r
 593     /**\r
 594     * Data input stream for uca.icu \r
 595     */\r
 596     private DataInputStream m_dataInputStream_;\r
 597    \r
 598     /**\r
 599     * File format version and id that this class understands.\r
 600     * No guarantees are made if a older version is used\r
 601     */\r
 602     private static final byte DATA_FORMAT_VERSION_[] = \r
 603                                    {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};\r
 604     private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  \r
 605                                                     (byte)0x6f, (byte)0x6c};\r
 606     /**\r
 607     * Inverse UCA file format version and id that this class understands.\r
 608     * No guarantees are made if a older version is used\r
 609     */\r
 610     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = \r
 611                                    {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};\r
 612     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, \r
 613                                                                (byte)0x6e,  \r
 614                                                                (byte)0x76, \r
 615                                                                (byte)0x43};\r
 616                                 \r
 617     /**\r
 618     * Wrong unicode version error string\r
 619     */\r
 620     private static final String WRONG_UNICODE_VERSION_ERROR_ =\r
 621                                 "Unicode version in binary image is not compatible with the current Unicode version";\r
 622 \r
 623     /**\r
 624      * Size of expansion table in bytes\r
 625      */\r
 626     private int m_expansionSize_;\r
 627     /**\r
 628      * Size of contraction index table in bytes\r
 629      */\r
 630     private int m_contractionIndexSize_;\r
 631     /**\r
 632      * Size of contraction table in bytes\r
 633      */\r
 634     private int m_contractionCESize_;\r
 635     /*\r
 636      * Size of the Trie in bytes\r
 637      */\r
 638     //private int m_trieSize_;\r
 639     /**\r
 640      * Size of the table that contains information about collation elements\r
 641      * that end with an expansion \r
 642      */\r
 643     private int m_expansionEndCESize_;\r
 644     /**\r
 645      * Size of the table that contains information about the maximum size of \r
 646      * collation elements that end with a particular expansion CE corresponding\r
 647      * to the ones in expansionEndCE\r
 648      */\r
 649     private int m_expansionEndCEMaxSizeSize_;\r
 650     /**\r
 651      * Size of the option table that contains information about the collation\r
 652      * options\r
 653      */\r
 654     private int m_optionSize_;\r
 655     /**\r
 656      * Size of the whole data file minusing the ICU header\r
 657      */\r
 658     private int m_size_;\r
 659     /**\r
 660      * Size of the collation data header\r
 661      */\r
 662     private int m_headerSize_;\r
 663     /**\r
 664      * Size of the table that contains information about the "Unsafe" \r
 665      * codepoints\r
 666      */\r
 667     private int m_unsafeSize_;\r
 668     /**\r
 669      * Size of the table that contains information about codepoints that ends\r
 670      * with a contraction\r
 671      */\r
 672     private int m_contractionEndSize_;\r
 673     /**\r
 674      * Size of the table that contains UCA contraction information\r
 675      */\r
 676     private int m_UCAValuesSize_;\r
 677       \r
 678     // private methods ---------------------------------------------------\r
 679       \r
 680 }\r
 681 \r