jars/icu4j-4_2_1-src/src/com/ibm/icu/text/CollatorReader.java

   1 /**\r
   2 *******************************************************************************\r
   3 * Copyright (C) 1996-2008, International Business Machines Corporation and    *\r
   4 * others. All Rights Reserved.                                                *\r
   5 *******************************************************************************\r
   6 */\r
   7 package com.ibm.icu.text;\r
   8 \r
   9 import java.io.BufferedInputStream;\r
  10 import java.io.ByteArrayInputStream;\r
  11 import java.io.InputStream;\r
  12 import java.io.DataInputStream;\r
  13 import java.io.IOException;\r
  14 \r
  15 import com.ibm.icu.impl.ICUBinary;\r
  16 import com.ibm.icu.impl.ICUData;\r
  17 import com.ibm.icu.impl.ICUResourceBundle;\r
  18 import com.ibm.icu.impl.IntTrie;\r
  19 import com.ibm.icu.lang.UCharacter;\r
  20 import com.ibm.icu.util.VersionInfo;\r
  21 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;\r
  22 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;\r
  23 \r
  24 /**\r
  25 * <p>Internal reader class for ICU data file uca.icu containing \r
  26 * Unicode Collation Algorithm data.</p> \r
  27 * <p>This class simply reads uca.icu, authenticates that it is a valid\r
  28 * ICU data file and split its contents up into blocks of data for use in\r
  29 * <a href=Collator.html>com.ibm.icu.text.Collator</a>.\r
  30 * </p> \r
  31 * <p>uca.icu which is in big-endian format is jared together with this \r
  32 * package.</p>\r
  33 * @author Syn Wee Quek\r
  34 * @since release 2.2, April 18 2002\r
  35 */\r
  36 \r
  37 final class CollatorReader\r
  38 {          \r
  39     static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {\r
  40         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");\r
  41         BufferedInputStream b = new BufferedInputStream(i, 90000);\r
  42         CollatorReader reader = new CollatorReader(b);\r
  43         char[] result = reader.readImp(rbc, ucac);\r
  44         b.close();\r
  45         return result;\r
  46     }\r
  47     \r
  48     static void initRBC(RuleBasedCollator rbc, byte[] data) throws IOException {\r
  49         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;\r
  50         \r
  51         InputStream i = new ByteArrayInputStream(data);\r
  52         BufferedInputStream b = new BufferedInputStream(i);\r
  53         CollatorReader reader = new CollatorReader(b, false);\r
  54         if (data.length > MIN_BINARY_DATA_SIZE_) {\r
  55             reader.readImp(rbc, null);\r
  56         } else {\r
  57             reader.readHeader(rbc);\r
  58             reader.readOptions(rbc);\r
  59             // duplicating UCA_'s data\r
  60             rbc.setWithUCATables();\r
  61         }\r
  62     }\r
  63     \r
  64     static InverseUCA getInverseUCA() throws IOException {\r
  65         InverseUCA result = null;\r
  66         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");\r
  67 //        try    {\r
  68 //            String invdat = "/com/ibm/icu/impl/data/invuca.icu";\r
  69 //            InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);\r
  70             BufferedInputStream b = new BufferedInputStream(i, 110000);\r
  71             result = CollatorReader.readInverseUCA(b);\r
  72             b.close();\r
  73             i.close();\r
  74             return result;\r
  75 //        } catch (Exception e) {\r
  76 //            throw new RuntimeException(e.getMessage());\r
  77 //        }\r
  78     }\r
  79     \r
  80     // protected constructor ---------------------------------------------\r
  81     \r
  82     /**\r
  83     * <p>Protected constructor.</p>\r
  84     * @param inputStream ICU collator file input stream\r
  85     * @exception IOException throw if data file fails authentication \r
  86     */\r
  87     private CollatorReader(InputStream inputStream) throws IOException\r
  88     {\r
  89         this(inputStream, true);\r
  90         /*\r
  91         byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);\r
  92         // weiv: check that we have the correct Unicode version in \r
  93         // binary files\r
  94         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
  95         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
  96         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
  97             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
  98         }\r
  99         m_dataInputStream_ = new DataInputStream(inputStream);\r
 100         */\r
 101     }\r
 102     \r
 103     /**\r
 104     * <p>Protected constructor.</p>\r
 105     * @param inputStream ICU uprops.icu file input stream\r
 106     * @param readICUHeader flag to indicate if the ICU header has to be read\r
 107     * @exception IOException throw if data file fails authentication \r
 108     */\r
 109     private CollatorReader(InputStream inputStream, boolean readICUHeader) \r
 110                                                             throws IOException\r
 111     {\r
 112         if (readICUHeader) {\r
 113             byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, \r
 114                                  UCA_AUTHENTICATE_);\r
 115             // weiv: check that we have the correct Unicode version in \r
 116             // binary files\r
 117             VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
 118             if(UnicodeVersion[0] != UCDVersion.getMajor() \r
 119             || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
 120                 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
 121             }\r
 122         }\r
 123         m_dataInputStream_ = new DataInputStream(inputStream);\r
 124     }\r
 125   \r
 126     // protected methods -------------------------------------------------\r
 127       \r
 128     /**\r
 129     * Read and break up the header stream of data passed in as arguments into \r
 130     * meaningful Collator data.\r
 131     * @param rbc RuleBasedCollator to populate with header information\r
 132     * @exception IOException thrown when there's a data error.\r
 133     */\r
 134     private void readHeader(RuleBasedCollator rbc) throws IOException\r
 135     {\r
 136         m_size_ = m_dataInputStream_.readInt();\r
 137         // all the offsets are in bytes\r
 138         // to get the address add to the header address and cast properly\r
 139         // Default options int options\r
 140         m_headerSize_ = m_dataInputStream_.readInt(); // start of options\r
 141         int readcount = 8; // for size and headersize\r
 142         // structure which holds values for indirect positioning and implicit\r
 143         // ranges\r
 144         int UCAConst = m_dataInputStream_.readInt();\r
 145         readcount += 4;\r
 146         // this one is needed only for UCA, to copy the appropriate\r
 147         // contractions\r
 148         m_dataInputStream_.skip(4);\r
 149         readcount += 4;\r
 150         // reserved for future use\r
 151         m_dataInputStream_.skipBytes(4);\r
 152         readcount += 4;\r
 153         // const uint8_t *mappingPosition;\r
 154         int mapping = m_dataInputStream_.readInt();\r
 155         readcount += 4;\r
 156         // uint32_t *expansion;\r
 157         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();\r
 158         readcount += 4;\r
 159         // UChar *contractionIndex;\r
 160         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();\r
 161         readcount += 4;\r
 162         // uint32_t *contractionCEs;\r
 163         int contractionCE = m_dataInputStream_.readInt();\r
 164         readcount += 4;\r
 165         // needed for various closures int contractionSize\r
 166         /*int contractionSize = */m_dataInputStream_.readInt();\r
 167         readcount += 4;\r
 168         // array of last collation element in expansion\r
 169         int expansionEndCE = m_dataInputStream_.readInt();\r
 170         readcount += 4;\r
 171         // array of maximum expansion size corresponding to the expansion\r
 172         // collation elements with last element in expansionEndCE\r
 173         int expansionEndCEMaxSize = m_dataInputStream_.readInt();\r
 174         readcount += 4;\r
 175         // size of endExpansionCE int expansionEndCESize\r
 176         m_dataInputStream_.skipBytes(4);\r
 177         readcount += 4;\r
 178         // hash table of unsafe code points\r
 179         int unsafe = m_dataInputStream_.readInt();\r
 180         readcount += 4;\r
 181         // hash table of final code points in contractions.\r
 182         int contractionEnd = m_dataInputStream_.readInt();\r
 183         readcount += 4;\r
 184         // int CEcount = m_dataInputStream_.readInt();\r
 185         m_dataInputStream_.skipBytes(4);\r
 186         readcount += 4;\r
 187         // is jamoSpecial\r
 188         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();\r
 189         readcount++;\r
 190         // padding\r
 191         m_dataInputStream_.skipBytes(3);\r
 192         readcount += 3;\r
 193         rbc.m_version_ = readVersion(m_dataInputStream_);\r
 194         readcount += 4;\r
 195         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);\r
 196         readcount += 4;\r
 197         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);\r
 198         readcount += 4;\r
 199         // byte charsetName[] = new byte[32]; // for charset CEs\r
 200         m_dataInputStream_.skipBytes(32);\r
 201         readcount += 32;\r
 202         m_dataInputStream_.skipBytes(56); // for future use\r
 203         readcount += 56;\r
 204         if (m_headerSize_ < readcount) {\r
 205             throw new IOException("Internal Error: Header size error");\r
 206         }\r
 207         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);\r
 208 \r
 209         if (rbc.m_contractionOffset_ == 0) { // contraction can be null\r
 210             rbc.m_contractionOffset_ = mapping;\r
 211             contractionCE = mapping;\r
 212         }\r
 213         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;\r
 214         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;\r
 215         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;\r
 216         m_contractionCESize_ = mapping - contractionCE;\r
 217         //m_trieSize_ = expansionEndCE - mapping;\r
 218         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;\r
 219         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;\r
 220         m_unsafeSize_ = contractionEnd - unsafe;\r
 221         m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled\r
 222                                                 // later\r
 223         // treat it as normal collator first\r
 224         // for normal collator there is no UCA contraction\r
 225         m_contractionEndSize_ = m_size_ - contractionEnd;\r
 226 \r
 227         rbc.m_contractionOffset_ >>= 1; // casting to ints\r
 228         rbc.m_expansionOffset_ >>= 2; // casting to chars\r
 229     }\r
 230     \r
 231     /**\r
 232      * Read and break up the collation options passed in the stream of data and\r
 233      * update the argument Collator with the results\r
 234      * \r
 235      * @param rbc\r
 236      *            RuleBasedCollator to populate\r
 237      * @exception IOException\r
 238      *                thrown when there's a data error.\r
 239      */\r
 240     private void readOptions(RuleBasedCollator rbc) throws IOException\r
 241     {\r
 242         int readcount = 0;\r
 243         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();\r
 244         readcount += 4;\r
 245         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()\r
 246                                       == RuleBasedCollator.AttributeValue.ON_);\r
 247         readcount += 4;\r
 248         rbc.m_defaultIsAlternateHandlingShifted_ \r
 249                                    = (m_dataInputStream_.readInt() == \r
 250                                     RuleBasedCollator.AttributeValue.SHIFTED_);\r
 251         readcount += 4;\r
 252         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();\r
 253         readcount += 4;\r
 254         rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() \r
 255                                      == RuleBasedCollator.AttributeValue.ON_);\r
 256         readcount += 4;\r
 257         int value = m_dataInputStream_.readInt();\r
 258         readcount += 4;\r
 259         if (value == RuleBasedCollator.AttributeValue.ON_) {\r
 260             value = Collator.CANONICAL_DECOMPOSITION;\r
 261         }\r
 262         else {\r
 263             value = Collator.NO_DECOMPOSITION;\r
 264         }\r
 265         rbc.m_defaultDecomposition_ = value;\r
 266         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();\r
 267         readcount += 4;\r
 268         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() \r
 269                                      == RuleBasedCollator.AttributeValue.ON_);\r
 270         readcount += 4;\r
 271         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() \r
 272                                       == RuleBasedCollator.AttributeValue.ON_);\r
 273         readcount += 4;\r
 274         m_dataInputStream_.skip(60); // reserved for future use\r
 275         readcount += 60;\r
 276         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);\r
 277         if (m_optionSize_ < readcount) {\r
 278             throw new IOException("Internal Error: Option size error");\r
 279         }\r
 280     }\r
 281     \r
 282     /**\r
 283     * Read and break up the stream of data passed in as arguments into \r
 284     * meaningful Collator data.\r
 285     * @param rbc RuleBasedCollator to populate\r
 286     * @param UCAConst object to fill up with UCA constants if we are reading \r
 287     *                 the UCA collator, if not use a null\r
 288     * @return UCAContractions array filled up with the UCA contractions if we\r
 289     *                        are reading the UCA collator\r
 290     * @exception IOException thrown when there's a data error.\r
 291     */\r
 292     private char[] readImp(RuleBasedCollator rbc, \r
 293                           RuleBasedCollator.UCAConstants UCAConst) \r
 294                                                             throws IOException\r
 295     {\r
 296         readHeader(rbc);\r
 297         // header size has been checked by readHeader\r
 298         int readcount = m_headerSize_; \r
 299         // option size has been checked by readOptions\r
 300         readOptions(rbc);\r
 301         readcount += m_optionSize_;\r
 302         m_expansionSize_ >>= 2;\r
 303         rbc.m_expansion_ = new int[m_expansionSize_];\r
 304         for (int i = 0; i < m_expansionSize_; i ++) {\r
 305             rbc.m_expansion_[i] = m_dataInputStream_.readInt();\r
 306         }\r
 307         readcount += (m_expansionSize_ << 2);\r
 308         if (m_contractionIndexSize_ > 0) { \r
 309             m_contractionIndexSize_ >>= 1;\r
 310             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];\r
 311             for (int i = 0; i < m_contractionIndexSize_; i ++) {\r
 312                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();\r
 313             }\r
 314             readcount += (m_contractionIndexSize_ << 1);\r
 315             m_contractionCESize_ >>= 2;\r
 316             rbc.m_contractionCE_ = new int[m_contractionCESize_];\r
 317             for (int i = 0; i < m_contractionCESize_; i ++) {\r
 318                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();\r
 319             }\r
 320             readcount += (m_contractionCESize_ << 2);\r
 321         }\r
 322         rbc.m_trie_ = new IntTrie(m_dataInputStream_, \r
 323                                  RuleBasedCollator.DataManipulate.getInstance());\r
 324         if (!rbc.m_trie_.isLatin1Linear()) {\r
 325             throw new IOException("Data corrupted, " \r
 326                                   + "Collator Tries expected to have linear "\r
 327                                   + "latin one data arrays");\r
 328         }\r
 329         readcount += rbc.m_trie_.getSerializedDataSize();\r
 330         m_expansionEndCESize_ >>= 2;\r
 331         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];\r
 332         for (int i = 0; i < m_expansionEndCESize_; i ++) {\r
 333             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();\r
 334         }\r
 335         readcount += (m_expansionEndCESize_ << 2);\r
 336         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];\r
 337         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {\r
 338             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();\r
 339         }\r
 340         readcount += m_expansionEndCEMaxSizeSize_;\r
 341         rbc.m_unsafe_ = new byte[m_unsafeSize_];\r
 342         for (int i = 0; i < m_unsafeSize_; i ++) {\r
 343             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();\r
 344         }\r
 345         readcount += m_unsafeSize_;\r
 346         if (UCAConst != null) {\r
 347             // we are reading the UCA\r
 348             // unfortunately the UCA offset in any collator data is not 0 and\r
 349             // only refers to the UCA data\r
 350             m_contractionEndSize_ -= m_UCAValuesSize_;       \r
 351         }\r
 352         rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];\r
 353         for (int i = 0; i < m_contractionEndSize_; i ++) {\r
 354             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();\r
 355         }\r
 356         readcount += m_contractionEndSize_;\r
 357         if (UCAConst != null) {\r
 358             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] \r
 359                                                = m_dataInputStream_.readInt();\r
 360             int readUCAConstcount = 4;\r
 361             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] \r
 362                                                = m_dataInputStream_.readInt();\r
 363             readUCAConstcount += 4;\r
 364             UCAConst.LAST_TERTIARY_IGNORABLE_[0] \r
 365                                                = m_dataInputStream_.readInt();\r
 366             readUCAConstcount += 4;\r
 367             UCAConst.LAST_TERTIARY_IGNORABLE_[1] \r
 368                                                = m_dataInputStream_.readInt();\r
 369             readUCAConstcount += 4;\r
 370             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] \r
 371                                                = m_dataInputStream_.readInt();\r
 372             readUCAConstcount += 4;\r
 373             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] \r
 374                                                = m_dataInputStream_.readInt();\r
 375             readUCAConstcount += 4;\r
 376             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] \r
 377                                                = m_dataInputStream_.readInt();\r
 378             readUCAConstcount += 4;\r
 379             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] \r
 380                                                = m_dataInputStream_.readInt();\r
 381             readUCAConstcount += 4;\r
 382             UCAConst.LAST_SECONDARY_IGNORABLE_[0] \r
 383                                                = m_dataInputStream_.readInt();\r
 384             readUCAConstcount += 4;\r
 385             UCAConst.LAST_SECONDARY_IGNORABLE_[1] \r
 386                                                = m_dataInputStream_.readInt();\r
 387             readUCAConstcount += 4;\r
 388             UCAConst.LAST_PRIMARY_IGNORABLE_[0] \r
 389                                                = m_dataInputStream_.readInt();\r
 390             readUCAConstcount += 4;\r
 391             UCAConst.LAST_PRIMARY_IGNORABLE_[1] \r
 392                                                = m_dataInputStream_.readInt();\r
 393             readUCAConstcount += 4;\r
 394             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();     \r
 395             readUCAConstcount += 4;\r
 396             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();\r
 397             readUCAConstcount += 4;\r
 398             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); \r
 399             readUCAConstcount += 4;\r
 400             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();                     \r
 401             readUCAConstcount += 4;\r
 402             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
 403             readUCAConstcount += 4;\r
 404             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
 405             readUCAConstcount += 4;\r
 406             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
 407             readUCAConstcount += 4;\r
 408             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
 409             readUCAConstcount += 4;\r
 410             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();  \r
 411             readUCAConstcount += 4;\r
 412             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();  \r
 413             readUCAConstcount += 4;\r
 414             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
 415             readUCAConstcount += 4;\r
 416             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
 417             readUCAConstcount += 4;\r
 418             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
 419             readUCAConstcount += 4;\r
 420             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
 421             readUCAConstcount += 4;\r
 422             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
 423             readUCAConstcount += 4;\r
 424             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();  \r
 425             readUCAConstcount += 4;\r
 426             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
 427             readUCAConstcount += 4;\r
 428             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();   \r
 429             readUCAConstcount += 4; \r
 430             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();  \r
 431             readUCAConstcount += 4;\r
 432             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();   \r
 433             readUCAConstcount += 4;\r
 434             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();   \r
 435             readUCAConstcount += 4;\r
 436             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();   \r
 437             readUCAConstcount += 4;\r
 438             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();   \r
 439             readUCAConstcount += 4;\r
 440             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();   \r
 441             readUCAConstcount += 4;\r
 442             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();   \r
 443             readUCAConstcount += 4;\r
 444             int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;\r
 445             char result[] = new char[resultsize];\r
 446             for (int i = 0; i < resultsize; i ++) {\r
 447                 result[i] = m_dataInputStream_.readChar();\r
 448             }\r
 449             readcount += m_UCAValuesSize_;\r
 450             if (readcount != m_size_) {\r
 451                 throw new IOException("Internal Error: Data file size error");\r
 452             }\r
 453             return result;\r
 454         }\r
 455         if (readcount != m_size_) {\r
 456             throw new IOException("Internal Error: Data file size error");\r
 457         }\r
 458         return null;\r
 459     }\r
 460     \r
 461     /**\r
 462      * Reads in the inverse uca data\r
 463      * @param input input stream with the inverse uca data\r
 464      * @return an object containing the inverse uca data\r
 465      * @exception IOException thrown when error occurs while reading the \r
 466      *            inverse uca\r
 467      */\r
 468     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(\r
 469                                                       InputStream inputStream)\r
 470                                                       throws IOException\r
 471     {\r
 472          byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, \r
 473                               INVERSE_UCA_AUTHENTICATE_);\r
 474                               \r
 475         // weiv: check that we have the correct Unicode version in \r
 476         // binary files\r
 477         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
 478         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
 479         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
 480             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
 481         }\r
 482                               \r
 483         CollationParsedRuleBuilder.InverseUCA result = \r
 484                                   new CollationParsedRuleBuilder.InverseUCA();\r
 485         DataInputStream input = new DataInputStream(inputStream);        \r
 486         input.readInt(); // bytesize\r
 487         int tablesize = input.readInt(); // in int size\r
 488         int contsize = input.readInt();  // in char size\r
 489         input.readInt(); // table in bytes\r
 490         input.readInt(); // conts in bytes\r
 491         result.m_UCA_version_ = readVersion(input);\r
 492         input.skipBytes(8); // skip padding\r
 493         \r
 494         int size = tablesize * 3; // one column for each strength\r
 495         result.m_table_ = new int[size];\r
 496         result.m_continuations_ = new char[contsize];\r
 497         \r
 498         for (int i = 0; i < size; i ++) {\r
 499             result.m_table_[i] = input.readInt();\r
 500         }\r
 501         for (int i = 0; i < contsize; i ++) {\r
 502             result.m_continuations_[i] = input.readChar();\r
 503         }\r
 504         input.close();\r
 505         return result;\r
 506     }\r
 507     \r
 508     /**\r
 509      * Reads four bytes from the input and returns a VersionInfo\r
 510      * object. Use it to read different collator versions.\r
 511      * @param input already instantiated DataInputStream, positioned \r
 512      *              at the start of four version bytes\r
 513      * @return a ready VersionInfo object\r
 514      * @throws IOException thrown when error occurs while reading  \r
 515      *            version bytes\r
 516      */\r
 517     \r
 518     protected static VersionInfo readVersion(DataInputStream input) \r
 519         throws IOException {\r
 520         byte[] version = new byte[4];\r
 521         version[0] = input.readByte();\r
 522         version[1] = input.readByte();\r
 523         version[2] = input.readByte();\r
 524         version[3] = input.readByte();\r
 525         \r
 526         VersionInfo result = \r
 527         VersionInfo.getInstance(\r
 528             (int)version[0], (int)version[1], \r
 529             (int)version[2], (int)version[3]);\r
 530         \r
 531         return result;\r
 532     }\r
 533     \r
 534     // private inner class -----------------------------------------------\r
 535     \r
 536     // private variables -------------------------------------------------\r
 537     \r
 538     /**\r
 539      * Authenticate uca data format version\r
 540      */\r
 541     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ \r
 542                 = new ICUBinary.Authenticate() {\r
 543                         public boolean isDataVersionAcceptable(byte version[])\r
 544                         {\r
 545                             return version[0] == DATA_FORMAT_VERSION_[0] \r
 546                                    && version[1] >= DATA_FORMAT_VERSION_[1];\r
 547                                    // Too harsh \r
 548                                    //&& version[1] == DATA_FORMAT_VERSION_[1]\r
 549                                    //&& version[2] == DATA_FORMAT_VERSION_[2] \r
 550                                    //&& version[3] == DATA_FORMAT_VERSION_[3];\r
 551                         }\r
 552                 };\r
 553                 \r
 554     /**\r
 555      * Authenticate uca data format version\r
 556      */\r
 557     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ \r
 558                 = new ICUBinary.Authenticate() {\r
 559                         public boolean isDataVersionAcceptable(byte version[])\r
 560                         {\r
 561                             return version[0] \r
 562                                     == INVERSE_UCA_DATA_FORMAT_VERSION_[0] \r
 563                                 && version[1] \r
 564                                     >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];\r
 565                         }\r
 566                 };\r
 567   \r
 568     /**\r
 569     * Data input stream for uca.icu \r
 570     */\r
 571     private DataInputStream m_dataInputStream_;\r
 572    \r
 573     /**\r
 574     * File format version and id that this class understands.\r
 575     * No guarantees are made if a older version is used\r
 576     */\r
 577     private static final byte DATA_FORMAT_VERSION_[] = \r
 578                                    {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};\r
 579     private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  \r
 580                                                     (byte)0x6f, (byte)0x6c};\r
 581     /**\r
 582     * Inverse UCA file format version and id that this class understands.\r
 583     * No guarantees are made if a older version is used\r
 584     */\r
 585     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = \r
 586                                    {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};\r
 587     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, \r
 588                                                                (byte)0x6e,  \r
 589                                                                (byte)0x76, \r
 590                                                                (byte)0x43};\r
 591                                 \r
 592     /**\r
 593     * Wrong unicode version error string\r
 594     */\r
 595     private static final String WRONG_UNICODE_VERSION_ERROR_ =\r
 596                                 "Unicode version in binary image is not compatible with the current Unicode version";\r
 597 \r
 598     /**\r
 599      * Size of expansion table in bytes\r
 600      */\r
 601     private int m_expansionSize_;\r
 602     /**\r
 603      * Size of contraction index table in bytes\r
 604      */\r
 605     private int m_contractionIndexSize_;\r
 606     /**\r
 607      * Size of contraction table in bytes\r
 608      */\r
 609     private int m_contractionCESize_;\r
 610     /*\r
 611      * Size of the Trie in bytes\r
 612      */\r
 613     //private int m_trieSize_;\r
 614     /**\r
 615      * Size of the table that contains information about collation elements\r
 616      * that end with an expansion \r
 617      */\r
 618     private int m_expansionEndCESize_;\r
 619     /**\r
 620      * Size of the table that contains information about the maximum size of \r
 621      * collation elements that end with a particular expansion CE corresponding\r
 622      * to the ones in expansionEndCE\r
 623      */\r
 624     private int m_expansionEndCEMaxSizeSize_;\r
 625     /**\r
 626      * Size of the option table that contains information about the collation\r
 627      * options\r
 628      */\r
 629     private int m_optionSize_;\r
 630     /**\r
 631      * Size of the whole data file minusing the ICU header\r
 632      */\r
 633     private int m_size_;\r
 634     /**\r
 635      * Size of the collation data header\r
 636      */\r
 637     private int m_headerSize_;\r
 638     /**\r
 639      * Size of the table that contains information about the "Unsafe" \r
 640      * codepoints\r
 641      */\r
 642     private int m_unsafeSize_;\r
 643     /**\r
 644      * Size of the table that contains information about codepoints that ends\r
 645      * with a contraction\r
 646      */\r
 647     private int m_contractionEndSize_;\r
 648     /**\r
 649      * Size of the table that contains UCA contraction information\r
 650      */\r
 651     private int m_UCAValuesSize_;\r
 652       \r
 653     // private methods ---------------------------------------------------\r
 654       \r
 655 }\r
 656 \r