jars/icu4j-52_1/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java

   1 /**
   2  *******************************************************************************
   3  * Copyright (C) 1996-2013, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  *******************************************************************************
   6  */
   7 package com.ibm.icu.text;
   8
   9 import java.io.BufferedInputStream;
  10 import java.io.DataInputStream;
  11 import java.io.IOException;
  12 import java.io.InputStream;
  13 import java.nio.ByteBuffer;
  14
  15 import com.ibm.icu.impl.ICUBinary;
  16 import com.ibm.icu.impl.ICUData;
  17 import com.ibm.icu.impl.ICUResourceBundle;
  18 import com.ibm.icu.impl.IntTrie;
  19 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
  20 import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
  21 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
  22 import com.ibm.icu.util.Output;
  23 import com.ibm.icu.util.VersionInfo;
  24
  25 /**
  26  * <p>
  27  * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
  28  * </p>
  29  * <p>
  30  * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
  31  * of data for use in <a href=Collator.html>com.ibm.icu.text.Collator</a>.
  32  * </p>
  33  * <p>
  34  * uca.icu which is in big-endian format is jared together with this package.
  35  * </p>
  36  *
  37  * @author Syn Wee Quek
  38  * @since release 2.2, April 18 2002
  39  */
  40
  41 final class CollatorReader {
  42     static char[] read(RuleBasedCollator rbc, UCAConstants ucac,
  43                        LeadByteConstants leadByteConstants, Output<Integer> maxUCAContractionLength)
  44             throws IOException {
  45         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
  46         BufferedInputStream b = new BufferedInputStream(i, 90000);
  47         CollatorReader reader = new CollatorReader(b);
  48         char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength);
  49         b.close();
  50         return ucaContractions;
  51     }
  52
  53     public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
  54         return new InputStream() {
  55             public int read() throws IOException {
  56                 if (!buf.hasRemaining()) {
  57                     return -1;
  58                 }
  59                 return buf.get() & 0xff;
  60             }
  61
  62             public int read(byte[] bytes, int off, int len) throws IOException {
  63                 len = Math.min(len, buf.remaining());
  64                 buf.get(bytes, off, len);
  65                 return len;
  66             }
  67         };
  68     }
  69
  70     static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
  71         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
  72         int dataLength = data.remaining();
  73         // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
  74         // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
  75         // Consider changing ICUBinary to also work with a ByteBuffer.
  76         CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
  77         if (dataLength > MIN_BINARY_DATA_SIZE_) {
  78             reader.readImp(rbc, null, null, null);
  79         } else {
  80             reader.readHeader(rbc, null);
  81             reader.readOptions(rbc);
  82             // duplicating UCA_'s data
  83             rbc.setWithUCATables();
  84         }
  85     }
  86
  87     static InverseUCA getInverseUCA() throws IOException {
  88         InverseUCA result = null;
  89         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
  90         // try {
  91         // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
  92         // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
  93         BufferedInputStream b = new BufferedInputStream(i, 110000);
  94         result = CollatorReader.readInverseUCA(b);
  95         b.close();
  96         i.close();
  97         return result;
  98         // } catch (Exception e) {
  99         // throw new RuntimeException(e.getMessage());
 100         // }
 101     }
 102
 103     // protected constructor ---------------------------------------------
 104
 105     /**
 106      * <p>
 107      * Protected constructor.
 108      * </p>
 109      *
 110      * @param inputStream
 111      *            ICU collator file input stream
 112      * @exception IOException
 113      *                throw if data file fails authentication
 114      */
 115     private CollatorReader(InputStream inputStream) throws IOException {
 116         this(inputStream, true);
 117         /*
 118          * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
 119          * that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
 120          * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
 121          * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
 122          * DataInputStream(inputStream);
 123          */
 124     }
 125
 126     /**
 127      * <p>
 128      * Protected constructor.
 129      * </p>
 130      *
 131      * @param inputStream
 132      *            ICU uprops.icu file input stream
 133      * @param readICUHeader
 134      *            flag to indicate if the ICU header has to be read
 135      * @exception IOException
 136      *                throw if data file fails authentication
 137      */
 138     private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
 139         if (readICUHeader) {
 140             ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
 141             // Note: In ICU 51 and earlier,
 142             // we used to check that the UCA data version (readHeader() return value)
 143             // matches the UCD version (UCharacter.getUnicodeVersion())
 144             // but that complicated version updates, and
 145             // a mismatch is "only" a problem for handling canonical equivalence.
 146             // It need not be a fatal error.
 147             // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
 148         }
 149         m_dataInputStream_ = new DataInputStream(inputStream);
 150     }
 151
 152     // protected methods -------------------------------------------------
 153
 154     /**
 155      * Read and break up the header stream of data passed in as arguments into meaningful Collator data.
 156      *
 157      * @param rbc
 158      *            RuleBasedCollator to populate with header information
 159      * @exception IOException
 160      *                thrown when there's a data error.
 161      */
 162     private void readHeader(RuleBasedCollator rbc, Output<Integer> maxUCAContractionLength) throws IOException {
 163         m_size_ = m_dataInputStream_.readInt();
 164         // all the offsets are in bytes
 165         // to get the address add to the header address and cast properly
 166         // Default options int options
 167         m_headerSize_ = m_dataInputStream_.readInt(); // start of options
 168         int readcount = 8; // for size and headersize
 169         // structure which holds values for indirect positioning and implicit
 170         // ranges
 171         m_UCAConstOffset_ = m_dataInputStream_.readInt();
 172         readcount += 4;
 173         // this one is needed only for UCA, to copy the appropriate
 174         // contractions
 175         /*int contractionUCACombos =*/ m_dataInputStream_.readInt();
 176         readcount += 4;
 177         // reserved for future use
 178         m_dataInputStream_.skipBytes(4);
 179         readcount += 4;
 180         // const uint8_t *mappingPosition;
 181         int mapping = m_dataInputStream_.readInt();
 182         readcount += 4;
 183         // uint32_t *expansion;
 184         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
 185         readcount += 4;
 186         // UChar *contractionIndex;
 187         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
 188         readcount += 4;
 189         // uint32_t *contractionCEs;
 190         int contractionCE = m_dataInputStream_.readInt();
 191         readcount += 4;
 192         // needed for various closures int contractionSize
 193         int contractionSize = m_dataInputStream_.readInt();
 194         readcount += 4;
 195         // array of last collation element in expansion
 196         int expansionEndCE = m_dataInputStream_.readInt();
 197         readcount += 4;
 198         // array of maximum expansion size corresponding to the expansion
 199         // collation elements with last element in expansionEndCE
 200         int expansionEndCEMaxSize = m_dataInputStream_.readInt();
 201         readcount += 4;
 202         // size of endExpansionCE int expansionEndCESize
 203         /* int endExpansionCECount = */m_dataInputStream_.readInt();
 204         readcount += 4;
 205         // hash table of unsafe code points
 206         int unsafe = m_dataInputStream_.readInt();
 207         readcount += 4;
 208         // hash table of final code points in contractions.
 209         int contractionEnd = m_dataInputStream_.readInt();
 210         readcount += 4;
 211         // int CEcount = m_dataInputStream_.readInt();
 212         int contractionUCACombosSize = m_dataInputStream_.readInt();
 213         readcount += 4;
 214         // is jamoSpecial
 215         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
 216         readcount++;
 217         // isBigEndian and charSetFamily
 218         m_dataInputStream_.skipBytes(2);
 219         readcount += 2;
 220         int contractionUCACombosWidth = m_dataInputStream_.readByte();
 221         if (maxUCAContractionLength != null) {
 222             maxUCAContractionLength.value = contractionUCACombosWidth;
 223         }
 224         // We want to be able to output this value if it's not 0.
 225         assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null;
 226         readcount += 1;
 227         rbc.m_version_ = readVersion(m_dataInputStream_);
 228         readcount += 4;
 229         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
 230         readcount += 4;
 231         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
 232         readcount += 4;
 233         /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
 234         readcount += 4;
 235         rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
 236         readcount += 4;
 237         rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
 238         readcount += 4;
 239
 240         // byte charsetName[] = new byte[32]; // for charset CEs
 241         m_dataInputStream_.skipBytes(32);
 242         readcount += 32;
 243
 244         m_dataInputStream_.skipBytes(44); // for future use
 245         readcount += 44;
 246         if (m_headerSize_ < readcount) {
 247             // /CLOVER:OFF
 248             throw new IOException("Internal Error: Header size error");
 249             // /CLOVER:ON
 250         }
 251         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
 252
 253         if (rbc.m_contractionOffset_ == 0) { // contraction can be null
 254             rbc.m_contractionOffset_ = mapping;
 255             contractionCE = mapping;
 256         }
 257         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
 258         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
 259         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
 260         m_contractionCESize_ = mapping - contractionCE;
 261         // m_trieSize_ = expansionEndCE - mapping;
 262         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
 263         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
 264         m_unsafeSize_ = contractionEnd - unsafe;
 265         // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
 266         m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
 267
 268         // treat it as normal collator first
 269         // for normal collator there is no UCA contraction
 270         // contractions (UChar[contractionSize] + CE[contractionSize])
 271         m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
 272
 273         rbc.m_contractionOffset_ >>= 1; // casting to ints
 274         rbc.m_expansionOffset_ >>= 2; // casting to chars
 275     }
 276
 277     /**
 278      * Read and break up the collation options passed in the stream of data and update the argument Collator with the
 279      * results
 280      *
 281      * @param rbc
 282      *            RuleBasedCollator to populate
 283      * @exception IOException
 284      *                thrown when there's a data error.
 285      */
 286     private void readOptions(RuleBasedCollator rbc) throws IOException {
 287         int readcount = 0;
 288         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
 289         readcount += 4;
 290         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
 291         readcount += 4;
 292         rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
 293         readcount += 4;
 294         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
 295         readcount += 4;
 296         // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
 297         // == RuleBasedCollator.AttributeValue.ON_);
 298         int defaultIsCaseLevel = m_dataInputStream_.readInt();
 299         rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
 300         readcount += 4;
 301         int value = m_dataInputStream_.readInt();
 302         readcount += 4;
 303         if (value == RuleBasedCollator.AttributeValue.ON_) {
 304             value = Collator.CANONICAL_DECOMPOSITION;
 305         } else {
 306             value = Collator.NO_DECOMPOSITION;
 307         }
 308         rbc.m_defaultDecomposition_ = value;
 309         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
 310         readcount += 4;
 311         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
 312         readcount += 4;
 313         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
 314         readcount += 4;
 315         m_dataInputStream_.skip(60); // reserved for future use
 316         readcount += 60;
 317         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
 318         if (m_optionSize_ < readcount) {
 319             // /CLOVER:OFF
 320             throw new IOException("Internal Error: Option size error");
 321             // /CLOVER:ON
 322         }
 323     }
 324
 325     /**
 326      * Read and break up the stream of data passed in as arguments into meaningful Collator data.
 327      *
 328      * @param rbc
 329      *            RuleBasedCollator to populate
 330      * @param UCAConst
 331      *            object to fill up with UCA constants if we are reading the UCA collator, if not use a null
 332      * @param leadByteConstants
 333      * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
 334      * @exception IOException
 335      *                thrown when there's a data error.
 336      */
 337     private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
 338             RuleBasedCollator.LeadByteConstants leadByteConstants,
 339             Output<Integer> maxUCAContractionLength) throws IOException {
 340         char ucaContractions[] = null; // return result
 341
 342         readHeader(rbc, maxUCAContractionLength);
 343         // header size has been checked by readHeader
 344         int readcount = m_headerSize_;
 345         // option size has been checked by readOptions
 346         readOptions(rbc);
 347         readcount += m_optionSize_;
 348         m_expansionSize_ >>= 2;
 349         rbc.m_expansion_ = new int[m_expansionSize_];
 350         for (int i = 0; i < m_expansionSize_; i++) {
 351             rbc.m_expansion_[i] = m_dataInputStream_.readInt();
 352         }
 353         readcount += (m_expansionSize_ << 2);
 354         if (m_contractionIndexSize_ > 0) {
 355             m_contractionIndexSize_ >>= 1;
 356             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
 357             for (int i = 0; i < m_contractionIndexSize_; i++) {
 358                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
 359             }
 360             readcount += (m_contractionIndexSize_ << 1);
 361             m_contractionCESize_ >>= 2;
 362             rbc.m_contractionCE_ = new int[m_contractionCESize_];
 363             for (int i = 0; i < m_contractionCESize_; i++) {
 364                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
 365             }
 366             readcount += (m_contractionCESize_ << 2);
 367         }
 368         rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
 369         if (!rbc.m_trie_.isLatin1Linear()) {
 370             throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
 371                     + "latin one data arrays");
 372         }
 373         readcount += rbc.m_trie_.getSerializedDataSize();
 374         m_expansionEndCESize_ >>= 2;
 375         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
 376         for (int i = 0; i < m_expansionEndCESize_; i++) {
 377             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
 378         }
 379         readcount += (m_expansionEndCESize_ << 2);
 380         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
 381         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
 382             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
 383         }
 384         readcount += m_expansionEndCEMaxSizeSize_;
 385         rbc.m_unsafe_ = new byte[m_unsafeSize_];
 386         for (int i = 0; i < m_unsafeSize_; i++) {
 387             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
 388         }
 389         readcount += m_unsafeSize_;
 390         if (UCAConst != null) {
 391             // we are reading the UCA
 392             // unfortunately the UCA offset in any collator data is not 0 and
 393             // only refers to the UCA data
 394             // m_contractionSize_ -= m_UCAValuesSize_;
 395             m_contractionSize_ = m_UCAConstOffset_ - readcount;
 396         } else {
 397             m_contractionSize_ = m_size_ - readcount;
 398         }
 399         rbc.m_contractionEnd_ = new byte[m_contractionSize_];
 400         for (int i = 0; i < m_contractionSize_; i++) {
 401             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
 402         }
 403         readcount += m_contractionSize_;
 404         if (UCAConst != null) {
 405             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 406             int readUCAConstcount = 4;
 407             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 408             readUCAConstcount += 4;
 409             UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 410             readUCAConstcount += 4;
 411             UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 412             readUCAConstcount += 4;
 413             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 414             readUCAConstcount += 4;
 415             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 416             readUCAConstcount += 4;
 417             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 418             readUCAConstcount += 4;
 419             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 420             readUCAConstcount += 4;
 421             UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 422             readUCAConstcount += 4;
 423             UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 424             readUCAConstcount += 4;
 425             UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
 426             readUCAConstcount += 4;
 427             UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
 428             readUCAConstcount += 4;
 429             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
 430             readUCAConstcount += 4;
 431             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
 432             readUCAConstcount += 4;
 433             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
 434             readUCAConstcount += 4;
 435             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
 436             readUCAConstcount += 4;
 437             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
 438             readUCAConstcount += 4;
 439             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
 440             readUCAConstcount += 4;
 441             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
 442             readUCAConstcount += 4;
 443             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
 444             readUCAConstcount += 4;
 445             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
 446             readUCAConstcount += 4;
 447             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
 448             readUCAConstcount += 4;
 449             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
 450             readUCAConstcount += 4;
 451             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
 452             readUCAConstcount += 4;
 453             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
 454             readUCAConstcount += 4;
 455             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
 456             readUCAConstcount += 4;
 457             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
 458             readUCAConstcount += 4;
 459             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
 460             readUCAConstcount += 4;
 461             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
 462             readUCAConstcount += 4;
 463             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
 464             readUCAConstcount += 4;
 465             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
 466             readUCAConstcount += 4;
 467             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
 468             readUCAConstcount += 4;
 469             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
 470             readUCAConstcount += 4;
 471             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
 472             readUCAConstcount += 4;
 473             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
 474             readUCAConstcount += 4;
 475             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
 476             readUCAConstcount += 4;
 477             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
 478             readUCAConstcount += 4;
 479
 480             readcount += readUCAConstcount;
 481
 482             int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
 483             assert resultsize == m_UCAcontractionSize_ / 2;
 484             ucaContractions = new char[resultsize];
 485             for (int i = 0; i < resultsize; i++) {
 486                 ucaContractions[i] = m_dataInputStream_.readChar();
 487             }
 488             readcount += m_UCAcontractionSize_;
 489         }
 490
 491         if (leadByteConstants != null) {
 492             readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
 493             leadByteConstants.read(m_dataInputStream_);
 494             readcount += leadByteConstants.getSerializedDataSize();
 495         }
 496
 497         if (readcount != m_size_) {
 498             // /CLOVER:OFF
 499             throw new IOException("Internal Error: Data file size error");
 500             // /CLOVER:ON
 501         }
 502         return ucaContractions;
 503     }
 504
 505     /**
 506      * Reads in the inverse uca data
 507      *
 508      * @param input
 509      *            input stream with the inverse uca data
 510      * @return an object containing the inverse uca data
 511      * @exception IOException
 512      *                thrown when error occurs while reading the inverse uca
 513      */
 514     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
 515         ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
 516
 517         // TODO: Check that the invuca data version (readHeader() return value)
 518         // matches the ucadata version.
 519         // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
 520
 521         CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
 522         DataInputStream input = new DataInputStream(inputStream);
 523         input.readInt(); // bytesize
 524         int tablesize = input.readInt(); // in int size
 525         int contsize = input.readInt(); // in char size
 526         input.readInt(); // table in bytes
 527         input.readInt(); // conts in bytes
 528         result.m_UCA_version_ = readVersion(input);
 529         input.skipBytes(8); // skip padding
 530
 531         int size = tablesize * 3; // one column for each strength
 532         result.m_table_ = new int[size];
 533         result.m_continuations_ = new char[contsize];
 534
 535         for (int i = 0; i < size; i++) {
 536             result.m_table_[i] = input.readInt();
 537         }
 538         for (int i = 0; i < contsize; i++) {
 539             result.m_continuations_[i] = input.readChar();
 540         }
 541         input.close();
 542         return result;
 543     }
 544
 545     /**
 546      * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
 547      *
 548      * @param input
 549      *            already instantiated DataInputStream, positioned at the start of four version bytes
 550      * @return a ready VersionInfo object
 551      * @throws IOException
 552      *             thrown when error occurs while reading version bytes
 553      */
 554
 555     protected static VersionInfo readVersion(DataInputStream input) throws IOException {
 556         byte[] version = new byte[4];
 557         version[0] = input.readByte();
 558         version[1] = input.readByte();
 559         version[2] = input.readByte();
 560         version[3] = input.readByte();
 561
 562         VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
 563                 (int) version[3]);
 564
 565         return result;
 566     }
 567
 568     // private inner class -----------------------------------------------
 569
 570     // private variables -------------------------------------------------
 571
 572     /**
 573      * Authenticate uca data format version
 574      */
 575     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
 576         public boolean isDataVersionAcceptable(byte version[]) {
 577             return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
 578             // Too harsh
 579             // && version[1] == DATA_FORMAT_VERSION_[1]
 580             // && version[2] == DATA_FORMAT_VERSION_[2]
 581             // && version[3] == DATA_FORMAT_VERSION_[3];
 582         }
 583     };
 584
 585     /**
 586      * Authenticate uca data format version
 587      */
 588     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
 589         public boolean isDataVersionAcceptable(byte version[]) {
 590             return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
 591                     && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
 592         }
 593     };
 594
 595     /**
 596      * Data input stream for uca.icu
 597      */
 598     private DataInputStream m_dataInputStream_;
 599
 600     /**
 601      * File format version and id that this class understands. No guarantees are made if a older version is used
 602      */
 603     private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
 604     private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
 605     /**
 606      * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
 607      * used
 608      */
 609     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
 610     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
 611
 612     /**
 613      * Wrong unicode version error string
 614      */
 615     // private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
 616
 617     /**
 618      * Size of expansion table in bytes
 619      */
 620     private int m_expansionSize_;
 621     /**
 622      * Size of contraction index table in bytes
 623      */
 624     private int m_contractionIndexSize_;
 625     /**
 626      * Size of contraction table in bytes
 627      */
 628     private int m_contractionCESize_;
 629     /*
 630      * Size of the Trie in bytes
 631      */
 632     // private int m_trieSize_;
 633     /**
 634      * Size of the table that contains information about collation elements that end with an expansion
 635      */
 636     private int m_expansionEndCESize_;
 637     /**
 638      * Size of the table that contains information about the maximum size of collation elements that end with a
 639      * particular expansion CE corresponding to the ones in expansionEndCE
 640      */
 641     private int m_expansionEndCEMaxSizeSize_;
 642     /**
 643      * Size of the option table that contains information about the collation options
 644      */
 645     private int m_optionSize_;
 646     /**
 647      * Size of the whole data file minusing the ICU header
 648      */
 649     private int m_size_;
 650     /**
 651      * Size of the collation data header
 652      */
 653     private int m_headerSize_;
 654     /**
 655      * Size of the table that contains information about the "Unsafe" codepoints
 656      */
 657     private int m_unsafeSize_;
 658     /**
 659      * Size in bytes of the table that contains information about codepoints that ends with a contraction
 660      */
 661     private int m_contractionSize_;
 662     /**
 663      * Size of the table that contains UCA contraction information in bytes
 664      */
 665     private int m_UCAcontractionSize_;
 666     /**
 667      * Offset of the UCA Const
 668      */
 669     private int m_UCAConstOffset_;
 670
 671     // private methods ---------------------------------------------------
 672
 673 }