]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java
Added flags.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / collate / src / com / ibm / icu / text / CollatorReader.java
1 /**
2  *******************************************************************************
3  * Copyright (C) 1996-2013, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8
9 import java.io.BufferedInputStream;
10 import java.io.DataInputStream;
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.nio.ByteBuffer;
14
15 import com.ibm.icu.impl.ICUBinary;
16 import com.ibm.icu.impl.ICUData;
17 import com.ibm.icu.impl.ICUResourceBundle;
18 import com.ibm.icu.impl.IntTrie;
19 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
20 import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
21 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
22 import com.ibm.icu.util.Output;
23 import com.ibm.icu.util.VersionInfo;
24
25 /**
26  * <p>
27  * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
28  * </p>
29  * <p>
30  * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
31  * of data for use in <a href=Collator.html>com.ibm.icu.text.Collator</a>.
32  * </p>
33  * <p>
34  * uca.icu which is in big-endian format is jared together with this package.
35  * </p>
36  * 
37  * @author Syn Wee Quek
38  * @since release 2.2, April 18 2002
39  */
40
41 final class CollatorReader {
42     static char[] read(RuleBasedCollator rbc, UCAConstants ucac,
43                        LeadByteConstants leadByteConstants, Output<Integer> maxUCAContractionLength)
44             throws IOException {
45         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
46         BufferedInputStream b = new BufferedInputStream(i, 90000);
47         CollatorReader reader = new CollatorReader(b);
48         char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength);
49         b.close();
50         return ucaContractions;
51     }
52
53     public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
54         return new InputStream() {
55             public int read() throws IOException {
56                 if (!buf.hasRemaining()) {
57                     return -1;
58                 }
59                 return buf.get() & 0xff;
60             }
61
62             public int read(byte[] bytes, int off, int len) throws IOException {
63                 len = Math.min(len, buf.remaining());
64                 buf.get(bytes, off, len);
65                 return len;
66             }
67         };
68     }
69
70     static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
71         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
72         int dataLength = data.remaining();
73         // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
74         // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
75         // Consider changing ICUBinary to also work with a ByteBuffer.
76         CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
77         if (dataLength > MIN_BINARY_DATA_SIZE_) {
78             reader.readImp(rbc, null, null, null);
79         } else {
80             reader.readHeader(rbc, null);
81             reader.readOptions(rbc);
82             // duplicating UCA_'s data
83             rbc.setWithUCATables();
84         }
85     }
86
87     static InverseUCA getInverseUCA() throws IOException {
88         InverseUCA result = null;
89         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
90         // try {
91         // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
92         // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
93         BufferedInputStream b = new BufferedInputStream(i, 110000);
94         result = CollatorReader.readInverseUCA(b);
95         b.close();
96         i.close();
97         return result;
98         // } catch (Exception e) {
99         // throw new RuntimeException(e.getMessage());
100         // }
101     }
102
103     // protected constructor ---------------------------------------------
104
105     /**
106      * <p>
107      * Protected constructor.
108      * </p>
109      * 
110      * @param inputStream
111      *            ICU collator file input stream
112      * @exception IOException
113      *                throw if data file fails authentication
114      */
115     private CollatorReader(InputStream inputStream) throws IOException {
116         this(inputStream, true);
117         /*
118          * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
119          * that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
120          * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
121          * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
122          * DataInputStream(inputStream);
123          */
124     }
125
126     /**
127      * <p>
128      * Protected constructor.
129      * </p>
130      * 
131      * @param inputStream
132      *            ICU uprops.icu file input stream
133      * @param readICUHeader
134      *            flag to indicate if the ICU header has to be read
135      * @exception IOException
136      *                throw if data file fails authentication
137      */
138     private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
139         if (readICUHeader) {
140             ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
141             // Note: In ICU 51 and earlier,
142             // we used to check that the UCA data version (readHeader() return value)
143             // matches the UCD version (UCharacter.getUnicodeVersion())
144             // but that complicated version updates, and
145             // a mismatch is "only" a problem for handling canonical equivalence.
146             // It need not be a fatal error.
147             // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
148         }
149         m_dataInputStream_ = new DataInputStream(inputStream);
150     }
151
152     // protected methods -------------------------------------------------
153
154     /**
155      * Read and break up the header stream of data passed in as arguments into meaningful Collator data.
156      * 
157      * @param rbc
158      *            RuleBasedCollator to populate with header information
159      * @exception IOException
160      *                thrown when there's a data error.
161      */
162     private void readHeader(RuleBasedCollator rbc, Output<Integer> maxUCAContractionLength) throws IOException {
163         m_size_ = m_dataInputStream_.readInt();
164         // all the offsets are in bytes
165         // to get the address add to the header address and cast properly
166         // Default options int options
167         m_headerSize_ = m_dataInputStream_.readInt(); // start of options
168         int readcount = 8; // for size and headersize
169         // structure which holds values for indirect positioning and implicit
170         // ranges
171         m_UCAConstOffset_ = m_dataInputStream_.readInt();
172         readcount += 4;
173         // this one is needed only for UCA, to copy the appropriate
174         // contractions
175         /*int contractionUCACombos =*/ m_dataInputStream_.readInt();
176         readcount += 4;
177         // reserved for future use
178         m_dataInputStream_.skipBytes(4);
179         readcount += 4;
180         // const uint8_t *mappingPosition;
181         int mapping = m_dataInputStream_.readInt();
182         readcount += 4;
183         // uint32_t *expansion;
184         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
185         readcount += 4;
186         // UChar *contractionIndex;
187         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
188         readcount += 4;
189         // uint32_t *contractionCEs;
190         int contractionCE = m_dataInputStream_.readInt();
191         readcount += 4;
192         // needed for various closures int contractionSize
193         int contractionSize = m_dataInputStream_.readInt();
194         readcount += 4;
195         // array of last collation element in expansion
196         int expansionEndCE = m_dataInputStream_.readInt();
197         readcount += 4;
198         // array of maximum expansion size corresponding to the expansion
199         // collation elements with last element in expansionEndCE
200         int expansionEndCEMaxSize = m_dataInputStream_.readInt();
201         readcount += 4;
202         // size of endExpansionCE int expansionEndCESize
203         /* int endExpansionCECount = */m_dataInputStream_.readInt();
204         readcount += 4;
205         // hash table of unsafe code points
206         int unsafe = m_dataInputStream_.readInt();
207         readcount += 4;
208         // hash table of final code points in contractions.
209         int contractionEnd = m_dataInputStream_.readInt();
210         readcount += 4;
211         // int CEcount = m_dataInputStream_.readInt();
212         int contractionUCACombosSize = m_dataInputStream_.readInt();
213         readcount += 4;
214         // is jamoSpecial
215         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
216         readcount++;
217         // isBigEndian and charSetFamily
218         m_dataInputStream_.skipBytes(2);
219         readcount += 2;
220         int contractionUCACombosWidth = m_dataInputStream_.readByte();
221         if (maxUCAContractionLength != null) {
222             maxUCAContractionLength.value = contractionUCACombosWidth;
223         }
224         // We want to be able to output this value if it's not 0.
225         assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null;
226         readcount += 1;
227         rbc.m_version_ = readVersion(m_dataInputStream_);
228         readcount += 4;
229         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
230         readcount += 4;
231         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
232         readcount += 4;
233         /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
234         readcount += 4;
235         rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
236         readcount += 4;
237         rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
238         readcount += 4;
239
240         // byte charsetName[] = new byte[32]; // for charset CEs
241         m_dataInputStream_.skipBytes(32);
242         readcount += 32;
243
244         m_dataInputStream_.skipBytes(44); // for future use
245         readcount += 44;
246         if (m_headerSize_ < readcount) {
247             // /CLOVER:OFF
248             throw new IOException("Internal Error: Header size error");
249             // /CLOVER:ON
250         }
251         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
252
253         if (rbc.m_contractionOffset_ == 0) { // contraction can be null
254             rbc.m_contractionOffset_ = mapping;
255             contractionCE = mapping;
256         }
257         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
258         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
259         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
260         m_contractionCESize_ = mapping - contractionCE;
261         // m_trieSize_ = expansionEndCE - mapping;
262         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
263         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
264         m_unsafeSize_ = contractionEnd - unsafe;
265         // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
266         m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
267
268         // treat it as normal collator first
269         // for normal collator there is no UCA contraction
270         // contractions (UChar[contractionSize] + CE[contractionSize])
271         m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
272
273         rbc.m_contractionOffset_ >>= 1; // casting to ints
274         rbc.m_expansionOffset_ >>= 2; // casting to chars
275     }
276
277     /**
278      * Read and break up the collation options passed in the stream of data and update the argument Collator with the
279      * results
280      * 
281      * @param rbc
282      *            RuleBasedCollator to populate
283      * @exception IOException
284      *                thrown when there's a data error.
285      */
286     private void readOptions(RuleBasedCollator rbc) throws IOException {
287         int readcount = 0;
288         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
289         readcount += 4;
290         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
291         readcount += 4;
292         rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
293         readcount += 4;
294         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
295         readcount += 4;
296         // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
297         // == RuleBasedCollator.AttributeValue.ON_);
298         int defaultIsCaseLevel = m_dataInputStream_.readInt();
299         rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
300         readcount += 4;
301         int value = m_dataInputStream_.readInt();
302         readcount += 4;
303         if (value == RuleBasedCollator.AttributeValue.ON_) {
304             value = Collator.CANONICAL_DECOMPOSITION;
305         } else {
306             value = Collator.NO_DECOMPOSITION;
307         }
308         rbc.m_defaultDecomposition_ = value;
309         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
310         readcount += 4;
311         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
312         readcount += 4;
313         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
314         readcount += 4;
315         m_dataInputStream_.skip(60); // reserved for future use
316         readcount += 60;
317         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
318         if (m_optionSize_ < readcount) {
319             // /CLOVER:OFF
320             throw new IOException("Internal Error: Option size error");
321             // /CLOVER:ON
322         }
323     }
324
325     /**
326      * Read and break up the stream of data passed in as arguments into meaningful Collator data.
327      * 
328      * @param rbc
329      *            RuleBasedCollator to populate
330      * @param UCAConst
331      *            object to fill up with UCA constants if we are reading the UCA collator, if not use a null
332      * @param leadByteConstants
333      * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
334      * @exception IOException
335      *                thrown when there's a data error.
336      */
337     private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
338             RuleBasedCollator.LeadByteConstants leadByteConstants,
339             Output<Integer> maxUCAContractionLength) throws IOException {
340         char ucaContractions[] = null; // return result
341
342         readHeader(rbc, maxUCAContractionLength);
343         // header size has been checked by readHeader
344         int readcount = m_headerSize_;
345         // option size has been checked by readOptions
346         readOptions(rbc);
347         readcount += m_optionSize_;
348         m_expansionSize_ >>= 2;
349         rbc.m_expansion_ = new int[m_expansionSize_];
350         for (int i = 0; i < m_expansionSize_; i++) {
351             rbc.m_expansion_[i] = m_dataInputStream_.readInt();
352         }
353         readcount += (m_expansionSize_ << 2);
354         if (m_contractionIndexSize_ > 0) {
355             m_contractionIndexSize_ >>= 1;
356             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
357             for (int i = 0; i < m_contractionIndexSize_; i++) {
358                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
359             }
360             readcount += (m_contractionIndexSize_ << 1);
361             m_contractionCESize_ >>= 2;
362             rbc.m_contractionCE_ = new int[m_contractionCESize_];
363             for (int i = 0; i < m_contractionCESize_; i++) {
364                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
365             }
366             readcount += (m_contractionCESize_ << 2);
367         }
368         rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
369         if (!rbc.m_trie_.isLatin1Linear()) {
370             throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
371                     + "latin one data arrays");
372         }
373         readcount += rbc.m_trie_.getSerializedDataSize();
374         m_expansionEndCESize_ >>= 2;
375         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
376         for (int i = 0; i < m_expansionEndCESize_; i++) {
377             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
378         }
379         readcount += (m_expansionEndCESize_ << 2);
380         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
381         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
382             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
383         }
384         readcount += m_expansionEndCEMaxSizeSize_;
385         rbc.m_unsafe_ = new byte[m_unsafeSize_];
386         for (int i = 0; i < m_unsafeSize_; i++) {
387             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
388         }
389         readcount += m_unsafeSize_;
390         if (UCAConst != null) {
391             // we are reading the UCA
392             // unfortunately the UCA offset in any collator data is not 0 and
393             // only refers to the UCA data
394             // m_contractionSize_ -= m_UCAValuesSize_;
395             m_contractionSize_ = m_UCAConstOffset_ - readcount;
396         } else {
397             m_contractionSize_ = m_size_ - readcount;
398         }
399         rbc.m_contractionEnd_ = new byte[m_contractionSize_];
400         for (int i = 0; i < m_contractionSize_; i++) {
401             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
402         }
403         readcount += m_contractionSize_;
404         if (UCAConst != null) {
405             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
406             int readUCAConstcount = 4;
407             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
408             readUCAConstcount += 4;
409             UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
410             readUCAConstcount += 4;
411             UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
412             readUCAConstcount += 4;
413             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
414             readUCAConstcount += 4;
415             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
416             readUCAConstcount += 4;
417             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
418             readUCAConstcount += 4;
419             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
420             readUCAConstcount += 4;
421             UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
422             readUCAConstcount += 4;
423             UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
424             readUCAConstcount += 4;
425             UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
426             readUCAConstcount += 4;
427             UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
428             readUCAConstcount += 4;
429             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
430             readUCAConstcount += 4;
431             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
432             readUCAConstcount += 4;
433             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
434             readUCAConstcount += 4;
435             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
436             readUCAConstcount += 4;
437             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
438             readUCAConstcount += 4;
439             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
440             readUCAConstcount += 4;
441             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
442             readUCAConstcount += 4;
443             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
444             readUCAConstcount += 4;
445             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
446             readUCAConstcount += 4;
447             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
448             readUCAConstcount += 4;
449             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
450             readUCAConstcount += 4;
451             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
452             readUCAConstcount += 4;
453             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
454             readUCAConstcount += 4;
455             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
456             readUCAConstcount += 4;
457             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
458             readUCAConstcount += 4;
459             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
460             readUCAConstcount += 4;
461             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
462             readUCAConstcount += 4;
463             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
464             readUCAConstcount += 4;
465             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
466             readUCAConstcount += 4;
467             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
468             readUCAConstcount += 4;
469             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
470             readUCAConstcount += 4;
471             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
472             readUCAConstcount += 4;
473             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
474             readUCAConstcount += 4;
475             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
476             readUCAConstcount += 4;
477             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
478             readUCAConstcount += 4;
479
480             readcount += readUCAConstcount;
481
482             int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
483             assert resultsize == m_UCAcontractionSize_ / 2;
484             ucaContractions = new char[resultsize];
485             for (int i = 0; i < resultsize; i++) {
486                 ucaContractions[i] = m_dataInputStream_.readChar();
487             }
488             readcount += m_UCAcontractionSize_;
489         }
490
491         if (leadByteConstants != null) {
492             readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
493             leadByteConstants.read(m_dataInputStream_);
494             readcount += leadByteConstants.getSerializedDataSize();
495         }
496
497         if (readcount != m_size_) {
498             // /CLOVER:OFF
499             throw new IOException("Internal Error: Data file size error");
500             // /CLOVER:ON
501         }
502         return ucaContractions;
503     }
504
505     /**
506      * Reads in the inverse uca data
507      * 
508      * @param input
509      *            input stream with the inverse uca data
510      * @return an object containing the inverse uca data
511      * @exception IOException
512      *                thrown when error occurs while reading the inverse uca
513      */
514     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
515         ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
516
517         // TODO: Check that the invuca data version (readHeader() return value)
518         // matches the ucadata version.
519         // throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
520
521         CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
522         DataInputStream input = new DataInputStream(inputStream);
523         input.readInt(); // bytesize
524         int tablesize = input.readInt(); // in int size
525         int contsize = input.readInt(); // in char size
526         input.readInt(); // table in bytes
527         input.readInt(); // conts in bytes
528         result.m_UCA_version_ = readVersion(input);
529         input.skipBytes(8); // skip padding
530
531         int size = tablesize * 3; // one column for each strength
532         result.m_table_ = new int[size];
533         result.m_continuations_ = new char[contsize];
534
535         for (int i = 0; i < size; i++) {
536             result.m_table_[i] = input.readInt();
537         }
538         for (int i = 0; i < contsize; i++) {
539             result.m_continuations_[i] = input.readChar();
540         }
541         input.close();
542         return result;
543     }
544
545     /**
546      * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
547      * 
548      * @param input
549      *            already instantiated DataInputStream, positioned at the start of four version bytes
550      * @return a ready VersionInfo object
551      * @throws IOException
552      *             thrown when error occurs while reading version bytes
553      */
554
555     protected static VersionInfo readVersion(DataInputStream input) throws IOException {
556         byte[] version = new byte[4];
557         version[0] = input.readByte();
558         version[1] = input.readByte();
559         version[2] = input.readByte();
560         version[3] = input.readByte();
561
562         VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
563                 (int) version[3]);
564
565         return result;
566     }
567
568     // private inner class -----------------------------------------------
569
570     // private variables -------------------------------------------------
571
572     /**
573      * Authenticate uca data format version
574      */
575     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
576         public boolean isDataVersionAcceptable(byte version[]) {
577             return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
578             // Too harsh
579             // && version[1] == DATA_FORMAT_VERSION_[1]
580             // && version[2] == DATA_FORMAT_VERSION_[2]
581             // && version[3] == DATA_FORMAT_VERSION_[3];
582         }
583     };
584
585     /**
586      * Authenticate uca data format version
587      */
588     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
589         public boolean isDataVersionAcceptable(byte version[]) {
590             return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
591                     && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
592         }
593     };
594
595     /**
596      * Data input stream for uca.icu
597      */
598     private DataInputStream m_dataInputStream_;
599
600     /**
601      * File format version and id that this class understands. No guarantees are made if a older version is used
602      */
603     private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
604     private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
605     /**
606      * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
607      * used
608      */
609     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
610     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
611
612     /**
613      * Wrong unicode version error string
614      */
615     // private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
616
617     /**
618      * Size of expansion table in bytes
619      */
620     private int m_expansionSize_;
621     /**
622      * Size of contraction index table in bytes
623      */
624     private int m_contractionIndexSize_;
625     /**
626      * Size of contraction table in bytes
627      */
628     private int m_contractionCESize_;
629     /*
630      * Size of the Trie in bytes
631      */
632     // private int m_trieSize_;
633     /**
634      * Size of the table that contains information about collation elements that end with an expansion
635      */
636     private int m_expansionEndCESize_;
637     /**
638      * Size of the table that contains information about the maximum size of collation elements that end with a
639      * particular expansion CE corresponding to the ones in expansionEndCE
640      */
641     private int m_expansionEndCEMaxSizeSize_;
642     /**
643      * Size of the option table that contains information about the collation options
644      */
645     private int m_optionSize_;
646     /**
647      * Size of the whole data file minusing the ICU header
648      */
649     private int m_size_;
650     /**
651      * Size of the collation data header
652      */
653     private int m_headerSize_;
654     /**
655      * Size of the table that contains information about the "Unsafe" codepoints
656      */
657     private int m_unsafeSize_;
658     /**
659      * Size in bytes of the table that contains information about codepoints that ends with a contraction
660      */
661     private int m_contractionSize_;
662     /**
663      * Size of the table that contains UCA contraction information in bytes
664      */
665     private int m_UCAcontractionSize_;
666     /**
667      * Offset of the UCA Const
668      */
669     private int m_UCAConstOffset_;
670
671     // private methods ---------------------------------------------------
672
673 }