]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / collate / src / com / ibm / icu / text / CollatorReader.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4 * others. All Rights Reserved.                                                *\r
5 *******************************************************************************\r
6 */\r
7 package com.ibm.icu.text;\r
8 \r
9 import java.io.BufferedInputStream;\r
10 import java.io.DataInputStream;\r
11 import java.io.IOException;\r
12 import java.io.InputStream;\r
13 import java.nio.ByteBuffer;\r
14 \r
15 import com.ibm.icu.impl.ICUBinary;\r
16 import com.ibm.icu.impl.ICUData;\r
17 import com.ibm.icu.impl.ICUResourceBundle;\r
18 import com.ibm.icu.impl.IntTrie;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;\r
21 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;\r
22 import com.ibm.icu.util.VersionInfo;\r
23 \r
24 /**\r
25 * <p>Internal reader class for ICU data file uca.icu containing \r
26 * Unicode Collation Algorithm data.</p> \r
27 * <p>This class simply reads uca.icu, authenticates that it is a valid\r
28 * ICU data file and split its contents up into blocks of data for use in\r
29 * <a href=Collator.html>com.ibm.icu.text.Collator</a>.\r
30 * </p> \r
31 * <p>uca.icu which is in big-endian format is jared together with this \r
32 * package.</p>\r
33 * @author Syn Wee Quek\r
34 * @since release 2.2, April 18 2002\r
35 */\r
36 \r
37 final class CollatorReader\r
38 {          \r
39     static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {\r
40         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");\r
41         BufferedInputStream b = new BufferedInputStream(i, 90000);\r
42         CollatorReader reader = new CollatorReader(b);\r
43         char[] result = reader.readImp(rbc, ucac);\r
44         b.close();\r
45         return result;\r
46     }\r
47 \r
48     public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {\r
49         return new InputStream() {\r
50             public int read() throws IOException {\r
51                 if (!buf.hasRemaining()) {\r
52                     return -1;\r
53                 }\r
54                 return buf.get() & 0xff;\r
55             }\r
56             public int read(byte[] bytes, int off, int len) throws IOException {\r
57                 len = Math.min(len, buf.remaining());\r
58                 buf.get(bytes, off, len);\r
59                 return len;\r
60             }\r
61         };\r
62     }\r
63 \r
64     static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {\r
65         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;\r
66         int dataLength = data.remaining();\r
67         // TODO: Change the rest of this class to use the ByteBuffer directly, rather than\r
68         // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().\r
69         // Consider changing ICUBinary to also work with a ByteBuffer.\r
70         CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);\r
71         if (dataLength > MIN_BINARY_DATA_SIZE_) {\r
72             reader.readImp(rbc, null);\r
73         } else {\r
74             reader.readHeader(rbc);\r
75             reader.readOptions(rbc);\r
76             // duplicating UCA_'s data\r
77             rbc.setWithUCATables();\r
78         }\r
79     }\r
80     \r
81     static InverseUCA getInverseUCA() throws IOException {\r
82         InverseUCA result = null;\r
83         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");\r
84 //        try    {\r
85 //            String invdat = "/com/ibm/icu/impl/data/invuca.icu";\r
86 //            InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);\r
87             BufferedInputStream b = new BufferedInputStream(i, 110000);\r
88             result = CollatorReader.readInverseUCA(b);\r
89             b.close();\r
90             i.close();\r
91             return result;\r
92 //        } catch (Exception e) {\r
93 //            throw new RuntimeException(e.getMessage());\r
94 //        }\r
95     }\r
96     \r
97     // protected constructor ---------------------------------------------\r
98     \r
99     /**\r
100     * <p>Protected constructor.</p>\r
101     * @param inputStream ICU collator file input stream\r
102     * @exception IOException throw if data file fails authentication \r
103     */\r
104     private CollatorReader(InputStream inputStream) throws IOException\r
105     {\r
106         this(inputStream, true);\r
107         /*\r
108         byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);\r
109         // weiv: check that we have the correct Unicode version in \r
110         // binary files\r
111         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
112         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
113         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
114             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
115         }\r
116         m_dataInputStream_ = new DataInputStream(inputStream);\r
117         */\r
118     }\r
119     \r
120     /**\r
121     * <p>Protected constructor.</p>\r
122     * @param inputStream ICU uprops.icu file input stream\r
123     * @param readICUHeader flag to indicate if the ICU header has to be read\r
124     * @exception IOException throw if data file fails authentication \r
125     */\r
126     private CollatorReader(InputStream inputStream, boolean readICUHeader) \r
127                                                             throws IOException\r
128     {\r
129         if (readICUHeader) {\r
130             byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, \r
131                                  UCA_AUTHENTICATE_);\r
132             // weiv: check that we have the correct Unicode version in \r
133             // binary files\r
134             VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
135             if(UnicodeVersion[0] != UCDVersion.getMajor() \r
136             || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
137                 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
138             }\r
139         }\r
140         m_dataInputStream_ = new DataInputStream(inputStream);\r
141     }\r
142   \r
143     // protected methods -------------------------------------------------\r
144       \r
145     /**\r
146     * Read and break up the header stream of data passed in as arguments into \r
147     * meaningful Collator data.\r
148     * @param rbc RuleBasedCollator to populate with header information\r
149     * @exception IOException thrown when there's a data error.\r
150     */\r
151     private void readHeader(RuleBasedCollator rbc) throws IOException\r
152     {\r
153         m_size_ = m_dataInputStream_.readInt();\r
154         // all the offsets are in bytes\r
155         // to get the address add to the header address and cast properly\r
156         // Default options int options\r
157         m_headerSize_ = m_dataInputStream_.readInt(); // start of options\r
158         int readcount = 8; // for size and headersize\r
159         // structure which holds values for indirect positioning and implicit\r
160         // ranges\r
161         int UCAConst = m_dataInputStream_.readInt();\r
162         readcount += 4;\r
163         // this one is needed only for UCA, to copy the appropriate\r
164         // contractions\r
165         m_dataInputStream_.skip(4);\r
166         readcount += 4;\r
167         // reserved for future use\r
168         m_dataInputStream_.skipBytes(4);\r
169         readcount += 4;\r
170         // const uint8_t *mappingPosition;\r
171         int mapping = m_dataInputStream_.readInt();\r
172         readcount += 4;\r
173         // uint32_t *expansion;\r
174         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();\r
175         readcount += 4;\r
176         // UChar *contractionIndex;\r
177         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();\r
178         readcount += 4;\r
179         // uint32_t *contractionCEs;\r
180         int contractionCE = m_dataInputStream_.readInt();\r
181         readcount += 4;\r
182         // needed for various closures int contractionSize\r
183         /*int contractionSize = */m_dataInputStream_.readInt();\r
184         readcount += 4;\r
185         // array of last collation element in expansion\r
186         int expansionEndCE = m_dataInputStream_.readInt();\r
187         readcount += 4;\r
188         // array of maximum expansion size corresponding to the expansion\r
189         // collation elements with last element in expansionEndCE\r
190         int expansionEndCEMaxSize = m_dataInputStream_.readInt();\r
191         readcount += 4;\r
192         // size of endExpansionCE int expansionEndCESize\r
193         m_dataInputStream_.skipBytes(4);\r
194         readcount += 4;\r
195         // hash table of unsafe code points\r
196         int unsafe = m_dataInputStream_.readInt();\r
197         readcount += 4;\r
198         // hash table of final code points in contractions.\r
199         int contractionEnd = m_dataInputStream_.readInt();\r
200         readcount += 4;\r
201         // int CEcount = m_dataInputStream_.readInt();\r
202         m_dataInputStream_.skipBytes(4);\r
203         readcount += 4;\r
204         // is jamoSpecial\r
205         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();\r
206         readcount++;\r
207         // padding\r
208         m_dataInputStream_.skipBytes(3);\r
209         readcount += 3;\r
210         rbc.m_version_ = readVersion(m_dataInputStream_);\r
211         readcount += 4;\r
212         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);\r
213         readcount += 4;\r
214         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);\r
215         readcount += 4;\r
216         // byte charsetName[] = new byte[32]; // for charset CEs\r
217         m_dataInputStream_.skipBytes(32);\r
218         readcount += 32;\r
219         m_dataInputStream_.skipBytes(56); // for future use\r
220         readcount += 56;\r
221         if (m_headerSize_ < readcount) {\r
222             ///CLOVER:OFF\r
223             throw new IOException("Internal Error: Header size error");\r
224             ///CLOVER:ON\r
225         }\r
226         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);\r
227 \r
228         if (rbc.m_contractionOffset_ == 0) { // contraction can be null\r
229             rbc.m_contractionOffset_ = mapping;\r
230             contractionCE = mapping;\r
231         }\r
232         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;\r
233         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;\r
234         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;\r
235         m_contractionCESize_ = mapping - contractionCE;\r
236         //m_trieSize_ = expansionEndCE - mapping;\r
237         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;\r
238         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;\r
239         m_unsafeSize_ = contractionEnd - unsafe;\r
240         m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled\r
241                                                 // later\r
242         // treat it as normal collator first\r
243         // for normal collator there is no UCA contraction\r
244         m_contractionEndSize_ = m_size_ - contractionEnd;\r
245 \r
246         rbc.m_contractionOffset_ >>= 1; // casting to ints\r
247         rbc.m_expansionOffset_ >>= 2; // casting to chars\r
248     }\r
249     \r
250     /**\r
251      * Read and break up the collation options passed in the stream of data and\r
252      * update the argument Collator with the results\r
253      * \r
254      * @param rbc\r
255      *            RuleBasedCollator to populate\r
256      * @exception IOException\r
257      *                thrown when there's a data error.\r
258      */\r
259     private void readOptions(RuleBasedCollator rbc) throws IOException\r
260     {\r
261         int readcount = 0;\r
262         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();\r
263         readcount += 4;\r
264         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()\r
265                                       == RuleBasedCollator.AttributeValue.ON_);\r
266         readcount += 4;\r
267         rbc.m_defaultIsAlternateHandlingShifted_ \r
268                                    = (m_dataInputStream_.readInt() == \r
269                                     RuleBasedCollator.AttributeValue.SHIFTED_);\r
270         readcount += 4;\r
271         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();\r
272         readcount += 4;\r
273         rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() \r
274                                      == RuleBasedCollator.AttributeValue.ON_);\r
275         readcount += 4;\r
276         int value = m_dataInputStream_.readInt();\r
277         readcount += 4;\r
278         if (value == RuleBasedCollator.AttributeValue.ON_) {\r
279             value = Collator.CANONICAL_DECOMPOSITION;\r
280         }\r
281         else {\r
282             value = Collator.NO_DECOMPOSITION;\r
283         }\r
284         rbc.m_defaultDecomposition_ = value;\r
285         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();\r
286         readcount += 4;\r
287         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() \r
288                                      == RuleBasedCollator.AttributeValue.ON_);\r
289         readcount += 4;\r
290         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() \r
291                                       == RuleBasedCollator.AttributeValue.ON_);\r
292         readcount += 4;\r
293         m_dataInputStream_.skip(60); // reserved for future use\r
294         readcount += 60;\r
295         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);\r
296         if (m_optionSize_ < readcount) {\r
297             ///CLOVER:OFF\r
298             throw new IOException("Internal Error: Option size error");\r
299             ///CLOVER:ON\r
300         }\r
301     }\r
302     \r
303     /**\r
304     * Read and break up the stream of data passed in as arguments into \r
305     * meaningful Collator data.\r
306     * @param rbc RuleBasedCollator to populate\r
307     * @param UCAConst object to fill up with UCA constants if we are reading \r
308     *                 the UCA collator, if not use a null\r
309     * @return UCAContractions array filled up with the UCA contractions if we\r
310     *                        are reading the UCA collator\r
311     * @exception IOException thrown when there's a data error.\r
312     */\r
313     private char[] readImp(RuleBasedCollator rbc, \r
314                           RuleBasedCollator.UCAConstants UCAConst) \r
315                                                             throws IOException\r
316     {\r
317         readHeader(rbc);\r
318         // header size has been checked by readHeader\r
319         int readcount = m_headerSize_; \r
320         // option size has been checked by readOptions\r
321         readOptions(rbc);\r
322         readcount += m_optionSize_;\r
323         m_expansionSize_ >>= 2;\r
324         rbc.m_expansion_ = new int[m_expansionSize_];\r
325         for (int i = 0; i < m_expansionSize_; i ++) {\r
326             rbc.m_expansion_[i] = m_dataInputStream_.readInt();\r
327         }\r
328         readcount += (m_expansionSize_ << 2);\r
329         if (m_contractionIndexSize_ > 0) { \r
330             m_contractionIndexSize_ >>= 1;\r
331             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];\r
332             for (int i = 0; i < m_contractionIndexSize_; i ++) {\r
333                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();\r
334             }\r
335             readcount += (m_contractionIndexSize_ << 1);\r
336             m_contractionCESize_ >>= 2;\r
337             rbc.m_contractionCE_ = new int[m_contractionCESize_];\r
338             for (int i = 0; i < m_contractionCESize_; i ++) {\r
339                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();\r
340             }\r
341             readcount += (m_contractionCESize_ << 2);\r
342         }\r
343         rbc.m_trie_ = new IntTrie(m_dataInputStream_, \r
344                                  RuleBasedCollator.DataManipulate.getInstance());\r
345         if (!rbc.m_trie_.isLatin1Linear()) {\r
346             throw new IOException("Data corrupted, " \r
347                                   + "Collator Tries expected to have linear "\r
348                                   + "latin one data arrays");\r
349         }\r
350         readcount += rbc.m_trie_.getSerializedDataSize();\r
351         m_expansionEndCESize_ >>= 2;\r
352         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];\r
353         for (int i = 0; i < m_expansionEndCESize_; i ++) {\r
354             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();\r
355         }\r
356         readcount += (m_expansionEndCESize_ << 2);\r
357         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];\r
358         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {\r
359             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();\r
360         }\r
361         readcount += m_expansionEndCEMaxSizeSize_;\r
362         rbc.m_unsafe_ = new byte[m_unsafeSize_];\r
363         for (int i = 0; i < m_unsafeSize_; i ++) {\r
364             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();\r
365         }\r
366         readcount += m_unsafeSize_;\r
367         if (UCAConst != null) {\r
368             // we are reading the UCA\r
369             // unfortunately the UCA offset in any collator data is not 0 and\r
370             // only refers to the UCA data\r
371             m_contractionEndSize_ -= m_UCAValuesSize_;       \r
372         }\r
373         rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];\r
374         for (int i = 0; i < m_contractionEndSize_; i ++) {\r
375             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();\r
376         }\r
377         readcount += m_contractionEndSize_;\r
378         if (UCAConst != null) {\r
379             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] \r
380                                                = m_dataInputStream_.readInt();\r
381             int readUCAConstcount = 4;\r
382             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] \r
383                                                = m_dataInputStream_.readInt();\r
384             readUCAConstcount += 4;\r
385             UCAConst.LAST_TERTIARY_IGNORABLE_[0] \r
386                                                = m_dataInputStream_.readInt();\r
387             readUCAConstcount += 4;\r
388             UCAConst.LAST_TERTIARY_IGNORABLE_[1] \r
389                                                = m_dataInputStream_.readInt();\r
390             readUCAConstcount += 4;\r
391             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] \r
392                                                = m_dataInputStream_.readInt();\r
393             readUCAConstcount += 4;\r
394             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] \r
395                                                = m_dataInputStream_.readInt();\r
396             readUCAConstcount += 4;\r
397             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] \r
398                                                = m_dataInputStream_.readInt();\r
399             readUCAConstcount += 4;\r
400             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] \r
401                                                = m_dataInputStream_.readInt();\r
402             readUCAConstcount += 4;\r
403             UCAConst.LAST_SECONDARY_IGNORABLE_[0] \r
404                                                = m_dataInputStream_.readInt();\r
405             readUCAConstcount += 4;\r
406             UCAConst.LAST_SECONDARY_IGNORABLE_[1] \r
407                                                = m_dataInputStream_.readInt();\r
408             readUCAConstcount += 4;\r
409             UCAConst.LAST_PRIMARY_IGNORABLE_[0] \r
410                                                = m_dataInputStream_.readInt();\r
411             readUCAConstcount += 4;\r
412             UCAConst.LAST_PRIMARY_IGNORABLE_[1] \r
413                                                = m_dataInputStream_.readInt();\r
414             readUCAConstcount += 4;\r
415             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();     \r
416             readUCAConstcount += 4;\r
417             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();\r
418             readUCAConstcount += 4;\r
419             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); \r
420             readUCAConstcount += 4;\r
421             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();                     \r
422             readUCAConstcount += 4;\r
423             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
424             readUCAConstcount += 4;\r
425             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
426             readUCAConstcount += 4;\r
427             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
428             readUCAConstcount += 4;\r
429             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
430             readUCAConstcount += 4;\r
431             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();  \r
432             readUCAConstcount += 4;\r
433             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();  \r
434             readUCAConstcount += 4;\r
435             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
436             readUCAConstcount += 4;\r
437             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
438             readUCAConstcount += 4;\r
439             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
440             readUCAConstcount += 4;\r
441             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
442             readUCAConstcount += 4;\r
443             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
444             readUCAConstcount += 4;\r
445             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();  \r
446             readUCAConstcount += 4;\r
447             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
448             readUCAConstcount += 4;\r
449             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();   \r
450             readUCAConstcount += 4; \r
451             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();  \r
452             readUCAConstcount += 4;\r
453             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();   \r
454             readUCAConstcount += 4;\r
455             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();   \r
456             readUCAConstcount += 4;\r
457             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();   \r
458             readUCAConstcount += 4;\r
459             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();   \r
460             readUCAConstcount += 4;\r
461             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();   \r
462             readUCAConstcount += 4;\r
463             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();   \r
464             readUCAConstcount += 4;\r
465             int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;\r
466             char result[] = new char[resultsize];\r
467             for (int i = 0; i < resultsize; i ++) {\r
468                 result[i] = m_dataInputStream_.readChar();\r
469             }\r
470             readcount += m_UCAValuesSize_;\r
471             if (readcount != m_size_) {\r
472                 ///CLOVER:OFF\r
473                 throw new IOException("Internal Error: Data file size error");\r
474                 ///CLOVER:ON\r
475             }\r
476             return result;\r
477         }\r
478         if (readcount != m_size_) {\r
479             ///CLOVER:OFF\r
480             throw new IOException("Internal Error: Data file size error");\r
481             ///CLOVER:ON\r
482         }\r
483         return null;\r
484     }\r
485     \r
486     /**\r
487      * Reads in the inverse uca data\r
488      * @param input input stream with the inverse uca data\r
489      * @return an object containing the inverse uca data\r
490      * @exception IOException thrown when error occurs while reading the \r
491      *            inverse uca\r
492      */\r
493     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(\r
494                                                       InputStream inputStream)\r
495                                                       throws IOException\r
496     {\r
497          byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, \r
498                               INVERSE_UCA_AUTHENTICATE_);\r
499                               \r
500         // weiv: check that we have the correct Unicode version in \r
501         // binary files\r
502         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
503         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
504         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
505             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
506         }\r
507                               \r
508         CollationParsedRuleBuilder.InverseUCA result = \r
509                                   new CollationParsedRuleBuilder.InverseUCA();\r
510         DataInputStream input = new DataInputStream(inputStream);        \r
511         input.readInt(); // bytesize\r
512         int tablesize = input.readInt(); // in int size\r
513         int contsize = input.readInt();  // in char size\r
514         input.readInt(); // table in bytes\r
515         input.readInt(); // conts in bytes\r
516         result.m_UCA_version_ = readVersion(input);\r
517         input.skipBytes(8); // skip padding\r
518         \r
519         int size = tablesize * 3; // one column for each strength\r
520         result.m_table_ = new int[size];\r
521         result.m_continuations_ = new char[contsize];\r
522         \r
523         for (int i = 0; i < size; i ++) {\r
524             result.m_table_[i] = input.readInt();\r
525         }\r
526         for (int i = 0; i < contsize; i ++) {\r
527             result.m_continuations_[i] = input.readChar();\r
528         }\r
529         input.close();\r
530         return result;\r
531     }\r
532     \r
533     /**\r
534      * Reads four bytes from the input and returns a VersionInfo\r
535      * object. Use it to read different collator versions.\r
536      * @param input already instantiated DataInputStream, positioned \r
537      *              at the start of four version bytes\r
538      * @return a ready VersionInfo object\r
539      * @throws IOException thrown when error occurs while reading  \r
540      *            version bytes\r
541      */\r
542     \r
543     protected static VersionInfo readVersion(DataInputStream input) \r
544         throws IOException {\r
545         byte[] version = new byte[4];\r
546         version[0] = input.readByte();\r
547         version[1] = input.readByte();\r
548         version[2] = input.readByte();\r
549         version[3] = input.readByte();\r
550         \r
551         VersionInfo result = \r
552         VersionInfo.getInstance(\r
553             (int)version[0], (int)version[1], \r
554             (int)version[2], (int)version[3]);\r
555         \r
556         return result;\r
557     }\r
558     \r
559     // private inner class -----------------------------------------------\r
560     \r
561     // private variables -------------------------------------------------\r
562     \r
563     /**\r
564      * Authenticate uca data format version\r
565      */\r
566     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ \r
567                 = new ICUBinary.Authenticate() {\r
568                         public boolean isDataVersionAcceptable(byte version[])\r
569                         {\r
570                             return version[0] == DATA_FORMAT_VERSION_[0] \r
571                                    && version[1] >= DATA_FORMAT_VERSION_[1];\r
572                                    // Too harsh \r
573                                    //&& version[1] == DATA_FORMAT_VERSION_[1]\r
574                                    //&& version[2] == DATA_FORMAT_VERSION_[2] \r
575                                    //&& version[3] == DATA_FORMAT_VERSION_[3];\r
576                         }\r
577                 };\r
578                 \r
579     /**\r
580      * Authenticate uca data format version\r
581      */\r
582     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ \r
583                 = new ICUBinary.Authenticate() {\r
584                         public boolean isDataVersionAcceptable(byte version[])\r
585                         {\r
586                             return version[0] \r
587                                     == INVERSE_UCA_DATA_FORMAT_VERSION_[0] \r
588                                 && version[1] \r
589                                     >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];\r
590                         }\r
591                 };\r
592   \r
593     /**\r
594     * Data input stream for uca.icu \r
595     */\r
596     private DataInputStream m_dataInputStream_;\r
597    \r
598     /**\r
599     * File format version and id that this class understands.\r
600     * No guarantees are made if a older version is used\r
601     */\r
602     private static final byte DATA_FORMAT_VERSION_[] = \r
603                                    {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};\r
604     private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  \r
605                                                     (byte)0x6f, (byte)0x6c};\r
606     /**\r
607     * Inverse UCA file format version and id that this class understands.\r
608     * No guarantees are made if a older version is used\r
609     */\r
610     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = \r
611                                    {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};\r
612     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, \r
613                                                                (byte)0x6e,  \r
614                                                                (byte)0x76, \r
615                                                                (byte)0x43};\r
616                                 \r
617     /**\r
618     * Wrong unicode version error string\r
619     */\r
620     private static final String WRONG_UNICODE_VERSION_ERROR_ =\r
621                                 "Unicode version in binary image is not compatible with the current Unicode version";\r
622 \r
623     /**\r
624      * Size of expansion table in bytes\r
625      */\r
626     private int m_expansionSize_;\r
627     /**\r
628      * Size of contraction index table in bytes\r
629      */\r
630     private int m_contractionIndexSize_;\r
631     /**\r
632      * Size of contraction table in bytes\r
633      */\r
634     private int m_contractionCESize_;\r
635     /*\r
636      * Size of the Trie in bytes\r
637      */\r
638     //private int m_trieSize_;\r
639     /**\r
640      * Size of the table that contains information about collation elements\r
641      * that end with an expansion \r
642      */\r
643     private int m_expansionEndCESize_;\r
644     /**\r
645      * Size of the table that contains information about the maximum size of \r
646      * collation elements that end with a particular expansion CE corresponding\r
647      * to the ones in expansionEndCE\r
648      */\r
649     private int m_expansionEndCEMaxSizeSize_;\r
650     /**\r
651      * Size of the option table that contains information about the collation\r
652      * options\r
653      */\r
654     private int m_optionSize_;\r
655     /**\r
656      * Size of the whole data file minusing the ICU header\r
657      */\r
658     private int m_size_;\r
659     /**\r
660      * Size of the collation data header\r
661      */\r
662     private int m_headerSize_;\r
663     /**\r
664      * Size of the table that contains information about the "Unsafe" \r
665      * codepoints\r
666      */\r
667     private int m_unsafeSize_;\r
668     /**\r
669      * Size of the table that contains information about codepoints that ends\r
670      * with a contraction\r
671      */\r
672     private int m_contractionEndSize_;\r
673     /**\r
674      * Size of the table that contains UCA contraction information\r
675      */\r
676     private int m_UCAValuesSize_;\r
677       \r
678     // private methods ---------------------------------------------------\r
679       \r
680 }\r
681 \r