]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/text/CollatorReader.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / text / CollatorReader.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2008, International Business Machines Corporation and    *\r
4 * others. All Rights Reserved.                                                *\r
5 *******************************************************************************\r
6 */\r
7 package com.ibm.icu.text;\r
8 \r
9 import java.io.BufferedInputStream;\r
10 import java.io.ByteArrayInputStream;\r
11 import java.io.InputStream;\r
12 import java.io.DataInputStream;\r
13 import java.io.IOException;\r
14 \r
15 import com.ibm.icu.impl.ICUBinary;\r
16 import com.ibm.icu.impl.ICUData;\r
17 import com.ibm.icu.impl.ICUResourceBundle;\r
18 import com.ibm.icu.impl.IntTrie;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.util.VersionInfo;\r
21 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;\r
22 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;\r
23 \r
24 /**\r
25 * <p>Internal reader class for ICU data file uca.icu containing \r
26 * Unicode Collation Algorithm data.</p> \r
27 * <p>This class simply reads uca.icu, authenticates that it is a valid\r
28 * ICU data file and split its contents up into blocks of data for use in\r
29 * <a href=Collator.html>com.ibm.icu.text.Collator</a>.\r
30 * </p> \r
31 * <p>uca.icu which is in big-endian format is jared together with this \r
32 * package.</p>\r
33 * @author Syn Wee Quek\r
34 * @since release 2.2, April 18 2002\r
35 */\r
36 \r
37 final class CollatorReader\r
38 {          \r
39     static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {\r
40         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");\r
41         BufferedInputStream b = new BufferedInputStream(i, 90000);\r
42         CollatorReader reader = new CollatorReader(b);\r
43         char[] result = reader.readImp(rbc, ucac);\r
44         b.close();\r
45         return result;\r
46     }\r
47     \r
48     static void initRBC(RuleBasedCollator rbc, byte[] data) throws IOException {\r
49         final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;\r
50         \r
51         InputStream i = new ByteArrayInputStream(data);\r
52         BufferedInputStream b = new BufferedInputStream(i);\r
53         CollatorReader reader = new CollatorReader(b, false);\r
54         if (data.length > MIN_BINARY_DATA_SIZE_) {\r
55             reader.readImp(rbc, null);\r
56         } else {\r
57             reader.readHeader(rbc);\r
58             reader.readOptions(rbc);\r
59             // duplicating UCA_'s data\r
60             rbc.setWithUCATables();\r
61         }\r
62     }\r
63     \r
64     static InverseUCA getInverseUCA() throws IOException {\r
65         InverseUCA result = null;\r
66         InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");\r
67 //        try    {\r
68 //            String invdat = "/com/ibm/icu/impl/data/invuca.icu";\r
69 //            InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);\r
70             BufferedInputStream b = new BufferedInputStream(i, 110000);\r
71             result = CollatorReader.readInverseUCA(b);\r
72             b.close();\r
73             i.close();\r
74             return result;\r
75 //        } catch (Exception e) {\r
76 //            throw new RuntimeException(e.getMessage());\r
77 //        }\r
78     }\r
79     \r
80     // protected constructor ---------------------------------------------\r
81     \r
82     /**\r
83     * <p>Protected constructor.</p>\r
84     * @param inputStream ICU collator file input stream\r
85     * @exception IOException throw if data file fails authentication \r
86     */\r
87     private CollatorReader(InputStream inputStream) throws IOException\r
88     {\r
89         this(inputStream, true);\r
90         /*\r
91         byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);\r
92         // weiv: check that we have the correct Unicode version in \r
93         // binary files\r
94         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
95         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
96         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
97             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
98         }\r
99         m_dataInputStream_ = new DataInputStream(inputStream);\r
100         */\r
101     }\r
102     \r
103     /**\r
104     * <p>Protected constructor.</p>\r
105     * @param inputStream ICU uprops.icu file input stream\r
106     * @param readICUHeader flag to indicate if the ICU header has to be read\r
107     * @exception IOException throw if data file fails authentication \r
108     */\r
109     private CollatorReader(InputStream inputStream, boolean readICUHeader) \r
110                                                             throws IOException\r
111     {\r
112         if (readICUHeader) {\r
113             byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, \r
114                                  UCA_AUTHENTICATE_);\r
115             // weiv: check that we have the correct Unicode version in \r
116             // binary files\r
117             VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
118             if(UnicodeVersion[0] != UCDVersion.getMajor() \r
119             || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
120                 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
121             }\r
122         }\r
123         m_dataInputStream_ = new DataInputStream(inputStream);\r
124     }\r
125   \r
126     // protected methods -------------------------------------------------\r
127       \r
128     /**\r
129     * Read and break up the header stream of data passed in as arguments into \r
130     * meaningful Collator data.\r
131     * @param rbc RuleBasedCollator to populate with header information\r
132     * @exception IOException thrown when there's a data error.\r
133     */\r
134     private void readHeader(RuleBasedCollator rbc) throws IOException\r
135     {\r
136         m_size_ = m_dataInputStream_.readInt();\r
137         // all the offsets are in bytes\r
138         // to get the address add to the header address and cast properly\r
139         // Default options int options\r
140         m_headerSize_ = m_dataInputStream_.readInt(); // start of options\r
141         int readcount = 8; // for size and headersize\r
142         // structure which holds values for indirect positioning and implicit\r
143         // ranges\r
144         int UCAConst = m_dataInputStream_.readInt();\r
145         readcount += 4;\r
146         // this one is needed only for UCA, to copy the appropriate\r
147         // contractions\r
148         m_dataInputStream_.skip(4);\r
149         readcount += 4;\r
150         // reserved for future use\r
151         m_dataInputStream_.skipBytes(4);\r
152         readcount += 4;\r
153         // const uint8_t *mappingPosition;\r
154         int mapping = m_dataInputStream_.readInt();\r
155         readcount += 4;\r
156         // uint32_t *expansion;\r
157         rbc.m_expansionOffset_ = m_dataInputStream_.readInt();\r
158         readcount += 4;\r
159         // UChar *contractionIndex;\r
160         rbc.m_contractionOffset_ = m_dataInputStream_.readInt();\r
161         readcount += 4;\r
162         // uint32_t *contractionCEs;\r
163         int contractionCE = m_dataInputStream_.readInt();\r
164         readcount += 4;\r
165         // needed for various closures int contractionSize\r
166         /*int contractionSize = */m_dataInputStream_.readInt();\r
167         readcount += 4;\r
168         // array of last collation element in expansion\r
169         int expansionEndCE = m_dataInputStream_.readInt();\r
170         readcount += 4;\r
171         // array of maximum expansion size corresponding to the expansion\r
172         // collation elements with last element in expansionEndCE\r
173         int expansionEndCEMaxSize = m_dataInputStream_.readInt();\r
174         readcount += 4;\r
175         // size of endExpansionCE int expansionEndCESize\r
176         m_dataInputStream_.skipBytes(4);\r
177         readcount += 4;\r
178         // hash table of unsafe code points\r
179         int unsafe = m_dataInputStream_.readInt();\r
180         readcount += 4;\r
181         // hash table of final code points in contractions.\r
182         int contractionEnd = m_dataInputStream_.readInt();\r
183         readcount += 4;\r
184         // int CEcount = m_dataInputStream_.readInt();\r
185         m_dataInputStream_.skipBytes(4);\r
186         readcount += 4;\r
187         // is jamoSpecial\r
188         rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();\r
189         readcount++;\r
190         // padding\r
191         m_dataInputStream_.skipBytes(3);\r
192         readcount += 3;\r
193         rbc.m_version_ = readVersion(m_dataInputStream_);\r
194         readcount += 4;\r
195         rbc.m_UCA_version_ = readVersion(m_dataInputStream_);\r
196         readcount += 4;\r
197         rbc.m_UCD_version_ = readVersion(m_dataInputStream_);\r
198         readcount += 4;\r
199         // byte charsetName[] = new byte[32]; // for charset CEs\r
200         m_dataInputStream_.skipBytes(32);\r
201         readcount += 32;\r
202         m_dataInputStream_.skipBytes(56); // for future use\r
203         readcount += 56;\r
204         if (m_headerSize_ < readcount) {\r
205             throw new IOException("Internal Error: Header size error");\r
206         }\r
207         m_dataInputStream_.skipBytes(m_headerSize_ - readcount);\r
208 \r
209         if (rbc.m_contractionOffset_ == 0) { // contraction can be null\r
210             rbc.m_contractionOffset_ = mapping;\r
211             contractionCE = mapping;\r
212         }\r
213         m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;\r
214         m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;\r
215         m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;\r
216         m_contractionCESize_ = mapping - contractionCE;\r
217         //m_trieSize_ = expansionEndCE - mapping;\r
218         m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;\r
219         m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;\r
220         m_unsafeSize_ = contractionEnd - unsafe;\r
221         m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled\r
222                                                 // later\r
223         // treat it as normal collator first\r
224         // for normal collator there is no UCA contraction\r
225         m_contractionEndSize_ = m_size_ - contractionEnd;\r
226 \r
227         rbc.m_contractionOffset_ >>= 1; // casting to ints\r
228         rbc.m_expansionOffset_ >>= 2; // casting to chars\r
229     }\r
230     \r
231     /**\r
232      * Read and break up the collation options passed in the stream of data and\r
233      * update the argument Collator with the results\r
234      * \r
235      * @param rbc\r
236      *            RuleBasedCollator to populate\r
237      * @exception IOException\r
238      *                thrown when there's a data error.\r
239      */\r
240     private void readOptions(RuleBasedCollator rbc) throws IOException\r
241     {\r
242         int readcount = 0;\r
243         rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();\r
244         readcount += 4;\r
245         rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()\r
246                                       == RuleBasedCollator.AttributeValue.ON_);\r
247         readcount += 4;\r
248         rbc.m_defaultIsAlternateHandlingShifted_ \r
249                                    = (m_dataInputStream_.readInt() == \r
250                                     RuleBasedCollator.AttributeValue.SHIFTED_);\r
251         readcount += 4;\r
252         rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();\r
253         readcount += 4;\r
254         rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() \r
255                                      == RuleBasedCollator.AttributeValue.ON_);\r
256         readcount += 4;\r
257         int value = m_dataInputStream_.readInt();\r
258         readcount += 4;\r
259         if (value == RuleBasedCollator.AttributeValue.ON_) {\r
260             value = Collator.CANONICAL_DECOMPOSITION;\r
261         }\r
262         else {\r
263             value = Collator.NO_DECOMPOSITION;\r
264         }\r
265         rbc.m_defaultDecomposition_ = value;\r
266         rbc.m_defaultStrength_ = m_dataInputStream_.readInt();\r
267         readcount += 4;\r
268         rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() \r
269                                      == RuleBasedCollator.AttributeValue.ON_);\r
270         readcount += 4;\r
271         rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() \r
272                                       == RuleBasedCollator.AttributeValue.ON_);\r
273         readcount += 4;\r
274         m_dataInputStream_.skip(60); // reserved for future use\r
275         readcount += 60;\r
276         m_dataInputStream_.skipBytes(m_optionSize_ - readcount);\r
277         if (m_optionSize_ < readcount) {\r
278             throw new IOException("Internal Error: Option size error");\r
279         }\r
280     }\r
281     \r
282     /**\r
283     * Read and break up the stream of data passed in as arguments into \r
284     * meaningful Collator data.\r
285     * @param rbc RuleBasedCollator to populate\r
286     * @param UCAConst object to fill up with UCA constants if we are reading \r
287     *                 the UCA collator, if not use a null\r
288     * @return UCAContractions array filled up with the UCA contractions if we\r
289     *                        are reading the UCA collator\r
290     * @exception IOException thrown when there's a data error.\r
291     */\r
292     private char[] readImp(RuleBasedCollator rbc, \r
293                           RuleBasedCollator.UCAConstants UCAConst) \r
294                                                             throws IOException\r
295     {\r
296         readHeader(rbc);\r
297         // header size has been checked by readHeader\r
298         int readcount = m_headerSize_; \r
299         // option size has been checked by readOptions\r
300         readOptions(rbc);\r
301         readcount += m_optionSize_;\r
302         m_expansionSize_ >>= 2;\r
303         rbc.m_expansion_ = new int[m_expansionSize_];\r
304         for (int i = 0; i < m_expansionSize_; i ++) {\r
305             rbc.m_expansion_[i] = m_dataInputStream_.readInt();\r
306         }\r
307         readcount += (m_expansionSize_ << 2);\r
308         if (m_contractionIndexSize_ > 0) { \r
309             m_contractionIndexSize_ >>= 1;\r
310             rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];\r
311             for (int i = 0; i < m_contractionIndexSize_; i ++) {\r
312                 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();\r
313             }\r
314             readcount += (m_contractionIndexSize_ << 1);\r
315             m_contractionCESize_ >>= 2;\r
316             rbc.m_contractionCE_ = new int[m_contractionCESize_];\r
317             for (int i = 0; i < m_contractionCESize_; i ++) {\r
318                 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();\r
319             }\r
320             readcount += (m_contractionCESize_ << 2);\r
321         }\r
322         rbc.m_trie_ = new IntTrie(m_dataInputStream_, \r
323                                  RuleBasedCollator.DataManipulate.getInstance());\r
324         if (!rbc.m_trie_.isLatin1Linear()) {\r
325             throw new IOException("Data corrupted, " \r
326                                   + "Collator Tries expected to have linear "\r
327                                   + "latin one data arrays");\r
328         }\r
329         readcount += rbc.m_trie_.getSerializedDataSize();\r
330         m_expansionEndCESize_ >>= 2;\r
331         rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];\r
332         for (int i = 0; i < m_expansionEndCESize_; i ++) {\r
333             rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();\r
334         }\r
335         readcount += (m_expansionEndCESize_ << 2);\r
336         rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];\r
337         for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {\r
338             rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();\r
339         }\r
340         readcount += m_expansionEndCEMaxSizeSize_;\r
341         rbc.m_unsafe_ = new byte[m_unsafeSize_];\r
342         for (int i = 0; i < m_unsafeSize_; i ++) {\r
343             rbc.m_unsafe_[i] = m_dataInputStream_.readByte();\r
344         }\r
345         readcount += m_unsafeSize_;\r
346         if (UCAConst != null) {\r
347             // we are reading the UCA\r
348             // unfortunately the UCA offset in any collator data is not 0 and\r
349             // only refers to the UCA data\r
350             m_contractionEndSize_ -= m_UCAValuesSize_;       \r
351         }\r
352         rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];\r
353         for (int i = 0; i < m_contractionEndSize_; i ++) {\r
354             rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();\r
355         }\r
356         readcount += m_contractionEndSize_;\r
357         if (UCAConst != null) {\r
358             UCAConst.FIRST_TERTIARY_IGNORABLE_[0] \r
359                                                = m_dataInputStream_.readInt();\r
360             int readUCAConstcount = 4;\r
361             UCAConst.FIRST_TERTIARY_IGNORABLE_[1] \r
362                                                = m_dataInputStream_.readInt();\r
363             readUCAConstcount += 4;\r
364             UCAConst.LAST_TERTIARY_IGNORABLE_[0] \r
365                                                = m_dataInputStream_.readInt();\r
366             readUCAConstcount += 4;\r
367             UCAConst.LAST_TERTIARY_IGNORABLE_[1] \r
368                                                = m_dataInputStream_.readInt();\r
369             readUCAConstcount += 4;\r
370             UCAConst.FIRST_PRIMARY_IGNORABLE_[0] \r
371                                                = m_dataInputStream_.readInt();\r
372             readUCAConstcount += 4;\r
373             UCAConst.FIRST_PRIMARY_IGNORABLE_[1] \r
374                                                = m_dataInputStream_.readInt();\r
375             readUCAConstcount += 4;\r
376             UCAConst.FIRST_SECONDARY_IGNORABLE_[0] \r
377                                                = m_dataInputStream_.readInt();\r
378             readUCAConstcount += 4;\r
379             UCAConst.FIRST_SECONDARY_IGNORABLE_[1] \r
380                                                = m_dataInputStream_.readInt();\r
381             readUCAConstcount += 4;\r
382             UCAConst.LAST_SECONDARY_IGNORABLE_[0] \r
383                                                = m_dataInputStream_.readInt();\r
384             readUCAConstcount += 4;\r
385             UCAConst.LAST_SECONDARY_IGNORABLE_[1] \r
386                                                = m_dataInputStream_.readInt();\r
387             readUCAConstcount += 4;\r
388             UCAConst.LAST_PRIMARY_IGNORABLE_[0] \r
389                                                = m_dataInputStream_.readInt();\r
390             readUCAConstcount += 4;\r
391             UCAConst.LAST_PRIMARY_IGNORABLE_[1] \r
392                                                = m_dataInputStream_.readInt();\r
393             readUCAConstcount += 4;\r
394             UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();     \r
395             readUCAConstcount += 4;\r
396             UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();\r
397             readUCAConstcount += 4;\r
398             UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); \r
399             readUCAConstcount += 4;\r
400             UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();                     \r
401             readUCAConstcount += 4;\r
402             UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
403             readUCAConstcount += 4;\r
404             UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
405             readUCAConstcount += 4;\r
406             UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();  \r
407             readUCAConstcount += 4;\r
408             UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();  \r
409             readUCAConstcount += 4;\r
410             UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();  \r
411             readUCAConstcount += 4;\r
412             UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();  \r
413             readUCAConstcount += 4;\r
414             UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
415             readUCAConstcount += 4;\r
416             UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
417             readUCAConstcount += 4;\r
418             UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();  \r
419             readUCAConstcount += 4;\r
420             UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();  \r
421             readUCAConstcount += 4;\r
422             UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
423             readUCAConstcount += 4;\r
424             UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();  \r
425             readUCAConstcount += 4;\r
426             UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();  \r
427             readUCAConstcount += 4;\r
428             UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();   \r
429             readUCAConstcount += 4; \r
430             UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();  \r
431             readUCAConstcount += 4;\r
432             UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();   \r
433             readUCAConstcount += 4;\r
434             UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();   \r
435             readUCAConstcount += 4;\r
436             UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();   \r
437             readUCAConstcount += 4;\r
438             UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();   \r
439             readUCAConstcount += 4;\r
440             UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();   \r
441             readUCAConstcount += 4;\r
442             UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();   \r
443             readUCAConstcount += 4;\r
444             int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;\r
445             char result[] = new char[resultsize];\r
446             for (int i = 0; i < resultsize; i ++) {\r
447                 result[i] = m_dataInputStream_.readChar();\r
448             }\r
449             readcount += m_UCAValuesSize_;\r
450             if (readcount != m_size_) {\r
451                 throw new IOException("Internal Error: Data file size error");\r
452             }\r
453             return result;\r
454         }\r
455         if (readcount != m_size_) {\r
456             throw new IOException("Internal Error: Data file size error");\r
457         }\r
458         return null;\r
459     }\r
460     \r
461     /**\r
462      * Reads in the inverse uca data\r
463      * @param input input stream with the inverse uca data\r
464      * @return an object containing the inverse uca data\r
465      * @exception IOException thrown when error occurs while reading the \r
466      *            inverse uca\r
467      */\r
468     private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(\r
469                                                       InputStream inputStream)\r
470                                                       throws IOException\r
471     {\r
472          byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, \r
473                               INVERSE_UCA_AUTHENTICATE_);\r
474                               \r
475         // weiv: check that we have the correct Unicode version in \r
476         // binary files\r
477         VersionInfo UCDVersion = UCharacter.getUnicodeVersion();\r
478         if(UnicodeVersion[0] != UCDVersion.getMajor() \r
479         || UnicodeVersion[1] != UCDVersion.getMinor()) {\r
480             throw new IOException(WRONG_UNICODE_VERSION_ERROR_);\r
481         }\r
482                               \r
483         CollationParsedRuleBuilder.InverseUCA result = \r
484                                   new CollationParsedRuleBuilder.InverseUCA();\r
485         DataInputStream input = new DataInputStream(inputStream);        \r
486         input.readInt(); // bytesize\r
487         int tablesize = input.readInt(); // in int size\r
488         int contsize = input.readInt();  // in char size\r
489         input.readInt(); // table in bytes\r
490         input.readInt(); // conts in bytes\r
491         result.m_UCA_version_ = readVersion(input);\r
492         input.skipBytes(8); // skip padding\r
493         \r
494         int size = tablesize * 3; // one column for each strength\r
495         result.m_table_ = new int[size];\r
496         result.m_continuations_ = new char[contsize];\r
497         \r
498         for (int i = 0; i < size; i ++) {\r
499             result.m_table_[i] = input.readInt();\r
500         }\r
501         for (int i = 0; i < contsize; i ++) {\r
502             result.m_continuations_[i] = input.readChar();\r
503         }\r
504         input.close();\r
505         return result;\r
506     }\r
507     \r
508     /**\r
509      * Reads four bytes from the input and returns a VersionInfo\r
510      * object. Use it to read different collator versions.\r
511      * @param input already instantiated DataInputStream, positioned \r
512      *              at the start of four version bytes\r
513      * @return a ready VersionInfo object\r
514      * @throws IOException thrown when error occurs while reading  \r
515      *            version bytes\r
516      */\r
517     \r
518     protected static VersionInfo readVersion(DataInputStream input) \r
519         throws IOException {\r
520         byte[] version = new byte[4];\r
521         version[0] = input.readByte();\r
522         version[1] = input.readByte();\r
523         version[2] = input.readByte();\r
524         version[3] = input.readByte();\r
525         \r
526         VersionInfo result = \r
527         VersionInfo.getInstance(\r
528             (int)version[0], (int)version[1], \r
529             (int)version[2], (int)version[3]);\r
530         \r
531         return result;\r
532     }\r
533     \r
534     // private inner class -----------------------------------------------\r
535     \r
536     // private variables -------------------------------------------------\r
537     \r
538     /**\r
539      * Authenticate uca data format version\r
540      */\r
541     private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ \r
542                 = new ICUBinary.Authenticate() {\r
543                         public boolean isDataVersionAcceptable(byte version[])\r
544                         {\r
545                             return version[0] == DATA_FORMAT_VERSION_[0] \r
546                                    && version[1] >= DATA_FORMAT_VERSION_[1];\r
547                                    // Too harsh \r
548                                    //&& version[1] == DATA_FORMAT_VERSION_[1]\r
549                                    //&& version[2] == DATA_FORMAT_VERSION_[2] \r
550                                    //&& version[3] == DATA_FORMAT_VERSION_[3];\r
551                         }\r
552                 };\r
553                 \r
554     /**\r
555      * Authenticate uca data format version\r
556      */\r
557     private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ \r
558                 = new ICUBinary.Authenticate() {\r
559                         public boolean isDataVersionAcceptable(byte version[])\r
560                         {\r
561                             return version[0] \r
562                                     == INVERSE_UCA_DATA_FORMAT_VERSION_[0] \r
563                                 && version[1] \r
564                                     >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];\r
565                         }\r
566                 };\r
567   \r
568     /**\r
569     * Data input stream for uca.icu \r
570     */\r
571     private DataInputStream m_dataInputStream_;\r
572    \r
573     /**\r
574     * File format version and id that this class understands.\r
575     * No guarantees are made if a older version is used\r
576     */\r
577     private static final byte DATA_FORMAT_VERSION_[] = \r
578                                    {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};\r
579     private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  \r
580                                                     (byte)0x6f, (byte)0x6c};\r
581     /**\r
582     * Inverse UCA file format version and id that this class understands.\r
583     * No guarantees are made if a older version is used\r
584     */\r
585     private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = \r
586                                    {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};\r
587     private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, \r
588                                                                (byte)0x6e,  \r
589                                                                (byte)0x76, \r
590                                                                (byte)0x43};\r
591                                 \r
592     /**\r
593     * Wrong unicode version error string\r
594     */\r
595     private static final String WRONG_UNICODE_VERSION_ERROR_ =\r
596                                 "Unicode version in binary image is not compatible with the current Unicode version";\r
597 \r
598     /**\r
599      * Size of expansion table in bytes\r
600      */\r
601     private int m_expansionSize_;\r
602     /**\r
603      * Size of contraction index table in bytes\r
604      */\r
605     private int m_contractionIndexSize_;\r
606     /**\r
607      * Size of contraction table in bytes\r
608      */\r
609     private int m_contractionCESize_;\r
610     /*\r
611      * Size of the Trie in bytes\r
612      */\r
613     //private int m_trieSize_;\r
614     /**\r
615      * Size of the table that contains information about collation elements\r
616      * that end with an expansion \r
617      */\r
618     private int m_expansionEndCESize_;\r
619     /**\r
620      * Size of the table that contains information about the maximum size of \r
621      * collation elements that end with a particular expansion CE corresponding\r
622      * to the ones in expansionEndCE\r
623      */\r
624     private int m_expansionEndCEMaxSizeSize_;\r
625     /**\r
626      * Size of the option table that contains information about the collation\r
627      * options\r
628      */\r
629     private int m_optionSize_;\r
630     /**\r
631      * Size of the whole data file minusing the ICU header\r
632      */\r
633     private int m_size_;\r
634     /**\r
635      * Size of the collation data header\r
636      */\r
637     private int m_headerSize_;\r
638     /**\r
639      * Size of the table that contains information about the "Unsafe" \r
640      * codepoints\r
641      */\r
642     private int m_unsafeSize_;\r
643     /**\r
644      * Size of the table that contains information about codepoints that ends\r
645      * with a contraction\r
646      */\r
647     private int m_contractionEndSize_;\r
648     /**\r
649      * Size of the table that contains UCA contraction information\r
650      */\r
651     private int m_UCAValuesSize_;\r
652       \r
653     // private methods ---------------------------------------------------\r
654       \r
655 }\r
656 \r