2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.io.BufferedInputStream;
\r
10 import java.io.DataInputStream;
\r
11 import java.io.IOException;
\r
12 import java.io.InputStream;
\r
13 import java.nio.ByteBuffer;
\r
15 import com.ibm.icu.impl.ICUBinary;
\r
16 import com.ibm.icu.impl.ICUData;
\r
17 import com.ibm.icu.impl.ICUResourceBundle;
\r
18 import com.ibm.icu.impl.IntTrie;
\r
19 import com.ibm.icu.lang.UCharacter;
\r
20 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
\r
21 import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
\r
22 import com.ibm.icu.util.VersionInfo;
\r
25 * <p>Internal reader class for ICU data file uca.icu containing
\r
26 * Unicode Collation Algorithm data.</p>
\r
27 * <p>This class simply reads uca.icu, authenticates that it is a valid
\r
28 * ICU data file and split its contents up into blocks of data for use in
\r
29 * <a href=Collator.html>com.ibm.icu.text.Collator</a>.
\r
31 * <p>uca.icu which is in big-endian format is jared together with this
\r
33 * @author Syn Wee Quek
\r
34 * @since release 2.2, April 18 2002
\r
37 final class CollatorReader
\r
39 static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {
\r
40 InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");
\r
41 BufferedInputStream b = new BufferedInputStream(i, 90000);
\r
42 CollatorReader reader = new CollatorReader(b);
\r
43 char[] result = reader.readImp(rbc, ucac);
\r
48 public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
\r
49 return new InputStream() {
\r
50 public int read() throws IOException {
\r
51 if (!buf.hasRemaining()) {
\r
54 return buf.get() & 0xff;
\r
56 public int read(byte[] bytes, int off, int len) throws IOException {
\r
57 len = Math.min(len, buf.remaining());
\r
58 buf.get(bytes, off, len);
\r
64 static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
\r
65 final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
\r
66 int dataLength = data.remaining();
\r
67 // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
\r
68 // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
\r
69 // Consider changing ICUBinary to also work with a ByteBuffer.
\r
70 CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
\r
71 if (dataLength > MIN_BINARY_DATA_SIZE_) {
\r
72 reader.readImp(rbc, null);
\r
74 reader.readHeader(rbc);
\r
75 reader.readOptions(rbc);
\r
76 // duplicating UCA_'s data
\r
77 rbc.setWithUCATables();
\r
81 static InverseUCA getInverseUCA() throws IOException {
\r
82 InverseUCA result = null;
\r
83 InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");
\r
85 // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
\r
86 // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
\r
87 BufferedInputStream b = new BufferedInputStream(i, 110000);
\r
88 result = CollatorReader.readInverseUCA(b);
\r
92 // } catch (Exception e) {
\r
93 // throw new RuntimeException(e.getMessage());
\r
97 // protected constructor ---------------------------------------------
\r
100 * <p>Protected constructor.</p>
\r
101 * @param inputStream ICU collator file input stream
\r
102 * @exception IOException throw if data file fails authentication
\r
104 private CollatorReader(InputStream inputStream) throws IOException
\r
106 this(inputStream, true);
\r
108 byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
\r
109 // weiv: check that we have the correct Unicode version in
\r
111 VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
\r
112 if(UnicodeVersion[0] != UCDVersion.getMajor()
\r
113 || UnicodeVersion[1] != UCDVersion.getMinor()) {
\r
114 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
\r
116 m_dataInputStream_ = new DataInputStream(inputStream);
\r
121 * <p>Protected constructor.</p>
\r
122 * @param inputStream ICU uprops.icu file input stream
\r
123 * @param readICUHeader flag to indicate if the ICU header has to be read
\r
124 * @exception IOException throw if data file fails authentication
\r
126 private CollatorReader(InputStream inputStream, boolean readICUHeader)
\r
129 if (readICUHeader) {
\r
130 byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
\r
131 UCA_AUTHENTICATE_);
\r
132 // weiv: check that we have the correct Unicode version in
\r
134 VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
\r
135 if(UnicodeVersion[0] != UCDVersion.getMajor()
\r
136 || UnicodeVersion[1] != UCDVersion.getMinor()) {
\r
137 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
\r
140 m_dataInputStream_ = new DataInputStream(inputStream);
\r
143 // protected methods -------------------------------------------------
\r
146 * Read and break up the header stream of data passed in as arguments into
\r
147 * meaningful Collator data.
\r
148 * @param rbc RuleBasedCollator to populate with header information
\r
149 * @exception IOException thrown when there's a data error.
\r
151 private void readHeader(RuleBasedCollator rbc) throws IOException
\r
153 m_size_ = m_dataInputStream_.readInt();
\r
154 // all the offsets are in bytes
\r
155 // to get the address add to the header address and cast properly
\r
156 // Default options int options
\r
157 m_headerSize_ = m_dataInputStream_.readInt(); // start of options
\r
158 int readcount = 8; // for size and headersize
\r
159 // structure which holds values for indirect positioning and implicit
\r
161 int UCAConst = m_dataInputStream_.readInt();
\r
163 // this one is needed only for UCA, to copy the appropriate
\r
165 m_dataInputStream_.skip(4);
\r
167 // reserved for future use
\r
168 m_dataInputStream_.skipBytes(4);
\r
170 // const uint8_t *mappingPosition;
\r
171 int mapping = m_dataInputStream_.readInt();
\r
173 // uint32_t *expansion;
\r
174 rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
\r
176 // UChar *contractionIndex;
\r
177 rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
\r
179 // uint32_t *contractionCEs;
\r
180 int contractionCE = m_dataInputStream_.readInt();
\r
182 // needed for various closures int contractionSize
\r
183 /*int contractionSize = */m_dataInputStream_.readInt();
\r
185 // array of last collation element in expansion
\r
186 int expansionEndCE = m_dataInputStream_.readInt();
\r
188 // array of maximum expansion size corresponding to the expansion
\r
189 // collation elements with last element in expansionEndCE
\r
190 int expansionEndCEMaxSize = m_dataInputStream_.readInt();
\r
192 // size of endExpansionCE int expansionEndCESize
\r
193 m_dataInputStream_.skipBytes(4);
\r
195 // hash table of unsafe code points
\r
196 int unsafe = m_dataInputStream_.readInt();
\r
198 // hash table of final code points in contractions.
\r
199 int contractionEnd = m_dataInputStream_.readInt();
\r
201 // int CEcount = m_dataInputStream_.readInt();
\r
202 m_dataInputStream_.skipBytes(4);
\r
205 rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
\r
208 m_dataInputStream_.skipBytes(3);
\r
210 rbc.m_version_ = readVersion(m_dataInputStream_);
\r
212 rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
\r
214 rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
\r
216 // byte charsetName[] = new byte[32]; // for charset CEs
\r
217 m_dataInputStream_.skipBytes(32);
\r
219 m_dataInputStream_.skipBytes(56); // for future use
\r
221 if (m_headerSize_ < readcount) {
\r
223 throw new IOException("Internal Error: Header size error");
\r
226 m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
\r
228 if (rbc.m_contractionOffset_ == 0) { // contraction can be null
\r
229 rbc.m_contractionOffset_ = mapping;
\r
230 contractionCE = mapping;
\r
232 m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
\r
233 m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
\r
234 m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
\r
235 m_contractionCESize_ = mapping - contractionCE;
\r
236 //m_trieSize_ = expansionEndCE - mapping;
\r
237 m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
\r
238 m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
\r
239 m_unsafeSize_ = contractionEnd - unsafe;
\r
240 m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled
\r
242 // treat it as normal collator first
\r
243 // for normal collator there is no UCA contraction
\r
244 m_contractionEndSize_ = m_size_ - contractionEnd;
\r
246 rbc.m_contractionOffset_ >>= 1; // casting to ints
\r
247 rbc.m_expansionOffset_ >>= 2; // casting to chars
\r
251 * Read and break up the collation options passed in the stream of data and
\r
252 * update the argument Collator with the results
\r
255 * RuleBasedCollator to populate
\r
256 * @exception IOException
\r
257 * thrown when there's a data error.
\r
259 private void readOptions(RuleBasedCollator rbc) throws IOException
\r
262 rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
\r
264 rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()
\r
265 == RuleBasedCollator.AttributeValue.ON_);
\r
267 rbc.m_defaultIsAlternateHandlingShifted_
\r
268 = (m_dataInputStream_.readInt() ==
\r
269 RuleBasedCollator.AttributeValue.SHIFTED_);
\r
271 rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
\r
273 rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
\r
274 == RuleBasedCollator.AttributeValue.ON_);
\r
276 int value = m_dataInputStream_.readInt();
\r
278 if (value == RuleBasedCollator.AttributeValue.ON_) {
\r
279 value = Collator.CANONICAL_DECOMPOSITION;
\r
282 value = Collator.NO_DECOMPOSITION;
\r
284 rbc.m_defaultDecomposition_ = value;
\r
285 rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
\r
287 rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt()
\r
288 == RuleBasedCollator.AttributeValue.ON_);
\r
290 rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt()
\r
291 == RuleBasedCollator.AttributeValue.ON_);
\r
293 m_dataInputStream_.skip(60); // reserved for future use
\r
295 m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
\r
296 if (m_optionSize_ < readcount) {
\r
298 throw new IOException("Internal Error: Option size error");
\r
304 * Read and break up the stream of data passed in as arguments into
\r
305 * meaningful Collator data.
\r
306 * @param rbc RuleBasedCollator to populate
\r
307 * @param UCAConst object to fill up with UCA constants if we are reading
\r
308 * the UCA collator, if not use a null
\r
309 * @return UCAContractions array filled up with the UCA contractions if we
\r
310 * are reading the UCA collator
\r
311 * @exception IOException thrown when there's a data error.
\r
313 private char[] readImp(RuleBasedCollator rbc,
\r
314 RuleBasedCollator.UCAConstants UCAConst)
\r
318 // header size has been checked by readHeader
\r
319 int readcount = m_headerSize_;
\r
320 // option size has been checked by readOptions
\r
322 readcount += m_optionSize_;
\r
323 m_expansionSize_ >>= 2;
\r
324 rbc.m_expansion_ = new int[m_expansionSize_];
\r
325 for (int i = 0; i < m_expansionSize_; i ++) {
\r
326 rbc.m_expansion_[i] = m_dataInputStream_.readInt();
\r
328 readcount += (m_expansionSize_ << 2);
\r
329 if (m_contractionIndexSize_ > 0) {
\r
330 m_contractionIndexSize_ >>= 1;
\r
331 rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
\r
332 for (int i = 0; i < m_contractionIndexSize_; i ++) {
\r
333 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
\r
335 readcount += (m_contractionIndexSize_ << 1);
\r
336 m_contractionCESize_ >>= 2;
\r
337 rbc.m_contractionCE_ = new int[m_contractionCESize_];
\r
338 for (int i = 0; i < m_contractionCESize_; i ++) {
\r
339 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
\r
341 readcount += (m_contractionCESize_ << 2);
\r
343 rbc.m_trie_ = new IntTrie(m_dataInputStream_,
\r
344 RuleBasedCollator.DataManipulate.getInstance());
\r
345 if (!rbc.m_trie_.isLatin1Linear()) {
\r
346 throw new IOException("Data corrupted, "
\r
347 + "Collator Tries expected to have linear "
\r
348 + "latin one data arrays");
\r
350 readcount += rbc.m_trie_.getSerializedDataSize();
\r
351 m_expansionEndCESize_ >>= 2;
\r
352 rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
\r
353 for (int i = 0; i < m_expansionEndCESize_; i ++) {
\r
354 rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
\r
356 readcount += (m_expansionEndCESize_ << 2);
\r
357 rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
\r
358 for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {
\r
359 rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
\r
361 readcount += m_expansionEndCEMaxSizeSize_;
\r
362 rbc.m_unsafe_ = new byte[m_unsafeSize_];
\r
363 for (int i = 0; i < m_unsafeSize_; i ++) {
\r
364 rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
\r
366 readcount += m_unsafeSize_;
\r
367 if (UCAConst != null) {
\r
368 // we are reading the UCA
\r
369 // unfortunately the UCA offset in any collator data is not 0 and
\r
370 // only refers to the UCA data
\r
371 m_contractionEndSize_ -= m_UCAValuesSize_;
\r
373 rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
\r
374 for (int i = 0; i < m_contractionEndSize_; i ++) {
\r
375 rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
\r
377 readcount += m_contractionEndSize_;
\r
378 if (UCAConst != null) {
\r
379 UCAConst.FIRST_TERTIARY_IGNORABLE_[0]
\r
380 = m_dataInputStream_.readInt();
\r
381 int readUCAConstcount = 4;
\r
382 UCAConst.FIRST_TERTIARY_IGNORABLE_[1]
\r
383 = m_dataInputStream_.readInt();
\r
384 readUCAConstcount += 4;
\r
385 UCAConst.LAST_TERTIARY_IGNORABLE_[0]
\r
386 = m_dataInputStream_.readInt();
\r
387 readUCAConstcount += 4;
\r
388 UCAConst.LAST_TERTIARY_IGNORABLE_[1]
\r
389 = m_dataInputStream_.readInt();
\r
390 readUCAConstcount += 4;
\r
391 UCAConst.FIRST_PRIMARY_IGNORABLE_[0]
\r
392 = m_dataInputStream_.readInt();
\r
393 readUCAConstcount += 4;
\r
394 UCAConst.FIRST_PRIMARY_IGNORABLE_[1]
\r
395 = m_dataInputStream_.readInt();
\r
396 readUCAConstcount += 4;
\r
397 UCAConst.FIRST_SECONDARY_IGNORABLE_[0]
\r
398 = m_dataInputStream_.readInt();
\r
399 readUCAConstcount += 4;
\r
400 UCAConst.FIRST_SECONDARY_IGNORABLE_[1]
\r
401 = m_dataInputStream_.readInt();
\r
402 readUCAConstcount += 4;
\r
403 UCAConst.LAST_SECONDARY_IGNORABLE_[0]
\r
404 = m_dataInputStream_.readInt();
\r
405 readUCAConstcount += 4;
\r
406 UCAConst.LAST_SECONDARY_IGNORABLE_[1]
\r
407 = m_dataInputStream_.readInt();
\r
408 readUCAConstcount += 4;
\r
409 UCAConst.LAST_PRIMARY_IGNORABLE_[0]
\r
410 = m_dataInputStream_.readInt();
\r
411 readUCAConstcount += 4;
\r
412 UCAConst.LAST_PRIMARY_IGNORABLE_[1]
\r
413 = m_dataInputStream_.readInt();
\r
414 readUCAConstcount += 4;
\r
415 UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
\r
416 readUCAConstcount += 4;
\r
417 UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
\r
418 readUCAConstcount += 4;
\r
419 UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
\r
420 readUCAConstcount += 4;
\r
421 UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
\r
422 readUCAConstcount += 4;
\r
423 UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
\r
424 readUCAConstcount += 4;
\r
425 UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
\r
426 readUCAConstcount += 4;
\r
427 UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
\r
428 readUCAConstcount += 4;
\r
429 UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
\r
430 readUCAConstcount += 4;
\r
431 UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
\r
432 readUCAConstcount += 4;
\r
433 UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
\r
434 readUCAConstcount += 4;
\r
435 UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
\r
436 readUCAConstcount += 4;
\r
437 UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
\r
438 readUCAConstcount += 4;
\r
439 UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
\r
440 readUCAConstcount += 4;
\r
441 UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
\r
442 readUCAConstcount += 4;
\r
443 UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
\r
444 readUCAConstcount += 4;
\r
445 UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
\r
446 readUCAConstcount += 4;
\r
447 UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
\r
448 readUCAConstcount += 4;
\r
449 UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
\r
450 readUCAConstcount += 4;
\r
451 UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
\r
452 readUCAConstcount += 4;
\r
453 UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
\r
454 readUCAConstcount += 4;
\r
455 UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
\r
456 readUCAConstcount += 4;
\r
457 UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
\r
458 readUCAConstcount += 4;
\r
459 UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
\r
460 readUCAConstcount += 4;
\r
461 UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
\r
462 readUCAConstcount += 4;
\r
463 UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
\r
464 readUCAConstcount += 4;
\r
465 int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;
\r
466 char result[] = new char[resultsize];
\r
467 for (int i = 0; i < resultsize; i ++) {
\r
468 result[i] = m_dataInputStream_.readChar();
\r
470 readcount += m_UCAValuesSize_;
\r
471 if (readcount != m_size_) {
\r
473 throw new IOException("Internal Error: Data file size error");
\r
478 if (readcount != m_size_) {
\r
480 throw new IOException("Internal Error: Data file size error");
\r
487 * Reads in the inverse uca data
\r
488 * @param input input stream with the inverse uca data
\r
489 * @return an object containing the inverse uca data
\r
490 * @exception IOException thrown when error occurs while reading the
\r
493 private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
\r
494 InputStream inputStream)
\r
497 byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
\r
498 INVERSE_UCA_AUTHENTICATE_);
\r
500 // weiv: check that we have the correct Unicode version in
\r
502 VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
\r
503 if(UnicodeVersion[0] != UCDVersion.getMajor()
\r
504 || UnicodeVersion[1] != UCDVersion.getMinor()) {
\r
505 throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
\r
508 CollationParsedRuleBuilder.InverseUCA result =
\r
509 new CollationParsedRuleBuilder.InverseUCA();
\r
510 DataInputStream input = new DataInputStream(inputStream);
\r
511 input.readInt(); // bytesize
\r
512 int tablesize = input.readInt(); // in int size
\r
513 int contsize = input.readInt(); // in char size
\r
514 input.readInt(); // table in bytes
\r
515 input.readInt(); // conts in bytes
\r
516 result.m_UCA_version_ = readVersion(input);
\r
517 input.skipBytes(8); // skip padding
\r
519 int size = tablesize * 3; // one column for each strength
\r
520 result.m_table_ = new int[size];
\r
521 result.m_continuations_ = new char[contsize];
\r
523 for (int i = 0; i < size; i ++) {
\r
524 result.m_table_[i] = input.readInt();
\r
526 for (int i = 0; i < contsize; i ++) {
\r
527 result.m_continuations_[i] = input.readChar();
\r
534 * Reads four bytes from the input and returns a VersionInfo
\r
535 * object. Use it to read different collator versions.
\r
536 * @param input already instantiated DataInputStream, positioned
\r
537 * at the start of four version bytes
\r
538 * @return a ready VersionInfo object
\r
539 * @throws IOException thrown when error occurs while reading
\r
543 protected static VersionInfo readVersion(DataInputStream input)
\r
544 throws IOException {
\r
545 byte[] version = new byte[4];
\r
546 version[0] = input.readByte();
\r
547 version[1] = input.readByte();
\r
548 version[2] = input.readByte();
\r
549 version[3] = input.readByte();
\r
551 VersionInfo result =
\r
552 VersionInfo.getInstance(
\r
553 (int)version[0], (int)version[1],
\r
554 (int)version[2], (int)version[3]);
\r
559 // private inner class -----------------------------------------------
\r
561 // private variables -------------------------------------------------
\r
564 * Authenticate uca data format version
\r
566 private static final ICUBinary.Authenticate UCA_AUTHENTICATE_
\r
567 = new ICUBinary.Authenticate() {
\r
568 public boolean isDataVersionAcceptable(byte version[])
\r
570 return version[0] == DATA_FORMAT_VERSION_[0]
\r
571 && version[1] >= DATA_FORMAT_VERSION_[1];
\r
573 //&& version[1] == DATA_FORMAT_VERSION_[1]
\r
574 //&& version[2] == DATA_FORMAT_VERSION_[2]
\r
575 //&& version[3] == DATA_FORMAT_VERSION_[3];
\r
580 * Authenticate uca data format version
\r
582 private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_
\r
583 = new ICUBinary.Authenticate() {
\r
584 public boolean isDataVersionAcceptable(byte version[])
\r
587 == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
\r
589 >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
\r
594 * Data input stream for uca.icu
\r
596 private DataInputStream m_dataInputStream_;
\r
599 * File format version and id that this class understands.
\r
600 * No guarantees are made if a older version is used
\r
602 private static final byte DATA_FORMAT_VERSION_[] =
\r
603 {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};
\r
604 private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,
\r
605 (byte)0x6f, (byte)0x6c};
\r
607 * Inverse UCA file format version and id that this class understands.
\r
608 * No guarantees are made if a older version is used
\r
610 private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] =
\r
611 {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};
\r
612 private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49,
\r
618 * Wrong unicode version error string
\r
620 private static final String WRONG_UNICODE_VERSION_ERROR_ =
\r
621 "Unicode version in binary image is not compatible with the current Unicode version";
\r
624 * Size of expansion table in bytes
\r
626 private int m_expansionSize_;
\r
628 * Size of contraction index table in bytes
\r
630 private int m_contractionIndexSize_;
\r
632 * Size of contraction table in bytes
\r
634 private int m_contractionCESize_;
\r
636 * Size of the Trie in bytes
\r
638 //private int m_trieSize_;
\r
640 * Size of the table that contains information about collation elements
\r
641 * that end with an expansion
\r
643 private int m_expansionEndCESize_;
\r
645 * Size of the table that contains information about the maximum size of
\r
646 * collation elements that end with a particular expansion CE corresponding
\r
647 * to the ones in expansionEndCE
\r
649 private int m_expansionEndCEMaxSizeSize_;
\r
651 * Size of the option table that contains information about the collation
\r
654 private int m_optionSize_;
\r
656 * Size of the whole data file minusing the ICU header
\r
658 private int m_size_;
\r
660 * Size of the collation data header
\r
662 private int m_headerSize_;
\r
664 * Size of the table that contains information about the "Unsafe"
\r
667 private int m_unsafeSize_;
\r
669 * Size of the table that contains information about codepoints that ends
\r
670 * with a contraction
\r
672 private int m_contractionEndSize_;
\r
674 * Size of the table that contains UCA contraction information
\r
676 private int m_UCAValuesSize_;
\r
678 // private methods ---------------------------------------------------
\r