2 ******************************************************************************
\r
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
8 package com.ibm.icu.impl;
\r
10 import com.ibm.icu.lang.UCharacter;
\r
11 import com.ibm.icu.text.UTF16;
\r
12 import java.util.Arrays;
\r
13 import java.io.OutputStream;
\r
14 import java.io.DataOutputStream;
\r
15 import java.io.IOException;
\r
18 * Builder class to manipulate and generate a trie.
\r
19 * This is useful for ICU data in primitive types.
\r
20 * Provides a compact way to store information that is indexed by Unicode
\r
21 * values, such as character properties, types, keyboard values, etc. This is
\r
22 * very useful when you have a block of Unicode data that contains significant
\r
23 * values while the rest of the Unicode data is unused in the application or
\r
24 * when you have a lot of redundance, such as where all 21,000 Han ideographs
\r
25 * have the same value. However, lookup is much faster than a hash table.
\r
26 * A trie of any primitive data type serves two purposes:
\r
28 * <LI>Fast access of the indexed values.
\r
29 * <LI>Smaller memory footprint.
\r
31 * This is a direct port from the ICU4C version
\r
32 * @author Syn Wee Quek
\r
34 public class IntTrieBuilder extends TrieBuilder
\r
36 // public constructor ----------------------------------------------
\r
41 public IntTrieBuilder(IntTrieBuilder table)
\r
44 m_data_ = new int[m_dataCapacity_];
\r
45 System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
\r
46 m_initialValue_ = table.m_initialValue_;
\r
47 m_leadUnitValue_ = table.m_leadUnitValue_;
\r
51 * Constructs a build table
\r
52 * @param aliasdata data to be filled into table
\r
53 * @param maxdatalength maximum data length allowed in table
\r
54 * @param initialvalue inital data value
\r
55 * @param latin1linear is latin 1 to be linear
\r
57 public IntTrieBuilder(int aliasdata[], int maxdatalength,
\r
58 int initialvalue, int leadunitvalue,
\r
59 boolean latin1linear)
\r
62 if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
\r
63 && maxdatalength < 1024)) {
\r
64 throw new IllegalArgumentException(
\r
65 "Argument maxdatalength is too small");
\r
68 if (aliasdata != null) {
\r
69 m_data_ = aliasdata;
\r
72 m_data_ = new int[maxdatalength];
\r
75 // preallocate and reset the first data block (block index 0)
\r
76 int j = DATA_BLOCK_LENGTH;
\r
79 // preallocate and reset the first block (number 0) and Latin-1
\r
80 // (U+0000..U+00ff) after that made sure above that
\r
81 // maxDataLength >= 1024
\r
82 // set indexes to point to consecutive data blocks
\r
85 // do this at least for trie->index[0] even if that block is
\r
86 // only partly used for Latin-1
\r
88 j += DATA_BLOCK_LENGTH;
\r
89 } while (i < (256 >> SHIFT_));
\r
93 // reset the initially allocated blocks to the initial value
\r
94 Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
\r
95 m_initialValue_ = initialvalue;
\r
96 m_leadUnitValue_ = leadunitvalue;
\r
97 m_dataCapacity_ = maxdatalength;
\r
98 m_isLatin1Linear_ = latin1linear;
\r
99 m_isCompacted_ = false;
\r
102 // public methods -------------------------------------------------------
\r
104 /*public final void print()
\r
107 int oldvalue = m_index_[i];
\r
109 System.out.println("index length " + m_indexLength_
\r
110 + " --------------------------");
\r
111 while (i < m_indexLength_) {
\r
112 if (m_index_[i] != oldvalue) {
\r
113 System.out.println("index has " + count + " counts of "
\r
114 + Integer.toHexString(oldvalue));
\r
116 oldvalue = m_index_[i];
\r
121 System.out.println("index has " + count + " counts of "
\r
122 + Integer.toHexString(oldvalue));
\r
124 oldvalue = m_data_[i];
\r
126 System.out.println("data length " + m_dataLength_
\r
127 + " --------------------------");
\r
128 while (i < m_dataLength_) {
\r
129 if (m_data_[i] != oldvalue) {
\r
130 if ((oldvalue & 0xf1000000) == 0xf1000000) {
\r
131 int temp = oldvalue & 0xffffff;
\r
133 oldvalue = 0xf1000000 | temp;
\r
135 if ((oldvalue & 0xf2000000) == 0xf2000000) {
\r
136 int temp = oldvalue & 0xffffff;
\r
138 oldvalue = 0xf2000000 | temp;
\r
140 System.out.println("data has " + count + " counts of "
\r
141 + Integer.toHexString(oldvalue));
\r
143 oldvalue = m_data_[i];
\r
148 if ((oldvalue & 0xf1000000) == 0xf1000000) {
\r
149 int temp = oldvalue & 0xffffff;
\r
151 oldvalue = 0xf1000000 | temp;
\r
153 if ((oldvalue & 0xf2000000) == 0xf2000000) {
\r
154 int temp = oldvalue & 0xffffff;
\r
156 oldvalue = 0xf2000000 | temp;
\r
158 System.out.println("data has " + count + " counts of "
\r
159 + Integer.toHexString(oldvalue));
\r
163 * Gets a 32 bit data from the table data
\r
164 * @param ch codepoint which data is to be retrieved
\r
165 * @return the 32 bit data
\r
167 public int getValue(int ch)
\r
169 // valid, uncompacted trie and valid c?
\r
170 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
\r
174 int block = m_index_[ch >> SHIFT_];
\r
175 return m_data_[Math.abs(block) + (ch & MASK_)];
\r
179 * Get a 32 bit data from the table data
\r
180 * @param ch code point for which data is to be retrieved.
\r
181 * @param inBlockZero Output parameter, inBlockZero[0] returns true if the
\r
182 * char maps into block zero, otherwise false.
\r
183 * @return the 32 bit data value.
\r
185 public int getValue(int ch, boolean [] inBlockZero)
\r
187 // valid, uncompacted trie and valid c?
\r
188 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
\r
189 if (inBlockZero != null) {
\r
190 inBlockZero[0] = true;
\r
195 int block = m_index_[ch >> SHIFT_];
\r
196 if (inBlockZero != null) {
\r
197 inBlockZero[0] = (block == 0);
\r
199 return m_data_[Math.abs(block) + (ch & MASK_)];
\r
204 * Sets a 32 bit data in the table data
\r
205 * @param ch codepoint which data is to be set
\r
206 * @param value to set
\r
207 * @return true if the set is successful, otherwise
\r
208 * if the table has been compacted return false
\r
210 public boolean setValue(int ch, int value)
\r
212 // valid, uncompacted trie and valid c?
\r
213 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
\r
217 int block = getDataBlock(ch);
\r
222 m_data_[block + (ch & MASK_)] = value;
\r
227 * Serializes the build table with 32 bit data
\r
228 * @param datamanipulate builder raw fold method implementation
\r
229 * @param triedatamanipulate result trie fold method
\r
230 * @return a new trie
\r
232 public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
\r
233 Trie.DataManipulate triedatamanipulate)
\r
235 if (datamanipulate == null) {
\r
236 throw new IllegalArgumentException("Parameters can not be null");
\r
238 // fold and compact if necessary, also checks that indexLength is
\r
240 if (!m_isCompacted_) {
\r
241 // compact once without overlap to improve folding
\r
243 // fold the supplementary part of the index array
\r
244 fold(datamanipulate);
\r
245 // compact again with overlap for minimum data array length
\r
247 m_isCompacted_ = true;
\r
249 // is dataLength within limits?
\r
250 if (m_dataLength_ >= MAX_DATA_LENGTH_) {
\r
251 throw new ArrayIndexOutOfBoundsException("Data length too small");
\r
254 char index[] = new char[m_indexLength_];
\r
255 int data[] = new int[m_dataLength_];
\r
256 // write the index (stage 1) array and the 32-bit data (stage 2) array
\r
257 // write 16-bit index values shifted right by INDEX_SHIFT_
\r
258 for (int i = 0; i < m_indexLength_; i ++) {
\r
259 index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
\r
261 // write 32-bit data values
\r
262 System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
\r
264 int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
\r
265 options |= OPTIONS_DATA_IS_32_BIT_;
\r
266 if (m_isLatin1Linear_) {
\r
267 options |= OPTIONS_LATIN1_IS_LINEAR_;
\r
269 return new IntTrie(index, data, m_initialValue_, options,
\r
270 triedatamanipulate);
\r
275 * Serializes the build table to an output stream.
\r
277 * Compacts the build-time trie after all values are set, and then
\r
278 * writes the serialized form onto an output stream.
\r
280 * After this, this build-time Trie can only be serialized again and/or closed;
\r
281 * no further values can be added.
\r
283 * This function is the rough equivalent of utrie_seriaize() in ICU4C.
\r
285 * @param os the output stream to which the seriaized trie will be written.
\r
286 * If nul, the function still returns the size of the serialized Trie.
\r
287 * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting
\r
288 * serialized form can then be used to create a CharTrie.
\r
289 * @param datamanipulate builder raw fold method implementation
\r
290 * @return the number of bytes written to the output stream.
\r
292 public int serialize(OutputStream os, boolean reduceTo16Bits,
\r
293 TrieBuilder.DataManipulate datamanipulate) throws IOException {
\r
294 if (datamanipulate == null) {
\r
295 throw new IllegalArgumentException("Parameters can not be null");
\r
298 // fold and compact if necessary, also checks that indexLength is
\r
300 if (!m_isCompacted_) {
\r
301 // compact once without overlap to improve folding
\r
303 // fold the supplementary part of the index array
\r
304 fold(datamanipulate);
\r
305 // compact again with overlap for minimum data array length
\r
307 m_isCompacted_ = true;
\r
310 // is dataLength within limits?
\r
312 if (reduceTo16Bits) {
\r
313 length = m_dataLength_ + m_indexLength_;
\r
315 length = m_dataLength_;
\r
317 if (length >= MAX_DATA_LENGTH_) {
\r
318 throw new ArrayIndexOutOfBoundsException("Data length too small");
\r
321 // struct UTrieHeader {
\r
322 // int32_t signature;
\r
323 // int32_t options (a bit field)
\r
324 // int32_t indexLength
\r
325 // int32_t dataLength
\r
326 length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
\r
327 if(reduceTo16Bits) {
\r
328 length+=2*m_dataLength_;
\r
330 length+=4*m_dataLength_;
\r
334 // No output stream. Just return the length of the serialized Trie, in bytes.
\r
338 DataOutputStream dos = new DataOutputStream(os);
\r
339 dos.writeInt(Trie.HEADER_SIGNATURE_);
\r
341 int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
\r
342 if(!reduceTo16Bits) {
\r
343 options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
\r
345 if(m_isLatin1Linear_) {
\r
346 options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
\r
348 dos.writeInt(options);
\r
350 dos.writeInt(m_indexLength_);
\r
351 dos.writeInt(m_dataLength_);
\r
353 /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
\r
354 if(reduceTo16Bits) {
\r
355 /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
\r
356 for (int i=0; i<m_indexLength_; i++) {
\r
357 int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
\r
361 /* write 16-bit data values */
\r
362 for(int i=0; i<m_dataLength_; i++) {
\r
363 int v = m_data_[i] & 0x0000ffff;
\r
367 /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
\r
368 for (int i=0; i<m_indexLength_; i++) {
\r
369 int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
\r
373 /* write 32-bit data values */
\r
374 for(int i=0; i<m_dataLength_; i++) {
\r
375 dos.writeInt(m_data_[i]);
\r
385 * Set a value in a range of code points [start..limit].
\r
386 * All code points c with start <= c < limit will get the value if
\r
387 * overwrite is true or if the old value is 0.
\r
388 * @param start the first code point to get the value
\r
389 * @param limit one past the last code point to get the value
\r
390 * @param value the value
\r
391 * @param overwrite flag for whether old non-initial values are to be
\r
393 * @return false if a failure occurred (illegal argument or data array
\r
396 public boolean setRange(int start, int limit, int value,
\r
397 boolean overwrite)
\r
399 // repeat value in [start..limit[
\r
400 // mark index values for repeat-data blocks by setting bit 31 of the
\r
401 // index values fill around existing values if any, if(overwrite)
\r
403 // valid, uncompacted trie and valid indexes?
\r
404 if (m_isCompacted_ || start < UCharacter.MIN_VALUE
\r
405 || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
\r
406 || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
\r
410 if (start == limit) {
\r
411 return true; // nothing to do
\r
414 if ((start & MASK_) != 0) {
\r
415 // set partial block at [start..following block boundary[
\r
416 int block = getDataBlock(start);
\r
421 int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
\r
422 if (nextStart <= limit) {
\r
423 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
\r
428 fillBlock(block, start & MASK_, limit & MASK_,
\r
434 // number of positions in the last, partial block
\r
435 int rest = limit & MASK_;
\r
437 // round down limit to a block boundary
\r
440 // iterate over all-value blocks
\r
441 int repeatBlock = 0;
\r
442 if (value == m_initialValue_) {
\r
443 // repeatBlock = 0; assigned above
\r
448 while (start < limit) {
\r
449 // get index value
\r
450 int block = m_index_[start >> SHIFT_];
\r
452 // already allocated, fill in value
\r
453 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
\r
455 else if (m_data_[-block] != value && (block == 0 || overwrite)) {
\r
456 // set the repeatBlock instead of the current block 0 or range
\r
458 if (repeatBlock >= 0) {
\r
459 m_index_[start >> SHIFT_] = -repeatBlock;
\r
462 // create and set and fill the repeatBlock
\r
463 repeatBlock = getDataBlock(start);
\r
464 if (repeatBlock < 0) {
\r
468 // set the negative block number to indicate that it is a
\r
470 m_index_[start >> SHIFT_] = -repeatBlock;
\r
471 fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
\r
475 start += DATA_BLOCK_LENGTH;
\r
479 // set partial block at [last block boundary..limit[
\r
480 int block = getDataBlock(start);
\r
484 fillBlock(block, 0, rest, value, overwrite);
\r
490 // protected data member ------------------------------------------------
\r
492 protected int m_data_[];
\r
493 protected int m_initialValue_;
\r
495 // private data member ------------------------------------------------
\r
497 private int m_leadUnitValue_;
\r
499 // private methods ------------------------------------------------------
\r
501 private int allocDataBlock()
\r
503 int newBlock = m_dataLength_;
\r
504 int newTop = newBlock + DATA_BLOCK_LENGTH;
\r
505 if (newTop > m_dataCapacity_) {
\r
506 // out of memory in the data array
\r
509 m_dataLength_ = newTop;
\r
514 * No error checking for illegal arguments.
\r
515 * @param ch codepoint to look for
\r
516 * @return -1 if no new data block available (out of memory in data array)
\r
518 private int getDataBlock(int ch)
\r
521 int indexValue = m_index_[ch];
\r
522 if (indexValue > 0) {
\r
526 // allocate a new data block
\r
527 int newBlock = allocDataBlock();
\r
528 if (newBlock < 0) {
\r
529 // out of memory in the data array
\r
532 m_index_[ch] = newBlock;
\r
534 // copy-on-write for a block from a setRange()
\r
535 System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
\r
536 DATA_BLOCK_LENGTH << 2);
\r
541 * Compact a folded build-time trie.
\r
543 * - removes blocks that are identical with earlier ones
\r
544 * - overlaps adjacent blocks as much as possible (if overlap == true)
\r
545 * - moves blocks in steps of the data granularity
\r
546 * - moves and overlaps blocks that overlap with multiple values in the overlap region
\r
549 * - try to move and overlap blocks that are not already adjacent
\r
550 * @param overlap flag
\r
552 private void compact(boolean overlap)
\r
554 if (m_isCompacted_) {
\r
555 return; // nothing left to do
\r
559 // initialize the index map with "block is used/unused" flags
\r
560 findUnusedBlocks();
\r
562 // if Latin-1 is preallocated and linear, then do not compact Latin-1
\r
564 int overlapStart = DATA_BLOCK_LENGTH;
\r
565 if (m_isLatin1Linear_ && SHIFT_ <= 8) {
\r
566 overlapStart += 256;
\r
569 int newStart = DATA_BLOCK_LENGTH;
\r
571 for (int start = newStart; start < m_dataLength_;) {
\r
572 // start: index of first entry of current block
\r
573 // newStart: index where the current block is to be moved
\r
574 // (right after current end of already-compacted data)
\r
575 // skip blocks that are not used
\r
576 if (m_map_[start >>> SHIFT_] < 0) {
\r
577 // advance start to the next block
\r
578 start += DATA_BLOCK_LENGTH;
\r
579 // leave newStart with the previous block!
\r
582 // search for an identical block
\r
583 if (start >= overlapStart) {
\r
584 i = findSameDataBlock(m_data_, newStart, start,
\r
585 overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
\r
587 // found an identical block, set the other block's index
\r
588 // value for the current block
\r
589 m_map_[start >>> SHIFT_] = i;
\r
590 // advance start to the next block
\r
591 start += DATA_BLOCK_LENGTH;
\r
592 // leave newStart with the previous block!
\r
596 // see if the beginning of this block can be overlapped with the
\r
597 // end of the previous block
\r
598 if(overlap && start>=overlapStart) {
\r
599 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
\r
600 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
\r
601 i>0 && !equal_int(m_data_, newStart-i, start, i);
\r
602 i-=DATA_GRANULARITY_) {}
\r
608 m_map_[start >>> SHIFT_] = newStart - i;
\r
609 // move the non-overlapping indexes to their new positions
\r
611 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
\r
612 m_data_[newStart ++] = m_data_[start ++];
\r
615 else if (newStart < start) {
\r
616 // no overlap, just move the indexes to their new positions
\r
617 m_map_[start >>> SHIFT_] = newStart;
\r
618 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
\r
619 m_data_[newStart ++] = m_data_[start ++];
\r
622 else { // no overlap && newStart==start
\r
623 m_map_[start >>> SHIFT_] = start;
\r
624 newStart += DATA_BLOCK_LENGTH;
\r
628 // now adjust the index (stage 1) table
\r
629 for (i = 0; i < m_indexLength_; ++ i) {
\r
630 m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
\r
632 m_dataLength_ = newStart;
\r
636 * Find the same data block
\r
637 * @param data array
\r
638 * @param dataLength
\r
639 * @param otherBlock
\r
642 private static final int findSameDataBlock(int data[], int dataLength,
\r
643 int otherBlock, int step)
\r
645 // ensure that we do not even partially get past dataLength
\r
646 dataLength -= DATA_BLOCK_LENGTH;
\r
648 for (int block = 0; block <= dataLength; block += step) {
\r
649 if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
\r
657 * Fold the normalization data for supplementary code points into
\r
658 * a compact area on top of the BMP-part of the trie index,
\r
659 * with the lead surrogates indexing this compact area.
\r
661 * Duplicate the index values for lead surrogates:
\r
662 * From inside the BMP area, where some may be overridden with folded values,
\r
663 * to just after the BMP area, where they can be retrieved for
\r
664 * code point lookups.
\r
665 * @param manipulate fold implementation
\r
667 private final void fold(DataManipulate manipulate)
\r
669 int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
\r
670 int index[] = m_index_;
\r
671 // copy the lead surrogate indexes into a temporary array
\r
672 System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
\r
673 SURROGATE_BLOCK_COUNT_);
\r
675 // set all values for lead surrogate code *units* to leadUnitValue
\r
676 // so that by default runtime lookups will find no data for associated
\r
677 // supplementary code points, unless there is data for such code points
\r
678 // which will result in a non-zero folding value below that is set for
\r
679 // the respective lead units
\r
680 // the above saved the indexes for surrogate code *points*
\r
681 // fill the indexes with simplified code from utrie_setRange32()
\r
683 if (m_leadUnitValue_ == m_initialValue_) {
\r
684 // leadUnitValue == initialValue, use all-initial-value block
\r
685 // block = 0; if block here left empty
\r
688 // create and fill the repeatBlock
\r
689 block = allocDataBlock();
\r
691 // data table overflow
\r
692 throw new IllegalStateException("Internal error: Out of memory space");
\r
694 fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
\r
695 // negative block number to indicate that it is a repeat block
\r
698 for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
\r
699 m_index_[c] = block;
\r
702 // Fold significant index values into the area just after the BMP
\r
704 // In case the first lead surrogate has significant data,
\r
705 // its index block must be used first (in which case the folding is a
\r
707 // Later all folded index blocks are moved up one to insert the copied
\r
708 // lead surrogate indexes.
\r
709 int indexLength = BMP_INDEX_LENGTH_;
\r
710 // search for any index (stage 1) entries for supplementary code points
\r
711 for (int c = 0x10000; c < 0x110000;) {
\r
712 if (index[c >> SHIFT_] != 0) {
\r
713 // there is data, treat the full block for a lead surrogate
\r
715 // is there an identical index block?
\r
716 block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
\r
718 // get a folded value for [c..c+0x400[ and,
\r
719 // if different from the value for the lead surrogate code
\r
720 // point, set it for the lead surrogate code unit
\r
722 int value = manipulate.getFoldedValue(c,
\r
723 block + SURROGATE_BLOCK_COUNT_);
\r
724 if (value != getValue(UTF16.getLeadSurrogate(c))) {
\r
725 if (!setValue(UTF16.getLeadSurrogate(c), value)) {
\r
726 // data table overflow
\r
727 throw new ArrayIndexOutOfBoundsException(
\r
728 "Data table overflow");
\r
730 // if we did not find an identical index block...
\r
731 if (block == indexLength) {
\r
732 // move the actual index (stage 1) entries from the
\r
733 // supplementary position to the new one
\r
734 System.arraycopy(index, c >> SHIFT_, index, indexLength,
\r
735 SURROGATE_BLOCK_COUNT_);
\r
736 indexLength += SURROGATE_BLOCK_COUNT_;
\r
742 c += DATA_BLOCK_LENGTH;
\r
746 // index array overflow?
\r
747 // This is to guarantee that a folding offset is of the form
\r
748 // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
\r
749 // If the index is too large, then n>=1024 and more than 10 bits are
\r
751 // In fact, it can only ever become n==1024 with completely unfoldable
\r
752 // data and the additional block of duplicated values for lead
\r
754 if (indexLength >= MAX_INDEX_LENGTH_) {
\r
755 throw new ArrayIndexOutOfBoundsException("Index table overflow");
\r
757 // make space for the lead surrogate index block and insert it between
\r
758 // the BMP indexes and the folded ones
\r
759 System.arraycopy(index, BMP_INDEX_LENGTH_, index,
\r
760 BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
\r
761 indexLength - BMP_INDEX_LENGTH_);
\r
762 System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
\r
763 SURROGATE_BLOCK_COUNT_);
\r
764 indexLength += SURROGATE_BLOCK_COUNT_;
\r
765 m_indexLength_ = indexLength;
\r
771 private void fillBlock(int block, int start, int limit, int value,
\r
772 boolean overwrite)
\r
777 while (block < limit) {
\r
778 m_data_[block ++] = value;
\r
782 while (block < limit) {
\r
783 if (m_data_[block] == m_initialValue_) {
\r
784 m_data_[block] = value;
\r