2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.util;
\r
10 import com.ibm.icu.impl.Utility;
\r
13 * class CompactATypeArray : use only on primitive data types
\r
14 * Provides a compact way to store information that is indexed by Unicode
\r
15 * values, such as character properties, types, keyboard values, etc.This
\r
16 * is very useful when you have a block of Unicode data that contains
\r
17 * significant values while the rest of the Unicode data is unused in the
\r
18 * application or when you have a lot of redundance, such as where all 21,000
\r
19 * Han ideographs have the same value. However, lookup is much faster than a
\r
21 * A compact array of any primitive data type serves two purposes:
\r
23 * <LI>Fast access of the indexed values.
\r
24 * <LI>Smaller memory footprint.
\r
26 * A compact array is composed of a index array and value array. The index
\r
27 * array contains the indicies of Unicode characters to the value array.
\r
28 * @see CompactByteArray
\r
29 * @author Helena Shih
\r
31 * @deprecated This API is ICU internal only.
\r
33 public final class CompactCharArray implements Cloneable {
\r
36 * The total number of Unicode characters.
\r
38 * @deprecated This API is ICU internal only.
\r
40 public static final int UNICODECOUNT = 65536;
\r
43 * Default constructor for CompactCharArray, the default value of the
\r
44 * compact array is 0.
\r
46 * @deprecated This API is ICU internal only.
\r
48 public CompactCharArray()
\r
54 * Constructor for CompactCharArray.
\r
55 * @param defaultValue the default value of the compact array.
\r
57 * @deprecated This API is ICU internal only.
\r
59 public CompactCharArray(char defaultValue)
\r
62 values = new char[UNICODECOUNT];
\r
63 indices = new char[INDEXCOUNT];
\r
64 hashes = new int[INDEXCOUNT];
\r
65 for (i = 0; i < UNICODECOUNT; ++i) {
\r
66 values[i] = defaultValue;
\r
68 for (i = 0; i < INDEXCOUNT; ++i) {
\r
69 indices[i] = (char)(i<<BLOCKSHIFT);
\r
74 this.defaultValue = defaultValue;
\r
78 * Constructor for CompactCharArray.
\r
79 * @param indexArray the indicies of the compact array.
\r
80 * @param newValues the values of the compact array.
\r
81 * @exception IllegalArgumentException If the index is out of range.
\r
83 * @deprecated This API is ICU internal only.
\r
85 public CompactCharArray(char indexArray[],
\r
89 if (indexArray.length != INDEXCOUNT)
\r
90 throw new IllegalArgumentException("Index out of bounds.");
\r
91 for (i = 0; i < INDEXCOUNT; ++i) {
\r
92 char index = indexArray[i];
\r
93 if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
\r
94 throw new IllegalArgumentException("Index out of bounds.");
\r
96 indices = indexArray;
\r
102 * Constructor for CompactCharArray.
\r
104 * @param indexArray the RLE-encoded indicies of the compact array.
\r
105 * @param valueArray the RLE-encoded values of the compact array.
\r
107 * @throws IllegalArgumentException if the index or value array is
\r
110 * @deprecated This API is ICU internal only.
\r
112 public CompactCharArray(String indexArray,
\r
115 this( Utility.RLEStringToCharArray(indexArray),
\r
116 Utility.RLEStringToCharArray(valueArray));
\r
120 * Get the mapped value of a Unicode character.
\r
121 * @param index the character to get the mapped value with
\r
122 * @return the mapped value of the given character
\r
124 * @deprecated This API is ICU internal only.
\r
126 public char elementAt(char index)
\r
128 int ix = (indices[index >> BLOCKSHIFT] & 0xFFFF)
\r
129 + (index & BLOCKMASK);
\r
130 return ix >= values.length ? defaultValue : values[ix];
\r
134 * Set a new value for a Unicode character.
\r
135 * Set automatically expands the array if it is compacted.
\r
136 * @param index the character to set the mapped value with
\r
137 * @param value the new mapped value
\r
139 * @deprecated This API is ICU internal only.
\r
141 public void setElementAt(char index, char value)
\r
145 values[(int)index] = value;
\r
146 touchBlock(index >> BLOCKSHIFT, value);
\r
150 * Set new values for a range of Unicode character.
\r
152 * @param start the starting offset of the range
\r
153 * @param end the ending offset of the range
\r
154 * @param value the new mapped value
\r
156 * @deprecated This API is ICU internal only.
\r
158 public void setElementAt(char start, char end, char value)
\r
164 for (i = start; i <= end; ++i) {
\r
166 touchBlock(i >> BLOCKSHIFT, value);
\r
170 * Compact the array
\r
172 * @deprecated This API is ICU internal only.
\r
174 public void compact() {
\r
179 * Compact the array.
\r
181 * @deprecated This API is ICU internal only.
\r
183 public void compact(boolean exhaustive)
\r
186 int iBlockStart = 0;
\r
187 char iUntouched = 0xFFFF;
\r
190 char[] target = exhaustive ? new char[UNICODECOUNT] : values;
\r
192 for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
\r
193 indices[i] = 0xFFFF;
\r
194 boolean touched = blockTouched(i);
\r
195 if (!touched && iUntouched != 0xFFFF) {
\r
196 // If no values in this block were set, we can just set its
\r
197 // index to be the same as some other block with no values
\r
198 // set, assuming we've seen one yet.
\r
199 indices[i] = iUntouched;
\r
201 int jBlockStart = 0;
\r
202 // See if we can find a previously compacted block that's identical
\r
203 for (int j = 0; j < i; ++j, jBlockStart += BLOCKCOUNT) {
\r
204 if (hashes[i] == hashes[j] &&
\r
205 arrayRegionMatches(values, iBlockStart,
\r
206 values, jBlockStart, BLOCKCOUNT)) {
\r
207 indices[i] = indices[j];
\r
210 if (indices[i] == 0xFFFF) {
\r
211 int dest; // Where to copy
\r
213 // See if we can find some overlap with another block
\r
214 dest = FindOverlappingPosition(iBlockStart, target,
\r
217 // Just copy to the end; it's quicker
\r
220 int limit = dest + BLOCKCOUNT;
\r
221 if (limit > newSize) {
\r
222 for (int j = newSize; j < limit; ++j) {
\r
223 target[j] = values[iBlockStart + j - dest];
\r
227 indices[i] = (char)dest;
\r
229 // If this is the first untouched block we've seen,
\r
230 // remember its index.
\r
231 iUntouched = (char)jBlockStart;
\r
236 // we are done compacting, so now make the array shorter
\r
237 char[] result = new char[newSize];
\r
238 System.arraycopy(target, 0, result, 0, newSize);
\r
245 private int FindOverlappingPosition(int start, char[] tempValues, int tempCount)
\r
247 for (int i = 0; i < tempCount; i += 1) {
\r
248 int currentCount = BLOCKCOUNT;
\r
249 if (i + BLOCKCOUNT > tempCount) {
\r
250 currentCount = tempCount - i;
\r
252 if (arrayRegionMatches(values, start, tempValues, i, currentCount))
\r
259 * Convenience utility to compare two arrays of doubles.
\r
260 * @param len the length to compare.
\r
261 * The start indices and start+len must be valid.
\r
263 final static boolean arrayRegionMatches(char[] source, int sourceStart,
\r
264 char[] target, int targetStart,
\r
267 int sourceEnd = sourceStart + len;
\r
268 int delta = targetStart - sourceStart;
\r
269 for (int i = sourceStart; i < sourceEnd; i++) {
\r
270 if (source[i] != target[i + delta])
\r
277 * Remember that a specified block was "touched", i.e. had a value set.
\r
278 * Untouched blocks can be skipped when compacting the array
\r
280 private final void touchBlock(int i, int value) {
\r
281 hashes[i] = (hashes[i] + (value<<1)) | 1;
\r
285 * Query whether a specified block was "touched", i.e. had a value set.
\r
286 * Untouched blocks can be skipped when compacting the array
\r
288 private final boolean blockTouched(int i) {
\r
289 return hashes[i] != 0;
\r
293 * For internal use only. Do not modify the result, the behavior of
\r
294 * modified results are undefined.
\r
296 * @deprecated This API is ICU internal only.
\r
298 public char[] getIndexArray()
\r
304 * For internal use only. Do not modify the result, the behavior of
\r
305 * modified results are undefined.
\r
307 * @deprecated This API is ICU internal only.
\r
309 public char[] getValueArray()
\r
315 * Overrides Cloneable
\r
317 * @deprecated This API is ICU internal only.
\r
319 public Object clone()
\r
322 CompactCharArray other = (CompactCharArray) super.clone();
\r
323 other.values = values.clone();
\r
324 other.indices = indices.clone();
\r
325 if (hashes != null) other.hashes = hashes.clone();
\r
327 } catch (CloneNotSupportedException e) {
\r
328 throw new IllegalStateException();
\r
333 * Compares the equality of two compact array objects.
\r
334 * @param obj the compact array object to be compared with this.
\r
335 * @return true if the current compact array object is the same
\r
336 * as the compact array object obj; false otherwise.
\r
338 * @deprecated This API is ICU internal only.
\r
340 public boolean equals(Object obj) {
\r
341 if (obj == null) return false;
\r
342 if (this == obj) // quick check
\r
344 if (getClass() != obj.getClass()) // same class?
\r
346 CompactCharArray other = (CompactCharArray) obj;
\r
347 for (int i = 0; i < UNICODECOUNT; i++) {
\r
348 // could be sped up later
\r
349 if (elementAt((char)i) != other.elementAt((char)i))
\r
352 return true; // we made it through the guantlet.
\r
356 * Generates the hash code for the compact array object
\r
358 * @deprecated This API is ICU internal only.
\r
360 public int hashCode() {
\r
362 int increment = Math.min(3, values.length/16);
\r
363 for (int i = 0; i < values.length; i+= increment) {
\r
364 result = result * 37 + values[i];
\r
370 // --------------------------------------------------------------
\r
372 // --------------------------------------------------------------
\r
375 * Expanding takes the array back to a 65536 element array.
\r
377 private void expand()
\r
382 hashes = new int[INDEXCOUNT];
\r
383 tempArray = new char[UNICODECOUNT];
\r
384 for (i = 0; i < UNICODECOUNT; ++i) {
\r
385 tempArray[i] = elementAt((char)i);
\r
387 for (i = 0; i < INDEXCOUNT; ++i) {
\r
388 indices[i] = (char)(i<<BLOCKSHIFT);
\r
391 values = tempArray;
\r
397 * @deprecated This API is ICU internal only.
\r
399 public static final int BLOCKSHIFT = 5; // NormalizerBuilder needs - liu
\r
400 static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
\r
401 static final int INDEXSHIFT =(16-BLOCKSHIFT);
\r
402 static final int INDEXCOUNT =(1<<INDEXSHIFT);
\r
403 static final int BLOCKMASK = BLOCKCOUNT - 1;
\r
405 private char values[];
\r
406 private char indices[];
\r
407 private int[] hashes;
\r
408 private boolean isCompact;
\r