2 ******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
8 package com.ibm.icu.impl;
\r
10 import java.io.DataInputStream;
\r
11 import java.io.IOException;
\r
12 import java.io.InputStream;
\r
13 import java.util.Arrays;
\r
15 import com.ibm.icu.text.UTF16;
\r
18 * Trie implementation which stores data in int, 32 bits.
\r
20 * @see com.ibm.icu.impl.Trie
\r
21 * @since release 2.1, Jan 01 2002
\r
23 public class IntTrie extends Trie
\r
25 // public constructors ---------------------------------------------
\r
28 * <p>Creates a new Trie with the settings for the trie data.</p>
\r
29 * <p>Unserialize the 32-bit-aligned input stream and use the data for the
\r
31 * @param inputStream file input stream to a ICU data file, containing
\r
33 * @param dataManipulate object which provides methods to parse the char
\r
35 * @throws IOException thrown when data reading fails
\r
37 public IntTrie(InputStream inputStream, DataManipulate dataManipulate)
\r
40 super(inputStream, dataManipulate);
\r
42 throw new IllegalArgumentException(
\r
43 "Data given does not belong to a int trie.");
\r
48 * Make a dummy IntTrie.
\r
49 * A dummy trie is an empty runtime trie, used when a real data trie cannot
\r
52 * The trie always returns the initialValue,
\r
53 * or the leadUnitValue for lead surrogate code points.
\r
54 * The Latin-1 part is always set up to be linear.
\r
56 * @param initialValue the initial value that is set for all code points
\r
57 * @param leadUnitValue the value for lead surrogate code _units_ that do not
\r
58 * have associated supplementary data
\r
59 * @param dataManipulate object which provides methods to parse the char data
\r
61 @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
\r
62 public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
\r
63 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
\r
65 int dataLength, latin1Length, i, limit;
\r
68 /* calculate the actual size of the dummy trie data */
\r
70 /* max(Latin-1, block 0) */
\r
71 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
\r
72 if(leadUnitValue!=initialValue) {
\r
73 dataLength+=DATA_BLOCK_LENGTH;
\r
75 m_data_=new int[dataLength];
\r
76 m_dataLength_=dataLength;
\r
78 m_initialValue_=initialValue;
\r
80 /* fill the index and data arrays */
\r
82 /* indexes are preset to 0 (block 0) */
\r
85 for(i=0; i<latin1Length; ++i) {
\r
86 m_data_[i]=initialValue;
\r
89 if(leadUnitValue!=initialValue) {
\r
90 /* indexes for lead surrogate code units to the block after Latin-1 */
\r
91 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
\r
92 i=0xd800>>INDEX_STAGE_1_SHIFT_;
\r
93 limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
\r
94 for(; i<limit; ++i) {
\r
98 /* data for lead surrogate code units */
\r
99 limit=latin1Length+DATA_BLOCK_LENGTH;
\r
100 for(i=latin1Length; i<limit; ++i) {
\r
101 m_data_[i]=leadUnitValue;
\r
106 // public methods --------------------------------------------------
\r
109 * Gets the value associated with the codepoint.
\r
110 * If no value is associated with the codepoint, a default value will be
\r
112 * @param ch codepoint
\r
113 * @return offset to data
\r
115 public final int getCodePointValue(int ch)
\r
119 // fastpath for U+0000..U+D7FF
\r
120 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
\r
121 // copy of getRawOffset()
\r
122 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
\r
123 + (ch & INDEX_STAGE_3_MASK_);
\r
124 return m_data_[offset];
\r
127 // handle U+D800..U+10FFFF
\r
128 offset = getCodePointOffset(ch);
\r
129 return (offset >= 0) ? m_data_[offset] : m_initialValue_;
\r
133 * Gets the value to the data which this lead surrogate character points
\r
135 * Returned data may contain folding offset information for the next
\r
136 * trailing surrogate character.
\r
137 * This method does not guarantee correct results for trail surrogates.
\r
138 * @param ch lead surrogate character
\r
139 * @return data value
\r
141 public final int getLeadValue(char ch)
\r
143 return m_data_[getLeadOffset(ch)];
\r
147 * Get the value associated with the BMP code point.
\r
148 * Lead surrogate code points are treated as normal code points, with
\r
149 * unfolded values that may differ from getLeadValue() results.
\r
150 * @param ch the input BMP code point
\r
151 * @return trie data value associated with the BMP codepoint
\r
153 public final int getBMPValue(char ch)
\r
155 return m_data_[getBMPOffset(ch)];
\r
159 * Get the value associated with a pair of surrogates.
\r
160 * @param lead a lead surrogate
\r
161 * @param trail a trail surrogate
\r
163 public final int getSurrogateValue(char lead, char trail)
\r
165 if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
\r
166 throw new IllegalArgumentException(
\r
167 "Argument characters do not form a supplementary character");
\r
169 // get fold position for the next trail surrogate
\r
170 int offset = getSurrogateOffset(lead, trail);
\r
172 // get the real data from the folded lead/trail units
\r
174 return m_data_[offset];
\r
177 // return m_initialValue_ if there is an error
\r
178 return m_initialValue_;
\r
182 * Get a value from a folding offset (from the value of a lead surrogate)
\r
183 * and a trail surrogate.
\r
184 * @param leadvalue the value of a lead surrogate that contains the
\r
186 * @param trail surrogate
\r
187 * @return trie data value associated with the trail character
\r
189 public final int getTrailValue(int leadvalue, char trail)
\r
191 if (m_dataManipulate_ == null) {
\r
192 throw new NullPointerException(
\r
193 "The field DataManipulate in this Trie is null");
\r
195 int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
\r
197 return m_data_[getRawOffset(offset,
\r
198 (char)(trail & SURROGATE_MASK_))];
\r
200 return m_initialValue_;
\r
204 * <p>Gets the latin 1 fast path value.</p>
\r
205 * <p>Note this only works if latin 1 characters have their own linear
\r
207 * @param ch latin 1 characters
\r
208 * @return value associated with latin character
\r
210 public final int getLatin1LinearValue(char ch)
\r
212 return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
\r
216 * Checks if the argument Trie has the same data as this Trie
\r
217 * @param other Trie to check
\r
218 * @return true if the argument Trie has the same data as this Trie, false
\r
222 public boolean equals(Object other)
\r
224 boolean result = super.equals(other);
\r
225 if (result && other instanceof IntTrie) {
\r
226 IntTrie othertrie = (IntTrie)other;
\r
227 if (m_initialValue_ != othertrie.m_initialValue_
\r
228 || !Arrays.equals(m_data_, othertrie.m_data_)) {
\r
237 // protected methods -----------------------------------------------
\r
240 * <p>Parses the input stream and stores its trie content into a index and
\r
242 * @param inputStream data input stream containing trie data
\r
243 * @exception IOException thrown when data reading fails
\r
245 protected final void unserialize(InputStream inputStream)
\r
248 super.unserialize(inputStream);
\r
249 // one used for initial value
\r
250 m_data_ = new int[m_dataLength_];
\r
251 DataInputStream input = new DataInputStream(inputStream);
\r
252 for (int i = 0; i < m_dataLength_; i ++) {
\r
253 m_data_[i] = input.readInt();
\r
255 m_initialValue_ = m_data_[0];
\r
259 * Gets the offset to the data which the surrogate pair points to.
\r
260 * @param lead lead surrogate
\r
261 * @param trail trailing surrogate
\r
262 * @return offset to data
\r
264 protected final int getSurrogateOffset(char lead, char trail)
\r
266 if (m_dataManipulate_ == null) {
\r
267 throw new NullPointerException(
\r
268 "The field DataManipulate in this Trie is null");
\r
270 // get fold position for the next trail surrogate
\r
271 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
\r
273 // get the real data from the folded lead/trail units
\r
275 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
\r
278 // return -1 if there is an error, in this case we return the default
\r
279 // value: m_initialValue_
\r
284 * Gets the value at the argument index.
\r
285 * For use internally in TrieIterator
\r
286 * @param index value at index will be retrieved
\r
287 * @return 32 bit value
\r
288 * @see com.ibm.icu.impl.TrieIterator
\r
290 protected final int getValue(int index)
\r
292 return m_data_[index];
\r
296 * Gets the default initial value
\r
297 * @return 32 bit value
\r
299 protected final int getInitialValue()
\r
301 return m_initialValue_;
\r
304 // package private methods -----------------------------------------
\r
307 * Internal constructor for builder use
\r
308 * @param index the index array to be slotted into this trie
\r
309 * @param data the data array to be slotted into this trie
\r
310 * @param initialvalue the initial value for this trie
\r
311 * @param options trie options to use
\r
312 * @param datamanipulate folding implementation
\r
314 IntTrie(char index[], int data[], int initialvalue, int options,
\r
315 DataManipulate datamanipulate)
\r
317 super(index, options, datamanipulate);
\r
319 m_dataLength_ = m_data_.length;
\r
320 m_initialValue_ = initialvalue;
\r
323 // private data members --------------------------------------------
\r
328 private int m_initialValue_;
\r
330 * Array of char data
\r
332 private int m_data_[];
\r