2 ******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
8 package com.ibm.icu.impl;
\r
10 import java.io.DataInputStream;
\r
11 import java.io.IOException;
\r
12 import java.io.InputStream;
\r
14 import com.ibm.icu.text.UTF16;
\r
17 * Trie implementation which stores data in char, 16 bits.
\r
19 * @see com.ibm.icu.impl.Trie
\r
20 * @since release 2.1, Jan 01 2002
\r
23 // note that i need to handle the block calculations later, since chartrie
\r
24 // in icu4c uses the same index array.
\r
25 public class CharTrie extends Trie
\r
27 // public constructors ---------------------------------------------
\r
30 * <p>Creates a new Trie with the settings for the trie data.</p>
\r
31 * <p>Unserialize the 32-bit-aligned input stream and use the data for the
\r
33 * @param inputStream file input stream to a ICU data file, containing
\r
35 * @param dataManipulate object which provides methods to parse the char
\r
37 * @throws IOException thrown when data reading fails
\r
39 public CharTrie(InputStream inputStream,
\r
40 DataManipulate dataManipulate) throws IOException
\r
42 super(inputStream, dataManipulate);
\r
44 if (!isCharTrie()) {
\r
45 throw new IllegalArgumentException(
\r
46 "Data given does not belong to a char trie.");
\r
48 m_friendAgent_ = new FriendAgent();
\r
52 * Make a dummy CharTrie.
\r
53 * A dummy trie is an empty runtime trie, used when a real data trie cannot
\r
56 * The trie always returns the initialValue,
\r
57 * or the leadUnitValue for lead surrogate code points.
\r
58 * The Latin-1 part is always set up to be linear.
\r
60 * @param initialValue the initial value that is set for all code points
\r
61 * @param leadUnitValue the value for lead surrogate code _units_ that do not
\r
62 * have associated supplementary data
\r
63 * @param dataManipulate object which provides methods to parse the char data
\r
65 @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
\r
66 public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
\r
67 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
\r
69 int dataLength, latin1Length, i, limit;
\r
72 /* calculate the actual size of the dummy trie data */
\r
74 /* max(Latin-1, block 0) */
\r
75 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
\r
76 if(leadUnitValue!=initialValue) {
\r
77 dataLength+=DATA_BLOCK_LENGTH;
\r
79 m_data_=new char[dataLength];
\r
80 m_dataLength_=dataLength;
\r
82 m_initialValue_=(char)initialValue;
\r
84 /* fill the index and data arrays */
\r
86 /* indexes are preset to 0 (block 0) */
\r
89 for(i=0; i<latin1Length; ++i) {
\r
90 m_data_[i]=(char)initialValue;
\r
93 if(leadUnitValue!=initialValue) {
\r
94 /* indexes for lead surrogate code units to the block after Latin-1 */
\r
95 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
\r
96 i=0xd800>>INDEX_STAGE_1_SHIFT_;
\r
97 limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
\r
98 for(; i<limit; ++i) {
\r
102 /* data for lead surrogate code units */
\r
103 limit=latin1Length+DATA_BLOCK_LENGTH;
\r
104 for(i=latin1Length; i<limit; ++i) {
\r
105 m_data_[i]=(char)leadUnitValue;
\r
109 m_friendAgent_ = new FriendAgent();
\r
113 * Java friend implementation
\r
115 public class FriendAgent
\r
118 * Gives out the index array of the trie
\r
119 * @return index array of trie
\r
121 public char[] getPrivateIndex()
\r
126 * Gives out the data array of the trie
\r
127 * @return data array of trie
\r
129 public char[] getPrivateData()
\r
134 * Gives out the data offset in the trie
\r
135 * @return data offset in the trie
\r
137 public int getPrivateInitialValue()
\r
139 return m_initialValue_;
\r
143 // public methods --------------------------------------------------
\r
146 * Java friend implementation
\r
147 * To store the index and data array into the argument.
\r
148 * @param friend java friend UCharacterProperty object to store the array
\r
150 public void putIndexData(UCharacterProperty friend)
\r
152 friend.setIndexData(m_friendAgent_);
\r
156 * Gets the value associated with the codepoint.
\r
157 * If no value is associated with the codepoint, a default value will be
\r
159 * @param ch codepoint
\r
160 * @return offset to data
\r
162 public final char getCodePointValue(int ch)
\r
166 // fastpath for U+0000..U+D7FF
\r
167 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
\r
168 // copy of getRawOffset()
\r
169 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
\r
170 + (ch & INDEX_STAGE_3_MASK_);
\r
171 return m_data_[offset];
\r
174 // handle U+D800..U+10FFFF
\r
175 offset = getCodePointOffset(ch);
\r
177 // return -1 if there is an error, in this case we return the default
\r
178 // value: m_initialValue_
\r
179 return (offset >= 0) ? m_data_[offset] : m_initialValue_;
\r
183 * Gets the value to the data which this lead surrogate character points
\r
185 * Returned data may contain folding offset information for the next
\r
186 * trailing surrogate character.
\r
187 * This method does not guarantee correct results for trail surrogates.
\r
188 * @param ch lead surrogate character
\r
189 * @return data value
\r
191 public final char getLeadValue(char ch)
\r
193 return m_data_[getLeadOffset(ch)];
\r
197 * Get the value associated with the BMP code point.
\r
198 * Lead surrogate code points are treated as normal code points, with
\r
199 * unfolded values that may differ from getLeadValue() results.
\r
200 * @param ch the input BMP code point
\r
201 * @return trie data value associated with the BMP codepoint
\r
203 public final char getBMPValue(char ch)
\r
205 return m_data_[getBMPOffset(ch)];
\r
209 * Get the value associated with a pair of surrogates.
\r
210 * @param lead a lead surrogate
\r
211 * @param trail a trail surrogate
\r
213 public final char getSurrogateValue(char lead, char trail)
\r
215 int offset = getSurrogateOffset(lead, trail);
\r
217 return m_data_[offset];
\r
219 return m_initialValue_;
\r
223 * <p>Get a value from a folding offset (from the value of a lead surrogate)
\r
224 * and a trail surrogate.</p>
\r
226 * @param leadvalue value associated with the lead surrogate which contains
\r
227 * the folding offset
\r
228 * @param trail surrogate
\r
229 * @return trie data value associated with the trail character
\r
231 public final char getTrailValue(int leadvalue, char trail)
\r
233 if (m_dataManipulate_ == null) {
\r
234 throw new NullPointerException(
\r
235 "The field DataManipulate in this Trie is null");
\r
237 int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
\r
239 return m_data_[getRawOffset(offset,
\r
240 (char)(trail & SURROGATE_MASK_))];
\r
242 return m_initialValue_;
\r
246 * <p>Gets the latin 1 fast path value.</p>
\r
247 * <p>Note this only works if latin 1 characters have their own linear
\r
249 * @param ch latin 1 characters
\r
250 * @return value associated with latin character
\r
252 public final char getLatin1LinearValue(char ch)
\r
254 return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
\r
258 * Checks if the argument Trie has the same data as this Trie
\r
259 * @param other Trie to check
\r
260 * @return true if the argument Trie has the same data as this Trie, false
\r
264 public boolean equals(Object other)
\r
266 boolean result = super.equals(other);
\r
267 if (result && other instanceof CharTrie) {
\r
268 CharTrie othertrie = (CharTrie)other;
\r
269 return m_initialValue_ == othertrie.m_initialValue_;
\r
275 // protected methods -----------------------------------------------
\r
278 * <p>Parses the input stream and stores its trie content into a index and
\r
280 * @param inputStream data input stream containing trie data
\r
281 * @exception IOException thrown when data reading fails
\r
283 protected final void unserialize(InputStream inputStream)
\r
286 DataInputStream input = new DataInputStream(inputStream);
\r
287 int indexDataLength = m_dataOffset_ + m_dataLength_;
\r
288 m_index_ = new char[indexDataLength];
\r
289 for (int i = 0; i < indexDataLength; i ++) {
\r
290 m_index_[i] = input.readChar();
\r
292 m_data_ = m_index_;
\r
293 m_initialValue_ = m_data_[m_dataOffset_];
\r
297 * Gets the offset to the data which the surrogate pair points to.
\r
298 * @param lead lead surrogate
\r
299 * @param trail trailing surrogate
\r
300 * @return offset to data
\r
302 protected final int getSurrogateOffset(char lead, char trail)
\r
304 if (m_dataManipulate_ == null) {
\r
305 throw new NullPointerException(
\r
306 "The field DataManipulate in this Trie is null");
\r
309 // get fold position for the next trail surrogate
\r
310 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
\r
312 // get the real data from the folded lead/trail units
\r
314 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
\r
317 // return -1 if there is an error, in this case we return the default
\r
318 // value: m_initialValue_
\r
323 * Gets the value at the argument index.
\r
324 * For use internally in TrieIterator.
\r
325 * @param index value at index will be retrieved
\r
326 * @return 32 bit value
\r
327 * @see com.ibm.icu.impl.TrieIterator
\r
329 protected final int getValue(int index)
\r
331 return m_data_[index];
\r
335 * Gets the default initial value
\r
336 * @return 32 bit value
\r
338 protected final int getInitialValue()
\r
340 return m_initialValue_;
\r
343 // private data members --------------------------------------------
\r
348 private char m_initialValue_;
\r
350 * Array of char data
\r
352 private char m_data_[];
\r
354 * Agent for friends
\r
356 private FriendAgent m_friendAgent_;
\r