2 ******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
9 * Store bits (Unicode character properties) in bit set vectors.
\r
11 * This is a port of the C++ class UPropsVectors from ICU4C
\r
13 * @author Shaopeng Jia
\r
17 package com.ibm.icu.impl;
\r
19 import java.util.Arrays;
\r
20 import java.util.Comparator;
\r
23 * Unicode Properties Vectors associated with code point ranges.
\r
25 * Rows of primitive integers in a contiguous array store the range limits and
\r
26 * the properties vectors.
\r
28 * In each row, row[0] contains the start code point and row[1] contains the
\r
29 * limit code point, which is the start of the next range.
\r
31 * Initially, there is only one range [0..0x110000] with values 0.
\r
33 * It would be possible to store only one range boundary per row, but
\r
34 * self-contained rows allow to later sort them by contents.
\r
36 public class PropsVectors {
\r
38 private int columns; // number of columns, plus two for start
\r
40 private int maxRows;
\r
42 private int prevRow; // search optimization: remember last row seen
\r
43 private boolean isCompacted;
\r
45 // internal function to compare elements in v and target. Return true iff
\r
46 // elements in v starting from index1 to index1 + length - 1
\r
47 // are exactly the same as elements in target
\r
48 // starting from index2 to index2 + length - 1
\r
49 private boolean areElementsSame(int index1, int[] target, int index2,
\r
51 for (int i = 0; i < length; ++i) {
\r
52 if (v[index1 + i] != target[index2 + i]) {
\r
59 // internal function which given rangeStart, returns
\r
60 // index where v[index]<=rangeStart<v[index+1].
\r
61 // The returned index is a multiple of columns, and therefore
\r
62 // points to the start of a row.
\r
63 private int findRow(int rangeStart) {
\r
66 // check the vicinity of the last-seen row (start
\r
67 // searching with an unrolled loop)
\r
69 index = prevRow * columns;
\r
70 if (rangeStart >= v[index]) {
\r
71 if (rangeStart < v[index + 1]) {
\r
72 // same row as last seen
\r
76 if (rangeStart < v[index + 1]) {
\r
81 if (rangeStart < v[index + 1]) {
\r
84 } else if ((rangeStart - v[index + 1]) < 10) {
\r
85 // we are close, continue looping
\r
90 } while (rangeStart >= v[index + 1]);
\r
95 } else if (rangeStart < v[1]) {
\r
96 // the very first row
\r
101 // do a binary search for the start of the range
\r
105 while (start < limit - 1) {
\r
106 mid = (start + limit) / 2;
\r
107 index = columns * mid;
\r
108 if (rangeStart < v[index]) {
\r
110 } else if (rangeStart < v[index + 1]) {
\r
118 // must be found because all ranges together always cover
\r
121 index = start * columns;
\r
126 * Special pseudo code points for storing the initialValue and the
\r
127 * errorValue which are used to initialize a Trie or similar.
\r
129 public final static int FIRST_SPECIAL_CP = 0x110000;
\r
130 public final static int INITIAL_VALUE_CP = 0x110000;
\r
131 public final static int ERROR_VALUE_CP = 0x110001;
\r
132 public final static int MAX_CP = 0x110001;
\r
134 public final static int INITIAL_ROWS = 1 << 12;
\r
135 public final static int MEDIUM_ROWS = 1 << 16;
\r
136 public final static int MAX_ROWS = MAX_CP + 1;
\r
140 * @param numOfColumns Number of value integers (32-bit int) per row.
\r
142 public PropsVectors(int numOfColumns) {
\r
143 if (numOfColumns < 1) {
\r
144 throw new IllegalArgumentException("numOfColumns need to be no "
\r
145 + "less than 1; but it is " + numOfColumns);
\r
147 columns = numOfColumns + 2; // count range start and limit columns
\r
148 v = new int[INITIAL_ROWS * columns];
\r
149 maxRows = INITIAL_ROWS;
\r
150 rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
\r
152 isCompacted = false;
\r
155 int index = columns;
\r
156 for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
\r
158 v[index + 1] = cp + 1;
\r
164 * In rows for code points [start..end], select the column, reset the mask
\r
165 * bits and set the value bits (ANDed with the mask).
\r
167 * @throws IllegalArgumentException
\r
169 * @throws IllegalStateException
\r
171 * @throws IndexOutOfBoundsException
\r
173 public void setValue(int start, int end, int column, int value, int mask) {
\r
174 if (start < 0 || start > end || end > MAX_CP || column < 0
\r
175 || column >= (columns - 2)) {
\r
176 throw new IllegalArgumentException();
\r
179 throw new IllegalStateException("Shouldn't be called after"
\r
183 int firstRow, lastRow;
\r
184 int limit = end + 1;
\r
185 boolean splitFirstRow, splitLastRow;
\r
186 // skip range start and limit columns
\r
190 // find the rows whose ranges overlap with the input range
\r
191 // find the first and last row, always successful
\r
192 firstRow = findRow(start);
\r
193 lastRow = findRow(end);
\r
196 * Rows need to be split if they partially overlap with the input range
\r
197 * (only possible for the first and last rows) and if their value
\r
198 * differs from the input value.
\r
200 splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
\r
201 splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
\r
203 // split first/last rows if necessary
\r
204 if (splitFirstRow || splitLastRow) {
\r
205 int rowsToExpand = 0;
\r
206 if (splitFirstRow) {
\r
209 if (splitLastRow) {
\r
212 int newMaxRows = 0;
\r
213 if ((rows + rowsToExpand) > maxRows) {
\r
214 if (maxRows < MEDIUM_ROWS) {
\r
215 newMaxRows = MEDIUM_ROWS;
\r
216 } else if (maxRows < MAX_ROWS) {
\r
217 newMaxRows = MAX_ROWS;
\r
219 throw new IndexOutOfBoundsException(
\r
220 "MAX_ROWS exceeded! Increase it to a higher value" +
\r
221 "in the implementation");
\r
223 int[] temp = new int[newMaxRows * columns];
\r
224 System.arraycopy(v, 0, temp, 0, rows * columns);
\r
226 maxRows = newMaxRows;
\r
229 // count the number of row cells to move after the last row,
\r
231 int count = (rows * columns) - (lastRow + columns);
\r
233 System.arraycopy(v, lastRow + columns, v, lastRow
\r
234 + (1 + rowsToExpand) * columns, count);
\r
236 rows += rowsToExpand;
\r
238 // split the first row, and move the firstRow pointer
\r
239 // to the second part
\r
240 if (splitFirstRow) {
\r
241 // copy all affected rows up one and move the lastRow pointer
\r
242 count = lastRow - firstRow + columns;
\r
243 System.arraycopy(v, firstRow, v, firstRow + columns, count);
\r
244 lastRow += columns;
\r
246 // split the range and move the firstRow pointer
\r
247 v[firstRow + 1] = v[firstRow + columns] = start;
\r
248 firstRow += columns;
\r
251 // split the last row
\r
252 if (splitLastRow) {
\r
253 // copy the last row data
\r
254 System.arraycopy(v, lastRow, v, lastRow + columns, columns);
\r
256 // split the range and move the firstRow pointer
\r
257 v[lastRow + 1] = v[lastRow + columns] = limit;
\r
261 // set the "row last seen" to the last row for the range
\r
262 prevRow = lastRow / columns;
\r
264 // set the input value in all remaining rows
\r
265 firstRow += column;
\r
269 v[firstRow] = (v[firstRow] & mask) | value;
\r
270 if (firstRow == lastRow) {
\r
273 firstRow += columns;
\r
278 * Always returns 0 if called after compact().
\r
280 public int getValue(int c, int column) {
\r
281 if (isCompacted || c < 0 || c > MAX_CP || column < 0
\r
282 || column >= (columns - 2)) {
\r
285 int index = findRow(c);
\r
286 return v[index + 2 + column];
\r
290 * Returns an array which contains value elements
\r
291 * in row rowIndex.
\r
293 * @throws IllegalStateException
\r
294 * @throws IllegalArgumentException
\r
296 public int[] getRow(int rowIndex) {
\r
298 throw new IllegalStateException(
\r
299 "Illegal Invocation of the method after compact()");
\r
301 if (rowIndex < 0 || rowIndex > rows) {
\r
302 throw new IllegalArgumentException("rowIndex out of bound!");
\r
304 int[] rowToReturn = new int[columns - 2];
\r
305 System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
\r
307 return rowToReturn;
\r
311 * Returns an int which is the start codepoint
\r
314 * @throws IllegalStateException
\r
316 * @throws IllegalArgumentException
\r
318 public int getRowStart(int rowIndex) {
\r
320 throw new IllegalStateException(
\r
321 "Illegal Invocation of the method after compact()");
\r
323 if (rowIndex < 0 || rowIndex > rows) {
\r
324 throw new IllegalArgumentException("rowIndex out of bound!");
\r
326 return v[rowIndex * columns];
\r
330 * Returns an int which is the limit codepoint
\r
331 * minus 1 in row rowIndex.
\r
333 * @throws IllegalStateException
\r
335 * @throws IllegalArgumentException
\r
337 public int getRowEnd(int rowIndex) {
\r
339 throw new IllegalStateException(
\r
340 "Illegal Invocation of the method after compact()");
\r
342 if (rowIndex < 0 || rowIndex > rows) {
\r
343 throw new IllegalArgumentException("rowIndex out of bound!");
\r
345 return v[rowIndex * columns + 1] - 1;
\r
349 * Compact the vectors:
\r
350 * - modify the memory
\r
351 * - keep only unique vectors
\r
352 * - store them contiguously from the beginning of the memory
\r
353 * - for each (non-unique) row, call the respective function in
\r
356 * The handler's rowIndex is the index of the row in the compacted
\r
357 * memory block. Therefore, it starts at 0 increases in increments of the
\r
360 * In a first phase, only special values are delivered (each exactly once).
\r
361 * Then CompactHandler::startRealValues() is called
\r
362 * where rowIndex is the length of the compacted array.
\r
363 * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
\r
364 * called for each row of real values.
\r
366 public void compact(CompactHandler compactor) {
\r
371 // Set the flag now: Sorting and compacting destroys the builder
\r
373 isCompacted = true;
\r
374 int valueColumns = columns - 2; // not counting start & limit
\r
376 // sort the properties vectors to find unique vector values
\r
377 Integer[] indexArray = new Integer[rows];
\r
378 for (int i = 0; i < rows; ++i) {
\r
379 indexArray[i] = new Integer(columns * i);
\r
382 Arrays.sort(indexArray, new Comparator<Integer>() {
\r
383 public int compare(Integer o1, Integer o2) {
\r
384 int indexOfRow1 = o1.intValue();
\r
385 int indexOfRow2 = o2.intValue();
\r
386 int count = columns; // includes start/limit columns
\r
388 // start comparing after start/limit
\r
389 // but wrap around to them
\r
392 if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
\r
393 return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
\r
396 if (++index == columns) {
\r
399 } while (--count > 0);
\r
406 * Find and set the special values. This has to do almost the same work
\r
407 * as the compaction below, to find the indexes where the special-value
\r
410 int count = -valueColumns;
\r
411 for (int i = 0; i < rows; ++i) {
\r
412 int start = v[indexArray[i].intValue()];
\r
414 // count a new values vector if it is different
\r
415 // from the current one
\r
416 if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
\r
417 indexArray[i-1].intValue() + 2, valueColumns)) {
\r
418 count += valueColumns;
\r
421 if (start == INITIAL_VALUE_CP) {
\r
422 compactor.setRowIndexForInitialValue(count);
\r
423 } else if (start == ERROR_VALUE_CP) {
\r
424 compactor.setRowIndexForErrorValue(count);
\r
428 // count is at the beginning of the last vector,
\r
429 // add valueColumns to include that last vector
\r
430 count += valueColumns;
\r
432 // Call the handler once more to signal the start of
\r
433 // delivering real values.
\r
434 compactor.startRealValues(count);
\r
437 * Move vector contents up to a contiguous array with only unique
\r
438 * vector values, and call the handler function for each vector.
\r
440 * This destroys the Properties Vector structure and replaces it
\r
441 * with an array of just vector values.
\r
443 int[] temp = new int[count];
\r
444 count = -valueColumns;
\r
445 for (int i = 0; i < rows; ++i) {
\r
446 int start = v[indexArray[i].intValue()];
\r
447 int limit = v[indexArray[i].intValue() + 1];
\r
449 // count a new values vector if it is different
\r
450 // from the current one
\r
451 if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
\r
452 temp, count, valueColumns)) {
\r
453 count += valueColumns;
\r
454 System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
\r
458 if (start < FIRST_SPECIAL_CP) {
\r
459 compactor.setRowIndexForRange(start, limit - 1, count);
\r
464 // count is at the beginning of the last vector,
\r
465 // add one to include that last vector
\r
466 rows = count / valueColumns + 1;
\r
470 * Get the vectors array after calling compact().
\r
472 * @throws IllegalStateException
\r
474 public int[] getCompactedArray() {
\r
475 if (!isCompacted) {
\r
476 throw new IllegalStateException(
\r
477 "Illegal Invocation of the method before compact()");
\r
483 * Get the number of rows for the compacted array.
\r
485 * @throws IllegalStateException
\r
487 public int getCompactedRows() {
\r
488 if (!isCompacted) {
\r
489 throw new IllegalStateException(
\r
490 "Illegal Invocation of the method before compact()");
\r
496 * Get the number of columns for the compacted array.
\r
498 * @throws IllegalStateException
\r
500 public int getCompactedColumns() {
\r
501 if (!isCompacted) {
\r
502 throw new IllegalStateException(
\r
503 "Illegal Invocation of the method before compact()");
\r
505 return columns - 2;
\r
509 * Call compact(), create a IntTrie with indexes into the compacted
\r
512 public IntTrie compactToTrieWithRowIndexes() {
\r
513 PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
\r
514 compact(compactor);
\r
515 return compactor.builder.serialize(new DefaultGetFoldedValue(
\r
516 compactor.builder), new DefaultGetFoldingOffset());
\r
519 // inner class implementation of Trie.DataManipulate
\r
520 private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
\r
521 public int getFoldingOffset(int value) {
\r
526 // inner class implementation of TrieBuilder.DataManipulate
\r
527 private static class DefaultGetFoldedValue implements
\r
528 TrieBuilder.DataManipulate {
\r
529 private IntTrieBuilder builder;
\r
531 public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
\r
532 builder = inBuilder;
\r
535 public int getFoldedValue(int start, int offset) {
\r
536 int initialValue = builder.m_initialValue_;
\r
537 int limit = start + 0x400;
\r
538 while (start < limit) {
\r
539 boolean[] inBlockZero = new boolean[1];
\r
540 int value = builder.getValue(start, inBlockZero);
\r
541 if (inBlockZero[0]) {
\r
542 start += TrieBuilder.DATA_BLOCK_LENGTH;
\r
543 } else if (value != initialValue) {
\r
553 public static interface CompactHandler {
\r
554 public void setRowIndexForRange(int start, int end, int rowIndex);
\r
555 public void setRowIndexForInitialValue(int rowIndex);
\r
556 public void setRowIndexForErrorValue(int rowIndex);
\r
557 public void startRealValues(int rowIndex);
\r