/* ****************************************************************************** * Copyright (C) 1996-2009, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** */ /** * Store bits (Unicode character properties) in bit set vectors. * * This is a port of the C++ class UPropsVectors from ICU4C * * @author Shaopeng Jia * @internal */ package com.ibm.icu.impl; import java.util.Arrays; import java.util.Comparator; /** * Unicode Properties Vectors associated with code point ranges. * * Rows of primitive integers in a contiguous array store the range limits and * the properties vectors. * * In each row, row[0] contains the start code point and row[1] contains the * limit code point, which is the start of the next range. * * Initially, there is only one range [0..0x110000] with values 0. * * It would be possible to store only one range boundary per row, but * self-contained rows allow to later sort them by contents. */ public class PropsVectors { private int v[]; private int columns; // number of columns, plus two for start // and limit values private int maxRows; private int rows; private int prevRow; // search optimization: remember last row seen private boolean isCompacted; // internal function to compare elements in v and target. Return true iff // elements in v starting from index1 to index1 + length - 1 // are exactly the same as elements in target // starting from index2 to index2 + length - 1 private boolean areElementsSame(int index1, int[] target, int index2, int length) { for (int i = 0; i < length; ++i) { if (v[index1 + i] != target[index2 + i]) { return false; } } return true; } // internal function which given rangeStart, returns // index where v[index]<=rangeStart= v[index]) { if (rangeStart < v[index + 1]) { // same row as last seen return index; } else { index += columns; if (rangeStart < v[index + 1]) { ++prevRow; return index; } else { index += columns; if (rangeStart < v[index + 1]) { prevRow += 2; return index; } else if ((rangeStart - v[index + 1]) < 10) { // we are close, continue looping prevRow += 2; do { ++prevRow; index += columns; } while (rangeStart >= v[index + 1]); return index; } } } } else if (rangeStart < v[1]) { // the very first row prevRow = 0; return 0; } // do a binary search for the start of the range int start = 0; int mid = 0; int limit = rows; while (start < limit - 1) { mid = (start + limit) / 2; index = columns * mid; if (rangeStart < v[index]) { limit = mid; } else if (rangeStart < v[index + 1]) { prevRow = mid; return index; } else { start = mid; } } // must be found because all ranges together always cover // all of Unicode prevRow = start; index = start * columns; return index; } /* * Special pseudo code points for storing the initialValue and the * errorValue which are used to initialize a Trie or similar. */ public final static int FIRST_SPECIAL_CP = 0x110000; public final static int INITIAL_VALUE_CP = 0x110000; public final static int ERROR_VALUE_CP = 0x110001; public final static int MAX_CP = 0x110001; public final static int INITIAL_ROWS = 1 << 12; public final static int MEDIUM_ROWS = 1 << 16; public final static int MAX_ROWS = MAX_CP + 1; /* * Constructor. * @param numOfColumns Number of value integers (32-bit int) per row. */ public PropsVectors(int numOfColumns) { if (numOfColumns < 1) { throw new IllegalArgumentException("numOfColumns need to be no " + "less than 1; but it is " + numOfColumns); } columns = numOfColumns + 2; // count range start and limit columns v = new int[INITIAL_ROWS * columns]; maxRows = INITIAL_ROWS; rows = 2 + (MAX_CP - FIRST_SPECIAL_CP); prevRow = 0; isCompacted = false; v[0] = 0; v[1] = 0x110000; int index = columns; for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) { v[index] = cp; v[index + 1] = cp + 1; index += columns; } } /* * In rows for code points [start..end], select the column, reset the mask * bits and set the value bits (ANDed with the mask). * * @throws IllegalArgumentException * * @throws IllegalStateException * * @throws IndexOutOfBoundsException */ public void setValue(int start, int end, int column, int value, int mask) { if (start < 0 || start > end || end > MAX_CP || column < 0 || column >= (columns - 2)) { throw new IllegalArgumentException(); } if (isCompacted) { throw new IllegalStateException("Shouldn't be called after" + "compact()!"); } int firstRow, lastRow; int limit = end + 1; boolean splitFirstRow, splitLastRow; // skip range start and limit columns column += 2; value &= mask; // find the rows whose ranges overlap with the input range // find the first and last row, always successful firstRow = findRow(start); lastRow = findRow(end); /* * Rows need to be split if they partially overlap with the input range * (only possible for the first and last rows) and if their value * differs from the input value. */ splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask)); splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask)); // split first/last rows if necessary if (splitFirstRow || splitLastRow) { int rowsToExpand = 0; if (splitFirstRow) { ++rowsToExpand; } if (splitLastRow) { ++rowsToExpand; } int newMaxRows = 0; if ((rows + rowsToExpand) > maxRows) { if (maxRows < MEDIUM_ROWS) { newMaxRows = MEDIUM_ROWS; } else if (maxRows < MAX_ROWS) { newMaxRows = MAX_ROWS; } else { throw new IndexOutOfBoundsException( "MAX_ROWS exceeded! Increase it to a higher value" + "in the implementation"); } int[] temp = new int[newMaxRows * columns]; System.arraycopy(v, 0, temp, 0, rows * columns); v = temp; maxRows = newMaxRows; } // count the number of row cells to move after the last row, // and move them int count = (rows * columns) - (lastRow + columns); if (count > 0) { System.arraycopy(v, lastRow + columns, v, lastRow + (1 + rowsToExpand) * columns, count); } rows += rowsToExpand; // split the first row, and move the firstRow pointer // to the second part if (splitFirstRow) { // copy all affected rows up one and move the lastRow pointer count = lastRow - firstRow + columns; System.arraycopy(v, firstRow, v, firstRow + columns, count); lastRow += columns; // split the range and move the firstRow pointer v[firstRow + 1] = v[firstRow + columns] = start; firstRow += columns; } // split the last row if (splitLastRow) { // copy the last row data System.arraycopy(v, lastRow, v, lastRow + columns, columns); // split the range and move the firstRow pointer v[lastRow + 1] = v[lastRow + columns] = limit; } } // set the "row last seen" to the last row for the range prevRow = lastRow / columns; // set the input value in all remaining rows firstRow += column; lastRow += column; mask = ~mask; for (;;) { v[firstRow] = (v[firstRow] & mask) | value; if (firstRow == lastRow) { break; } firstRow += columns; } } /* * Always returns 0 if called after compact(). */ public int getValue(int c, int column) { if (isCompacted || c < 0 || c > MAX_CP || column < 0 || column >= (columns - 2)) { return 0; } int index = findRow(c); return v[index + 2 + column]; } /* * Returns an array which contains value elements * in row rowIndex. * * @throws IllegalStateException * @throws IllegalArgumentException */ public int[] getRow(int rowIndex) { if (isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method after compact()"); } if (rowIndex < 0 || rowIndex > rows) { throw new IllegalArgumentException("rowIndex out of bound!"); } int[] rowToReturn = new int[columns - 2]; System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0, columns - 2); return rowToReturn; } /* * Returns an int which is the start codepoint * in row rowIndex. * * @throws IllegalStateException * * @throws IllegalArgumentException */ public int getRowStart(int rowIndex) { if (isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method after compact()"); } if (rowIndex < 0 || rowIndex > rows) { throw new IllegalArgumentException("rowIndex out of bound!"); } return v[rowIndex * columns]; } /* * Returns an int which is the limit codepoint * minus 1 in row rowIndex. * * @throws IllegalStateException * * @throws IllegalArgumentException */ public int getRowEnd(int rowIndex) { if (isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method after compact()"); } if (rowIndex < 0 || rowIndex > rows) { throw new IllegalArgumentException("rowIndex out of bound!"); } return v[rowIndex * columns + 1] - 1; } /* * Compact the vectors: * - modify the memory * - keep only unique vectors * - store them contiguously from the beginning of the memory * - for each (non-unique) row, call the respective function in * CompactHandler * * The handler's rowIndex is the index of the row in the compacted * memory block. Therefore, it starts at 0 increases in increments of the * columns value. * * In a first phase, only special values are delivered (each exactly once). * Then CompactHandler::startRealValues() is called * where rowIndex is the length of the compacted array. * Then, in the second phase, the CompactHandler::setRowIndexForRange() is * called for each row of real values. */ public void compact(CompactHandler compactor) { if (isCompacted) { return; } // Set the flag now: Sorting and compacting destroys the builder // data structure. isCompacted = true; int valueColumns = columns - 2; // not counting start & limit // sort the properties vectors to find unique vector values Integer[] indexArray = new Integer[rows]; for (int i = 0; i < rows; ++i) { indexArray[i] = new Integer(columns * i); } Arrays.sort(indexArray, new Comparator() { public int compare(Integer o1, Integer o2) { int indexOfRow1 = o1.intValue(); int indexOfRow2 = o2.intValue(); int count = columns; // includes start/limit columns // start comparing after start/limit // but wrap around to them int index = 2; do { if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) { return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1 : 1; } if (++index == columns) { index = 0; } } while (--count > 0); return 0; } }); /* * Find and set the special values. This has to do almost the same work * as the compaction below, to find the indexes where the special-value * rows will move. */ int count = -valueColumns; for (int i = 0; i < rows; ++i) { int start = v[indexArray[i].intValue()]; // count a new values vector if it is different // from the current one if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v, indexArray[i-1].intValue() + 2, valueColumns)) { count += valueColumns; } if (start == INITIAL_VALUE_CP) { compactor.setRowIndexForInitialValue(count); } else if (start == ERROR_VALUE_CP) { compactor.setRowIndexForErrorValue(count); } } // count is at the beginning of the last vector, // add valueColumns to include that last vector count += valueColumns; // Call the handler once more to signal the start of // delivering real values. compactor.startRealValues(count); /* * Move vector contents up to a contiguous array with only unique * vector values, and call the handler function for each vector. * * This destroys the Properties Vector structure and replaces it * with an array of just vector values. */ int[] temp = new int[count]; count = -valueColumns; for (int i = 0; i < rows; ++i) { int start = v[indexArray[i].intValue()]; int limit = v[indexArray[i].intValue() + 1]; // count a new values vector if it is different // from the current one if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, temp, count, valueColumns)) { count += valueColumns; System.arraycopy(v, indexArray[i].intValue() + 2, temp, count, valueColumns); } if (start < FIRST_SPECIAL_CP) { compactor.setRowIndexForRange(start, limit - 1, count); } } v = temp; // count is at the beginning of the last vector, // add one to include that last vector rows = count / valueColumns + 1; } /* * Get the vectors array after calling compact(). * * @throws IllegalStateException */ public int[] getCompactedArray() { if (!isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method before compact()"); } return v; } /* * Get the number of rows for the compacted array. * * @throws IllegalStateException */ public int getCompactedRows() { if (!isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method before compact()"); } return rows; } /* * Get the number of columns for the compacted array. * * @throws IllegalStateException */ public int getCompactedColumns() { if (!isCompacted) { throw new IllegalStateException( "Illegal Invocation of the method before compact()"); } return columns - 2; } /* * Call compact(), create a IntTrie with indexes into the compacted * vectors array. */ public IntTrie compactToTrieWithRowIndexes() { PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler(); compact(compactor); return compactor.builder.serialize(new DefaultGetFoldedValue( compactor.builder), new DefaultGetFoldingOffset()); } // inner class implementation of Trie.DataManipulate private static class DefaultGetFoldingOffset implements Trie.DataManipulate { public int getFoldingOffset(int value) { return value; } } // inner class implementation of TrieBuilder.DataManipulate private static class DefaultGetFoldedValue implements TrieBuilder.DataManipulate { private IntTrieBuilder builder; public DefaultGetFoldedValue(IntTrieBuilder inBuilder) { builder = inBuilder; } public int getFoldedValue(int start, int offset) { int initialValue = builder.m_initialValue_; int limit = start + 0x400; while (start < limit) { boolean[] inBlockZero = new boolean[1]; int value = builder.getValue(start, inBlockZero); if (inBlockZero[0]) { start += TrieBuilder.DATA_BLOCK_LENGTH; } else if (value != initialValue) { return offset; } else { ++start; } } return 0; } } public static interface CompactHandler { public void setRowIndexForRange(int start, int end, int rowIndex); public void setRowIndexForInitialValue(int rowIndex); public void setRowIndexForErrorValue(int rowIndex); public void startRealValues(int rowIndex); } }