/* ********************************************************************** * Copyright (c) 2002-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu * Created: November 5 2002 * Since: ICU 2.4 ********************************************************************** */ package com.ibm.icu.impl; import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.util.MissingResourceException; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; /** * Wrapper for the pnames.icu binary data file. This data file is * imported from icu4c. It contains property and property value * aliases from the UCD files PropertyAliases.txt and * PropertyValueAliases.txt. The file is built by the icu4c tool * genpname. It must be built on an ASCII big-endian platform to be * usable in icu4j. * * This class performs two functions. * * (1) It can import the flat binary data into a tree of usable * objects. * * (2) It provides an API to access the tree of objects. * * Needless to say, this class is tightly coupled to the binary format * of icu4c's pnames.icu file. * * Each time a UPropertyAliases is constructed, the pnames.icu file is * read, parsed, and a data tree assembled. Clients should create one * singleton instance and cache it. * * @author Alan Liu * @since ICU 2.4 */ public final class UPropertyAliases implements ICUBinary.Authenticate { //---------------------------------------------------------------- // Runtime data. This is an unflattened representation of the // data in pnames.icu. /** * Map from property enum value to nameGroupPool[] index */ private NonContiguousEnumToShort enumToName; /** * Map from property alias to property enum value */ private NameToEnum nameToEnum; /** * Map from property enum value to valueMapArray[] index */ private NonContiguousEnumToShort enumToValue; /** * Each entry represents a binary or enumerated property */ private ValueMap valueMapArray[]; /** * Pool of concatenated integer runs. Each run contains one * or more entries. The last entry of the run is negative. * A zero entry indicates "n/a" in the Property*Aliases.txt. * Each entry is a stringPool[] index. */ private short nameGroupPool[]; /** * Pool of strings. */ private String stringPool[]; //---------------------------------------------------------------- // Constants /** * Debug flag (not really constant) */ private static boolean DEBUG = ICUDebug.enabled("pnames"); /** * File format that this class understands. * See icu4c/src/common/propname.h. */ private static final byte DATA_FORMAT_ID[] = {'p', 'n', 'a', 'm'}; /** * File version that this class understands. * See icu4c/src/common/propname.h. */ private static final byte DATA_FORMAT_VERSION = 1; /** * Name of the datafile */ private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/pnames.icu"; /** * Buffer size of datafile. The whole file is < 16k. */ private static final int DATA_BUFFER_SIZE = 8192; //---------------------------------------------------------------- // Constructor /** * Constructs a UPropertyAliases object. The binary file * DATA_FILE_NAME is read from the jar/classpath and unflattened * into member variables of this object. */ private UPropertyAliases() throws IOException { // Open the .icu file from the jar/classpath InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME); BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE); // Read and discard Unicode version... /* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this); DataInputStream d = new DataInputStream(b); // Record the origin position of the file. Keep enough around // to seek back to the start of the header. d.mark(256); short enumToName_offset = d.readShort(); short nameToEnum_offset = d.readShort(); short enumToValue_offset = d.readShort(); short total_size = d.readShort(); short valueMap_offset = d.readShort(); short valueMap_count = d.readShort(); short nameGroupPool_offset = d.readShort(); short nameGroupPool_count = d.readShort(); short stringPool_offset = d.readShort(); short stringPool_count = d.readShort(); if (DEBUG) { System.out.println( "enumToName_offset=" + enumToName_offset + "\n" + "nameToEnum_offset=" + nameToEnum_offset + "\n" + "enumToValue_offset=" + enumToValue_offset + "\n" + "total_size=" + total_size + "\n" + "valueMap_offset=" + valueMap_offset + "\n" + "valueMap_count=" + valueMap_count + "\n" + "nameGroupPool_offset=" + nameGroupPool_offset + "\n" + "nameGroupPool_count=" + nameGroupPool_count + "\n" + "stringPool_offset=" + stringPool_offset + "\n" + "stringPool_count=" + stringPool_count); } // Read it all (less than 32k). Seeking around (using // mark/reset/skipBytes) doesn't work directly on the file, // but it works fine if we read everything into a byte[] array // first. byte raw[] = new byte[total_size]; d.reset(); d.readFully(raw); d.close(); Builder builder = new Builder(raw); stringPool = builder.readStringPool(stringPool_offset, stringPool_count); nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset, nameGroupPool_count); builder.setupValueMap_map(valueMap_offset, valueMap_count); // Some of the following data structures have to be set up // here, _not_ in Builder. That's because they are instances // of non-static inner classes, and they contain implicit // references to this. builder.seek(enumToName_offset); enumToName = new NonContiguousEnumToShort(builder); builder.nameGroupOffsetToIndex(enumToName.offsetArray); builder.seek(nameToEnum_offset); nameToEnum = new NameToEnum(builder); builder.seek(enumToValue_offset); enumToValue = new NonContiguousEnumToShort(builder); builder.valueMapOffsetToIndex(enumToValue.offsetArray); valueMapArray = new ValueMap[valueMap_count]; for (int i=0; i= enumLimit) { throw new IllegalIcuArgumentException("Invalid enum. enumStart = " +enumStart + " enumLimit = " + enumLimit + " enumProbe = " + enumProbe ); } return offsetArray[enumProbe - enumStart]; } ContiguousEnumToShort(ICUBinaryStream s) throws IOException { enumStart = s.readInt(); enumLimit = s.readInt(); int count = enumLimit - enumStart; offsetArray = new short[count]; for (int i=0; i enumProbe) break; return offsetArray[i]; } throw new IllegalIcuArgumentException("Invalid enum"); } NonContiguousEnumToShort(ICUBinaryStream s) throws IOException { int i; int count = s.readInt(); enumArray = new int[count]; offsetArray = new short[count]; for (i=0; i 0) continue; if (c < 0) break; return enumArray[i]; } return UProperty.UNDEFINED; } NameToEnum(Builder b) throws IOException { int i; int count = b.readInt(); enumArray = new int[count]; nameArray = new short[count]; for (i=0; i0. The * comparison is that described as "loose" matching in the * Property*Aliases.txt files. */ public static int compare(String stra, String strb) { // Note: This implementation is a literal copy of // uprv_comparePropertyNames. It can probably be improved. int istra=0, istrb=0, rc; int cstra=0, cstrb=0; for (;;) { /* Ignore delimiters '-', '_', and ASCII White_Space */ while (istra 0) { if (nameGroupPool[nameGroupIndex++] < 0) { throw new IllegalIcuArgumentException("Invalid name choice"); } } short a = nameGroupPool[nameGroupIndex]; return stringPool[(a < 0) ? -a : a]; } /** * Return the valueMap[] entry for a given property. */ private ValueMap getValueMap(int property) { int valueMapIndex = enumToValue.getShort(property); return valueMapArray[valueMapIndex]; } //---------------------------------------------------------------- // ICUBinary API /** * Return true if the given data version can be used. */ public boolean isDataVersionAcceptable(byte version[]) { return version[0] == DATA_FORMAT_VERSION; } //---------------------------------------------------------------- // Builder /** * A specialized ICUBinaryStream that can map between offsets and * index values into various arrays (stringPool, nameGroupPool, * and valueMap). It also knows how to read various structures. */ static class Builder extends ICUBinaryStream { // map[i] = offset of object i. We need maps for all of our // arrays. The arrays are indexed by offset in the raw binary // file; we need to translate that to index. private short stringPool_map[]; private short valueMap_map[]; private short nameGroup_map[]; public Builder(byte raw[]) { super(raw); } /** * The valueMap_map[] must be setup in advance. This method * does that. */ public void setupValueMap_map(short offset, short count) { valueMap_map = new short[count]; for (int i=0; iindex * map (nameGroupPool_map[]). */ public short[] readNameGroupPool(short offset, short count) throws IOException { // Read nameGroupPool[]. This contains offsets from start of // header. We translate these into indices into stringPool[] // on the fly. The offset 0, which indicates "no entry", we // translate into index 0, which contains a null String // pointer. seek(offset); short pos = offset; short nameGroupPool[] = new short[count]; nameGroup_map = new short[count]; for (int i=0; i