2 **********************************************************************
3 * Copyright (c) 2002-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: November 5 2002
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
10 **********************************************************************
13 package com.ibm.icu.impl;
15 import java.io.BufferedInputStream;
16 import java.io.DataInputStream;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.util.MissingResourceException;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.util.BytesTrie;
25 * Wrapper for the pnames.icu binary data file. This data file is
26 * imported from icu4c. It contains property and property value
27 * aliases from the UCD files PropertyAliases.txt and
28 * PropertyValueAliases.txt. The file is built by the icu4c tool
29 * genpname. It must be an ASCII big-endian file to be
32 * This class performs two functions.
34 * (1) It can import the flat binary data into usable objects.
36 * (2) It provides an API to access the tree of objects.
38 * Needless to say, this class is tightly coupled to the binary format
39 * of icu4c's pnames.icu file.
41 * Each time a UPropertyAliases is constructed, the pnames.icu file is
42 * read, parsed, and data structures assembled. Clients should create one
43 * singleton instance and cache it.
48 public final class UPropertyAliases {
49 // Byte offsets from the start of the data, after the generic header.
50 private static final int IX_VALUE_MAPS_OFFSET=0;
51 private static final int IX_BYTE_TRIES_OFFSET=1;
52 private static final int IX_NAME_GROUPS_OFFSET=2;
53 private static final int IX_RESERVED3_OFFSET=3;
54 // private static final int IX_RESERVED4_OFFSET=4;
55 // private static final int IX_TOTAL_SIZE=5;
58 // private static final int IX_MAX_NAME_LENGTH=6;
59 // private static final int IX_RESERVED7=7;
60 // private static final int IX_COUNT=8;
62 //----------------------------------------------------------------
63 // Runtime data. This is an unflattened representation of the
64 // data in pnames.icu.
66 private int[] valueMaps;
67 private byte[] bytesTries;
68 private String nameGroups;
70 private static final class IsAcceptable implements ICUBinary.Authenticate {
71 // @Override when we switch to Java 6
72 public boolean isDataVersionAcceptable(byte version[]) {
76 private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
77 private static final byte DATA_FORMAT[]={ 0x70, 0x6E, 0x61, 0x6D }; // "pnam"
79 private void load(InputStream data) throws IOException {
80 BufferedInputStream bis=new BufferedInputStream(data);
81 //dataVersion=ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
82 ICUBinary.readHeader(bis, DATA_FORMAT, IS_ACCEPTABLE);
83 DataInputStream ds=new DataInputStream(bis);
84 int indexesLength=ds.readInt()/4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4
85 if(indexesLength<8) { // formatVersion 2 initially has 8 indexes
86 throw new IOException("pnames.icu: not enough indexes");
88 int[] inIndexes=new int[indexesLength];
89 inIndexes[0]=indexesLength*4;
90 for(int i=1; i<indexesLength; ++i) {
91 inIndexes[i]=ds.readInt();
94 // Read the valueMaps.
95 int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
96 int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
97 int numInts=(nextOffset-offset)/4;
98 valueMaps=new int[numInts];
99 for(int i=0; i<numInts; ++i) {
100 valueMaps[i]=ds.readInt();
103 // Read the bytesTries.
105 nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
106 int numBytes=nextOffset-offset;
107 bytesTries=new byte[numBytes];
108 ds.readFully(bytesTries);
110 // Read the nameGroups and turn them from ASCII bytes into a Java String.
112 nextOffset=inIndexes[IX_RESERVED3_OFFSET];
113 numBytes=nextOffset-offset;
114 StringBuilder sb=new StringBuilder(numBytes);
115 for(int i=0; i<numBytes; ++i) {
116 sb.append((char)ds.readByte());
118 nameGroups=sb.toString();
123 private UPropertyAliases() throws IOException {
124 load(ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu"));
127 private int findProperty(int property) {
128 int i=1; // valueMaps index, initially after numRanges
129 for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
130 // Read and skip the start and limit of this range.
131 int start=valueMaps[i];
132 int limit=valueMaps[i+1];
138 return i+(property-start)*2;
140 i+=(limit-start)*2; // Skip all entries for this range.
145 private int findPropertyValueNameGroup(int valueMapIndex, int value) {
146 if(valueMapIndex==0) {
147 return 0; // The property does not have named values.
149 ++valueMapIndex; // Skip the BytesTrie offset.
150 int numRanges=valueMaps[valueMapIndex++];
153 for(; numRanges>0; --numRanges) {
154 // Read and skip the start and limit of this range.
155 int start=valueMaps[valueMapIndex];
156 int limit=valueMaps[valueMapIndex+1];
162 return valueMaps[valueMapIndex+value-start];
164 valueMapIndex+=limit-start; // Skip all entries for this range.
168 int valuesStart=valueMapIndex;
169 int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
171 int v=valueMaps[valueMapIndex];
176 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
178 } while(++valueMapIndex<nameGroupOffsetsStart);
183 private String getName(int nameGroupsIndex, int nameIndex) {
184 int numNames=nameGroups.charAt(nameGroupsIndex++);
185 if(nameIndex<0 || numNames<=nameIndex) {
186 throw new IllegalIcuArgumentException("Invalid property (value) name choice");
188 // Skip nameIndex names.
189 for(; nameIndex>0; --nameIndex) {
190 while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
192 // Find the end of this name.
193 int nameStart=nameGroupsIndex;
194 while(0!=nameGroups.charAt(nameGroupsIndex)) {
197 if(nameStart==nameGroupsIndex) {
198 return null; // no name (Property[Value]Aliases.txt has "n/a")
200 return nameGroups.substring(nameStart, nameGroupsIndex);
203 private static int asciiToLowercase(int c) {
204 return 'A'<=c && c<='Z' ? c+0x20 : c;
207 private boolean containsName(BytesTrie trie, CharSequence name) {
208 BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
209 for(int i=0; i<name.length(); ++i) {
210 int c=name.charAt(i);
211 // Ignore delimiters '-', '_', and ASCII White_Space.
212 if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
215 if(!result.hasNext()) {
218 c=asciiToLowercase(c);
221 return result.hasValue();
224 //----------------------------------------------------------------
227 public static final UPropertyAliases INSTANCE;
231 INSTANCE = new UPropertyAliases();
232 } catch(IOException e) {
234 MissingResourceException mre = new MissingResourceException(
235 "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
243 * Returns a property name given a property enum.
244 * Multiple names may be available for each property;
245 * the nameChoice selects among them.
247 public String getPropertyName(int property, int nameChoice) {
248 int valueMapIndex=findProperty(property);
249 if(valueMapIndex==0) {
250 throw new IllegalArgumentException(
251 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
253 return getName(valueMaps[valueMapIndex], nameChoice);
257 * Returns a value name given a property enum and a value enum.
258 * Multiple names may be available for each value;
259 * the nameChoice selects among them.
261 public String getPropertyValueName(int property, int value, int nameChoice) {
262 int valueMapIndex=findProperty(property);
263 if(valueMapIndex==0) {
264 throw new IllegalArgumentException(
265 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
267 int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
268 if(nameGroupOffset==0) {
269 throw new IllegalArgumentException(
270 "Property "+property+" (0x"+Integer.toHexString(property)+
271 ") does not have named values");
273 return getName(nameGroupOffset, nameChoice);
276 private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
277 BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
278 if(containsName(trie, alias)) {
279 return trie.getValue();
281 return UProperty.UNDEFINED;
286 * Returns a property enum given one of its property names.
287 * If the property name is not known, this method returns
288 * UProperty.UNDEFINED.
290 public int getPropertyEnum(CharSequence alias) {
291 return getPropertyOrValueEnum(0, alias);
295 * Returns a value enum given a property enum and one of its value names.
297 public int getPropertyValueEnum(int property, CharSequence alias) {
298 int valueMapIndex=findProperty(property);
299 if(valueMapIndex==0) {
300 throw new IllegalArgumentException(
301 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
303 valueMapIndex=valueMaps[valueMapIndex+1];
304 if(valueMapIndex==0) {
305 throw new IllegalArgumentException(
306 "Property "+property+" (0x"+Integer.toHexString(property)+
307 ") does not have named values");
309 // valueMapIndex is the start of the property's valueMap,
310 // where the first word is the BytesTrie offset.
311 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
315 * Compare two property names, returning <0, 0, or >0. The
316 * comparison is that described as "loose" matching in the
317 * Property*Aliases.txt files.
319 public static int compare(String stra, String strb) {
320 // Note: This implementation is a literal copy of
321 // uprv_comparePropertyNames. It can probably be improved.
322 int istra=0, istrb=0, rc;
323 int cstra=0, cstrb=0;
325 /* Ignore delimiters '-', '_', and ASCII White_Space */
326 while (istra<stra.length()) {
327 cstra = stra.charAt(istra);
329 case '-': case '_': case ' ': case '\t':
330 case '\n': case 0xb/*\v*/: case '\f': case '\r':
337 while (istrb<strb.length()) {
338 cstrb = strb.charAt(istrb);
340 case '-': case '_': case ' ': case '\t':
341 case '\n': case 0xb/*\v*/: case '\f': case '\r':
348 /* If we reach the ends of both strings then they match */
349 boolean endstra = istra==stra.length();
350 boolean endstrb = istrb==strb.length();
352 if (endstrb) return 0;
354 } else if (endstrb) {
358 rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);