2 **********************************************************************
\r
3 * Copyright (c) 2002-2009, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
7 * Created: November 5 2002
\r
9 **********************************************************************
\r
12 package com.ibm.icu.impl;
\r
16 import com.ibm.icu.lang.*;
\r
19 * Wrapper for the pnames.icu binary data file. This data file is
\r
20 * imported from icu4c. It contains property and property value
\r
21 * aliases from the UCD files PropertyAliases.txt and
\r
22 * PropertyValueAliases.txt. The file is built by the icu4c tool
\r
23 * genpname. It must be built on an ASCII big-endian platform to be
\r
26 * This class performs two functions.
\r
28 * (1) It can import the flat binary data into a tree of usable
\r
31 * (2) It provides an API to access the tree of objects.
\r
33 * Needless to say, this class is tightly coupled to the binary format
\r
34 * of icu4c's pnames.icu file.
\r
36 * Each time a UPropertyAliases is constructed, the pnames.icu file is
\r
37 * read, parsed, and a data tree assembled. Clients should create one
\r
38 * singleton instance and cache it.
\r
43 public final class UPropertyAliases implements ICUBinary.Authenticate {
\r
45 //----------------------------------------------------------------
\r
46 // Runtime data. This is an unflattened representation of the
\r
47 // data in pnames.icu.
\r
50 * Map from property enum value to nameGroupPool[] index
\r
52 private NonContiguousEnumToShort enumToName;
\r
55 * Map from property alias to property enum value
\r
57 private NameToEnum nameToEnum;
\r
60 * Map from property enum value to valueMapArray[] index
\r
62 private NonContiguousEnumToShort enumToValue;
\r
65 * Each entry represents a binary or enumerated property
\r
67 private ValueMap valueMapArray[];
\r
70 * Pool of concatenated integer runs. Each run contains one
\r
71 * or more entries. The last entry of the run is negative.
\r
72 * A zero entry indicates "n/a" in the Property*Aliases.txt.
\r
73 * Each entry is a stringPool[] index.
\r
75 private short nameGroupPool[];
\r
80 private String stringPool[];
\r
82 //----------------------------------------------------------------
\r
86 * Debug flag (not really constant)
\r
88 private static boolean DEBUG = ICUDebug.enabled("pnames");
\r
91 * File format that this class understands.
\r
92 * See icu4c/src/common/propname.h.
\r
94 private static final byte DATA_FORMAT_ID[] = {'p', 'n', 'a', 'm'};
\r
97 * File version that this class understands.
\r
98 * See icu4c/src/common/propname.h.
\r
100 private static final byte DATA_FORMAT_VERSION = 1;
\r
103 * Name of the datafile
\r
105 private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/pnames.icu";
\r
108 * Buffer size of datafile. The whole file is < 16k.
\r
110 private static final int DATA_BUFFER_SIZE = 8192;
\r
112 //----------------------------------------------------------------
\r
116 * Constructs a UPropertyAliases object. The binary file
\r
117 * DATA_FILE_NAME is read from the jar/classpath and unflattened
\r
118 * into member variables of this object.
\r
120 public UPropertyAliases() throws IOException {
\r
122 // Open the .icu file from the jar/classpath
\r
123 InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
\r
124 BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE);
\r
125 // Read and discard Unicode version...
\r
126 /* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this);
\r
127 DataInputStream d = new DataInputStream(b);
\r
129 // Record the origin position of the file. Keep enough around
\r
130 // to seek back to the start of the header.
\r
133 short enumToName_offset = d.readShort();
\r
134 short nameToEnum_offset = d.readShort();
\r
135 short enumToValue_offset = d.readShort();
\r
136 short total_size = d.readShort();
\r
137 short valueMap_offset = d.readShort();
\r
138 short valueMap_count = d.readShort();
\r
139 short nameGroupPool_offset = d.readShort();
\r
140 short nameGroupPool_count = d.readShort();
\r
141 short stringPool_offset = d.readShort();
\r
142 short stringPool_count = d.readShort();
\r
145 System.out.println(
\r
146 "enumToName_offset=" + enumToName_offset + "\n" +
\r
147 "nameToEnum_offset=" + nameToEnum_offset + "\n" +
\r
148 "enumToValue_offset=" + enumToValue_offset + "\n" +
\r
149 "total_size=" + total_size + "\n" +
\r
150 "valueMap_offset=" + valueMap_offset + "\n" +
\r
151 "valueMap_count=" + valueMap_count + "\n" +
\r
152 "nameGroupPool_offset=" + nameGroupPool_offset + "\n" +
\r
153 "nameGroupPool_count=" + nameGroupPool_count + "\n" +
\r
154 "stringPool_offset=" + stringPool_offset + "\n" +
\r
155 "stringPool_count=" + stringPool_count);
\r
158 // Read it all (less than 32k). Seeking around (using
\r
159 // mark/reset/skipBytes) doesn't work directly on the file,
\r
160 // but it works fine if we read everything into a byte[] array
\r
162 byte raw[] = new byte[total_size];
\r
167 Builder builder = new Builder(raw);
\r
169 stringPool = builder.readStringPool(stringPool_offset,
\r
172 nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset,
\r
173 nameGroupPool_count);
\r
175 builder.setupValueMap_map(valueMap_offset, valueMap_count);
\r
177 // Some of the following data structures have to be set up
\r
178 // here, _not_ in Builder. That's because they are instances
\r
179 // of non-static inner classes, and they contain implicit
\r
180 // references to this.
\r
182 builder.seek(enumToName_offset);
\r
183 enumToName = new NonContiguousEnumToShort(builder);
\r
184 builder.nameGroupOffsetToIndex(enumToName.offsetArray);
\r
186 builder.seek(nameToEnum_offset);
\r
187 nameToEnum = new NameToEnum(builder);
\r
189 builder.seek(enumToValue_offset);
\r
190 enumToValue = new NonContiguousEnumToShort(builder);
\r
191 builder.valueMapOffsetToIndex(enumToValue.offsetArray);
\r
193 valueMapArray = new ValueMap[valueMap_count];
\r
194 for (int i=0; i<valueMap_count; ++i) {
\r
195 // Must seek to the start of each entry.
\r
196 builder.seek(builder.valueMap_map[i]);
\r
197 valueMapArray[i] = new ValueMap(builder);
\r
203 //----------------------------------------------------------------
\r
207 * Return a property name given a property enum. Multiple
\r
208 * names may be available for each property; the nameChoice
\r
209 * selects among them.
\r
211 public String getPropertyName(int property,
\r
213 short nameGroupIndex = enumToName.getShort(property);
\r
214 return chooseNameInGroup(nameGroupIndex, nameChoice);
\r
218 * Return a property enum given one of its property names.
\r
220 public int getPropertyEnum(String propertyAlias) {
\r
221 return nameToEnum.getEnum(propertyAlias);
\r
225 * Return a value name given a property enum and a value enum.
\r
226 * Multiple names may be available for each value; the nameChoice
\r
227 * selects among them.
\r
229 public String getPropertyValueName(int property,
\r
232 ValueMap vm = getValueMap(property);
\r
233 short nameGroupIndex = vm.enumToName.getShort(value);
\r
234 return chooseNameInGroup(nameGroupIndex, nameChoice);
\r
238 * Return a value enum given one of its value names and the
\r
239 * corresponding property alias.
\r
241 public int getPropertyValueEnum(int property,
\r
242 String valueAlias) {
\r
243 ValueMap vm = getValueMap(property);
\r
244 return vm.nameToEnum.getEnum(valueAlias);
\r
247 //----------------------------------------------------------------
\r
251 * A map for the legal values of a binary or enumerated properties.
\r
253 private class ValueMap {
\r
256 * Maps value enum to index into the nameGroupPool[]
\r
258 EnumToShort enumToName; // polymorphic
\r
261 * Maps value name to value enum.
\r
263 NameToEnum nameToEnum;
\r
265 ValueMap(Builder b) throws IOException {
\r
266 short enumToName_offset = b.readShort();
\r
267 short ncEnumToName_offset = b.readShort();
\r
268 short nameToEnum_offset = b.readShort();
\r
269 if (enumToName_offset != 0) {
\r
270 b.seek(enumToName_offset);
\r
271 ContiguousEnumToShort x = new ContiguousEnumToShort(b);
\r
272 b.nameGroupOffsetToIndex(x.offsetArray);
\r
275 b.seek(ncEnumToName_offset);
\r
276 NonContiguousEnumToShort x = new NonContiguousEnumToShort(b);
\r
277 b.nameGroupOffsetToIndex(x.offsetArray);
\r
280 b.seek(nameToEnum_offset);
\r
281 nameToEnum = new NameToEnum(b);
\r
286 * Abstract map from enum values to integers.
\r
288 private interface EnumToShort {
\r
289 short getShort(int enumProbe);
\r
293 * Generic map from enum values to offsets. Enum values are
\r
296 private static class ContiguousEnumToShort implements EnumToShort {
\r
299 short offsetArray[];
\r
301 public short getShort(int enumProbe) {
\r
302 if (enumProbe < enumStart || enumProbe >= enumLimit) {
\r
303 throw new IllegalIcuArgumentException("Invalid enum. enumStart = " +enumStart +
\r
304 " enumLimit = " + enumLimit +
\r
305 " enumProbe = " + enumProbe );
\r
307 return offsetArray[enumProbe - enumStart];
\r
310 ContiguousEnumToShort(ICUBinaryStream s) throws IOException {
\r
311 enumStart = s.readInt();
\r
312 enumLimit = s.readInt();
\r
313 int count = enumLimit - enumStart;
\r
314 offsetArray = new short[count];
\r
315 for (int i=0; i<count; ++i) {
\r
316 offsetArray[i] = s.readShort();
\r
322 * Generic map from enum values to offsets. Enum values need not
\r
325 private static class NonContiguousEnumToShort implements EnumToShort {
\r
327 short offsetArray[];
\r
329 public short getShort(int enumProbe) {
\r
330 for (int i=0; i<enumArray.length; ++i) {
\r
331 if (enumArray[i] < enumProbe) continue;
\r
332 if (enumArray[i] > enumProbe) break;
\r
333 return offsetArray[i];
\r
335 throw new IllegalIcuArgumentException("Invalid enum");
\r
338 NonContiguousEnumToShort(ICUBinaryStream s) throws IOException {
\r
340 int count = s.readInt();
\r
341 enumArray = new int[count];
\r
342 offsetArray = new short[count];
\r
343 for (i=0; i<count; ++i) {
\r
344 enumArray[i] = s.readInt();
\r
346 for (i=0; i<count; ++i) {
\r
347 offsetArray[i] = s.readShort();
\r
353 * Map from names to enum values.
\r
355 private class NameToEnum {
\r
359 int getEnum(String nameProbe) {
\r
360 for (int i=0; i<nameArray.length; ++i) {
\r
361 int c = UPropertyAliases.compare(nameProbe,
\r
362 stringPool[nameArray[i]]);
\r
363 if (c > 0) continue;
\r
365 return enumArray[i];
\r
367 throw new IllegalIcuArgumentException("Invalid name: " + nameProbe);
\r
370 NameToEnum(Builder b) throws IOException {
\r
372 int count = b.readInt();
\r
373 enumArray = new int[count];
\r
374 nameArray = new short[count];
\r
375 for (i=0; i<count; ++i) {
\r
376 enumArray[i] = b.readInt();
\r
378 for (i=0; i<count; ++i) {
\r
379 nameArray[i] = b.stringOffsetToIndex(b.readShort());
\r
384 //----------------------------------------------------------------
\r
385 // Runtime implementation
\r
388 * Compare two property names, returning <0, 0, or >0. The
\r
389 * comparison is that described as "loose" matching in the
\r
390 * Property*Aliases.txt files.
\r
392 public static int compare(String stra, String strb) {
\r
393 // Note: This implementation is a literal copy of
\r
394 // uprv_comparePropertyNames. It can probably be improved.
\r
395 int istra=0, istrb=0, rc;
\r
396 int cstra=0, cstrb=0;
\r
398 /* Ignore delimiters '-', '_', and ASCII White_Space */
\r
399 while (istra<stra.length()) {
\r
400 cstra = stra.charAt(istra);
\r
402 case '-': case '_': case ' ': case '\t':
\r
403 case '\n': case 0xb/*\v*/: case '\f': case '\r':
\r
410 while (istrb<strb.length()) {
\r
411 cstrb = strb.charAt(istrb);
\r
413 case '-': case '_': case ' ': case '\t':
\r
414 case '\n': case 0xb/*\v*/: case '\f': case '\r':
\r
421 /* If we reach the ends of both strings then they match */
\r
422 boolean endstra = istra==stra.length();
\r
423 boolean endstrb = istrb==strb.length();
\r
425 if (endstrb) return 0;
\r
427 } else if (endstrb) {
\r
431 rc = UCharacter.toLowerCase(cstra) - UCharacter.toLowerCase(cstrb);
\r
442 * Given an index to a run within the nameGroupPool[], and a
\r
443 * nameChoice (0,1,...), select the nameChoice-th entry of the run.
\r
445 private String chooseNameInGroup(short nameGroupIndex, int nameChoice) {
\r
446 if (nameChoice < 0) {
\r
447 throw new IllegalIcuArgumentException("Invalid name choice");
\r
449 while (nameChoice-- > 0) {
\r
450 if (nameGroupPool[nameGroupIndex++] < 0) {
\r
451 throw new IllegalIcuArgumentException("Invalid name choice");
\r
454 short a = nameGroupPool[nameGroupIndex];
\r
455 return stringPool[(a < 0) ? -a : a];
\r
459 * Return the valueMap[] entry for a given property.
\r
461 private ValueMap getValueMap(int property) {
\r
462 int valueMapIndex = enumToValue.getShort(property);
\r
463 return valueMapArray[valueMapIndex];
\r
466 //----------------------------------------------------------------
\r
470 * Return true if the given data version can be used.
\r
472 public boolean isDataVersionAcceptable(byte version[]) {
\r
473 return version[0] == DATA_FORMAT_VERSION;
\r
476 //----------------------------------------------------------------
\r
480 * A specialized ICUBinaryStream that can map between offsets and
\r
481 * index values into various arrays (stringPool, nameGroupPool,
\r
482 * and valueMap). It also knows how to read various structures.
\r
484 static class Builder extends ICUBinaryStream {
\r
486 // map[i] = offset of object i. We need maps for all of our
\r
487 // arrays. The arrays are indexed by offset in the raw binary
\r
488 // file; we need to translate that to index.
\r
490 private short stringPool_map[];
\r
492 private short valueMap_map[];
\r
494 private short nameGroup_map[];
\r
496 public Builder(byte raw[]) {
\r
501 * The valueMap_map[] must be setup in advance. This method
\r
504 public void setupValueMap_map(short offset, short count) {
\r
505 valueMap_map = new short[count];
\r
506 for (int i=0; i<count; ++i) {
\r
507 // Start of each entry. Each entry is 6 bytes long.
\r
508 valueMap_map[i] = (short) (offset + i * 6);
\r
513 * Read stringPool[]. Build up translation table from offsets
\r
514 * to string indices (stringPool_map[]).
\r
516 public String[] readStringPool(short offset, short count)
\r
517 throws IOException {
\r
519 // Allocate one more stringPool entry than needed. Use this
\r
520 // to store a "no string" entry in the pool, at index 0. This
\r
521 // maps to offset 0, so let stringPool_map[0] = 0.
\r
522 String stringPool[] = new String[count + 1];
\r
523 stringPool_map = new short[count + 1];
\r
524 short pos = offset;
\r
525 StringBuffer buf = new StringBuffer();
\r
526 stringPool_map[0] = 0;
\r
527 for (int i=1; i<=count; ++i) {
\r
530 // This works because the name is invariant-ASCII
\r
531 char c = (char) readUnsignedByte();
\r
535 stringPool_map[i] = pos;
\r
536 stringPool[i] = buf.toString();
\r
537 pos += stringPool[i].length() + 1;
\r
540 System.out.println("read stringPool x " + count +
\r
541 ": " + stringPool[1] + ", " +
\r
542 stringPool[2] + ", " +
\r
543 stringPool[3] + ",...");
\r
549 * Read the nameGroupPool[], and build up the offset->index
\r
550 * map (nameGroupPool_map[]).
\r
552 public short[] readNameGroupPool(short offset, short count)
\r
553 throws IOException {
\r
554 // Read nameGroupPool[]. This contains offsets from start of
\r
555 // header. We translate these into indices into stringPool[]
\r
556 // on the fly. The offset 0, which indicates "no entry", we
\r
557 // translate into index 0, which contains a null String
\r
560 short pos = offset;
\r
561 short nameGroupPool[] = new short[count];
\r
562 nameGroup_map = new short[count];
\r
563 for (int i=0; i<count; ++i) {
\r
564 nameGroup_map[i] = pos;
\r
565 nameGroupPool[i] = stringOffsetToIndex(readShort());
\r
569 System.out.println("read nameGroupPool x " + count +
\r
570 ": " + nameGroupPool[0] + ", " +
\r
571 nameGroupPool[1] + ", " +
\r
572 nameGroupPool[2] + ",...");
\r
574 return nameGroupPool;
\r
578 * Convert an offset into the string pool into a stringPool[]
\r
581 private short stringOffsetToIndex(short offset) {
\r
582 int probe = offset;
\r
583 if (probe < 0) probe = -probe;
\r
584 for (int i=0; i<stringPool_map.length; ++i) {
\r
585 if (stringPool_map[i] == probe) {
\r
586 return (short) ((offset < 0) ? -i : i);
\r
589 throw new IllegalStateException("Can't map string pool offset " +
\r
590 offset + " to index");
\r
594 * Convert an array of offsets into the string pool into an
\r
595 * array of stringPool[] indices. MODIFIES THE ARRAY IN
\r
598 /* private void stringOffsetToIndex(short array[]) {
\r
599 for (int i=0; i<array.length; ++i) {
\r
600 array[i] = stringOffsetToIndex(array[i]);
\r
605 * Convert an offset into the value map into a valueMap[]
\r
608 private short valueMapOffsetToIndex(short offset) {
\r
609 for (short i=0; i<valueMap_map.length; ++i) {
\r
610 if (valueMap_map[i] == offset) {
\r
614 throw new IllegalStateException("Can't map value map offset " +
\r
615 offset + " to index");
\r
619 * Convert an array of offsets into the value map array into
\r
620 * an array of valueMap[] indices. MODIFIES THE ARRAY IN
\r
623 private void valueMapOffsetToIndex(short array[]) {
\r
624 for (int i=0; i<array.length; ++i) {
\r
625 array[i] = valueMapOffsetToIndex(array[i]);
\r
630 * Convert an offset into the name group pool into a
\r
631 * nameGroupPool[] index.
\r
633 private short nameGroupOffsetToIndex(short offset) {
\r
634 for (short i=0; i<nameGroup_map.length; ++i) {
\r
635 if (nameGroup_map[i] == offset) {
\r
639 throw new RuntimeException("Can't map name group offset " + offset +
\r
644 * Convert an array of offsets into the name group pool into an
\r
645 * array of nameGroupPool[] indices. MODIFIES THE ARRAY IN
\r
648 private void nameGroupOffsetToIndex(short array[]) {
\r
649 for (int i=0; i<array.length; ++i) {
\r
650 array[i] = nameGroupOffsetToIndex(array[i]);
\r