2 **********************************************************************
\r
3 * Copyright (c) 2002-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
7 * Created: November 5 2002
\r
9 **********************************************************************
\r
12 package com.ibm.icu.impl;
\r
14 import java.io.BufferedInputStream;
\r
15 import java.io.DataInputStream;
\r
16 import java.io.IOException;
\r
17 import java.io.InputStream;
\r
18 import java.util.MissingResourceException;
\r
20 import com.ibm.icu.lang.UCharacter;
\r
21 import com.ibm.icu.lang.UProperty;
\r
24 * Wrapper for the pnames.icu binary data file. This data file is
\r
25 * imported from icu4c. It contains property and property value
\r
26 * aliases from the UCD files PropertyAliases.txt and
\r
27 * PropertyValueAliases.txt. The file is built by the icu4c tool
\r
28 * genpname. It must be built on an ASCII big-endian platform to be
\r
31 * This class performs two functions.
\r
33 * (1) It can import the flat binary data into a tree of usable
\r
36 * (2) It provides an API to access the tree of objects.
\r
38 * Needless to say, this class is tightly coupled to the binary format
\r
39 * of icu4c's pnames.icu file.
\r
41 * Each time a UPropertyAliases is constructed, the pnames.icu file is
\r
42 * read, parsed, and a data tree assembled. Clients should create one
\r
43 * singleton instance and cache it.
\r
48 public final class UPropertyAliases implements ICUBinary.Authenticate {
\r
50 //----------------------------------------------------------------
\r
51 // Runtime data. This is an unflattened representation of the
\r
52 // data in pnames.icu.
\r
55 * Map from property enum value to nameGroupPool[] index
\r
57 private NonContiguousEnumToShort enumToName;
\r
60 * Map from property alias to property enum value
\r
62 private NameToEnum nameToEnum;
\r
65 * Map from property enum value to valueMapArray[] index
\r
67 private NonContiguousEnumToShort enumToValue;
\r
70 * Each entry represents a binary or enumerated property
\r
72 private ValueMap valueMapArray[];
\r
75 * Pool of concatenated integer runs. Each run contains one
\r
76 * or more entries. The last entry of the run is negative.
\r
77 * A zero entry indicates "n/a" in the Property*Aliases.txt.
\r
78 * Each entry is a stringPool[] index.
\r
80 private short nameGroupPool[];
\r
85 private String stringPool[];
\r
87 //----------------------------------------------------------------
\r
91 * Debug flag (not really constant)
\r
93 private static boolean DEBUG = ICUDebug.enabled("pnames");
\r
96 * File format that this class understands.
\r
97 * See icu4c/src/common/propname.h.
\r
99 private static final byte DATA_FORMAT_ID[] = {'p', 'n', 'a', 'm'};
\r
102 * File version that this class understands.
\r
103 * See icu4c/src/common/propname.h.
\r
105 private static final byte DATA_FORMAT_VERSION = 1;
\r
108 * Name of the datafile
\r
110 private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/pnames.icu";
\r
113 * Buffer size of datafile. The whole file is < 16k.
\r
115 private static final int DATA_BUFFER_SIZE = 8192;
\r
117 //----------------------------------------------------------------
\r
121 * Constructs a UPropertyAliases object. The binary file
\r
122 * DATA_FILE_NAME is read from the jar/classpath and unflattened
\r
123 * into member variables of this object.
\r
125 private UPropertyAliases() throws IOException {
\r
127 // Open the .icu file from the jar/classpath
\r
128 InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
\r
129 BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE);
\r
130 // Read and discard Unicode version...
\r
131 /* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this);
\r
132 DataInputStream d = new DataInputStream(b);
\r
134 // Record the origin position of the file. Keep enough around
\r
135 // to seek back to the start of the header.
\r
138 short enumToName_offset = d.readShort();
\r
139 short nameToEnum_offset = d.readShort();
\r
140 short enumToValue_offset = d.readShort();
\r
141 short total_size = d.readShort();
\r
142 short valueMap_offset = d.readShort();
\r
143 short valueMap_count = d.readShort();
\r
144 short nameGroupPool_offset = d.readShort();
\r
145 short nameGroupPool_count = d.readShort();
\r
146 short stringPool_offset = d.readShort();
\r
147 short stringPool_count = d.readShort();
\r
150 System.out.println(
\r
151 "enumToName_offset=" + enumToName_offset + "\n" +
\r
152 "nameToEnum_offset=" + nameToEnum_offset + "\n" +
\r
153 "enumToValue_offset=" + enumToValue_offset + "\n" +
\r
154 "total_size=" + total_size + "\n" +
\r
155 "valueMap_offset=" + valueMap_offset + "\n" +
\r
156 "valueMap_count=" + valueMap_count + "\n" +
\r
157 "nameGroupPool_offset=" + nameGroupPool_offset + "\n" +
\r
158 "nameGroupPool_count=" + nameGroupPool_count + "\n" +
\r
159 "stringPool_offset=" + stringPool_offset + "\n" +
\r
160 "stringPool_count=" + stringPool_count);
\r
163 // Read it all (less than 32k). Seeking around (using
\r
164 // mark/reset/skipBytes) doesn't work directly on the file,
\r
165 // but it works fine if we read everything into a byte[] array
\r
167 byte raw[] = new byte[total_size];
\r
172 Builder builder = new Builder(raw);
\r
174 stringPool = builder.readStringPool(stringPool_offset,
\r
177 nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset,
\r
178 nameGroupPool_count);
\r
180 builder.setupValueMap_map(valueMap_offset, valueMap_count);
\r
182 // Some of the following data structures have to be set up
\r
183 // here, _not_ in Builder. That's because they are instances
\r
184 // of non-static inner classes, and they contain implicit
\r
185 // references to this.
\r
187 builder.seek(enumToName_offset);
\r
188 enumToName = new NonContiguousEnumToShort(builder);
\r
189 builder.nameGroupOffsetToIndex(enumToName.offsetArray);
\r
191 builder.seek(nameToEnum_offset);
\r
192 nameToEnum = new NameToEnum(builder);
\r
194 builder.seek(enumToValue_offset);
\r
195 enumToValue = new NonContiguousEnumToShort(builder);
\r
196 builder.valueMapOffsetToIndex(enumToValue.offsetArray);
\r
198 valueMapArray = new ValueMap[valueMap_count];
\r
199 for (int i=0; i<valueMap_count; ++i) {
\r
200 // Must seek to the start of each entry.
\r
201 builder.seek(builder.valueMap_map[i]);
\r
202 valueMapArray[i] = new ValueMap(builder);
\r
208 //----------------------------------------------------------------
\r
211 public static final UPropertyAliases INSTANCE;
\r
215 INSTANCE = new UPropertyAliases();
\r
216 } catch(IOException e) {
\r
218 throw new MissingResourceException("Could not construct UPropertyAliases. Missing pnames.icu","","");
\r
224 * Return a property name given a property enum. Multiple
\r
225 * names may be available for each property; the nameChoice
\r
226 * selects among them.
\r
228 public String getPropertyName(int property,
\r
230 short nameGroupIndex = enumToName.getShort(property);
\r
231 return chooseNameInGroup(nameGroupIndex, nameChoice);
\r
235 * Return a property enum given one of its property names.
\r
236 * If the property name is not known, this method returns
\r
237 * UProperty.UNDEFINED.
\r
239 public int getPropertyEnum(String propertyAlias) {
\r
240 return nameToEnum.getEnum(propertyAlias);
\r
244 * Return a value name given a property enum and a value enum.
\r
245 * Multiple names may be available for each value; the nameChoice
\r
246 * selects among them.
\r
248 public String getPropertyValueName(int property,
\r
251 ValueMap vm = getValueMap(property);
\r
252 short nameGroupIndex = vm.enumToName.getShort(value);
\r
253 return chooseNameInGroup(nameGroupIndex, nameChoice);
\r
257 * Return a value enum given one of its value names and the
\r
258 * corresponding property alias.
\r
260 public int getPropertyValueEnum(int property,
\r
261 String valueAlias) {
\r
262 ValueMap vm = getValueMap(property);
\r
263 return vm.nameToEnum.getEnum(valueAlias);
\r
266 //----------------------------------------------------------------
\r
270 * A map for the legal values of a binary or enumerated properties.
\r
272 private class ValueMap {
\r
275 * Maps value enum to index into the nameGroupPool[]
\r
277 EnumToShort enumToName; // polymorphic
\r
280 * Maps value name to value enum.
\r
282 NameToEnum nameToEnum;
\r
284 ValueMap(Builder b) throws IOException {
\r
285 short enumToName_offset = b.readShort();
\r
286 short ncEnumToName_offset = b.readShort();
\r
287 short nameToEnum_offset = b.readShort();
\r
288 if (enumToName_offset != 0) {
\r
289 b.seek(enumToName_offset);
\r
290 ContiguousEnumToShort x = new ContiguousEnumToShort(b);
\r
291 b.nameGroupOffsetToIndex(x.offsetArray);
\r
294 b.seek(ncEnumToName_offset);
\r
295 NonContiguousEnumToShort x = new NonContiguousEnumToShort(b);
\r
296 b.nameGroupOffsetToIndex(x.offsetArray);
\r
299 b.seek(nameToEnum_offset);
\r
300 nameToEnum = new NameToEnum(b);
\r
305 * Abstract map from enum values to integers.
\r
307 private interface EnumToShort {
\r
308 short getShort(int enumProbe);
\r
312 * Generic map from enum values to offsets. Enum values are
\r
315 private static class ContiguousEnumToShort implements EnumToShort {
\r
318 short offsetArray[];
\r
320 public short getShort(int enumProbe) {
\r
321 if (enumProbe < enumStart || enumProbe >= enumLimit) {
\r
322 throw new IllegalIcuArgumentException("Invalid enum. enumStart = " +enumStart +
\r
323 " enumLimit = " + enumLimit +
\r
324 " enumProbe = " + enumProbe );
\r
326 return offsetArray[enumProbe - enumStart];
\r
329 ContiguousEnumToShort(ICUBinaryStream s) throws IOException {
\r
330 enumStart = s.readInt();
\r
331 enumLimit = s.readInt();
\r
332 int count = enumLimit - enumStart;
\r
333 offsetArray = new short[count];
\r
334 for (int i=0; i<count; ++i) {
\r
335 offsetArray[i] = s.readShort();
\r
341 * Generic map from enum values to offsets. Enum values need not
\r
344 private static class NonContiguousEnumToShort implements EnumToShort {
\r
346 short offsetArray[];
\r
348 public short getShort(int enumProbe) {
\r
349 for (int i=0; i<enumArray.length; ++i) {
\r
350 if (enumArray[i] < enumProbe) continue;
\r
351 if (enumArray[i] > enumProbe) break;
\r
352 return offsetArray[i];
\r
354 throw new IllegalIcuArgumentException("Invalid enum");
\r
357 NonContiguousEnumToShort(ICUBinaryStream s) throws IOException {
\r
359 int count = s.readInt();
\r
360 enumArray = new int[count];
\r
361 offsetArray = new short[count];
\r
362 for (i=0; i<count; ++i) {
\r
363 enumArray[i] = s.readInt();
\r
365 for (i=0; i<count; ++i) {
\r
366 offsetArray[i] = s.readShort();
\r
372 * Map from names to enum values.
\r
374 private class NameToEnum {
\r
378 int getEnum(String nameProbe) {
\r
379 for (int i=0; i<nameArray.length; ++i) {
\r
380 int c = UPropertyAliases.compare(nameProbe,
\r
381 stringPool[nameArray[i]]);
\r
382 if (c > 0) continue;
\r
384 return enumArray[i];
\r
386 return UProperty.UNDEFINED;
\r
389 NameToEnum(Builder b) throws IOException {
\r
391 int count = b.readInt();
\r
392 enumArray = new int[count];
\r
393 nameArray = new short[count];
\r
394 for (i=0; i<count; ++i) {
\r
395 enumArray[i] = b.readInt();
\r
397 for (i=0; i<count; ++i) {
\r
398 nameArray[i] = b.stringOffsetToIndex(b.readShort());
\r
403 //----------------------------------------------------------------
\r
404 // Runtime implementation
\r
407 * Compare two property names, returning <0, 0, or >0. The
\r
408 * comparison is that described as "loose" matching in the
\r
409 * Property*Aliases.txt files.
\r
411 public static int compare(String stra, String strb) {
\r
412 // Note: This implementation is a literal copy of
\r
413 // uprv_comparePropertyNames. It can probably be improved.
\r
414 int istra=0, istrb=0, rc;
\r
415 int cstra=0, cstrb=0;
\r
417 /* Ignore delimiters '-', '_', and ASCII White_Space */
\r
418 while (istra<stra.length()) {
\r
419 cstra = stra.charAt(istra);
\r
421 case '-': case '_': case ' ': case '\t':
\r
422 case '\n': case 0xb/*\v*/: case '\f': case '\r':
\r
429 while (istrb<strb.length()) {
\r
430 cstrb = strb.charAt(istrb);
\r
432 case '-': case '_': case ' ': case '\t':
\r
433 case '\n': case 0xb/*\v*/: case '\f': case '\r':
\r
440 /* If we reach the ends of both strings then they match */
\r
441 boolean endstra = istra==stra.length();
\r
442 boolean endstrb = istrb==strb.length();
\r
444 if (endstrb) return 0;
\r
446 } else if (endstrb) {
\r
450 rc = UCharacter.toLowerCase(cstra) - UCharacter.toLowerCase(cstrb);
\r
461 * Given an index to a run within the nameGroupPool[], and a
\r
462 * nameChoice (0,1,...), select the nameChoice-th entry of the run.
\r
464 private String chooseNameInGroup(short nameGroupIndex, int nameChoice) {
\r
465 if (nameChoice < 0) {
\r
466 throw new IllegalIcuArgumentException("Invalid name choice");
\r
468 while (nameChoice-- > 0) {
\r
469 if (nameGroupPool[nameGroupIndex++] < 0) {
\r
470 throw new IllegalIcuArgumentException("Invalid name choice");
\r
473 short a = nameGroupPool[nameGroupIndex];
\r
474 return stringPool[(a < 0) ? -a : a];
\r
478 * Return the valueMap[] entry for a given property.
\r
480 private ValueMap getValueMap(int property) {
\r
481 int valueMapIndex = enumToValue.getShort(property);
\r
482 return valueMapArray[valueMapIndex];
\r
485 //----------------------------------------------------------------
\r
489 * Return true if the given data version can be used.
\r
491 public boolean isDataVersionAcceptable(byte version[]) {
\r
492 return version[0] == DATA_FORMAT_VERSION;
\r
495 //----------------------------------------------------------------
\r
499 * A specialized ICUBinaryStream that can map between offsets and
\r
500 * index values into various arrays (stringPool, nameGroupPool,
\r
501 * and valueMap). It also knows how to read various structures.
\r
503 static class Builder extends ICUBinaryStream {
\r
505 // map[i] = offset of object i. We need maps for all of our
\r
506 // arrays. The arrays are indexed by offset in the raw binary
\r
507 // file; we need to translate that to index.
\r
509 private short stringPool_map[];
\r
511 private short valueMap_map[];
\r
513 private short nameGroup_map[];
\r
515 public Builder(byte raw[]) {
\r
520 * The valueMap_map[] must be setup in advance. This method
\r
523 public void setupValueMap_map(short offset, short count) {
\r
524 valueMap_map = new short[count];
\r
525 for (int i=0; i<count; ++i) {
\r
526 // Start of each entry. Each entry is 6 bytes long.
\r
527 valueMap_map[i] = (short) (offset + i * 6);
\r
532 * Read stringPool[]. Build up translation table from offsets
\r
533 * to string indices (stringPool_map[]).
\r
535 public String[] readStringPool(short offset, short count)
\r
536 throws IOException {
\r
538 // Allocate one more stringPool entry than needed. Use this
\r
539 // to store a "no string" entry in the pool, at index 0. This
\r
540 // maps to offset 0, so let stringPool_map[0] = 0.
\r
541 String stringPool[] = new String[count + 1];
\r
542 stringPool_map = new short[count + 1];
\r
543 short pos = offset;
\r
544 StringBuilder buf = new StringBuilder();
\r
545 stringPool_map[0] = 0;
\r
546 for (int i=1; i<=count; ++i) {
\r
549 // This works because the name is invariant-ASCII
\r
550 char c = (char) readUnsignedByte();
\r
554 stringPool_map[i] = pos;
\r
555 stringPool[i] = buf.toString();
\r
556 pos += stringPool[i].length() + 1;
\r
559 System.out.println("read stringPool x " + count +
\r
560 ": " + stringPool[1] + ", " +
\r
561 stringPool[2] + ", " +
\r
562 stringPool[3] + ",...");
\r
568 * Read the nameGroupPool[], and build up the offset->index
\r
569 * map (nameGroupPool_map[]).
\r
571 public short[] readNameGroupPool(short offset, short count)
\r
572 throws IOException {
\r
573 // Read nameGroupPool[]. This contains offsets from start of
\r
574 // header. We translate these into indices into stringPool[]
\r
575 // on the fly. The offset 0, which indicates "no entry", we
\r
576 // translate into index 0, which contains a null String
\r
579 short pos = offset;
\r
580 short nameGroupPool[] = new short[count];
\r
581 nameGroup_map = new short[count];
\r
582 for (int i=0; i<count; ++i) {
\r
583 nameGroup_map[i] = pos;
\r
584 nameGroupPool[i] = stringOffsetToIndex(readShort());
\r
588 System.out.println("read nameGroupPool x " + count +
\r
589 ": " + nameGroupPool[0] + ", " +
\r
590 nameGroupPool[1] + ", " +
\r
591 nameGroupPool[2] + ",...");
\r
593 return nameGroupPool;
\r
597 * Convert an offset into the string pool into a stringPool[]
\r
600 private short stringOffsetToIndex(short offset) {
\r
601 int probe = offset;
\r
602 if (probe < 0) probe = -probe;
\r
603 for (int i=0; i<stringPool_map.length; ++i) {
\r
604 if (stringPool_map[i] == probe) {
\r
605 return (short) ((offset < 0) ? -i : i);
\r
608 throw new IllegalStateException("Can't map string pool offset " +
\r
609 offset + " to index");
\r
613 * Convert an array of offsets into the string pool into an
\r
614 * array of stringPool[] indices. MODIFIES THE ARRAY IN
\r
617 /* private void stringOffsetToIndex(short array[]) {
\r
618 for (int i=0; i<array.length; ++i) {
\r
619 array[i] = stringOffsetToIndex(array[i]);
\r
624 * Convert an offset into the value map into a valueMap[]
\r
627 private short valueMapOffsetToIndex(short offset) {
\r
628 for (short i=0; i<valueMap_map.length; ++i) {
\r
629 if (valueMap_map[i] == offset) {
\r
633 throw new IllegalStateException("Can't map value map offset " +
\r
634 offset + " to index");
\r
638 * Convert an array of offsets into the value map array into
\r
639 * an array of valueMap[] indices. MODIFIES THE ARRAY IN
\r
642 private void valueMapOffsetToIndex(short array[]) {
\r
643 for (int i=0; i<array.length; ++i) {
\r
644 array[i] = valueMapOffsetToIndex(array[i]);
\r
649 * Convert an offset into the name group pool into a
\r
650 * nameGroupPool[] index.
\r
652 private short nameGroupOffsetToIndex(short offset) {
\r
653 for (short i=0; i<nameGroup_map.length; ++i) {
\r
654 if (nameGroup_map[i] == offset) {
\r
658 throw new RuntimeException("Can't map name group offset " + offset +
\r
663 * Convert an array of offsets into the name group pool into an
\r
664 * array of nameGroupPool[] indices. MODIFIES THE ARRAY IN
\r
667 private void nameGroupOffsetToIndex(short array[]) {
\r
668 for (int i=0; i<array.length; ++i) {
\r
669 array[i] = nameGroupOffsetToIndex(array[i]);
\r