2 *******************************************************************************
\r
3 * Copyright (C) 2004-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.impl;
\r
9 import java.io.BufferedInputStream;
\r
10 import java.io.DataInputStream;
\r
11 import java.io.IOException;
\r
12 import java.io.InputStream;
\r
14 import com.ibm.icu.util.ULocale;
\r
15 import com.ibm.icu.util.VersionInfo;
\r
20 * This class reads the *.res resource bundle format
\r
22 * (For the latest version of the file format documentation see
\r
23 * ICU4C's source/common/uresdata.h file.)
\r
25 * File format for .res resource bundle files (formatVersion=1.2)
\r
27 * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
\r
28 * with nested, hierarchical data structures.
\r
29 * It physically contains the following:
\r
31 * Resource root; -- 32-bit Resource item, root item for this bundle's tree;
\r
32 * currently, the root item must be a table or table32 resource item
\r
33 * int32_t indexes[indexes[0]]; -- array of indexes for friendly
\r
34 * reading and swapping; see URES_INDEX_* above
\r
35 * new in formatVersion 1.1 (ICU 2.8)
\r
36 * char keys[]; -- characters for key strings
\r
37 * (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
\r
38 * (minus the space for root and indexes[]),
\r
39 * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
\r
40 * padded to multiple of 4 bytes for 4-alignment of the following data
\r
41 * data; -- data directly and indirectly indexed by the root item;
\r
42 * the structure is determined by walking the tree
\r
44 * Each resource bundle item has a 32-bit Resource handle (see typedef above)
\r
45 * which contains the item type number in its upper 4 bits (31..28) and either
\r
46 * an offset or a direct value in its lower 28 bits (27..0).
\r
47 * The order of items is undefined and only determined by walking the tree.
\r
48 * Leaves of the tree may be stored first or last or anywhere in between,
\r
49 * and it is in theory possible to have unreferenced holes in the file.
\r
52 * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
\r
53 * - Integer values are 28-bit values stored in the Resource handle itself;
\r
54 * the interpretation of unsigned vs. signed integers is up to the application.
\r
56 * All other types and values use 28-bit offsets to point to the item's data.
\r
57 * The offset is an index to the first 32-bit word of the value, relative to the
\r
58 * start of the resource data (i.e., the root item handle is at offset 0).
\r
59 * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
\r
60 * All resource item values are 4-aligned.
\r
62 * The structures (memory layouts) for the values for each item type are listed
\r
63 * in the table above.
\r
65 * Nested, hierarchical structures: -------------
\r
67 * Table items contain key-value pairs where the keys are 16-bit offsets to char * key strings.
\r
68 * Key string offsets are also relative to the start of the resource data (of the root handle),
\r
69 * i.e., the first string has an offset of 4 (after the 4-byte root handle).
\r
71 * The values of these pairs are Resource handles.
\r
73 * Array items are simple vectors of Resource handles.
\r
75 * An alias item is special (and new in ICU 2.4): --------------
\r
77 * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
\r
78 * another resource bundle's item according to the path in the string.
\r
79 * This is used to share items across bundles that are in different lookup/fallback
\r
80 * chains (e.g., large collation data among zh_TW and zh_HK).
\r
81 * This saves space (for large items) and maintenance effort (less duplication of data).
\r
83 * --------------------------------------------------------------------------
\r
87 * Most resources have their values stored at four-byte offsets from the start
\r
88 * of the resource data. These values are at least 4-aligned.
\r
89 * Some resource values are stored directly in the offset field of the Resource itself.
\r
90 * See UResType in unicode/ures.h for enumeration constants for Resource types.
\r
92 * Type Name Memory layout of values
\r
93 * (in parentheses: scalar, non-offset values)
\r
95 * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding)
\r
96 * or (empty string ("") if offset==0)
\r
97 * 1 Binary: int32_t length, uint8_t[length], (padding)
\r
98 * - this value should be 32-aligned -
\r
99 * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
\r
100 * 3 Alias: (physically same value layout as string, new in ICU 2.4)
\r
101 * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count]
\r
102 * (new in formatVersion 1.1/ICU 2.8)
\r
104 * 7 Integer: (28-bit offset is integer value)
\r
105 * 8 Array: int32_t count, Resource[count]
\r
107 * 14 Integer Vector: int32_t length, int32_t[length]
\r
108 * 15 Reserved: This value denotes special purpose resources and is for internal use.
\r
110 * Note that there are 3 types with data vector values:
\r
111 * - Vectors of 8-bit bytes stored as type Binary.
\r
112 * - Vectors of 16-bit words stored as type Unicode String
\r
113 * (no value restrictions, all values 0..ffff allowed!).
\r
114 * - Vectors of 32-bit words stored as type Integer Vector.
\r
118 public final class ICUResourceBundleReader implements ICUBinary.Authenticate{
\r
121 * File format version that this class understands.
\r
124 private static final byte DATA_FORMAT_ID[] = {(byte)0x52, (byte)0x65,
\r
125 (byte)0x73, (byte)0x42};
\r
127 private static final String ICU_RESOURCE_SUFFIX = ".res";
\r
129 /* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
\r
130 private static final int URES_INDEX_LENGTH = 0; /* [0] contains URES_INDEX_TOP==the length of indexes[] */
\r
131 //private static final int URES_INDEX_STRINGS_TOP = 1; /* [1] contains the top of the strings, */
\r
132 /* same as the bottom of resources, rounded up */
\r
133 //private static final int URES_INDEX_RESOURCES_TOP = 2; /* [2] contains the top of all resources */
\r
134 private static final int URES_INDEX_BUNDLE_TOP = 3; /* [3] contains the top of the bundle, */
\r
135 /* in case it were ever different from [2] */
\r
136 //private static final int URES_INDEX_MAX_TABLE_LENGTH = 4; /* [4] max. length of any table */
\r
137 private static final int URES_INDEX_ATTRIBUTES = 5; /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
\r
138 //private static final int URES_INDEX_TOP = 6;
\r
140 //private static final int URES_STRINGS_BOTTOM=(1+URES_INDEX_TOP)*4;
\r
143 * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
\r
144 * New in formatVersion 1.2 (ICU 3.6).
\r
146 * If set, then this resource bundle is a standalone bundle.
\r
147 * If not set, then the bundle participates in locale fallback, eventually
\r
148 * all the way to the root bundle.
\r
149 * If indexes[] is missing or too short, then the attribute cannot be determined
\r
150 * reliably. Dependency checking should ignore such bundles, and loading should
\r
153 private static final int URES_ATT_NO_FALLBACK = 1;
\r
155 private static final boolean DEBUG = false;
\r
157 private byte[] /* formatVersion, */ dataVersion;
\r
159 private int rootRes;
\r
160 private int[] indexes;
\r
161 private boolean noFallback; /* see URES_ATT_NO_FALLBACK */
\r
163 private byte[] data;
\r
165 private ICUResourceBundleReader(InputStream stream, String resolvedName){
\r
167 BufferedInputStream bs = new BufferedInputStream(stream);
\r
169 if(DEBUG) System.out.println("The InputStream class is: " + stream.getClass().getName());
\r
170 if(DEBUG) System.out.println("The BufferedInputStream class is: " + bs.getClass().getName());
\r
171 if(DEBUG) System.out.println("The bytes avialable in stream before reading the header: " + bs.available());
\r
173 dataVersion = ICUBinary.readHeader(bs,DATA_FORMAT_ID,this);
\r
175 if(DEBUG) System.out.println("The bytes available in stream after reading the header: " + bs.available());
\r
179 }catch(IOException ex){
\r
180 throw new RuntimeException("Data file "+ resolvedName+ " is corrupt - " + ex.getMessage());
\r
183 public static ICUResourceBundleReader getReader(String baseName, String localeName, ClassLoader root){
\r
184 String resolvedName = getFullName(baseName, localeName);
\r
185 InputStream stream = ICUData.getStream(root,resolvedName);
\r
190 ICUResourceBundleReader reader = new ICUResourceBundleReader(stream, resolvedName);
\r
194 private static void writeInt(int i, byte[] bytes, int offset) {
\r
195 bytes[offset++]=(byte)(i>>24);
\r
196 bytes[offset++]=(byte)(i>>16);
\r
197 bytes[offset++]=(byte)(i>>8);
\r
198 bytes[offset]=(byte)i;
\r
201 private void readData(InputStream stream)
\r
202 throws IOException{
\r
204 DataInputStream ds = new DataInputStream(stream);
\r
206 if(DEBUG) System.out.println("The DataInputStream class is: " + ds.getClass().getName());
\r
207 if(DEBUG) System.out.println("The available bytes in the stream before reading the data: "+ds.available());
\r
210 * The following will read two integers before ds.mark().
\r
211 * Later, the two integers need to be placed into data[],
\r
212 * then ds.reset(), then ds.readFully(into rest of data[]).
\r
214 * This is necessary because we don't know the readLimit for ds.mark()
\r
215 * until we have read the second integer (indexLength).
\r
217 rootRes = ds.readInt();
\r
219 // read the variable-length indexes[] array
\r
220 int indexLength = ds.readInt();
\r
221 ds.mark((indexLength-1)*4);
\r
223 indexes = new int[indexLength];
\r
224 indexes[URES_INDEX_LENGTH] = indexLength;
\r
226 for(int i=1; i<indexLength; i++){
\r
227 indexes[i] = ds.readInt();
\r
230 // determine if this resource bundle falls back to a parent bundle
\r
231 // along normal locale ID fallback
\r
233 indexLength > URES_INDEX_ATTRIBUTES &&
\r
234 (indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)!=0;
\r
236 // read the entire bundle (after the header) into data[]
\r
237 // put rootRes and indexLength into data[0..7]
\r
238 // and the rest of the data into data[8..length-1]
\r
239 int length = indexes[URES_INDEX_BUNDLE_TOP]*4;
\r
240 if(DEBUG) System.out.println("The number of bytes in the bundle: "+length);
\r
242 data = new byte[length];
\r
243 writeInt(rootRes, data, 0);
\r
244 writeInt(indexLength, data, 4);
\r
246 // now reset to the mark, which was set after reading rootRes and indexLength
\r
248 ds.readFully(data, 8, length-8);
\r
252 * Gets the full name of the resource with suffix.
\r
254 public static String getFullName(String baseName, String localeName){
\r
255 if(baseName==null || baseName.length()==0){
\r
256 if(localeName.length()==0){
\r
257 return ULocale.getDefault().toString()+ICU_RESOURCE_SUFFIX;
\r
259 return localeName+ICU_RESOURCE_SUFFIX;
\r
262 if(baseName.indexOf('.')==-1){
\r
263 if(baseName.charAt(baseName.length()-1)!= '/'){
\r
264 return baseName+"/"+localeName+ICU_RESOURCE_SUFFIX;
\r
266 return baseName+localeName+ICU_RESOURCE_SUFFIX;
\r
269 baseName = baseName.replace('.','/');
\r
270 if(localeName.length()==0){
\r
271 return baseName+ICU_RESOURCE_SUFFIX;
\r
273 return baseName+"_"+localeName+ICU_RESOURCE_SUFFIX;
\r
279 public VersionInfo getVersion(){
\r
280 return VersionInfo.getInstance(dataVersion[0],dataVersion[1],dataVersion[2],dataVersion[3]);
\r
282 public boolean isDataVersionAcceptable(byte version[]){
\r
283 // while ICU4C can read formatVersion 1.0 and up,
\r
284 // ICU4J requires 1.1 as a minimum
\r
285 // formatVersion = version;
\r
286 return version[0] == 1 && version[1] >= 1;
\r
289 public byte[] getData(){
\r
292 public int getRootResource() {
\r
295 public boolean getNoFallback() {
\r