2 *******************************************************************************
\r
3 * Copyright (C) 2006-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
10 package com.ibm.icu.charset;
\r
12 import java.io.IOException;
\r
13 import java.nio.charset.Charset;
\r
14 import java.nio.charset.UnsupportedCharsetException;
\r
15 import java.nio.charset.spi.CharsetProvider;
\r
16 import java.util.HashMap;
\r
17 import java.util.Iterator;
\r
18 import java.util.Map;
\r
20 import com.ibm.icu.impl.InvalidFormatException;
\r
24 * A concrete subclass of CharsetProvider for loading and providing charset converters
\r
28 public final class CharsetProviderICU extends CharsetProvider{
\r
29 private static String optionsString = null;
\r
30 private static boolean gettingJavaCanonicalName = false;
\r
33 * Default constructor
\r
36 public CharsetProviderICU() {
\r
40 * Constructs a charset for the given charset name.
\r
41 * Implements the abstract method of super class.
\r
42 * @param charsetName charset name
\r
43 * @return charset objet for the given charset name, null if unsupported
\r
46 public final Charset charsetForName(String charsetName){
\r
48 // extract the options from the charset name
\r
49 charsetName = processOptions(charsetName);
\r
50 // get the canonical name
\r
51 String icuCanonicalName = getICUCanonicalName(charsetName);
\r
53 // create the converter object and return it
\r
54 if(icuCanonicalName==null || icuCanonicalName.length()==0){
\r
55 // Try the original name, may be something added and not in the alias table.
\r
56 // Will get an unsupported encoding exception if it doesn't work.
\r
57 return getCharset(charsetName);
\r
59 return getCharset(icuCanonicalName);
\r
60 }catch(UnsupportedCharsetException ex){
\r
61 }catch(IOException ex){
\r
67 * Constructs a charset for the given ICU conversion table from the specified class path.
\r
68 * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
\r
69 * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
\r
70 * Conversion tables can be made with ICU4C's makeconv tool.
\r
71 * This function allows you to allows you to load user defined conversion
\r
72 * tables that are outside of ICU's core data.
\r
73 * @param charsetName The name of the charset conversion table.
\r
74 * @param classPath The class path that contain the conversion table.
\r
75 * @return charset object for the given charset name, null if unsupported
\r
78 public final Charset charsetForName(String charsetName, String classPath) {
\r
79 return charsetForName(charsetName, classPath, null);
\r
83 * Constructs a charset for the given ICU conversion table from the specified class path.
\r
84 * This function is similar to {@link #charsetForName(String, String)}.
\r
85 * @param charsetName The name of the charset conversion table.
\r
86 * @param classPath The class path that contain the conversion table.
\r
87 * @param loader the class object from which to load the charset conversion table
\r
88 * @return charset object for the given charset name, null if unsupported
\r
91 public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
\r
92 CharsetMBCS cs = null;
\r
94 cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
\r
95 } catch (InvalidFormatException e) {
\r
102 * Gets the canonical name of the converter as defined by Java
\r
103 * @param enc converter name
\r
104 * @return canonical name of the converter
\r
105 * @internal ICU 3.6
\r
106 * @deprecated This API is ICU internal only.
\r
108 public static final String getICUCanonicalName(String enc)
\r
109 throws UnsupportedCharsetException{
\r
110 String canonicalName = null;
\r
114 if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
\r
115 ret = canonicalName;
\r
116 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
\r
117 ret = canonicalName;
\r
118 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
\r
119 /* we have some aliases in the form x-blah .. match those */
\r
120 ret = canonicalName;
\r
121 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
\r
122 ret = canonicalName;
\r
123 }*/else if(enc.indexOf("x-")==0){
\r
124 /* TODO: Match with getJavaCanonicalName method */
\r
126 char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
\r
127 strcpy(temp, encName+2);
\r
129 // Remove the 'x-' and get the ICU canonical name
\r
130 if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
\r
131 ret = canonicalName;
\r
137 /* unsupported encoding */
\r
142 }catch(IOException ex){
\r
143 throw new UnsupportedCharsetException(enc);
\r
146 private static final Charset getCharset(String icuCanonicalName) throws IOException{
\r
147 String[] aliases = (String[])getAliases(icuCanonicalName);
\r
148 String canonicalName = getJavaCanonicalName(icuCanonicalName);
\r
150 /* Concat the option string to the icuCanonicalName so that the options can be handled properly
\r
151 * by the actual charset.
\r
152 * Note: getJavaCanonicalName() may eventually call this method so skip the concatenation part
\r
153 * during getJavaCanonicalName() call.
\r
155 if (gettingJavaCanonicalName) {
\r
156 gettingJavaCanonicalName = false;
\r
157 } else if (optionsString != null) {
\r
158 icuCanonicalName = icuCanonicalName.concat(optionsString);
\r
159 optionsString = null;
\r
162 return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));
\r
165 * Gets the canonical name of the converter as defined by Java
\r
166 * @param charsetName converter name
\r
167 * @return canonical name of the converter
\r
168 * @internal ICU 3.6
\r
169 * @deprecated This API is ICU internal only.
\r
171 public static String getJavaCanonicalName(String charsetName){
\r
173 If a charset listed in the IANA Charset Registry is supported by an implementation
\r
174 of the Java platform then its canonical name must be the name listed in the registry.
\r
175 Many charsets are given more than one name in the registry, in which case the registry
\r
176 identifies one of the names as MIME-preferred. If a charset has more than one registry
\r
177 name then its canonical name must be the MIME-preferred name and the other names in
\r
178 the registry must be valid aliases. If a supported charset is not listed in the IANA
\r
179 registry then its canonical name must begin with one of the strings "X-" or "x-".
\r
181 if(charsetName==null ){
\r
185 String cName = null;
\r
186 /* find out the alias with MIME tag */
\r
187 if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
\r
188 /* find out the alias with IANA tag */
\r
189 }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
\r
192 check to see if an alias already exists with x- prefix, if yes then
\r
193 make that the canonical name
\r
195 int aliasNum = UConverterAlias.countAliases(charsetName);
\r
197 for(int i=0;i<aliasNum;i++){
\r
198 name = UConverterAlias.getAlias(charsetName, i);
\r
199 if(name!=null && name.indexOf("x-")==0){
\r
204 /* last resort just append x- to any of the alias and
\r
205 make it the canonical name */
\r
206 if((cName==null || cName.length()==0)){
\r
207 name = UConverterAlias.getStandardName(charsetName, "UTR22");
\r
208 if(name==null && charsetName.indexOf(",")!=-1){
\r
209 name = UConverterAlias.getAlias(charsetName, 1);
\r
211 /* if there is no UTR22 canonical name .. then just return itself*/
\r
213 name = charsetName;
\r
215 cName = "x-"+ name;
\r
218 /* After getting the java canonical name from ICU alias table, get the
\r
219 * java canonical name from the current JDK. This is neccessary because
\r
220 * different versions of the JVM (Sun and IBM) may have a different
\r
221 * canonical name then the one given by ICU. So the java canonical name
\r
222 * will depend on the current JVM. Since java cannot use the ICU canonical
\r
223 * we have to try to use a java compatible name.
\r
225 if (cName != null) {
\r
226 gettingJavaCanonicalName = true;
\r
227 if (Charset.isSupported(cName)) {
\r
228 cName = Charset.forName(cName).name();
\r
232 }catch (IOException ex){
\r
239 * Gets the aliases associated with the converter name
\r
240 * @param encName converter name
\r
241 * @return converter names as elements in an object array
\r
242 * @internal ICU 3.6
\r
244 private static final String[] getAliases(String encName)throws IOException{
\r
245 String[] ret = null;
\r
249 String aliasArray[/*50*/] = new String[50];
\r
251 if(encName != null){
\r
252 aliasNum = UConverterAlias.countAliases(encName);
\r
253 for(i=0,j=0;i<aliasNum;i++){
\r
254 String name = UConverterAlias.getAlias(encName,i);
\r
255 if(name.indexOf('+')==-1 && name.indexOf(',')==-1){
\r
256 aliasArray[j++]= name;
\r
259 ret = new String[j];
\r
261 ret[j] = aliasArray[j];
\r
269 private static final void putCharsets(Map map){
\r
270 int num = UConverterAlias.countAvailable();
\r
271 for(int i=0;i<num;i++) {
\r
272 String name = UConverterAlias.getAvailableName(i);
\r
274 Charset cs = getCharset(name);
\r
275 map.put(cs, getJavaCanonicalName(name));
\r
276 }catch(UnsupportedCharsetException ex){
\r
277 }catch (IOException e) {
\r
279 // add only charsets that can be created!
\r
284 * Returns an iterator for the available charsets.
\r
285 * Implements the abstract method of super class.
\r
286 * @return Iterator the charset name iterator
\r
289 public final Iterator charsets(){
\r
290 HashMap map = new HashMap();
\r
292 return map.keySet().iterator();
\r
296 * Gets the canonical names of available converters
\r
297 * @return Object[] names as an object array
\r
298 * @internal ICU 3.6
\r
299 * @deprecated This API is ICU internal only.
\r
301 public static final Object[] getAvailableNames(){
\r
302 HashMap map = new HashMap();
\r
304 return map.values().toArray();
\r
308 * Return all names available
\r
309 * @return String[] an arrya of all available names
\r
310 * @internal ICU 3.6
\r
311 * @deprecated This API is ICU internal only.
\r
313 public static final String[] getAllNames(){
\r
314 int num = UConverterAlias.countAvailable();
\r
315 String[] names = new String[num];
\r
316 for(int i=0;i<num;i++) {
\r
317 names[i] = UConverterAlias.getAvailableName(i);
\r
322 private static final String processOptions(String charsetName) {
\r
323 if (charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
\r
324 /* Remove and save the swap lfnl option string portion of the charset name. */
\r
325 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
\r
327 charsetName = charsetName.substring(0, charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
\r
330 return charsetName;
\r