3 *******************************************************************************
\r
4 * Copyright (C) 2002-2009, International Business Machines Corporation and *
\r
5 * others. All Rights Reserved. *
\r
6 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.util;
\r
10 import java.io.BufferedReader;
\r
11 import java.io.IOException;
\r
13 import com.ibm.icu.text.Transliterator;
\r
14 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
15 //##import com.ibm.icu.dev.test.TestUtil;
\r
18 public class TransliteratorUtilities {
\r
19 public static boolean DEBUG = false;
\r
21 public static void registerTransliteratorFromFile(String dir, String id) {
\r
23 String filename = id.replace('-', '_') + ".txt";
\r
24 String rules = getFileContents(dir, filename);
\r
26 int pos = id.indexOf('-');
\r
32 rid = id.substring(pos+1) + "-" + id.substring(0, pos);
\r
34 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
\r
35 Transliterator.unregister(id);
\r
36 Transliterator.registerInstance(t);
\r
38 /*String test = "\u049A\u0430\u0437\u0430\u049B";
\r
39 System.out.println(t.transliterate(test));
\r
40 t = Transliterator.getInstance(id);
\r
41 System.out.println(t.transliterate(test));
\r
44 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
\r
45 Transliterator.unregister(rid);
\r
46 Transliterator.registerInstance(t);
\r
47 if (DEBUG) System.out.println("Registered new Transliterator: " + id + ", " + rid);
\r
48 } catch (IOException e) {
\r
49 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
50 //## throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id+" "+ e.getMessage());
\r
52 throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id).initCause(e);
\r
60 public static String getFileContents(String dir, String filename) throws IOException {
\r
61 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
62 //## BufferedReader br = TestUtil.openUTF8Reader(dir, filename);
\r
64 BufferedReader br = BagFormatter.openUTF8Reader(dir, filename);
\r
66 StringBuffer buffer = new StringBuffer();
\r
68 String line = br.readLine();
\r
69 if (line == null) break;
\r
70 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
\r
71 buffer.append(line).append("\r\n");
\r
74 return buffer.toString();
\r
78 private static final String BASE_RULES =
\r
79 ":: (hex-any/xml);" +
\r
80 ":: (hex-any/xml10);" +
\r
82 "'<' < '&'[lL][Tt]';' ;" +
\r
84 "'&' < '&'[aA][mM][pP]';' ;" +
\r
85 "'>' < '&'[gG][tT]';' ;" +
\r
86 "'\"' < '&'[qQ][uU][oO][tT]';' ; " +
\r
87 "'' < '&'[aA][pP][oO][sS]';' ; ";
\r
89 private static final String CONTENT_RULES =
\r
92 private static final String HTML_RULES = BASE_RULES + CONTENT_RULES +
\r
93 "'\"' > '"' ; ";
\r
95 private static final String HTML_RULES_CONTROLS = HTML_RULES +
\r
96 ":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; ";
\r
98 private static final String HTML_RULES_ASCII = HTML_RULES +
\r
99 ":: [[:C:][:^ASCII:]] any-hex/xml ; ";
\r
101 private static final String XML_RULES = HTML_RULES +
\r
106 The ampersand character (&) and the left angle bracket (<) MUST NOT appear
\r
108 in their literal form, except when used as markup delimiters, or within a
\r
110 comment, a processing instruction, or a CDATA section. If they are needed
\r
112 elsewhere, they MUST be escaped using either numeric character references or
\r
114 the strings "&" and "<" respectively. The right angle bracket (>) MAY
\r
116 be represented using the string ">", and MUST, for compatibility, be
\r
118 escaped using either ">" or a character reference when it appears in the string
\r
120 "]]>" in content, when that string is not marking the end of a CDATA section.
\r
122 In the content of elements, character data is any string of characters which does
\r
124 not contain the start-delimiter of any markup and does not include the
\r
126 CDATA-section-close delimiter, "]]>". In a CDATA section, character data is
\r
128 any string of characters not including the CDATA-section-close delimiter,
\r
132 To allow attribute values to contain both single and double quotes, the
\r
134 apostrophe or single-quote character (') MAY be represented as "'", and
\r
136 the double-quote character (") as """.
\r
141 public static final Transliterator toXML = Transliterator.createFromRules(
\r
142 "any-xml", XML_RULES, Transliterator.FORWARD);
\r
143 public static final Transliterator fromXML = Transliterator.createFromRules(
\r
144 "xml-any", XML_RULES, Transliterator.REVERSE);
\r
145 public static final Transliterator toHTML = Transliterator.createFromRules(
\r
146 "any-html", HTML_RULES, Transliterator.FORWARD);
\r
147 public static final Transliterator toHTMLControl = Transliterator.createFromRules(
\r
148 "any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
\r
149 public static final Transliterator toHTMLAscii = Transliterator.createFromRules(
\r
150 "any-html", HTML_RULES_ASCII, Transliterator.FORWARD);
\r
151 public static final Transliterator fromHTML = Transliterator.createFromRules(
\r
152 "html-any", HTML_RULES, Transliterator.REVERSE);
\r