jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/util/TransliteratorUtilities.java

   1 /*\r
   2  *******************************************************************************\r
   3  * Copyright (C) 2002-2009, International Business Machines Corporation and    *\r
   4  * others. All Rights Reserved.                                                *\r
   5  *******************************************************************************\r
   6  */\r
   7 package com.ibm.icu.dev.test.util;\r
   8 \r
   9 import java.io.BufferedReader;\r
  10 import java.io.IOException;\r
  11 \r
  12 import com.ibm.icu.text.Transliterator;\r
  13 \r
  14 public class TransliteratorUtilities {\r
  15     public static boolean DEBUG = false;\r
  16 \r
  17     public static void registerTransliteratorFromFile(String dir, String id) {\r
  18         try {\r
  19             String filename = id.replace('-', '_') +  ".txt";\r
  20             String rules = getFileContents(dir, filename);\r
  21             Transliterator t;\r
  22             int pos = id.indexOf('-');\r
  23             String rid;\r
  24             if (pos < 0) {\r
  25                 rid = id + "-Any";\r
  26                 id = "Any-" + id;\r
  27             } else {\r
  28                 rid = id.substring(pos+1) + "-" + id.substring(0, pos);\r
  29             }\r
  30             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);\r
  31             Transliterator.unregister(id);\r
  32             Transliterator.registerInstance(t);\r
  33 \r
  34             /*String test = "\u049A\u0430\u0437\u0430\u049B";\r
  35             System.out.println(t.transliterate(test));\r
  36             t = Transliterator.getInstance(id);\r
  37             System.out.println(t.transliterate(test));\r
  38             */\r
  39 \r
  40             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);\r
  41             Transliterator.unregister(rid);\r
  42             Transliterator.registerInstance(t);\r
  43             if (DEBUG) System.out.println("Registered new Transliterator: " + id + ", " + rid);\r
  44         } catch (IOException e) {\r
  45 //#if defined(FOUNDATION10) || defined(J2SE13)\r
  46 //##        throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id+" "+ e.getMessage());\r
  47 //#else\r
  48             throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id).initCause(e);\r
  49 //#endif\r
  50         }\r
  51     }\r
  52 \r
  53     /**\r
  54      * \r
  55      */\r
  56     public static String getFileContents(String dir, String filename) throws IOException {\r
  57 //#if defined(FOUNDATION10) || defined(J2SE13)\r
  58 //##        BufferedReader br = TestUtil.openUTF8Reader(dir, filename);\r
  59 //#else\r
  60         BufferedReader br = BagFormatter.openUTF8Reader(dir, filename);\r
  61 //#endif \r
  62         StringBuffer buffer = new StringBuffer();\r
  63         while (true) {\r
  64             String line = br.readLine();\r
  65             if (line == null) break;\r
  66             if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);\r
  67             buffer.append(line).append("\r\n");\r
  68         }\r
  69         br.close();\r
  70         return buffer.toString();\r
  71          \r
  72     }\r
  73 \r
  74     private static final String BASE_RULES =\r
  75         ":: (hex-any/xml);" +\r
  76         ":: (hex-any/xml10);" + \r
  77         "'<' > '&lt;' ;" +\r
  78         "'<' < '&'[lL][Tt]';' ;" +\r
  79         "'&' > '&amp;' ;" +\r
  80         "'&' < '&'[aA][mM][pP]';' ;" +\r
  81         "'>' < '&'[gG][tT]';' ;" +\r
  82         "'\"' < '&'[qQ][uU][oO][tT]';' ; " +\r
  83         "'' < '&'[aA][pP][oO][sS]';' ; ";\r
  84 \r
  85     private static final String CONTENT_RULES =\r
  86         "'>' > '&gt;' ;";\r
  87 \r
  88     private static final String HTML_RULES = BASE_RULES + CONTENT_RULES + \r
  89         "'\"' > '&quot;' ; ";\r
  90 \r
  91     private static final String HTML_RULES_CONTROLS = HTML_RULES + \r
  92         ":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; ";\r
  93 \r
  94     private static final String HTML_RULES_ASCII = HTML_RULES + \r
  95         ":: [[:C:][:^ASCII:]] any-hex/xml ; ";\r
  96 \r
  97     private static final String XML_RULES = HTML_RULES +\r
  98         "'' > '&apos;' ; "\r
  99 ;\r
 100     \r
 101     /*\r
 102 The ampersand character (&) and the left angle bracket (<) MUST NOT appear \r
 103 \r
 104 in their literal form, except when used as markup delimiters, or within a \r
 105 \r
 106 comment, a processing instruction, or a CDATA section. If they are needed \r
 107 \r
 108 elsewhere, they MUST be escaped using either numeric character references or \r
 109 \r
 110 the strings "&amp;" and "&lt;" respectively. The right angle bracket (>) MAY \r
 111 \r
 112 be represented using the string "&gt;", and MUST, for compatibility, be \r
 113 \r
 114 escaped using either "&gt;" or a character reference when it appears in the string \r
 115 \r
 116 "]]>" in content, when that string is not marking the end of a CDATA section.\r
 117 \r
 118 In the content of elements, character data is any string of characters which does \r
 119 \r
 120 not contain the start-delimiter of any markup and does not include the \r
 121 \r
 122 CDATA-section-close delimiter, "]]>". In a CDATA section, character data is \r
 123 \r
 124 any string of characters not including the CDATA-section-close delimiter, \r
 125 \r
 126 "]]>".\r
 127 \r
 128 To allow attribute values to contain both single and double quotes, the \r
 129 \r
 130 apostrophe or single-quote character (') MAY be represented as "&apos;", and \r
 131 \r
 132 the double-quote character (") as "&quot;".\r
 133 \r
 134 \r
 135      */\r
 136     \r
 137     public static final Transliterator toXML = Transliterator.createFromRules(\r
 138             "any-xml", XML_RULES, Transliterator.FORWARD);\r
 139     public static final Transliterator fromXML = Transliterator.createFromRules(\r
 140             "xml-any", XML_RULES, Transliterator.REVERSE);\r
 141     public static final Transliterator toHTML = Transliterator.createFromRules(\r
 142             "any-html", HTML_RULES, Transliterator.FORWARD);\r
 143     public static final Transliterator toHTMLControl = Transliterator.createFromRules(\r
 144             "any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);\r
 145     public static final Transliterator toHTMLAscii = Transliterator.createFromRules(\r
 146             "any-html", HTML_RULES_ASCII, Transliterator.FORWARD);\r
 147     public static final Transliterator fromHTML = Transliterator.createFromRules(\r
 148             "html-any", HTML_RULES, Transliterator.REVERSE);\r
 149 }\r