2 *******************************************************************************
\r
3 * Copyright (C) 2001-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.demo.translit;
\r
8 import java.util.Enumeration;
\r
9 import java.util.HashMap;
\r
10 import java.util.HashSet;
\r
11 import java.util.Iterator;
\r
12 import java.util.Set;
\r
13 import java.util.TreeSet;
\r
15 import com.ibm.icu.lang.UScript;
\r
16 import com.ibm.icu.text.Replaceable;
\r
17 import com.ibm.icu.text.Transliterator;
\r
18 import com.ibm.icu.text.UTF16;
\r
19 import com.ibm.icu.text.UnicodeFilter;
\r
21 public class AnyTransliterator extends Transliterator {
\r
23 static final boolean DEBUG = false;
\r
24 private String targetName;
\r
25 private RunIterator it;
\r
26 private Position run;
\r
29 public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
\r
30 super("Any-" + targetName, filter);
\r
31 this.targetName = targetName;
\r
33 run = new Position();
\r
36 public AnyTransliterator(String targetName, UnicodeFilter filter){
\r
37 this(targetName, filter, new ScriptRunIterator());
\r
40 static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
\r
42 protected void handleTransliterate(Replaceable text,
\r
43 Position offsets, boolean isIncremental) {
\r
45 System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
\r
46 + ", " + toString(offsets));
\r
48 it.reset(text, offsets);
\r
50 while (it.next(run)) {
\r
51 if (targetName.equalsIgnoreCase(it.getName())) {
\r
52 if (DEBUG) System.out.println("Skipping identical: " + targetName);
\r
53 run.start = run.limit; // show we processed
\r
54 continue; // skip if same
\r
58 String id = it.getName() + '-' + targetName;
\r
60 t = Transliterator.getInstance(id);
\r
61 } catch (IllegalArgumentException ex) {
\r
62 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
\r
63 id = it.getName() + "-Latin; Latin-" + targetName;
\r
65 t = Transliterator.getInstance(id);
\r
66 } catch (IllegalArgumentException ex2) {
\r
67 if (DEBUG) System.out.println("Couldn't find: " + id);
\r
71 // TODO catch error later!!
\r
74 System.out.println(t.getID());
\r
75 System.out.println("input: " + hex.transliterate(text.toString())
\r
76 + ", " + toString(run));
\r
79 if (isIncremental && it.atEnd()) {
\r
80 t.transliterate(text, run);
\r
82 t.finishTransliteration(text, run);
\r
84 // adjust the offsets in line with the changes
\r
85 it.adjust(run.limit);
\r
88 System.out.println("output: " + hex.transliterate(text.toString())
\r
89 + ", " + toString(run));
\r
93 // show how far we got!
\r
94 it.getExpanse(offsets);
\r
95 if (run.start == run.limit) offsets.start = offsets.limit;
\r
96 else offsets.start = run.start;
\r
98 System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
\r
99 System.out.println();
\r
103 // should be method on Position
\r
104 public static String toString(Position offsets) {
\r
105 return "[cs: " + offsets.contextStart
\r
106 + ", s: " + offsets.start
\r
107 + ", l: " + offsets.limit
\r
108 + ", cl: " + offsets.contextLimit
\r
112 public interface RunIterator {
\r
113 public void reset(Replaceable text, Position expanse);
\r
114 public void getExpanse(Position run);
\r
115 public void reset();
\r
116 public boolean next(Position run);
\r
117 public void getCurrent(Position run);
\r
118 public String getName();
\r
119 public void adjust(int newCurrentLimit);
\r
120 public boolean atEnd();
\r
124 * Returns a series of ranges corresponding to scripts. They will be of the form:
\r
125 * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
\r
128 * That is, the runs will overlap. The reason for this is so that a transliterator can
\r
129 * consider common characters both before and after the scripts.
\r
130 * The only time that contextStart != start is for the first run
\r
131 * (the context is the start context of the entire expanse)
\r
132 * The only time that contextLimit != limit is for the last run
\r
133 * (the context is the end context of the entire expanse)
\r
135 public static class ScriptRunIterator implements RunIterator {
\r
136 private Replaceable text;
\r
137 private Position expanse = new Position();
\r
138 private Position current = new Position();
\r
139 private int script;
\r
140 private boolean done = true;
\r
143 public void reset(Replaceable repText, Position expansePos) {
\r
144 set(this.expanse, expansePos);
\r
145 this.text = repText;
\r
149 public void reset() {
\r
151 //this.expanse = expanse;
\r
152 script = UScript.INVALID_CODE;
\r
153 // set up first range to be empty, at beginning
\r
154 current.contextStart = expanse.contextStart;
\r
155 current.start = current.limit = current.contextLimit = expanse.start;
\r
158 public boolean next(Position run) {
\r
159 if (done) return false;
\r
161 System.out.println("+cs: " + current.contextStart
\r
162 + ", s: " + current.start
\r
163 + ", l: " + current.limit
\r
164 + ", cl: " + current.contextLimit);
\r
166 // reset start context run to the last end
\r
167 current.start = current.limit;
\r
169 // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
\r
171 int limit = expanse.start;
\r
172 for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
\r
173 cp = text.char32At(i);
\r
174 int scrpt = UScript.getScript(cp);
\r
175 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
\r
178 current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
\r
180 // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
\r
181 int lastScript = UScript.COMMON;
\r
182 //int veryLastScript = UScript.COMMON;
\r
183 limit = expanse.limit;
\r
184 for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
\r
185 cp = text.char32At(i);
\r
186 int scrpt = UScript.getScript(cp);
\r
187 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
\r
188 if (scrpt != UScript.COMMON) {
\r
189 // if we find a real script:
\r
190 // if we already had a script, bail
\r
191 // otherwise set our script
\r
192 if (lastScript == UScript.COMMON) lastScript = scrpt;
\r
193 else if (lastScript != scrpt) break;
\r
197 current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
\r
198 done = (i == limit);
\r
199 script = lastScript;
\r
202 System.out.println("-cs: " + current.contextStart
\r
203 + ", s: " + current.start
\r
204 + ", l: " + current.limit
\r
205 + ", cl: " + current.contextLimit);
\r
212 // SHOULD BE METHOD ON POSITION
\r
213 public static void set(Position run, Position current) {
\r
214 run.contextStart = current.contextStart;
\r
215 run.start = current.start;
\r
216 run.limit = current.limit;
\r
217 run.contextLimit = current.contextLimit;
\r
220 public boolean atEnd() {
\r
221 return current.limit == expanse.limit;
\r
224 public void getCurrent(Position run) {
\r
228 public void getExpanse(Position run) {
\r
232 public String getName() {
\r
233 return UScript.getName(script);
\r
236 public void adjust(int newCurrentLimit) {
\r
237 if (expanse == null) {
\r
238 throw new IllegalArgumentException("Must reset() before calling");
\r
240 int delta = newCurrentLimit - current.limit;
\r
241 current.limit += delta;
\r
242 current.contextLimit += delta;
\r
243 expanse.limit += delta;
\r
244 expanse.contextLimit += delta;
\r
247 // register Any-Script for every script.
\r
249 private static Set scriptList = new HashSet();
\r
251 public static void registerAnyToScript() {
\r
252 synchronized (scriptList) {
\r
253 Enumeration sources = Transliterator.getAvailableSources();
\r
254 while(sources.hasMoreElements()) {
\r
255 String source = (String) sources.nextElement();
\r
256 if (source.equals("Any")) continue; // to keep from looping
\r
258 Enumeration targets = Transliterator.getAvailableTargets(source);
\r
259 while(targets.hasMoreElements()) {
\r
260 String target = (String) targets.nextElement();
\r
261 if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
\r
262 if (scriptList.contains(target)) continue; // already encountered
\r
263 scriptList.add(target); // otherwise add for later testing
\r
265 Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
\r
266 if (variantSet.size() < 2) {
\r
267 AnyTransliterator at = new AnyTransliterator(target, null);
\r
268 DummyFactory.add(at.getID(), at);
\r
270 Iterator variants = variantSet.iterator();
\r
271 while(variants.hasNext()) {
\r
272 String variant = (String) variants.next();
\r
273 AnyTransliterator at = new AnyTransliterator(
\r
274 (variant.length() > 0) ? target + "/" + variant : target, null);
\r
275 DummyFactory.add(at.getID(), at);
\r
283 static class DummyFactory implements Transliterator.Factory {
\r
284 static DummyFactory singleton = new DummyFactory();
\r
285 static HashMap m = new HashMap();
\r
287 // Since Transliterators are immutable, we don't have to clone on set & get
\r
288 static void add(String ID, Transliterator t) {
\r
290 System.out.println("Registering: " + ID + ", " + t.toRules(true));
\r
291 Transliterator.registerFactory(ID, singleton);
\r
293 public Transliterator getInstance(String ID) {
\r
294 return (Transliterator) m.get(ID);
\r
298 // Nice little Utility for converting Enumeration to collection
\r
299 static Set add(Set s, Enumeration enumeration) {
\r
300 while(enumeration.hasMoreElements()) {
\r
301 s.add(enumeration.nextElement());
\r