2 *******************************************************************************
3 * Copyright (C) 2001-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.demo.translit;
8 import java.util.Enumeration;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
13 import java.util.TreeSet;
15 import com.ibm.icu.lang.UScript;
16 import com.ibm.icu.text.Replaceable;
17 import com.ibm.icu.text.Transliterator;
18 import com.ibm.icu.text.UTF16;
19 import com.ibm.icu.text.UnicodeFilter;
21 public class AnyTransliterator extends Transliterator {
23 static final boolean DEBUG = false;
24 private String targetName;
25 private RunIterator it;
29 public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
30 super("Any-" + targetName, filter);
31 this.targetName = targetName;
36 public AnyTransliterator(String targetName, UnicodeFilter filter){
37 this(targetName, filter, new ScriptRunIterator());
40 static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
42 protected void handleTransliterate(Replaceable text,
43 Position offsets, boolean isIncremental) {
45 System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
46 + ", " + toString(offsets));
48 it.reset(text, offsets);
50 while (it.next(run)) {
51 if (targetName.equalsIgnoreCase(it.getName())) {
52 if (DEBUG) System.out.println("Skipping identical: " + targetName);
53 run.start = run.limit; // show we processed
54 continue; // skip if same
58 String id = it.getName() + '-' + targetName;
60 t = Transliterator.getInstance(id);
61 } catch (IllegalArgumentException ex) {
62 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
63 id = it.getName() + "-Latin; Latin-" + targetName;
65 t = Transliterator.getInstance(id);
66 } catch (IllegalArgumentException ex2) {
67 if (DEBUG) System.out.println("Couldn't find: " + id);
71 // TODO catch error later!!
74 System.out.println(t.getID());
75 System.out.println("input: " + hex.transliterate(text.toString())
76 + ", " + toString(run));
79 if (isIncremental && it.atEnd()) {
80 t.transliterate(text, run);
82 t.finishTransliteration(text, run);
84 // adjust the offsets in line with the changes
88 System.out.println("output: " + hex.transliterate(text.toString())
89 + ", " + toString(run));
93 // show how far we got!
94 it.getExpanse(offsets);
95 if (run.start == run.limit) offsets.start = offsets.limit;
96 else offsets.start = run.start;
98 System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
103 // should be method on Position
104 public static String toString(Position offsets) {
105 return "[cs: " + offsets.contextStart
106 + ", s: " + offsets.start
107 + ", l: " + offsets.limit
108 + ", cl: " + offsets.contextLimit
112 public interface RunIterator {
113 public void reset(Replaceable text, Position expanse);
114 public void getExpanse(Position run);
116 public boolean next(Position run);
117 public void getCurrent(Position run);
118 public String getName();
119 public void adjust(int newCurrentLimit);
120 public boolean atEnd();
124 * Returns a series of ranges corresponding to scripts. They will be of the form:
125 * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
128 * That is, the runs will overlap. The reason for this is so that a transliterator can
129 * consider common characters both before and after the scripts.
130 * The only time that contextStart != start is for the first run
131 * (the context is the start context of the entire expanse)
132 * The only time that contextLimit != limit is for the last run
133 * (the context is the end context of the entire expanse)
135 public static class ScriptRunIterator implements RunIterator {
136 private Replaceable text;
137 private Position expanse = new Position();
138 private Position current = new Position();
140 private boolean done = true;
143 public void reset(Replaceable repText, Position expansePos) {
144 set(this.expanse, expansePos);
149 public void reset() {
151 //this.expanse = expanse;
152 script = UScript.INVALID_CODE;
153 // set up first range to be empty, at beginning
154 current.contextStart = expanse.contextStart;
155 current.start = current.limit = current.contextLimit = expanse.start;
158 public boolean next(Position run) {
159 if (done) return false;
161 System.out.println("+cs: " + current.contextStart
162 + ", s: " + current.start
163 + ", l: " + current.limit
164 + ", cl: " + current.contextLimit);
166 // reset start context run to the last end
167 current.start = current.limit;
169 // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
171 int limit = expanse.start;
172 for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
173 cp = text.char32At(i);
174 int scrpt = UScript.getScript(cp);
175 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
178 current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
180 // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
181 int lastScript = UScript.COMMON;
182 //int veryLastScript = UScript.COMMON;
183 limit = expanse.limit;
184 for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
185 cp = text.char32At(i);
186 int scrpt = UScript.getScript(cp);
187 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
188 if (scrpt != UScript.COMMON) {
189 // if we find a real script:
190 // if we already had a script, bail
191 // otherwise set our script
192 if (lastScript == UScript.COMMON) lastScript = scrpt;
193 else if (lastScript != scrpt) break;
197 current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
202 System.out.println("-cs: " + current.contextStart
203 + ", s: " + current.start
204 + ", l: " + current.limit
205 + ", cl: " + current.contextLimit);
212 // SHOULD BE METHOD ON POSITION
213 public static void set(Position run, Position current) {
214 run.contextStart = current.contextStart;
215 run.start = current.start;
216 run.limit = current.limit;
217 run.contextLimit = current.contextLimit;
220 public boolean atEnd() {
221 return current.limit == expanse.limit;
224 public void getCurrent(Position run) {
228 public void getExpanse(Position run) {
232 public String getName() {
233 return UScript.getName(script);
236 public void adjust(int newCurrentLimit) {
237 if (expanse == null) {
238 throw new IllegalArgumentException("Must reset() before calling");
240 int delta = newCurrentLimit - current.limit;
241 current.limit += delta;
242 current.contextLimit += delta;
243 expanse.limit += delta;
244 expanse.contextLimit += delta;
247 // register Any-Script for every script.
249 private static Set scriptList = new HashSet();
251 public static void registerAnyToScript() {
252 synchronized (scriptList) {
253 Enumeration sources = Transliterator.getAvailableSources();
254 while(sources.hasMoreElements()) {
255 String source = (String) sources.nextElement();
256 if (source.equals("Any")) continue; // to keep from looping
258 Enumeration targets = Transliterator.getAvailableTargets(source);
259 while(targets.hasMoreElements()) {
260 String target = (String) targets.nextElement();
261 if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
262 if (scriptList.contains(target)) continue; // already encountered
263 scriptList.add(target); // otherwise add for later testing
265 Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
266 if (variantSet.size() < 2) {
267 AnyTransliterator at = new AnyTransliterator(target, null);
268 DummyFactory.add(at.getID(), at);
270 Iterator variants = variantSet.iterator();
271 while(variants.hasNext()) {
272 String variant = (String) variants.next();
273 AnyTransliterator at = new AnyTransliterator(
274 (variant.length() > 0) ? target + "/" + variant : target, null);
275 DummyFactory.add(at.getID(), at);
283 static class DummyFactory implements Transliterator.Factory {
284 static DummyFactory singleton = new DummyFactory();
285 static HashMap m = new HashMap();
287 // Since Transliterators are immutable, we don't have to clone on set & get
288 static void add(String ID, Transliterator t) {
290 System.out.println("Registering: " + ID + ", " + t.toRules(true));
291 Transliterator.registerFactory(ID, singleton);
293 public Transliterator getInstance(String ID) {
294 return (Transliterator) m.get(ID);
298 // Nice little Utility for converting Enumeration to collection
299 static Set add(Set s, Enumeration enumeration) {
300 while(enumeration.hasMoreElements()) {
301 s.add(enumeration.nextElement());