2 *******************************************************************************
\r
3 * Copyright (C) 2001-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.demo.translit;
\r
8 import com.ibm.icu.lang.*;
\r
9 import com.ibm.icu.text.*;
\r
12 public class AnyTransliterator extends Transliterator {
\r
14 static final boolean DEBUG = false;
\r
15 private String targetName;
\r
16 private RunIterator it;
\r
17 private Position run;
\r
20 public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
\r
21 super("Any-" + targetName, filter);
\r
22 this.targetName = targetName;
\r
24 run = new Position();
\r
27 public AnyTransliterator(String targetName, UnicodeFilter filter){
\r
28 this(targetName, filter, new ScriptRunIterator());
\r
31 static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
\r
33 protected void handleTransliterate(Replaceable text,
\r
34 Position offsets, boolean isIncremental) {
\r
36 System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
\r
37 + ", " + toString(offsets));
\r
39 it.reset(text, offsets);
\r
41 while (it.next(run)) {
\r
42 if (targetName.equalsIgnoreCase(it.getName())) {
\r
43 if (DEBUG) System.out.println("Skipping identical: " + targetName);
\r
44 run.start = run.limit; // show we processed
\r
45 continue; // skip if same
\r
49 String id = it.getName() + '-' + targetName;
\r
51 t = Transliterator.getInstance(id);
\r
52 } catch (IllegalArgumentException ex) {
\r
53 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
\r
54 id = it.getName() + "-Latin; Latin-" + targetName;
\r
56 t = Transliterator.getInstance(id);
\r
57 } catch (IllegalArgumentException ex2) {
\r
58 if (DEBUG) System.out.println("Couldn't find: " + id);
\r
62 // TODO catch error later!!
\r
65 System.out.println(t.getID());
\r
66 System.out.println("input: " + hex.transliterate(text.toString())
\r
67 + ", " + toString(run));
\r
70 if (isIncremental && it.atEnd()) {
\r
71 t.transliterate(text, run);
\r
73 t.finishTransliteration(text, run);
\r
75 // adjust the offsets in line with the changes
\r
76 it.adjust(run.limit);
\r
79 System.out.println("output: " + hex.transliterate(text.toString())
\r
80 + ", " + toString(run));
\r
84 // show how far we got!
\r
85 it.getExpanse(offsets);
\r
86 if (run.start == run.limit) offsets.start = offsets.limit;
\r
87 else offsets.start = run.start;
\r
89 System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
\r
90 System.out.println();
\r
94 // should be method on Position
\r
95 public static String toString(Position offsets) {
\r
96 return "[cs: " + offsets.contextStart
\r
97 + ", s: " + offsets.start
\r
98 + ", l: " + offsets.limit
\r
99 + ", cl: " + offsets.contextLimit
\r
103 public interface RunIterator {
\r
104 public void reset(Replaceable text, Position expanse);
\r
105 public void getExpanse(Position run);
\r
106 public void reset();
\r
107 public boolean next(Position run);
\r
108 public void getCurrent(Position run);
\r
109 public String getName();
\r
110 public void adjust(int newCurrentLimit);
\r
111 public boolean atEnd();
\r
115 * Returns a series of ranges corresponding to scripts. They will be of the form:
\r
116 * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
\r
119 * That is, the runs will overlap. The reason for this is so that a transliterator can
\r
120 * consider common characters both before and after the scripts.
\r
121 * The only time that contextStart != start is for the first run
\r
122 * (the context is the start context of the entire expanse)
\r
123 * The only time that contextLimit != limit is for the last run
\r
124 * (the context is the end context of the entire expanse)
\r
126 public static class ScriptRunIterator implements RunIterator {
\r
127 private Replaceable text;
\r
128 private Position expanse = new Position();
\r
129 private Position current = new Position();
\r
130 private int script;
\r
131 private boolean done = true;
\r
134 public void reset(Replaceable repText, Position expansePos) {
\r
135 set(this.expanse, expansePos);
\r
136 this.text = repText;
\r
140 public void reset() {
\r
142 //this.expanse = expanse;
\r
143 script = UScript.INVALID_CODE;
\r
144 // set up first range to be empty, at beginning
\r
145 current.contextStart = expanse.contextStart;
\r
146 current.start = current.limit = current.contextLimit = expanse.start;
\r
149 public boolean next(Position run) {
\r
150 if (done) return false;
\r
152 System.out.println("+cs: " + current.contextStart
\r
153 + ", s: " + current.start
\r
154 + ", l: " + current.limit
\r
155 + ", cl: " + current.contextLimit);
\r
157 // reset start context run to the last end
\r
158 current.start = current.limit;
\r
160 // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
\r
162 int limit = expanse.start;
\r
163 for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
\r
164 cp = text.char32At(i);
\r
165 int scrpt = UScript.getScript(cp);
\r
166 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
\r
169 current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
\r
171 // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
\r
172 int lastScript = UScript.COMMON;
\r
173 //int veryLastScript = UScript.COMMON;
\r
174 limit = expanse.limit;
\r
175 for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
\r
176 cp = text.char32At(i);
\r
177 int scrpt = UScript.getScript(cp);
\r
178 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
\r
179 if (scrpt != UScript.COMMON) {
\r
180 // if we find a real script:
\r
181 // if we already had a script, bail
\r
182 // otherwise set our script
\r
183 if (lastScript == UScript.COMMON) lastScript = scrpt;
\r
184 else if (lastScript != scrpt) break;
\r
188 current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
\r
189 done = (i == limit);
\r
190 script = lastScript;
\r
193 System.out.println("-cs: " + current.contextStart
\r
194 + ", s: " + current.start
\r
195 + ", l: " + current.limit
\r
196 + ", cl: " + current.contextLimit);
\r
203 // SHOULD BE METHOD ON POSITION
\r
204 public static void set(Position run, Position current) {
\r
205 run.contextStart = current.contextStart;
\r
206 run.start = current.start;
\r
207 run.limit = current.limit;
\r
208 run.contextLimit = current.contextLimit;
\r
211 public boolean atEnd() {
\r
212 return current.limit == expanse.limit;
\r
215 public void getCurrent(Position run) {
\r
219 public void getExpanse(Position run) {
\r
223 public String getName() {
\r
224 return UScript.getName(script);
\r
227 public void adjust(int newCurrentLimit) {
\r
228 if (expanse == null) {
\r
229 throw new IllegalArgumentException("Must reset() before calling");
\r
231 int delta = newCurrentLimit - current.limit;
\r
232 current.limit += delta;
\r
233 current.contextLimit += delta;
\r
234 expanse.limit += delta;
\r
235 expanse.contextLimit += delta;
\r
238 // register Any-Script for every script.
\r
240 private static Set scriptList = new HashSet();
\r
242 public static void registerAnyToScript() {
\r
243 synchronized (scriptList) {
\r
244 Enumeration sources = Transliterator.getAvailableSources();
\r
245 while(sources.hasMoreElements()) {
\r
246 String source = (String) sources.nextElement();
\r
247 if (source.equals("Any")) continue; // to keep from looping
\r
249 Enumeration targets = Transliterator.getAvailableTargets(source);
\r
250 while(targets.hasMoreElements()) {
\r
251 String target = (String) targets.nextElement();
\r
252 if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
\r
253 if (scriptList.contains(target)) continue; // already encountered
\r
254 scriptList.add(target); // otherwise add for later testing
\r
256 Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
\r
257 if (variantSet.size() < 2) {
\r
258 AnyTransliterator at = new AnyTransliterator(target, null);
\r
259 DummyFactory.add(at.getID(), at);
\r
261 Iterator variants = variantSet.iterator();
\r
262 while(variants.hasNext()) {
\r
263 String variant = (String) variants.next();
\r
264 AnyTransliterator at = new AnyTransliterator(
\r
265 (variant.length() > 0) ? target + "/" + variant : target, null);
\r
266 DummyFactory.add(at.getID(), at);
\r
274 static class DummyFactory implements Transliterator.Factory {
\r
275 static DummyFactory singleton = new DummyFactory();
\r
276 static HashMap m = new HashMap();
\r
278 // Since Transliterators are immutable, we don't have to clone on set & get
\r
279 static void add(String ID, Transliterator t) {
\r
281 System.out.println("Registering: " + ID + ", " + t.toRules(true));
\r
282 Transliterator.registerFactory(ID, singleton);
\r
284 public Transliterator getInstance(String ID) {
\r
285 return (Transliterator) m.get(ID);
\r
289 // Nice little Utility for converting Enumeration to collection
\r
290 static Set add(Set s, Enumeration enumeration) {
\r
291 while(enumeration.hasMoreElements()) {
\r
292 s.add(enumeration.nextElement());
\r