]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_8_1_1/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java
Added flags.
[Dictionary.git] / jars / icu4j-4_8_1_1 / demos / src / com / ibm / icu / dev / demo / translit / AnyTransliterator.java
1 /**
2  *******************************************************************************
3  * Copyright (C) 2001-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.demo.translit;
8 import java.util.Enumeration;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.Set;
13 import java.util.TreeSet;
14
15 import com.ibm.icu.lang.UScript;
16 import com.ibm.icu.text.Replaceable;
17 import com.ibm.icu.text.Transliterator;
18 import com.ibm.icu.text.UTF16;
19 import com.ibm.icu.text.UnicodeFilter;
20
21 public class AnyTransliterator extends Transliterator {
22     
23     static final boolean DEBUG = false;
24     private String targetName;
25     private RunIterator it;
26     private Position run;
27     
28     
29     public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
30         super("Any-" + targetName, filter);
31         this.targetName = targetName;
32         this.it = it;
33         run = new Position();
34     }
35     
36     public AnyTransliterator(String targetName, UnicodeFilter filter){
37         this(targetName, filter, new ScriptRunIterator());
38     }
39     
40     static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
41     
42     protected void handleTransliterate(Replaceable text,
43                                        Position offsets, boolean isIncremental) {
44         if (DEBUG) {
45             System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
46                 + ", " + toString(offsets));
47         }
48         it.reset(text, offsets);
49         
50         while (it.next(run)) {
51             if (targetName.equalsIgnoreCase(it.getName())) {
52                 if (DEBUG) System.out.println("Skipping identical: " + targetName);
53                 run.start = run.limit; // show we processed
54                 continue; // skip if same
55             }
56             
57             Transliterator t;
58             String id = it.getName() + '-' + targetName;
59             try {
60                 t = Transliterator.getInstance(id);
61             } catch (IllegalArgumentException ex) {
62                 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
63                 id = it.getName() + "-Latin; Latin-" + targetName;
64                 try {
65                     t = Transliterator.getInstance(id);
66                 } catch (IllegalArgumentException ex2) {
67                     if (DEBUG) System.out.println("Couldn't find: " + id);
68                     continue;
69                 }
70             }
71             // TODO catch error later!!
72                 
73             if (DEBUG) {
74                 System.out.println(t.getID());
75                 System.out.println("input: " + hex.transliterate(text.toString())
76                  + ", " + toString(run));
77             }
78             
79             if (isIncremental && it.atEnd()) {
80                 t.transliterate(text, run);
81             } else {
82                 t.finishTransliteration(text, run);
83             }
84             // adjust the offsets in line with the changes
85             it.adjust(run.limit);
86             
87             if (DEBUG) {
88                 System.out.println("output: " + hex.transliterate(text.toString())
89                  + ", " + toString(run));
90             }
91         }
92
93         // show how far we got!
94         it.getExpanse(offsets);
95         if (run.start == run.limit) offsets.start = offsets.limit;
96         else offsets.start = run.start;
97         if (DEBUG) {
98             System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
99             System.out.println();
100         }
101     }
102     
103     // should be method on Position
104     public static String toString(Position offsets) {
105         return "[cs: " + offsets.contextStart
106                 + ", s: " + offsets.start
107                 + ", l: " + offsets.limit
108                 + ", cl: " + offsets.contextLimit
109                 + "]";
110     }
111     
112     public interface RunIterator {
113         public void reset(Replaceable text, Position expanse);
114         public void getExpanse(Position run);
115         public void reset();
116         public boolean next(Position run);
117         public void getCurrent(Position run);
118         public String getName();
119         public void adjust(int newCurrentLimit);
120         public boolean atEnd();
121     }
122     
123     /**
124      * Returns a series of ranges corresponding to scripts. They will be of the form:
125      * ccccSScSSccccTTcTcccc    - where c is common, S is the first script and T is the second
126      *|            |            - first run
127      *         |            |    - second run
128      * That is, the runs will overlap. The reason for this is so that a transliterator can
129      * consider common characters both before and after the scripts.
130      * The only time that contextStart != start is for the first run 
131      *    (the context is the start context of the entire expanse)
132      * The only time that contextLimit != limit is for the last run 
133      *    (the context is the end context of the entire expanse)
134      */
135     public static class ScriptRunIterator implements RunIterator {
136         private Replaceable text;
137         private Position expanse = new Position();
138         private Position current = new Position();
139         private int script;
140         private boolean done = true;
141         
142
143         public void reset(Replaceable repText, Position expansePos) {
144             set(this.expanse, expansePos);
145             this.text = repText;
146             reset();
147         }
148             
149         public void reset() {
150             done = false;
151             //this.expanse = expanse;
152             script = UScript.INVALID_CODE;
153             // set up first range to be empty, at beginning
154             current.contextStart = expanse.contextStart;
155             current.start = current.limit = current.contextLimit = expanse.start;            
156         }
157             
158         public boolean next(Position run) {
159             if (done) return false;
160             if (DEBUG) {
161                 System.out.println("+cs: " + current.contextStart
162                     + ", s: " + current.start
163                     + ", l: " + current.limit
164                     + ", cl: " + current.contextLimit);
165             }
166             // reset start context run to the last end
167             current.start = current.limit;
168             
169             // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
170             int i, cp;
171             int limit = expanse.start;
172             for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
173                 cp = text.char32At(i);
174                 int scrpt = UScript.getScript(cp);
175                 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
176             }
177             current.start = i;
178             current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
179             
180             // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
181             int lastScript = UScript.COMMON;
182             //int veryLastScript = UScript.COMMON;
183             limit = expanse.limit; 
184             for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
185                 cp = text.char32At(i);
186                 int scrpt = UScript.getScript(cp);
187                 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
188                 if (scrpt != UScript.COMMON) {
189                     // if we find a real script:
190                     //   if we already had a script, bail
191                     //   otherwise set our script
192                     if (lastScript == UScript.COMMON) lastScript = scrpt;
193                     else if (lastScript != scrpt) break;
194                 }
195             }
196             current.limit = i;
197             current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
198             done = (i == limit);
199             script = lastScript;
200             
201             if (DEBUG) {
202                 System.out.println("-cs: " + current.contextStart
203                     + ", s: " + current.start
204                     + ", l: " + current.limit
205                     + ", cl: " + current.contextLimit);
206             }
207             
208             set(run, current);
209             return true;
210         }
211         
212         // SHOULD BE METHOD ON POSITION
213         public static void set(Position run, Position current) {
214             run.contextStart = current.contextStart;
215             run.start = current.start;
216             run.limit = current.limit;
217             run.contextLimit = current.contextLimit;
218         }
219         
220         public boolean atEnd() {
221             return current.limit == expanse.limit;
222         }
223         
224         public void getCurrent(Position run) {
225             set(run, current);
226         }
227         
228         public void getExpanse(Position run) {
229             set(run, expanse);
230         }
231         
232         public String getName() {
233             return UScript.getName(script);
234         }
235         
236         public void adjust(int newCurrentLimit) {
237             if (expanse == null) {
238                 throw new IllegalArgumentException("Must reset() before calling");
239             }
240             int delta = newCurrentLimit - current.limit;
241             current.limit += delta;
242             current.contextLimit += delta;
243             expanse.limit += delta;
244             expanse.contextLimit += delta;
245         }
246         
247         // register Any-Script for every script.
248         
249         private static Set scriptList = new HashSet();
250         
251         public static void registerAnyToScript() {
252             synchronized (scriptList) {
253                 Enumeration sources = Transliterator.getAvailableSources();
254                 while(sources.hasMoreElements()) {
255                     String source = (String) sources.nextElement();
256                     if (source.equals("Any")) continue; // to keep from looping
257                     
258                     Enumeration targets = Transliterator.getAvailableTargets(source);
259                     while(targets.hasMoreElements()) {
260                         String target = (String) targets.nextElement();
261                         if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
262                         if (scriptList.contains(target)) continue; // already encountered
263                         scriptList.add(target); // otherwise add for later testing
264                         
265                         Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
266                         if (variantSet.size() < 2) {
267                             AnyTransliterator at = new AnyTransliterator(target, null);
268                             DummyFactory.add(at.getID(), at);
269                         } else {
270                             Iterator variants = variantSet.iterator();
271                             while(variants.hasNext()) {
272                                 String variant = (String) variants.next();
273                                 AnyTransliterator at = new AnyTransliterator(
274                                     (variant.length() > 0) ? target + "/" + variant : target, null);
275                                 DummyFactory.add(at.getID(), at);
276                             }
277                         }
278                     }
279                 }
280             }
281         }
282         
283         static class DummyFactory implements Transliterator.Factory {
284             static DummyFactory singleton = new DummyFactory();
285             static HashMap m = new HashMap();
286
287             // Since Transliterators are immutable, we don't have to clone on set & get
288             static void add(String ID, Transliterator t) {
289                 m.put(ID, t);
290                 System.out.println("Registering: " + ID + ", " + t.toRules(true));
291                 Transliterator.registerFactory(ID, singleton);
292             }
293             public Transliterator getInstance(String ID) {
294                 return (Transliterator) m.get(ID);
295             }
296         }
297         
298         // Nice little Utility for converting Enumeration to collection
299         static Set add(Set s, Enumeration enumeration) {
300             while(enumeration.hasMoreElements()) {
301                 s.add(enumeration.nextElement());
302             }
303             return s;
304         }
305         
306         
307     }
308 }