2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.demo.translit;
9 import java.awt.Button;
10 import java.awt.CheckboxMenuItem;
11 import java.awt.FileDialog;
13 import java.awt.Frame;
14 import java.awt.GraphicsEnvironment;
15 import java.awt.Label;
17 import java.awt.MenuBar;
18 import java.awt.MenuItem;
19 import java.awt.MenuShortcut;
20 import java.awt.TextField;
21 import java.awt.event.ActionEvent;
22 import java.awt.event.ActionListener;
23 import java.awt.event.ItemEvent;
24 import java.awt.event.ItemListener;
25 import java.awt.event.KeyEvent;
26 import java.awt.event.WindowAdapter;
27 import java.awt.event.WindowEvent;
28 import java.io.BufferedReader;
29 import java.io.BufferedWriter;
31 import java.io.FileInputStream;
32 import java.io.FileOutputStream;
33 import java.io.InputStreamReader;
34 import java.io.OutputStreamWriter;
35 import java.io.PrintWriter;
36 import java.text.CharacterIterator;
37 import java.util.Comparator;
38 import java.util.Enumeration;
39 import java.util.HashMap;
40 import java.util.Iterator;
43 import java.util.TreeSet;
45 import com.ibm.icu.impl.Differ;
46 import com.ibm.icu.lang.UCharacter;
47 import com.ibm.icu.text.BreakIterator;
48 import com.ibm.icu.text.CanonicalIterator;
49 import com.ibm.icu.text.Normalizer;
50 import com.ibm.icu.text.ReplaceableString;
51 import com.ibm.icu.text.Transliterator;
52 import com.ibm.icu.text.UTF16;
53 import com.ibm.icu.text.UnicodeSet;
54 import com.ibm.icu.text.UnicodeSetIterator;
57 * A frame that allows the user to experiment with keyboard
58 * transliteration. This class has a main() method so it can be run
59 * as an application. The frame contains an editable text component
60 * and uses keyboard transliteration to process keyboard events.
62 * <p>Copyright (c) IBM Corporation 1999. All rights reserved.
66 public class Demo extends Frame {
71 private static final long serialVersionUID = 1L;
72 static final boolean DEBUG = false;
73 static final String START_TEXT = "(cut,\u03BA\u03C5\u03C4,\u05D0,\u30AF\u30C8,\u4E80,\u091A\u0941\u0924\u094D)";
75 Transliterator translit = null;
76 String fontName = "Arial Unicode MS";
82 boolean compound = false;
83 Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND];
84 static final int MAX_COMPOUND = 128;
85 int compoundCount = 0;
88 TransliteratingTextComponent text = null;
91 CheckboxMenuItem translitItem;
92 CheckboxMenuItem noTranslitItem;
94 static final String NO_TRANSLITERATOR = "None";
96 //private static final String COPYRIGHT =
97 // "\u00A9 IBM Corporation 1999. All rights reserved.";
99 public static void main(String[] args) {
100 Frame f = new Demo(600, 200);
101 f.addWindowListener(new WindowAdapter() {
102 public void windowClosing(WindowEvent e) {
103 com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameClosed();
108 com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameOpened();
111 public Demo(int width, int height) {
112 super("Transliteration Demo");
116 addWindowListener(new WindowAdapter() {
117 public void windowClosing(WindowEvent e) {
122 text = new TransliteratingTextComponent();
123 Font font = new Font(fontName, Font.PLAIN, fontSize);
125 text.setSize(width, height);
126 text.setVisible(true);
127 text.setText(START_TEXT);
130 setSize(width, height);
131 setTransliterator("Latin-Greek", null);
134 private void initMenus() {
138 //CheckboxMenuItem citem;
140 setMenuBar(mbar = new MenuBar());
141 mbar.add(menu = new Menu("File"));
142 menu.add(mitem = new MenuItem("Quit"));
143 mitem.addActionListener(new ActionListener() {
144 public void actionPerformed(ActionEvent e) {
149 final ItemListener setTransliteratorListener = new ItemListener() {
150 public void itemStateChanged(ItemEvent e) {
151 CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
152 if (e.getStateChange() == ItemEvent.DESELECTED) {
153 // Don't let the current transliterator be deselected.
156 } else if (compound) {
157 // Adding an item to a compound transliterator
158 handleAddToCompound(item.getLabel());
159 } else if (item != translitItem) {
160 // Deselect previous choice. Don't need to call
161 // setState(true) on new choice.
162 translitItem.setState(false);
164 handleSetTransliterator(item.getLabel());
170 translitMenu.add(translitItem = noTranslitItem =
171 new CheckboxMenuItem(NO_TRANSLITERATOR, true));
172 noTranslitItem.addItemListener(new ItemListener() {
173 public void itemStateChanged(ItemEvent e) {
174 // Can't uncheck None -- any action here sets None to true
175 setNoTransliterator();
179 translitMenu.addSeparator();
183 translitMenu.add(citem = new CheckboxMenuItem("Compound"));
184 citem.addItemListener(new ItemListener() {
185 public void itemStateChanged(ItemEvent e) {
186 CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
187 if (e.getStateChange() == ItemEvent.DESELECTED) {
188 // If compound gets deselected, then select NONE
189 setNoTransliterator();
190 } else if (!compound) {
191 // Switching from non-compound to compound
192 translitItem.setState(false);
197 for (int i=0; i<MAX_COMPOUND; ++i) {
198 compoundTranslit[i] = null;
204 translitMenu.addSeparator();
208 for (Enumeration e=getSystemTransliteratorNames().elements();
209 e.hasMoreElements(); ) {
210 String s = (String) e.nextElement();
211 translitMenu.add(citem = new CheckboxMenuItem(s));
212 citem.addItemListener(setTransliteratorListener);
216 Menu fontMenu = new Menu("Font");
217 String[] fonts = GraphicsEnvironment.getLocalGraphicsEnvironment().getAvailableFontFamilyNames();
218 for (int i = 0; i < fonts.length; ++i) {
219 MenuItem mItem = new MenuItem(fonts[i]);
220 mItem.addActionListener(new FontActionListener(fonts[i]));
225 Menu sizeMenu = new Menu("Size");
226 int[] sizes = {9, 10, 12, 14, 18, 24, 36, 48, 72};
227 for (int i = 0; i < sizes.length; ++i) {
228 MenuItem mItem = new MenuItem("" + sizes[i]);
229 mItem.addActionListener(new SizeActionListener(sizes[i]));
236 mbar.add(translitMenu = new Menu("Transliterator"));
238 translitMenu.add(convertSelectionItem = new MenuItem("Transliterate",
239 new MenuShortcut(KeyEvent.VK_K)));
240 convertSelectionItem.addActionListener(new ActionListener() {
241 public void actionPerformed(ActionEvent e) {
242 handleBatchTransliterate(translit);
246 translitMenu.add(swapSelectionItem = new MenuItem("Reverse",
247 new MenuShortcut(KeyEvent.VK_S)));
248 swapSelectionItem.addActionListener(new ActionListener() {
249 public void actionPerformed(ActionEvent e) {
252 inv = translit.getInverse();
253 } catch (Exception x) {
254 inv = Transliterator.getInstance("null");
256 setTransliterator(inv.getID(), null);
260 translitMenu.add(convertTypingItem = new MenuItem("No Typing Conversion",
261 new MenuShortcut(KeyEvent.VK_T)));
262 convertTypingItem.addActionListener(new ActionListener() {
263 public void actionPerformed(ActionEvent e) {
264 if (!transliterateTyping) {
265 text.setTransliterator(translit);
266 convertTypingItem.setLabel("No Typing Conversion");
269 text.setTransliterator(null);
270 convertTypingItem.setLabel("Convert Typing");
272 transliterateTyping = !transliterateTyping;
276 translitMenu.add(historyMenu = new Menu("Recent"));
278 helpDialog = new InfoDialog(this, "Simple Demo", "Instructions",
279 "CTL A, X, C, V have customary meanings.\n"
280 + "Arrow keys, delete and backspace work.\n"
281 + "To get a character from its control point, type the hex, then hit CTL Q"
283 helpDialog.getArea().setEditable(false);
287 mbar.add(helpMenu = new Menu("Extras"));
288 helpMenu.add(mitem = new MenuItem("Help"));
289 mitem.addActionListener(new ActionListener() {
290 public void actionPerformed(ActionEvent e) {
295 hexDialog = new InfoDialog(this, "Hex Entry", "Use U+..., \\u..., \\x{...}, or &#x...;",
298 Button button = new Button("Insert");
299 button.addActionListener(new ActionListener() {
300 public void actionPerformed(ActionEvent e) {
301 String hexValue = hexDialog.getArea().getText();
302 text.insertText(fromHex.transliterate(hexValue));
305 hexDialog.getBottom().add(button);
307 helpMenu.add(mitem = new MenuItem("Hex...",
308 new MenuShortcut(KeyEvent.VK_H)));
309 mitem.addActionListener(new ActionListener() {
310 public void actionPerformed(ActionEvent e) {
315 // Compound Transliterator
317 compoundDialog = new InfoDialog(this, "Compound Transliterator", "",
318 "[^\\u0000-\\u00FF] hex"
320 button = new Button("Set");
321 button.addActionListener(new ActionListener() {
322 public void actionPerformed(ActionEvent e) {
323 String compound = "";
325 compound = compoundDialog.getArea().getText();
326 setTransliterator(compound, null);
327 } catch (RuntimeException ex) {
328 compoundDialog.getArea().setText(compound + "\n" + ex.getMessage());
332 compoundDialog.getBottom().add(button);
334 translitMenu.add(mitem = new MenuItem("Multiple...",
335 new MenuShortcut(KeyEvent.VK_M)));
336 mitem.addActionListener(new ActionListener() {
337 public void actionPerformed(ActionEvent e) {
338 compoundDialog.show();
342 // RuleBased Transliterator
344 rulesDialog = new InfoDialog(this, "Rule-Based Transliterator", "",
345 "([A-Z]) > &Hex($1) &Name($1);\r\n"
346 + "&Hex-Any($1) < ('\\' [uU] [a-fA-F0-9]*);\r\n"
347 + "&Name-Any($1) < ('{' [^\\}]* '}');"
349 button = new Button("Set");
350 button.addActionListener(new ActionListener() {
351 public void actionPerformed(ActionEvent e) {
352 String compound = "";
354 compound = rulesDialog.getArea().getText();
355 String id = ruleId.getText();
356 setTransliterator(compound, id);
357 } catch (RuntimeException ex) {
358 rulesDialog.getArea().setText(compound + "\n#" + ex.getMessage());
362 rulesDialog.getBottom().add(button);
363 ruleId = new TextField("test1", 20);
364 Label temp = new Label(" Name:");
365 rulesDialog.getBottom().add(temp);
366 rulesDialog.getBottom().add(ruleId);
369 translitMenu.add(mitem = new MenuItem("From Rules...",
370 new MenuShortcut(KeyEvent.VK_R)));
371 mitem.addActionListener(new ActionListener() {
372 public void actionPerformed(ActionEvent e) {
378 translitMenu.add(mitem = new MenuItem("From File...",
379 new MenuShortcut(KeyEvent.VK_F)));
380 mitem.addActionListener(new FileListener(this, RULE_FILE));
382 translitMenu.add(mitem = new MenuItem("Test File..."));
383 mitem.addActionListener(new FileListener(this, TEST_FILE));
385 // Flesh out the menu with the installed transliterators
387 translitMenu.addSeparator();
389 Iterator sources = add(new TreeSet(), Transliterator.getAvailableSources()).iterator();
390 while(sources.hasNext()) {
391 String source = (String) sources.next();
392 Iterator targets = add(new TreeSet(), Transliterator.getAvailableTargets(source)).iterator();
393 Menu targetMenu = new Menu(source);
394 while(targets.hasNext()) {
395 String target = (String) targets.next();
396 Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
397 if (variantSet.size() < 2) {
398 mitem = new MenuItem(target);
399 mitem.addActionListener(new TransliterationListener(source + "-" + target));
400 targetMenu.add(mitem);
402 Iterator variants = variantSet.iterator();
403 Menu variantMenu = new Menu(target);
404 while(variants.hasNext()) {
405 String variant = (String) variants.next();
406 String menuName = variant.length() == 0 ? "<default>" : variant;
407 //System.out.println("<" + source + "-" + target + "/" + variant + ">, <" + menuName + ">");
408 mitem = new MenuItem(menuName);
409 mitem.addActionListener(new TransliterationListener(source + "-" + target + "/" + variant));
410 variantMenu.add(mitem);
412 targetMenu.add(variantMenu);
415 translitMenu.add(targetMenu);
421 static final int RULE_FILE = 0, TEST_FILE = 1;
423 static class FileListener implements ActionListener {
427 FileListener(Demo frame, int choice) {
429 this.choice = choice;
432 public void actionPerformed(ActionEvent e) {
433 String id = frame.translit.getID();
434 int slashPos = id.indexOf('/');
437 variant = "_" + id.substring(slashPos+1);
438 id = id.substring(0, slashPos);
441 FileDialog fileDialog = new FileDialog(frame, "Input File");
442 fileDialog.setFile("Test_" + id + ".txt");
444 String fileName = fileDialog.getFile();
445 String fileDirectory = fileDialog.getDirectory();
446 if (fileName != null) {
448 File f = new File(fileDirectory, fileName);
449 if (choice == RULE_FILE) {
451 // read stuff into buffer
453 StringBuffer buffer = new StringBuffer();
454 FileInputStream fis = new FileInputStream(f);
455 InputStreamReader isr = new InputStreamReader(fis, "UTF8");
456 BufferedReader br = new BufferedReader(isr, 32*1024);
458 String line = br.readLine();
459 if (line == null) break;
460 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); // strip BOM
466 // Transform file name into id
467 if (fileName.startsWith("Transliterator_")) {
468 fileName = fileName.substring("Transliterator_".length());
470 int pos = fileName.indexOf('_');
474 id = fileName.substring(0, pos) + "-";
475 int pos2 = fileName.indexOf('_', pos+1);
477 id += fileName.substring(pos+1);
479 id += fileName.substring(pos+1, pos2) + "/" + fileName.substring(pos2 + 1);
482 pos = id.lastIndexOf('.');
483 if (pos >= 0) id = id.substring(0, pos);
487 frame.setTransliterator(buffer.toString(), id);
488 } else if (choice == TEST_FILE) {
489 genTestFile(f, frame.translit, variant);
491 } catch (Exception e2) {
492 e2.printStackTrace();
493 System.out.println("Problem opening/reading: " + fileDirectory + ", " + fileName);
496 fileDialog.dispose();
501 boolean transliterateTyping = true;
502 Transliterator fromHex = Transliterator.getInstance("Hex-Any");
503 InfoDialog helpDialog;
504 InfoDialog hexDialog;
505 InfoDialog compoundDialog;
506 InfoDialog rulesDialog;
508 MenuItem convertSelectionItem = null;
509 MenuItem swapSelectionItem = null;
510 MenuItem convertTypingItem = null;
512 Map historyMap = new HashMap();
513 Set historySet = new TreeSet(new Comparator() {
514 public int compare(Object a, Object b) {
515 MenuItem aa = (MenuItem)a;
516 MenuItem bb = (MenuItem)b;
517 return aa.getLabel().compareTo(bb.getLabel());
521 // ADD Factory since otherwise getInverse blows out
522 static class DummyFactory implements Transliterator.Factory {
523 static DummyFactory singleton = new DummyFactory();
524 static HashMap m = new HashMap();
526 // Since Transliterators are immutable, we don't have to clone on set & get
527 static void add(String ID, Transliterator t) {
529 System.out.println("Registering: " + ID + ", " + t.toRules(true));
530 Transliterator.registerFactory(ID, singleton);
532 public Transliterator getInstance(String ID) {
533 return (Transliterator) m.get(ID);
537 static void printBreaks(int num, String testSource, BreakIterator brkItr) {
541 int pos = brkItr.next();
542 if (pos == BreakIterator.DONE) break;
543 result += testSource.substring(lastPos, pos) + "&";
545 System.out.println(pos);
547 System.out.println("Test" + num + ": " + result);
550 static void printIteration(int num, String testSource, CharacterIterator ci) {
554 if (ch == CharacterIterator.DONE) break;
555 result += ch + "(" + ci.getIndex() + ")";
557 System.out.println("Test" + num + ": " + result);
560 static void printSources() {
561 String[] list = {"Latin-ThaiLogical", "ThaiLogical-Latin", "Thai-ThaiLogical", "ThaiLogical-Thai"};
562 UnicodeSet all = new UnicodeSet();
563 for (int i = 0; i < list.length; ++i) {
564 Transliterator tr = Transliterator.getInstance(list[i]);
565 UnicodeSet src = tr.getSourceSet();
566 System.out.println(list[i] + ": " + src.toPattern(true));
569 System.out.println("All: " + all.toPattern(true));
570 UnicodeSet rem = new UnicodeSet("[[:latin:][:thai:]]");
571 System.out.println("missing from [:latin:][:thai:]: " + all.removeAll(rem).toPattern(true));
574 // 200E;LEFT-TO-RIGHT MARK;Cf;0;L;;;;;N;;;;;
576 static Transliterator title = Transliterator.getInstance("title");
577 static String hexAndNameRules = " ([:c:]) > \\u200E &hex/unicode($1) ' ( ) ' &name($1) \\u200E ' ';"
578 + "([:mark:]) > \\u200E &hex/unicode($1) ' ( ' \\u200E \u25CC $1 \\u200E ' ) ' &name($1) \\u200E ' ';"
579 + "(.) > \\u200E &hex/unicode($1) ' ( ' \\u200E $1 \\u200E ' ) ' &name($1) ' ' \\u200E;";
581 static Transliterator hexAndName = Transliterator.createFromRules("any-hexAndName",
582 hexAndNameRules, Transliterator.FORWARD);
586 //static Transliterator upper = Transliterator.getInstance("upper");
588 static final byte NONE = 0, TITLEWORD = 1, TITLELINE = 2;
590 static void genTestFile(File sourceFile, Transliterator translit, String variant) {
593 System.out.println("Reading: " + sourceFile.getCanonicalPath());
594 BufferedReader in = new BufferedReader(
595 new InputStreamReader(
596 new FileInputStream(sourceFile), "UTF-8"));
597 String targetFile = sourceFile.getCanonicalPath();
598 int dotPos = targetFile.lastIndexOf('.');
599 if (dotPos >= 0) targetFile = targetFile.substring(0,dotPos);
600 targetFile += variant;
602 File outFile = new File(targetFile + ".html");
603 System.out.println("Writing: " + outFile.getCanonicalPath());
605 PrintWriter out = new PrintWriter(
607 new OutputStreamWriter(
608 new FileOutputStream(outFile), "UTF-8")));
610 String direction = "";
611 String id = translit.getID();
612 if (id.indexOf("Arabic") >= 0 || id.indexOf("Hebrew") >= 0) {
613 direction = " direction: rtl;";
615 boolean testRoundTrip = true;
616 boolean generateSets = true;
617 if (id.startsWith("Han-") || id.startsWith("ja-")) {
618 testRoundTrip = false;
619 generateSets = false;
621 out.println("<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
622 out.println("<style><!--");
623 out.println("td, th { vertical-align: top; border: 1px solid black }");
624 out.println("td.s { background-color: #EEEEEE;" + direction + " }");
625 out.println("td.r { background-color: #CCCCCC;" + direction + " }");
626 out.println("td.n { background-color: #FFFFCC; }");
627 out.println("td.title { border: 0px solid black}");
628 out.println("span.d { background-color: #FF6666 }");
629 out.println("span.r { background-color: #66FF66 }");
631 out.println("body { font-family: 'Arial Unicode MS', 'Lucida Sans Unicode', Arial, sans-serif; margin: 5 }");
632 out.println("--></style>");
633 out.println("<title>" + id + " Transliteration Check</title></head>");
634 out.println("<body bgcolor='#FFFFFF'><p>See <a href='Test_Instructions.html'>Test_Instructions.html</a> for details.</p>");
635 out.println("<table>");
637 //out.println("<tr><th width='33%'>Thai</th><th width='33%'>Latin</th><th width='33%'>Thai</th></tr>");
639 Transliterator tl = translit;
640 Transliterator lt = tl.getInverse();
642 Transliterator ltFilter = tl.getInverse();
643 ltFilter.setFilter(new UnicodeSet("[:^Lu:]"));
644 Transliterator tlFilter = lt.getInverse();
645 tlFilter.setFilter(new UnicodeSet("[:^Lu:]"));
647 //Transliterator.getInstance("[:^Lu:]" + lt.getID());
649 BreakIterator sentenceBreak = BreakIterator.getSentenceInstance();
651 byte titleSetting = TITLELINE;
652 //boolean upperfilter = false;
653 boolean first = true;
655 String line = in.readLine();
656 if (line == null) break;
658 if (line.length() == 0) continue;
659 if (line.charAt(0) == '\uFEFF') line = line.substring(1); // remove BOM
661 if (line.charAt(0) == '#') continue; // comments
663 if (line.equals("@TITLECASE@")) {
664 titleSetting = TITLEWORD;
665 out.println("<tr><td colspan='2' class='title'><b>Names</b></td></tr>");
667 } else if (line.equals("@UPPERFILTER@")) {
668 //upperfilter = true;
670 } else if (line.startsWith("@SET")) {
671 UnicodeSet s = new UnicodeSet(line.substring(4).trim());
672 out.println("<tr><td colspan='2' class='title'><b>Characters</b></td></tr>");
673 UnicodeSetIterator it = new UnicodeSetIterator(s);
675 addSentenceToTable(out, it.codepoint != UnicodeSetIterator.IS_STRING
676 ? UTF16.valueOf(it.codepoint)
678 NONE, true, testRoundTrip, first, tl, lt);
683 sentenceBreak.setText(line);
686 int end = sentenceBreak.next();
687 if (end == BreakIterator.DONE) break;
688 String coreSentence = line.substring(start, end);
689 //System.out.println("Core: " + hex.transliterate(coreSentence));
693 while (oldPos < coreSentence.length()) {
694 // hack, because sentence doesn't seem to be working right
695 int pos = coreSentence.indexOf(". ", oldPos);
696 if (pos < 0) pos = coreSentence.length(); else pos = pos+2;
697 int pos2 = coreSentence.indexOf('\u3002', oldPos);
698 if (pos2 < 0) pos2 = coreSentence.length(); else pos2 = pos2 + 1;
699 if (pos > pos2) pos = pos2;
700 String sentence = coreSentence.substring(oldPos, pos).trim();
701 //System.out.println("Sentence: " + hex.transliterate(coreSentence));
704 addSentenceToTable(out, sentence,
705 titleSetting, false, testRoundTrip, first, tl, lt);
711 out.println("</table></body>");
714 // Now write the source/target sets
716 outFile = new File(targetFile + "_Sets.html");
717 System.out.println("Writing: " + outFile.getCanonicalPath());
719 out = new PrintWriter(
721 new OutputStreamWriter(
722 new FileOutputStream(outFile), "UTF-8")));
723 out.println("<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
724 out.println("<style><!--");
725 out.println("body { font-family: 'Arial Unicode MS', 'Lucida Sans Unicode', Arial, sans-serif; margin: 5 }");
726 out.println("--></style>");
727 out.println("<title>" + id + " Transliteration Sets</title></head>");
728 out.println("<body bgcolor='#FFFFFF'>");
730 int dashPos = id.indexOf('-');
731 int slashPos = id.indexOf('/');
732 if (slashPos < 0) slashPos = id.length();
733 UnicodeSet sourceSuper = null;
735 String temp = id.substring(0,dashPos);
736 if (temp.equals("ja")) sourceSuper = new UnicodeSet("[[:Han:][:hiragana:][:katakana:]]");
737 else sourceSuper = new UnicodeSet("[[:" + temp + ":][:Mn:][:Me:]]");
738 } catch (Exception e) {}
740 UnicodeSet targetSuper = null;
742 targetSuper = new UnicodeSet("[[:" + id.substring(dashPos+1, slashPos) + ":][:Mn:][:Me:]]");
743 } catch (Exception e) {}
745 int nfdStyle = CLOSE_CASE | CLOSE_FLATTEN | CLOSE_CANONICAL;
746 int nfkdStyle = nfdStyle | CLOSE_COMPATIBILITY;
748 out.println("<p><b>None</b></p>");
749 showSets(out, translit, lt, null, null, 0);
750 out.println("<p><b>NFD</b></p>");
751 showSets(out, translit, lt, sourceSuper, targetSuper, nfdStyle);
752 out.println("<p><b>NFKD</b></p>");
753 showSets(out, translit, lt, sourceSuper, targetSuper, nfkdStyle);
754 out.println("</ul></body>");
757 System.out.println("Done Writing");
758 } catch (Exception e) {
763 static void addSentenceToTable(PrintWriter out, String sentence,
764 byte titleSetting, boolean addName, boolean testRoundTrip, boolean first,
765 Transliterator tl, Transliterator lt) {
766 if (sentence.length() == 0) return; // skip empty lines
768 String originalShow = sentence;
770 latin = tl.transliterate(saveAscii.transliterate(sentence));
772 String latinShow = latin;
773 if (titleSetting == TITLEWORD) {
774 latinShow = title.transliterate(latin);
775 } else if (titleSetting == TITLELINE) {
776 latinShow = titlecaseFirstWord(latinShow);
778 latinShow = restoreAscii.transliterate(latinShow);
781 reverse = restoreAscii.transliterate(lt.transliterate(latin));
783 String NFKDSentence = Normalizer.normalize(sentence, Normalizer.NFKD);
784 String NFKDLatin = Normalizer.normalize(latin, Normalizer.NFKD);
785 String NFKDReverse = Normalizer.normalize(reverse, Normalizer.NFKD);
787 if (latinShow.length() == 0) {
788 latinShow = "<i>empty</i>";
789 } else if (NFKDSentence.equals(NFKDLatin)) {
790 latinShow = "<span class='r'>" + latinShow + "</span>";
792 String reverseShow = reverse;
794 if (testRoundTrip && !NFKDReverse.equals(NFKDSentence)) {
795 int minLen = reverse.length();
796 if (minLen > sentence.length()) minLen = sentence.length();
798 for (i = 0; i < minLen; ++i) {
799 if (reverse.charAt(i) != sentence.charAt(i)) break;
801 //originalShow = sentence.substring(0,i) + "<span class='d'>" + sentence.substring(i) + "</span>";
802 reverseShow = reverseShow.length() == 0
804 //: reverse.substring(0,i) + "<span class='d'>" + reverse.substring(i) + "</span>";
805 : showDifference(sentence, reverse);
806 out.println("<tr><td class='s'" + (first ? " width='50%'>" : ">") + originalShow
807 + "</td><td rowSpan='2'>" + latinShow
808 + "</td></tr><tr><td class='r'>" + reverseShow
811 out.println("<tr><td class='s'" + (first ? " width='50%'>" : ">") + originalShow
812 + "</td><td>" + latinShow
816 latinShow = hexAndName.transliterate(latin);
817 if (latinShow.length() == 0) latinShow = "<i>empty</i>";
818 originalShow = hexAndName.transliterate(sentence);
819 if (originalShow.length() == 0) originalShow = "<i>empty</i>";
821 out.println("<tr><td class='n'>" + originalShow
822 + "</td><td class='n'>" + latinShow
825 out.println("<tr><td></td></tr>");
829 static String showDifference(String as, String bs) {
830 Differ differ = new Differ(300, 3);
831 StringBuffer out = new StringBuffer();
832 int max = as.length();
833 if (max < bs.length()) max = bs.length();
834 for (int j = 0; j <= max; ++j) {
835 if (j < as.length()) differ.addA(as.substring(j, j+1));
836 if (j < bs.length()) differ.addB(bs.substring(j, j+1));
837 differ.checkMatch(j == max);
839 if (differ.getACount() != 0 || differ.getBCount() != 0) {
841 if (differ.getACount() != 0) {
842 out.append("<span class='r'>");
843 for (int i = 0; i < differ.getACount(); ++i) {
844 out.append(differ.getA(i));
846 out.append("</span>");
848 if (differ.getBCount() != 0) {
849 out.append("<span class='d'>");
850 for (int i = 0; i < differ.getBCount(); ++i) {
851 out.append(differ.getB(i));
853 out.append("</span>");
858 return out.toString();
861 static void showSets(PrintWriter out, Transliterator translit, Transliterator inverse,
862 UnicodeSet sourceSuper, UnicodeSet targetSuper, int options) {
863 out.println("<li>Source Set:<ul><li>" + toPattern(closeUnicodeSet(translit.getSourceSet(), options), sourceSuper) + "</li></ul></li>");
864 out.println("<li>Reverse Target Set:<ul><li>" + toPattern(closeUnicodeSet(inverse.getTargetSet(), options), sourceSuper) + "</li></ul></li>");
865 out.println("<li>Target Set:<ul><li>" + toPattern(closeUnicodeSet(translit.getTargetSet(), options), targetSuper) + "</li></ul></li>");
866 out.println("<li>Reverse Source Set:<ul><li>" + toPattern(closeUnicodeSet(inverse.getSourceSet(), options), targetSuper) + "</li></ul></li>");
869 static final int CLOSE_CASE = 1, CLOSE_FLATTEN = 2, CLOSE_CANONICAL = 4, CLOSE_COMPATIBILITY = 8;
871 static UnicodeSet closeUnicodeSet(UnicodeSet source, int options) {
872 if (options == 0) return source;
874 UnicodeSetIterator it = new UnicodeSetIterator(source);
875 UnicodeSet additions = new UnicodeSet(); // to avoid messing up iterator
876 UnicodeSet removals = new UnicodeSet(); // to avoid messing up iterator
880 // Add all case equivalents
881 if ((options & CLOSE_CASE) != 0) {
884 if (cp == UnicodeSetIterator.IS_STRING) continue;
885 int type = UCharacter.getType(cp);
886 if (type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.TITLECASE_LETTER) {
887 additions.add(UCharacter.toLowerCase(UTF16.valueOf(cp)));
888 additions.add(UCharacter.toUpperCase(UTF16.valueOf(cp)));
891 source.addAll(additions);
894 // Add the canonical closure of all strings and characters in source
895 if ((options & CLOSE_CANONICAL) != 0) {
898 CanonicalIterator ci = new CanonicalIterator(".");
900 if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string;
901 else base = UTF16.valueOf(it.codepoint);
904 String trial = ci.next();
905 if (trial == null) break;
906 if (trial.equals(base)) continue;
907 additions.add(trial);
910 source.addAll(additions);
914 if ((options & CLOSE_FLATTEN) != 0) {
918 if (it.codepoint != UnicodeSetIterator.IS_STRING) continue;
919 additions.addAll(it.string);
920 removals.add(it.string);
921 //System.out.println("flattening '" + hex.transliterate(it.string) + "'");
923 source.addAll(additions);
924 source.removeAll(removals);
927 // Now add decompositions of characters in source
928 if ((options & CLOSE_COMPATIBILITY) != 0) {
932 if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string;
933 else base = UTF16.valueOf(it.codepoint);
934 if (Normalizer.isNormalized(base, Normalizer.NFKD,0)) continue;
935 String decomp = Normalizer.normalize(base, Normalizer.NFKD);
936 additions.add(decomp);
938 source.addAll(additions);
940 // Now add any other character that decomposes to a character in source
941 for (cp = 0; cp < 0x10FFFF; ++cp) {
942 if (!UCharacter.isDefined(cp)) continue;
943 if (Normalizer.isNormalized(cp, Normalizer.NFKD,0)) continue;
944 if (source.contains(cp)) continue;
946 String decomp = Normalizer.normalize(cp, Normalizer.NFKD);
947 if (source.containsAll(decomp)) {
948 // System.out.println("Adding: " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
957 static String toPattern(UnicodeSet source, UnicodeSet superset) {
958 if (superset != null) {
959 source.removeAll(superset);
960 return "[" + superset.toPattern(true) + " " + source.toPattern(true) + "]";
962 return source.toPattern(true);
965 static BreakIterator bi = BreakIterator.getWordInstance();
967 static String titlecaseFirstWord(String line) {
968 // search for first word with letters. If the first letter is lower, then titlecase it.
973 if (end == BreakIterator.DONE) break;
974 int firstLetterType = getFirstLetterType(line, start, end);
975 if (firstLetterType != Character.UNASSIGNED) {
976 if (firstLetterType != Character.LOWERCASE_LETTER) break;
977 line = line.substring(0, start)
978 + UCharacter.toTitleCase(line.substring(start, end), bi)
979 + line.substring(end);
987 static final int LETTER_MASK =
988 (1<<Character.UPPERCASE_LETTER)
989 | (1<<Character.LOWERCASE_LETTER)
990 | (1<<Character.TITLECASE_LETTER)
991 | (1<<Character.MODIFIER_LETTER)
992 | (1<<Character.OTHER_LETTER)
995 static int getFirstLetterType(String line, int start, int end) {
997 for (int i = start; i < end; i += UTF16.getCharCount(cp)) {
998 cp = UTF16.charAt(line, i);
999 int type = UCharacter.getType(cp);
1000 if (((1<<type) & LETTER_MASK) != 0) return type;
1002 return Character.UNASSIGNED;
1005 static void printNames(UnicodeSet s, String targetFile) {
1007 File outFile = new File(targetFile);
1008 System.out.println("Writing: " + outFile.getCanonicalPath());
1010 PrintWriter out = new PrintWriter(
1012 new OutputStreamWriter(
1013 new FileOutputStream(outFile), "UTF-8")));
1014 UnicodeSet main = new UnicodeSet();
1016 UnicodeSet others = new UnicodeSet();
1017 UnicodeSetIterator it = new UnicodeSetIterator(s);
1019 if (!UCharacter.isDefined(it.codepoint)) continue;
1020 if (!Normalizer.isNormalized(it.codepoint, Normalizer.NFD,0)) {
1021 String decomp = Normalizer.normalize(it.codepoint, Normalizer.NFD);
1022 others.addAll(decomp);
1025 out.println(" " + UTF16.valueOf(it.codepoint) + " <> XXX # " + UCharacter.getName(it.codepoint));
1026 main.add(it.codepoint);
1029 if (others.size() != 0) {
1030 out.println("Decomposed characters found above: ");
1031 others.removeAll(main);
1034 out.println(" " + UTF16.valueOf(it.codepoint) + " <> XXX # " + UCharacter.getName(it.codepoint));
1039 System.out.println("Done Writing");
1040 } catch (Exception e) {
1041 e.printStackTrace();
1045 static Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
1046 static final String saveRules =
1047 "A <> \uEA41; B <> \uEA42; C <> \uEA43; D <> \uEA44; E <> \uEA45; F <> \uEA46; G <> \uEA47; H <> \uEA48; I <> \uEA49; "
1048 + "J <> \uEA4A; K <> \uEA4B; L <> \uEA4C; M <> \uEA4D; N <> \uEA4E; O <> \uEA4F; P <> \uEA50; Q <> \uEA51; R <> \uEA52; "
1049 + "S <> \uEA53; T <> \uEA54; U <> \uEA55; V <> \uEA56; W <> \uEA57; X <> \uEA58; Y <> \uEA59; Z <> \uEA5A; "
1050 + "a <> \uEA61; b <> \uEA62; c <> \uEA63; d <> \uEA64; e <> \uEA65; f <> \uEA66; g <> \uEA67; h <> \uEA68; i <> \uEA69; "
1051 + "j <> \uEA6A; k <> \uEA6B; l <> \uEA6C; m <> \uEA6D; n <> \uEA6E; o <> \uEA6F; p <> \uEA70; q <> \uEA71; r <> \uEA72; "
1052 + "s <> \uEA73; t <> \uEA74; u <> \uEA75; v <> \uEA76; w <> \uEA77; x <> \uEA78; y <> \uEA79; z <> \uEA7A;";
1054 static Transliterator saveAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.FORWARD);
1055 static Transliterator restoreAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.REVERSE);
1061 for (char i = 'A'; i <= 'z'; ++i) {
1062 System.out.print(i + " <> " + hex.transliterate(String.valueOf((char)(0xEA00 + i))) + "; ");
1065 UnicodeSet x = new UnicodeSet("[[:^ccc=0:]&[:^ccc=230:]]");
1068 System.out.println("Test: " + x.toPattern(true));
1070 Transliterator y = Transliterator.createFromRules("xxx", "$notAbove = [[:^ccc=0:]&[:^ccc=230:]]; u ($notAbove*) \u0308 > XXX | $1; ", Transliterator.FORWARD);
1072 String[] testList = {"u\u0308", "u\u0316\u0308", "u\u0308\u0316", "u\u0301\u0308", "u\u0308\u0301"};
1073 for (int i = 0; i < testList.length; ++i) {
1074 String yy = y.transliterate(testList[i]);
1075 System.out.println(hex.transliterate(testList[i]) + " => " + hex.transliterate(yy));
1078 //printNames(new UnicodeSet("[\u0600-\u06FF]"), "Arabic-Latin.txt");
1082 BreakTransliterator.register();
1084 BreakTransliterator testTrans = new BreakTransliterator("Any-XXX", null, null, "$");
1085 String testSource = "The Quick: Brown fox--jumped.";
1086 BreakIterator bi = testTrans.getBreakIterator();
1087 bi.setText(new StringCharacterIterator(testSource));
1088 printBreaks(0, testSource, bi);
1089 //bi.setText(UCharacterIterator.getInstance(testSource));
1090 //printBreaks(1, testSource, bi);
1092 printIteration(2, testSource, new StringCharacterIterator(testSource));
1093 //printIteration(3, testSource, UCharacterIterator.getInstance(testSource));
1097 String test = testTrans.transliterate(testSource);
1098 System.out.println("Test3: " + test);
1099 DummyFactory.add(testTrans.getID(), testTrans);
1102 // AnyTransliterator.ScriptRunIterator.registerAnyToScript();
1104 AnyTransliterator at = new AnyTransliterator("Greek", null);
1105 at.transliterate("(cat,\u03b1,\u0915)");
1106 DummyFactory.add(at.getID(), at);
1108 at = new AnyTransliterator("Devanagari", null);
1109 at.transliterate("(cat,\u03b1,\u0915)");
1110 DummyFactory.add(at.getID(), at);
1112 at = new AnyTransliterator("Latin", null);
1113 at.transliterate("(cat,\u03b1,\u0915)");
1114 DummyFactory.add(at.getID(), at);
1116 DummyFactory.add("Any-gif", Transliterator.createFromRules("gif", "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", Transliterator.FORWARD));
1117 DummyFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
1119 DummyFactory.add("Any-RemoveCurly", Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ;", Transliterator.FORWARD));
1120 DummyFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
1122 System.out.println("Trying &hex");
1123 Transliterator t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
1124 System.out.println("Registering");
1125 DummyFactory.add("Any-hex2", t);
1127 System.out.println("Trying &gif");
1128 t = Transliterator.createFromRules("gif2", "(.) > &any-gif($1);", Transliterator.FORWARD);
1129 System.out.println("Registering");
1130 DummyFactory.add("Any-gif2", t);
1135 void setTransliterator(String name, String id) {
1136 if (DEBUG) System.out.println("Got: " + name);
1138 translit = Transliterator.getInstance(name);
1140 String reverseId = "";
1141 int pos = id.indexOf('-');
1143 reverseId = id + "-Any";
1146 int pos2 = id.indexOf("/", pos);
1148 reverseId = id.substring(pos+1) + "-" + id.substring(0,pos);
1150 reverseId = id.substring(pos+1, pos2) + "-" + id.substring(0,pos) + id.substring(pos2);
1155 translit = Transliterator.createFromRules(id, name, Transliterator.FORWARD);
1157 System.out.println("***Forward Rules");
1158 System.out.println(translit.toRules(true));
1159 System.out.println("***Source Set");
1160 System.out.println(translit.getSourceSet().toPattern(true));
1162 System.out.println("***Target Set");
1163 UnicodeSet target = translit.getTargetSet();
1164 System.out.println(target.toPattern(true));
1165 UnicodeSet rest = new UnicodeSet("[a-z]").removeAll(target);
1166 System.out.println("***ASCII - Target Set");
1167 System.out.println(rest.toPattern(true));
1169 DummyFactory.add(id, translit);
1171 Transliterator translit2 = Transliterator.createFromRules(reverseId, name, Transliterator.REVERSE);
1173 System.out.println("***Backward Rules");
1174 System.out.println(translit2.toRules(true));
1176 DummyFactory.add(reverseId, translit2);
1178 Transliterator rev = translit.getInverse();
1179 if (DEBUG) System.out.println("***Inverse Rules");
1180 if (DEBUG) System.out.println(rev.toRules(true));
1184 text.setTransliterator(translit);
1185 convertSelectionItem.setLabel(Transliterator.getDisplayName(translit.getID()));
1187 addHistory(translit);
1191 inv = translit.getInverse();
1192 } catch (Exception ex) {
1197 swapSelectionItem.setEnabled(true);
1199 swapSelectionItem.setEnabled(false);
1201 System.out.println("Set transliterator: " + translit.getID()
1202 + (inv != null ? " and " + inv.getID() : ""));
1205 void addHistory(Transliterator trans) {
1206 String name = trans.getID();
1207 MenuItem cmi = (MenuItem) historyMap.get(name);
1209 cmi = new MenuItem(Transliterator.getDisplayName(name));
1210 cmi.addActionListener(new TransliterationListener(name));
1211 historyMap.put(name, cmi);
1212 historySet.add(cmi);
1213 historyMenu.removeAll();
1214 Iterator it = historySet.iterator();
1215 while (it.hasNext()) {
1216 historyMenu.add((MenuItem)it.next());
1221 class TransliterationListener implements ActionListener, ItemListener {
1223 public TransliterationListener(String name) {
1226 public void actionPerformed(ActionEvent e) {
1227 setTransliterator(name, null);
1229 public void itemStateChanged(ItemEvent e) {
1230 if (e.getStateChange() == ItemEvent.SELECTED) {
1231 setTransliterator(name, null);
1233 setTransliterator("Any-Null", null);
1238 class FontActionListener implements ActionListener {
1240 public FontActionListener(String name) {
1243 public void actionPerformed(ActionEvent e) {
1244 if (DEBUG) System.out.println("Font: " + name);
1246 text.setFont(new Font(fontName, Font.PLAIN, fontSize));
1250 class SizeActionListener implements ActionListener {
1252 public SizeActionListener(int size) {
1255 public void actionPerformed(ActionEvent e) {
1256 if (DEBUG) System.out.println("Size: " + size);
1258 text.setFont(new Font(fontName, Font.PLAIN, fontSize));
1262 Set add(Set s, Enumeration enumeration) {
1263 while(enumeration.hasMoreElements()) {
1264 s.add(enumeration.nextElement());
1270 * Get a sorted list of the system transliterators.
1273 private static Vector getSystemTransliteratorNames() {
1274 Vector v = new Vector();
1275 for (Enumeration e=Transliterator.getAvailableIDs();
1276 e.hasMoreElements(); ) {
1277 v.addElement(e.nextElement());
1279 // Insertion sort, O(n^2) acceptable for small n
1280 for (int i=0; i<(v.size()-1); ++i) {
1281 String a = (String) v.elementAt(i);
1282 for (int j=i+1; j<v.size(); ++j) {
1283 String b = (String) v.elementAt(j);
1284 if (a.compareTo(b) > 0) {
1285 v.setElementAt(b, i);
1286 v.setElementAt(a, j);
1296 private void setNoTransliterator() {
1297 translitItem = noTranslitItem;
1298 noTranslitItem.setState(true);
1299 handleSetTransliterator(noTranslitItem.getLabel());
1301 for (int i=0; i<translitMenu.getItemCount(); ++i) {
1302 MenuItem it = translitMenu.getItem(i);
1303 if (it != noTranslitItem && it instanceof CheckboxMenuItem) {
1304 ((CheckboxMenuItem) it).setState(false);
1310 private void handleAddToCompound(String name) {
1311 if (compoundCount < MAX_COMPOUND) {
1312 compoundTranslit[compoundCount] = decodeTranslitItem(name);
1314 Transliterator t[] = new Transliterator[compoundCount];
1315 System.arraycopy(compoundTranslit, 0, t, 0, compoundCount);
1316 translit = new CompoundTransliterator(t);
1317 text.setTransliterator(translit);
1322 private void handleSetTransliterator(String name) {
1323 translit = decodeTranslitItem(name);
1324 text.setTransliterator(translit);
1329 * Decode a menu item that looks like <translit name>.
1332 private static Transliterator decodeTranslitItem(String name) {
1333 return (name.equals(NO_TRANSLITERATOR))
1334 ? null : Transliterator.getInstance(name);
1338 private void handleBatchTransliterate(Transliterator trans) {
1339 if (trans == null) {
1343 int start = text.getSelectionStart();
1344 int end = text.getSelectionEnd();
1345 ReplaceableString s =
1346 new ReplaceableString(text.getText().substring(start, end));
1348 StringBuffer log = null;
1350 log = new StringBuffer();
1351 log.append('"' + s.toString() + "\" (start " + start +
1352 ", end " + end + ") -> \"");
1355 trans.transliterate(s);
1356 String str = s.toString();
1359 log.append(str + "\"");
1360 System.out.println("Batch " + trans.getID() + ": " + log.toString());
1363 text.replaceRange(str, start, end);
1364 text.select(start, start + str.length());
1367 private void handleClose() {
1368 helpDialog.dispose();
1373 class InfoDialog extends Dialog {
1374 protected Button button;
1375 protected TextArea area;
1376 protected Dialog me;
1377 protected Panel bottom;
1379 public TextArea getArea() {
1383 public Panel getBottom() {
1387 InfoDialog(Frame parent, String title, String label, String message) {
1388 super(parent, title, false);
1390 this.setLayout(new BorderLayout());
1391 if (label.length() != 0) {
1392 this.add("North", new Label(label));
1395 area = new TextArea(message, 8, 80, TextArea.SCROLLBARS_VERTICAL_ONLY);
1396 this.add("Center", area);
1398 button = new Button("Hide");
1399 button.addActionListener(new ActionListener() {
1400 public void actionPerformed(ActionEvent e) {
1404 bottom = new Panel();
1405 bottom.setLayout(new FlowLayout(FlowLayout.CENTER, 0, 0));
1407 this.add("South", bottom);
1409 addWindowListener(new WindowAdapter() {
1410 public void windowClosing(WindowEvent e) {