2 **********************************************************************
3 * Copyright (c) 2001-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/29/2001 aliu Creation.
8 * 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit
9 **********************************************************************
11 package com.ibm.icu.dev.tool.translit;
13 import java.io.FileOutputStream;
14 import java.io.IOException;
15 import java.io.OutputStreamWriter;
16 import java.io.PrintWriter;
17 import java.util.Enumeration;
19 import com.ibm.icu.text.Normalizer;
20 import com.ibm.icu.text.Transliterator;
21 import com.ibm.icu.text.UnicodeSet;
24 * Class that generates source set information for a transliterator.
28 * java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
30 * Output is produced in the command console, and a file with more detail is also written.
32 * To see if it works, use:
34 * java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
38 * java com.ibm.icu.dev.demo.translit.Demo
40 public class SourceSet {
42 public static void main(String[] args) throws IOException {
43 if (args.length == 0) {
44 // Compute and display the source sets for all system
46 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
47 String ID = (String) e.nextElement();
48 showSourceSet(ID, Normalizer.NONE, false);
51 // Usage: ID [NFKD | NFD] [lower]
52 Normalizer.Mode m = Normalizer.NONE;
53 boolean lowerFirst = false;
54 if (args.length >= 2) {
55 if (args[1].equalsIgnoreCase("NFD")) {
57 } else if (args[1].equalsIgnoreCase("NFKD")) {
63 if (args.length >= 3) {
64 if (args[2].equalsIgnoreCase("lower")) {
70 if (args.length > 3) {
73 showSourceSet(args[0], m, lowerFirst);
77 static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException {
78 File f = new File("UnicodeSetClosure.txt");
79 String filename = f.getCanonicalFile().toString();
80 out = new PrintWriter(
81 new OutputStreamWriter(
82 new FileOutputStream(filename), "UTF-8"));
83 out.print('\uFEFF'); // BOM
85 System.out.println("Writing " + filename);
86 Transliterator t = Transliterator.getInstance(ID);
87 showSourceSetAux(t, m, lowerFirst, true);
88 showSourceSetAux(t.getInverse(), m, lowerFirst, false);
92 static PrintWriter out;
94 static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) {
95 UnicodeSet sourceSet = t.getSourceSet();
96 if (m != Normalizer.NONE || lowerFirst) {
97 UnicodeSetClosure.close(sourceSet, m, lowerFirst);
99 System.out.println(t.getID() + ": " +
100 sourceSet.toPattern(true));
101 out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE"));
103 + (forward ? "" : "( ")
104 + sourceSet.toPattern(true)
105 + (forward ? "" : " )")
107 out.println("# Unicode: " + sourceSet.toPattern(false));
111 static void usage() {
112 System.err.println("Usage: ID [ NFD|NFKD [lower] ]");