2 **********************************************************************
\r
3 * Copyright (c) 2001-2007, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
6 * Date Name Description
\r
7 * 11/29/2001 aliu Creation.
\r
8 * 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit
\r
9 **********************************************************************
\r
11 package com.ibm.icu.dev.tool.translit;
\r
13 import com.ibm.icu.dev.tool.translit.UnicodeSetClosure;
\r
15 import com.ibm.icu.text.*;
\r
18 * Class that generates source set information for a transliterator.
\r
22 * java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
\r
24 * Output is produced in the command console, and a file with more detail is also written.
\r
26 * To see if it works, use:
\r
28 * java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
\r
32 * java com.ibm.icu.dev.demo.translit.Demo
\r
34 public class SourceSet {
\r
36 public static void main(String[] args) throws IOException {
\r
37 if (args.length == 0) {
\r
38 // Compute and display the source sets for all system
\r
40 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
\r
41 String ID = (String) e.nextElement();
\r
42 showSourceSet(ID, Normalizer.NONE, false);
\r
45 // Usage: ID [NFKD | NFD] [lower]
\r
46 Normalizer.Mode m = Normalizer.NONE;
\r
47 boolean lowerFirst = false;
\r
48 if (args.length >= 2) {
\r
49 if (args[1].equalsIgnoreCase("NFD")) {
\r
51 } else if (args[1].equalsIgnoreCase("NFKD")) {
\r
52 m = Normalizer.NFKD;
\r
57 if (args.length >= 3) {
\r
58 if (args[2].equalsIgnoreCase("lower")) {
\r
64 if (args.length > 3) {
\r
67 showSourceSet(args[0], m, lowerFirst);
\r
71 static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException {
\r
72 File f = new File("UnicodeSetClosure.txt");
\r
73 String filename = f.getCanonicalFile().toString();
\r
74 out = new PrintWriter(
\r
75 new OutputStreamWriter(
\r
76 new FileOutputStream(filename), "UTF-8"));
\r
77 out.print('\uFEFF'); // BOM
\r
78 System.out.println();
\r
79 System.out.println("Writing " + filename);
\r
80 Transliterator t = Transliterator.getInstance(ID);
\r
81 showSourceSetAux(t, m, lowerFirst, true);
\r
82 showSourceSetAux(t.getInverse(), m, lowerFirst, false);
\r
86 static PrintWriter out;
\r
88 static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) {
\r
89 UnicodeSet sourceSet = t.getSourceSet();
\r
90 if (m != Normalizer.NONE || lowerFirst) {
\r
91 UnicodeSetClosure.close(sourceSet, m, lowerFirst);
\r
93 System.out.println(t.getID() + ": " +
\r
94 sourceSet.toPattern(true));
\r
95 out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE"));
\r
97 + (forward ? "" : "( ")
\r
98 + sourceSet.toPattern(true)
\r
99 + (forward ? "" : " )")
\r
101 out.println("# Unicode: " + sourceSet.toPattern(false));
\r
105 static void usage() {
\r
106 System.err.println("Usage: ID [ NFD|NFKD [lower] ]");
\r