]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/tools/misc/src/com/ibm/icu/dev/tool/layout/CanonicalCharacterData.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / tools / misc / src / com / ibm / icu / dev / tool / layout / CanonicalCharacterData.java
1 /**
2  *******************************************************************************
3  * Copyright (C) 2002-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7
8 package com.ibm.icu.dev.tool.layout;
9
10 import java.util.Vector;
11
12 import com.ibm.icu.impl.Utility;
13 import com.ibm.icu.lang.UCharacter;
14 import com.ibm.icu.lang.UScript;
15 import com.ibm.icu.text.CanonicalIterator;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18
19 public class CanonicalCharacterData
20 {
21     private static int THRESHOLD = 4;
22     
23     public class Record
24     {
25         // TODO: might want to save arrays of Char32's rather than UTF16 strings...
26         Record(int character, int script)
27         {
28             String char32 = UCharacter.toString(character);
29             CanonicalIterator iterator = new CanonicalIterator(char32);
30             Vector equivs = new Vector();
31             
32             composed = character;
33             
34             for (String equiv = iterator.next(); equiv != null; equiv = iterator.next()) {
35                 // Skip all equivalents of length 1; it's either the original
36                 // characeter or something like Angstrom for A-Ring, which we don't care about
37                 if (UTF16.countCodePoint(equiv) > 1) {
38                     equivs.add(equiv);
39                 }
40             }
41             
42             int nEquivalents = equivs.size();
43             
44             if (nEquivalents > maxEquivalents[script]) {
45                 maxEquivalents[script] = nEquivalents;
46             }
47             
48             if (nEquivalents > 0) {
49                 equivalents = new String[nEquivalents];
50                 
51                 if (nEquivalents > THRESHOLD) {
52                     dumpEquivalents(character, equivs);
53                 }
54                 
55                 sortEquivalents(equivalents, equivs);
56             }
57         }
58         
59         public int getComposedCharacter()
60         {
61             return composed;
62         }
63         
64         public int countEquivalents()
65         {
66             if (equivalents == null) {
67                 return 0;
68             }
69             
70             return equivalents.length;
71         }
72         
73         public String[] getEquivalents()
74         {
75             return equivalents;
76         }
77         
78         public String getEquivalent(int index)
79         {
80             if (equivalents == null || index < 0 || index >= equivalents.length) {
81                 return null;
82             }
83             
84             return equivalents[index];
85         }
86         
87         private void dumpEquivalents(int character, Vector equivs)
88         {
89             int count = equivs.size();
90             
91             System.out.println(Utility.hex(character, 6) + " - " + count + ":");
92             
93             for (int i = 0; i < count; i += 1) {
94                 String equiv = (String) equivs.elementAt(i);
95                 int codePoints = UTF16.countCodePoint(equiv);
96                 
97                 for (int c = 0; c < codePoints; c += 1) {
98                     if (c > 0) {
99                         System.out.print(" ");
100                     }
101                     
102                     System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
103                 }
104                 
105                 System.out.println();
106             }
107             
108             System.out.println();
109         }
110         
111         private int composed;
112         private String[] equivalents = null;
113     }
114     
115     public CanonicalCharacterData()
116     {
117         // nothing to do...
118     }
119     
120     public void add(int character)
121     {
122         int script = UScript.getScript(character);
123         Vector recordVector = recordVectors[script];
124         
125         if (recordVector == null) {
126             recordVector = recordVectors[script] = new Vector();
127         }
128         
129         recordVector.add(new Record(character, script));
130     }
131     
132     public int getMaxEquivalents(int script)
133     {
134         if (script < 0 || script >= UScript.CODE_LIMIT) {
135             return 0;
136         }
137         
138         return maxEquivalents[script];
139     }
140     
141     public Record getRecord(int script, int index)
142     {
143         if (script < 0 || script >= UScript.CODE_LIMIT) {
144             return null;
145         }
146         
147         Vector recordVector = recordVectors[script];
148         
149         if (recordVector == null || index < 0 || index >= recordVector.size()) {
150             return null;
151         }
152         
153         return (Record) recordVector.elementAt(index);
154     }
155     
156     public int countRecords(int script)
157     {
158         if (script < 0 || script >= UScript.CODE_LIMIT ||
159             recordVectors[script] == null) {
160             return 0;
161         }
162         
163         return recordVectors[script].size();
164     }
165  
166     public static CanonicalCharacterData factory(UnicodeSet characterSet)
167     {
168         int charCount = characterSet.size();
169         CanonicalCharacterData data = new CanonicalCharacterData();
170         
171         System.out.println("There are " + charCount + " characters with a canonical decomposition.");
172         
173         for (int i = 0; i < charCount; i += 1) {
174             data.add(characterSet.charAt(i));
175         }
176         
177         return data;
178     }
179
180     private static int compareEquivalents(String a, String b)
181     {
182         int result = UTF16.countCodePoint(a) - UTF16.countCodePoint(b);
183             
184         if (result == 0) {
185             return a.compareTo(b);
186         }
187             
188         return result;
189     }
190         
191     //
192     // Straight insertion sort from Knuth vol. III, pg. 81
193     //
194     private static void sortEquivalents(String[] equivalents, Vector unsorted)
195     {
196         int nEquivalents = equivalents.length;
197         
198         for (int e = 0; e < nEquivalents; e += 1) {
199             String v = (String) unsorted.elementAt(e);
200             int i;
201             
202             for (i = e - 1; i >= 0; i -= 1) {
203                 if (compareEquivalents(v, equivalents[i]) >= 0) {
204                   break;
205                 }
206
207                 equivalents[i + 1] = equivalents[i];
208             }
209
210             equivalents[i + 1] = v;
211        }
212     }
213             
214     private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
215     private int maxEquivalents[] = new int[UScript.CODE_LIMIT];
216
217 }