2 *******************************************************************************
\r
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.tool.layout;
\r
10 import java.util.Vector;
\r
12 import com.ibm.icu.impl.Utility;
\r
13 import com.ibm.icu.lang.UCharacter;
\r
14 import com.ibm.icu.lang.UScript;
\r
15 import com.ibm.icu.text.CanonicalIterator;
\r
16 import com.ibm.icu.text.UTF16;
\r
17 import com.ibm.icu.text.UnicodeSet;
\r
19 public class CanonicalCharacterData
\r
21 private static int THRESHOLD = 4;
\r
25 // TODO: might want to save arrays of Char32's rather than UTF16 strings...
\r
26 Record(int character, int script)
\r
28 String char32 = UCharacter.toString(character);
\r
29 CanonicalIterator iterator = new CanonicalIterator(char32);
\r
30 Vector equivs = new Vector();
\r
32 composed = character;
\r
34 for (String equiv = iterator.next(); equiv != null; equiv = iterator.next()) {
\r
35 // Skip all equivalents of length 1; it's either the original
\r
36 // characeter or something like Angstrom for A-Ring, which we don't care about
\r
37 if (UTF16.countCodePoint(equiv) > 1) {
\r
42 int nEquivalents = equivs.size();
\r
44 if (nEquivalents > maxEquivalents[script]) {
\r
45 maxEquivalents[script] = nEquivalents;
\r
48 if (nEquivalents > 0) {
\r
49 equivalents = new String[nEquivalents];
\r
51 if (nEquivalents > THRESHOLD) {
\r
52 dumpEquivalents(character, equivs);
\r
55 sortEquivalents(equivalents, equivs);
\r
59 public int getComposedCharacter()
\r
64 public int countEquivalents()
\r
66 if (equivalents == null) {
\r
70 return equivalents.length;
\r
73 public String[] getEquivalents()
\r
78 public String getEquivalent(int index)
\r
80 if (equivalents == null || index < 0 || index >= equivalents.length) {
\r
84 return equivalents[index];
\r
87 private void dumpEquivalents(int character, Vector equivs)
\r
89 int count = equivs.size();
\r
91 System.out.println(Utility.hex(character, 6) + " - " + count + ":");
\r
93 for (int i = 0; i < count; i += 1) {
\r
94 String equiv = (String) equivs.elementAt(i);
\r
95 int codePoints = UTF16.countCodePoint(equiv);
\r
97 for (int c = 0; c < codePoints; c += 1) {
\r
99 System.out.print(" ");
\r
102 System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
\r
105 System.out.println();
\r
108 System.out.println();
\r
111 private int composed;
\r
112 private String[] equivalents = null;
\r
115 public CanonicalCharacterData()
\r
117 // nothing to do...
\r
120 public void add(int character)
\r
122 int script = UScript.getScript(character);
\r
123 Vector recordVector = recordVectors[script];
\r
125 if (recordVector == null) {
\r
126 recordVector = recordVectors[script] = new Vector();
\r
129 recordVector.add(new Record(character, script));
\r
132 public int getMaxEquivalents(int script)
\r
134 if (script < 0 || script >= UScript.CODE_LIMIT) {
\r
138 return maxEquivalents[script];
\r
141 public Record getRecord(int script, int index)
\r
143 if (script < 0 || script >= UScript.CODE_LIMIT) {
\r
147 Vector recordVector = recordVectors[script];
\r
149 if (recordVector == null || index < 0 || index >= recordVector.size()) {
\r
153 return (Record) recordVector.elementAt(index);
\r
156 public int countRecords(int script)
\r
158 if (script < 0 || script >= UScript.CODE_LIMIT ||
\r
159 recordVectors[script] == null) {
\r
163 return recordVectors[script].size();
\r
166 public static CanonicalCharacterData factory(UnicodeSet characterSet)
\r
168 int charCount = characterSet.size();
\r
169 CanonicalCharacterData data = new CanonicalCharacterData();
\r
171 System.out.println("There are " + charCount + " characters with a canonical decomposition.");
\r
173 for (int i = 0; i < charCount; i += 1) {
\r
174 data.add(characterSet.charAt(i));
\r
180 private static int compareEquivalents(String a, String b)
\r
182 int result = UTF16.countCodePoint(a) - UTF16.countCodePoint(b);
\r
185 return a.compareTo(b);
\r
192 // Straight insertion sort from Knuth vol. III, pg. 81
\r
194 private static void sortEquivalents(String[] equivalents, Vector unsorted)
\r
196 int nEquivalents = equivalents.length;
\r
198 for (int e = 0; e < nEquivalents; e += 1) {
\r
199 String v = (String) unsorted.elementAt(e);
\r
202 for (i = e - 1; i >= 0; i -= 1) {
\r
203 if (compareEquivalents(v, equivalents[i]) >= 0) {
\r
207 equivalents[i + 1] = equivalents[i];
\r
210 equivalents[i + 1] = v;
\r
214 private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
\r
215 private int maxEquivalents[] = new int[UScript.CODE_LIMIT];
\r