2 *******************************************************************************
\r
3 * Copyright (C) 2002-2005, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.tool.layout;
\r
10 import com.ibm.icu.impl.Utility;
\r
11 import com.ibm.icu.lang.UCharacter;
\r
12 import com.ibm.icu.lang.UScript;
\r
13 import com.ibm.icu.text.UnicodeSet;
\r
14 import com.ibm.icu.text.CanonicalIterator;
\r
15 import com.ibm.icu.text.UTF16;
\r
16 import java.util.Vector;
\r
18 public class CanonicalCharacterData
\r
20 private static int THRESHOLD = 4;
\r
24 // TODO: might want to save arrays of Char32's rather than UTF16 strings...
\r
25 Record(int character, int script)
\r
27 String char32 = UCharacter.toString(character);
\r
28 CanonicalIterator iterator = new CanonicalIterator(char32);
\r
29 Vector equivs = new Vector();
\r
31 composed = character;
\r
33 for (String equiv = iterator.next(); equiv != null; equiv = iterator.next()) {
\r
34 // Skip all equivalents of length 1; it's either the original
\r
35 // characeter or something like Angstrom for A-Ring, which we don't care about
\r
36 if (UTF16.countCodePoint(equiv) > 1) {
\r
41 int nEquivalents = equivs.size();
\r
43 if (nEquivalents > maxEquivalents[script]) {
\r
44 maxEquivalents[script] = nEquivalents;
\r
47 if (nEquivalents > 0) {
\r
48 equivalents = new String[nEquivalents];
\r
50 if (nEquivalents > THRESHOLD) {
\r
51 dumpEquivalents(character, equivs);
\r
54 sortEquivalents(equivalents, equivs);
\r
58 public int getComposedCharacter()
\r
63 public int countEquivalents()
\r
65 if (equivalents == null) {
\r
69 return equivalents.length;
\r
72 public String[] getEquivalents()
\r
77 public String getEquivalent(int index)
\r
79 if (equivalents == null || index < 0 || index >= equivalents.length) {
\r
83 return equivalents[index];
\r
86 private void dumpEquivalents(int character, Vector equivs)
\r
88 int count = equivs.size();
\r
90 System.out.println(Utility.hex(character, 6) + " - " + count + ":");
\r
92 for (int i = 0; i < count; i += 1) {
\r
93 String equiv = (String) equivs.elementAt(i);
\r
94 int codePoints = UTF16.countCodePoint(equiv);
\r
96 for (int c = 0; c < codePoints; c += 1) {
\r
98 System.out.print(" ");
\r
101 System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
\r
104 System.out.println();
\r
107 System.out.println();
\r
110 private int composed;
\r
111 private String[] equivalents = null;
\r
114 public CanonicalCharacterData()
\r
116 // nothing to do...
\r
119 public void add(int character)
\r
121 int script = UScript.getScript(character);
\r
122 Vector recordVector = recordVectors[script];
\r
124 if (recordVector == null) {
\r
125 recordVector = recordVectors[script] = new Vector();
\r
128 recordVector.add(new Record(character, script));
\r
131 public int getMaxEquivalents(int script)
\r
133 if (script < 0 || script >= UScript.CODE_LIMIT) {
\r
137 return maxEquivalents[script];
\r
140 public Record getRecord(int script, int index)
\r
142 if (script < 0 || script >= UScript.CODE_LIMIT) {
\r
146 Vector recordVector = recordVectors[script];
\r
148 if (recordVector == null || index < 0 || index >= recordVector.size()) {
\r
152 return (Record) recordVector.elementAt(index);
\r
155 public int countRecords(int script)
\r
157 if (script < 0 || script >= UScript.CODE_LIMIT ||
\r
158 recordVectors[script] == null) {
\r
162 return recordVectors[script].size();
\r
165 public static CanonicalCharacterData factory(UnicodeSet characterSet)
\r
167 int charCount = characterSet.size();
\r
168 CanonicalCharacterData data = new CanonicalCharacterData();
\r
170 System.out.println("There are " + charCount + " characters with a canonical decomposition.");
\r
172 for (int i = 0; i < charCount; i += 1) {
\r
173 data.add(characterSet.charAt(i));
\r
179 private static int compareEquivalents(String a, String b)
\r
181 int result = UTF16.countCodePoint(a) - UTF16.countCodePoint(b);
\r
184 return a.compareTo(b);
\r
191 // Straight insertion sort from Knuth vol. III, pg. 81
\r
193 private static void sortEquivalents(String[] equivalents, Vector unsorted)
\r
195 int nEquivalents = equivalents.length;
\r
197 for (int e = 0; e < nEquivalents; e += 1) {
\r
198 String v = (String) unsorted.elementAt(e);
\r
201 for (i = e - 1; i >= 0; i -= 1) {
\r
202 if (compareEquivalents(v, equivalents[i]) >= 0) {
\r
206 equivalents[i + 1] = equivalents[i];
\r
209 equivalents[i + 1] = v;
\r
213 private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
\r
214 private int maxEquivalents[] = new int[UScript.CODE_LIMIT];
\r