2 *******************************************************************************
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
8 package com.ibm.icu.dev.tool.layout;
10 import java.util.Vector;
12 import com.ibm.icu.impl.Utility;
13 import com.ibm.icu.lang.UCharacter;
14 import com.ibm.icu.lang.UScript;
15 import com.ibm.icu.text.CanonicalIterator;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
19 public class CanonicalCharacterData
21 private static int THRESHOLD = 4;
25 // TODO: might want to save arrays of Char32's rather than UTF16 strings...
26 Record(int character, int script)
28 String char32 = UCharacter.toString(character);
29 CanonicalIterator iterator = new CanonicalIterator(char32);
30 Vector equivs = new Vector();
34 for (String equiv = iterator.next(); equiv != null; equiv = iterator.next()) {
35 // Skip all equivalents of length 1; it's either the original
36 // characeter or something like Angstrom for A-Ring, which we don't care about
37 if (UTF16.countCodePoint(equiv) > 1) {
42 int nEquivalents = equivs.size();
44 if (nEquivalents > maxEquivalents[script]) {
45 maxEquivalents[script] = nEquivalents;
48 if (nEquivalents > 0) {
49 equivalents = new String[nEquivalents];
51 if (nEquivalents > THRESHOLD) {
52 dumpEquivalents(character, equivs);
55 sortEquivalents(equivalents, equivs);
59 public int getComposedCharacter()
64 public int countEquivalents()
66 if (equivalents == null) {
70 return equivalents.length;
73 public String[] getEquivalents()
78 public String getEquivalent(int index)
80 if (equivalents == null || index < 0 || index >= equivalents.length) {
84 return equivalents[index];
87 private void dumpEquivalents(int character, Vector equivs)
89 int count = equivs.size();
91 System.out.println(Utility.hex(character, 6) + " - " + count + ":");
93 for (int i = 0; i < count; i += 1) {
94 String equiv = (String) equivs.elementAt(i);
95 int codePoints = UTF16.countCodePoint(equiv);
97 for (int c = 0; c < codePoints; c += 1) {
99 System.out.print(" ");
102 System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
105 System.out.println();
108 System.out.println();
111 private int composed;
112 private String[] equivalents = null;
115 public CanonicalCharacterData()
120 public void add(int character)
122 int script = UScript.getScript(character);
123 Vector recordVector = recordVectors[script];
125 if (recordVector == null) {
126 recordVector = recordVectors[script] = new Vector();
129 recordVector.add(new Record(character, script));
132 public int getMaxEquivalents(int script)
134 if (script < 0 || script >= UScript.CODE_LIMIT) {
138 return maxEquivalents[script];
141 public Record getRecord(int script, int index)
143 if (script < 0 || script >= UScript.CODE_LIMIT) {
147 Vector recordVector = recordVectors[script];
149 if (recordVector == null || index < 0 || index >= recordVector.size()) {
153 return (Record) recordVector.elementAt(index);
156 public int countRecords(int script)
158 if (script < 0 || script >= UScript.CODE_LIMIT ||
159 recordVectors[script] == null) {
163 return recordVectors[script].size();
166 public static CanonicalCharacterData factory(UnicodeSet characterSet)
168 int charCount = characterSet.size();
169 CanonicalCharacterData data = new CanonicalCharacterData();
171 System.out.println("There are " + charCount + " characters with a canonical decomposition.");
173 for (int i = 0; i < charCount; i += 1) {
174 data.add(characterSet.charAt(i));
180 private static int compareEquivalents(String a, String b)
182 int result = UTF16.countCodePoint(a) - UTF16.countCodePoint(b);
185 return a.compareTo(b);
192 // Straight insertion sort from Knuth vol. III, pg. 81
194 private static void sortEquivalents(String[] equivalents, Vector unsorted)
196 int nEquivalents = equivalents.length;
198 for (int e = 0; e < nEquivalents; e += 1) {
199 String v = (String) unsorted.elementAt(e);
202 for (i = e - 1; i >= 0; i -= 1) {
203 if (compareEquivalents(v, equivalents[i]) >= 0) {
207 equivalents[i + 1] = equivalents[i];
210 equivalents[i + 1] = v;
214 private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
215 private int maxEquivalents[] = new int[UScript.CODE_LIMIT];