2 *******************************************************************************
3 * Copyright (C) 2010-2011, Google, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.util.HashSet;
12 import com.ibm.icu.lang.CharSequences;
15 * Simple internal utility class for helping with getSource/TargetSet
17 class SourceTargetUtility {
18 final Transform<String, String> transform;
19 final UnicodeSet sourceCache;
20 final Set<String> sourceStrings;
21 static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
22 static Normalizer2 NFC = Normalizer2.getNFCInstance();
23 //static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
25 public SourceTargetUtility(Transform<String, String> transform) {
26 this(transform, null);
29 public SourceTargetUtility(Transform<String, String> transform, Normalizer2 normalizer) {
30 this.transform = transform;
31 if (normalizer != null) {
32 // synchronized (SourceTargetUtility.class) {
34 // NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
35 // for (int i = 0; i <= 0x10FFFF; ++i) {
36 // String d = NFC.getDecomposition(i);
40 // String s = NFC.normalize(d);
41 // if (!CharSequences.equals(i, s)) {
45 // boolean first = false;
46 // for (int trailing : CharSequences.codePoints(d)) {
50 // TRAILING_COMBINING.add(trailing);
56 sourceCache = new UnicodeSet("[:^ccc=0:]");
58 sourceCache = new UnicodeSet();
60 sourceStrings = new HashSet<String>();
61 for (int i = 0; i <= 0x10FFFF; ++i) {
62 String s = transform.transform(UTF16.valueOf(i));
63 boolean added = false;
64 if (!CharSequences.equals(i, s)) {
68 if (normalizer == null) {
71 String d = NFC.getDecomposition(i);
75 s = transform.transform(d);
82 if (!normalizer.isInert(i)) {
86 // see if any of the non-starters change s; if so, add i
87 // for (String ns : TRAILING_COMBINING) {
88 // String s2 = transform.transform(s + ns);
89 // if (!s2.startsWith(s)) {
90 // sourceCache.add(i);
95 // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
96 // if (endOfFirst >= d.length()) {
99 // // now add all initial substrings
100 // for (int j = 1; j < d.length(); ++j) {
101 // if (!CharSequences.onCharacterBoundary(d, j)) {
104 // String dd = d.substring(0,j);
105 // s = transform.transform(dd);
106 // if (!dd.equals(s)) {
107 // sourceStrings.add(dd);
111 sourceCache.freeze();
114 public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
115 UnicodeSet targetSet) {
116 UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
117 UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
118 sourceSet.addAll(affectedCharacters);
119 for (String s : affectedCharacters) {
120 targetSet.addAll(transform.transform(s));
122 for (String s : sourceStrings) {
123 if (myFilter.containsAll(s)) {
124 String t = transform.transform(s);