]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/translit/src/com/ibm/icu/text/SourceTargetUtility.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / translit / src / com / ibm / icu / text / SourceTargetUtility.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2010-2011, Google, International Business Machines            *
4  * Corporation and others. All Rights Reserved.                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8
9 import java.util.HashSet;
10 import java.util.Set;
11
12 import com.ibm.icu.lang.CharSequences;
13
14 /**
15  * Simple internal utility class for helping with getSource/TargetSet
16  */
17 class SourceTargetUtility {
18     final Transform<String, String> transform;
19     final UnicodeSet sourceCache;
20     final Set<String> sourceStrings;
21     static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
22     static Normalizer2 NFC = Normalizer2.getNFCInstance();
23     //static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
24
25     public SourceTargetUtility(Transform<String, String> transform) {
26         this(transform, null);
27     }
28
29     public SourceTargetUtility(Transform<String, String> transform, Normalizer2 normalizer) {
30         this.transform = transform;
31         if (normalizer != null) {
32 //            synchronized (SourceTargetUtility.class) {
33 //                if (NFC == null) {
34 //                    NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
35 //                    for (int i = 0; i <= 0x10FFFF; ++i) {
36 //                        String d = NFC.getDecomposition(i);
37 //                        if (d == null) {
38 //                            continue;
39 //                        }
40 //                        String s = NFC.normalize(d);
41 //                        if (!CharSequences.equals(i, s)) {
42 //                            continue;
43 //                        }
44 //                        // composes
45 //                        boolean first = false;
46 //                        for (int trailing : CharSequences.codePoints(d)) {
47 //                            if (first) {
48 //                                first = false;
49 //                            } else {
50 //                                TRAILING_COMBINING.add(trailing);
51 //                            }
52 //                        }
53 //                    }
54 //                }
55 //            }
56             sourceCache = new UnicodeSet("[:^ccc=0:]");
57         } else {
58             sourceCache = new UnicodeSet();
59         }
60         sourceStrings = new HashSet<String>();
61         for (int i = 0; i <= 0x10FFFF; ++i) {
62             String s = transform.transform(UTF16.valueOf(i));
63             boolean added = false;
64             if (!CharSequences.equals(i, s)) {
65                 sourceCache.add(i);
66                 added = true;
67             }
68             if (normalizer == null) {
69                 continue;
70             }
71             String d = NFC.getDecomposition(i);
72             if (d == null) {
73                 continue;
74             }
75             s = transform.transform(d);
76             if (!d.equals(s)) {
77                 sourceStrings.add(d);
78             }
79             if (added) {
80                 continue;
81             }
82             if (!normalizer.isInert(i)) {
83                 sourceCache.add(i);
84                 continue;
85             }
86             // see if any of the non-starters change s; if so, add i
87 //            for (String ns : TRAILING_COMBINING) {
88 //                String s2 = transform.transform(s + ns);
89 //                if (!s2.startsWith(s)) {
90 //                    sourceCache.add(i);
91 //                    break;
92 //                }
93 //            }
94
95             // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
96             // if (endOfFirst >= d.length()) {
97             // continue;
98             // }
99             // // now add all initial substrings
100             // for (int j = 1; j < d.length(); ++j) {
101             // if (!CharSequences.onCharacterBoundary(d, j)) {
102             // continue;
103             // }
104             // String dd = d.substring(0,j);
105             // s = transform.transform(dd);
106             // if (!dd.equals(s)) {
107             // sourceStrings.add(dd);
108             // }
109             // }
110         }
111         sourceCache.freeze();
112     }
113
114     public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
115             UnicodeSet targetSet) {
116         UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
117         UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
118         sourceSet.addAll(affectedCharacters);
119         for (String s : affectedCharacters) {
120             targetSet.addAll(transform.transform(s));
121         }
122         for (String s : sourceStrings) {
123             if (myFilter.containsAll(s)) {
124                 String t = transform.transform(s);
125                 if (!s.equals(t)) {
126                     targetSet.addAll(t);
127                     sourceSet.addAll(s);
128                 }
129             }
130         }
131     }
132 }