]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java
Added flags.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / translit / src / com / ibm / icu / text / NormalizationTransliterator.java
1 /*
2  **********************************************************************
3  *   Copyright (C) 2001-2010, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  *   Date        Name        Description
7  *   06/08/01    aliu        Creation.
8  **********************************************************************
9  */
10
11 package com.ibm.icu.text;
12 import java.util.HashMap;
13 import java.util.Map;
14
15 import com.ibm.icu.impl.Norm2AllModes;
16 import com.ibm.icu.impl.Normalizer2Impl;
17
18 /**
19  * @author Alan Liu, Markus Scherer
20  */
21 final class NormalizationTransliterator extends Transliterator {
22     private final Normalizer2 norm2;
23
24     /**
25      * System registration hook.
26      */
27     static void register() {
28         Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
29             public Transliterator getInstance(String ID) {
30                 return new NormalizationTransliterator(
31                         "NFC", Norm2AllModes.getNFCInstance().comp);
32             }
33         });
34         Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
35             public Transliterator getInstance(String ID) {
36                 return new NormalizationTransliterator(
37                         "NFD", Norm2AllModes.getNFCInstance().decomp);
38             }
39         });
40         Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
41             public Transliterator getInstance(String ID) {
42                 return new NormalizationTransliterator(
43                         "NFKC", Norm2AllModes.getNFKCInstance().comp);
44             }
45         });
46         Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
47             public Transliterator getInstance(String ID) {
48                 return new NormalizationTransliterator(
49                         "NFKD", Norm2AllModes.getNFKCInstance().decomp);
50             }
51         });
52         Transliterator.registerFactory("Any-FCD", new Transliterator.Factory() {
53             public Transliterator getInstance(String ID) {
54                 return new NormalizationTransliterator(
55                         "FCD", Norm2AllModes.getFCDNormalizer2());
56             }
57         });
58         Transliterator.registerFactory("Any-FCC", new Transliterator.Factory() {
59             public Transliterator getInstance(String ID) {
60                 return new NormalizationTransliterator(
61                         "FCC", Norm2AllModes.getNFCInstance().fcc);
62             }
63         });
64         Transliterator.registerSpecialInverse("NFC", "NFD", true);
65         Transliterator.registerSpecialInverse("NFKC", "NFKD", true);
66         Transliterator.registerSpecialInverse("FCC", "NFD", false);
67         Transliterator.registerSpecialInverse("FCD", "FCD", false);
68     }
69
70     /**
71      * Constructs a transliterator.
72      */
73     private NormalizationTransliterator(String id, Normalizer2 n2) {
74         super(id, null);
75         norm2 = n2;
76     }
77
78     /**
79      * Implements {@link Transliterator#handleTransliterate}.
80      */
81     protected void handleTransliterate(Replaceable text,
82             Position offsets, boolean isIncremental) {
83         // start and limit of the input range
84         int start = offsets.start;
85         int limit = offsets.limit;
86         if(start >= limit) {
87             return;
88         }
89
90         /*
91          * Normalize as short chunks at a time as possible even in
92          * bulk mode, so that styled text is minimally disrupted.
93          * In incremental mode, a chunk that ends with offsets.limit
94          * must not be normalized.
95          *
96          * If it was known that the input text is not styled, then
97          * a bulk mode normalization could be used.
98          * (For details, see the comment in the C++ version.)
99          */
100         StringBuilder segment = new StringBuilder();
101         StringBuilder normalized = new StringBuilder();
102         int c = text.char32At(start);
103         do {
104             int prev = start;
105             // Skip at least one character so we make progress.
106             // c holds the character at start.
107             segment.setLength(0);
108             do {
109                 segment.appendCodePoint(c);
110                 start += Character.charCount(c);
111             } while(start < limit && !norm2.hasBoundaryBefore(c = text.char32At(start)));
112             if(start == limit && isIncremental && !norm2.hasBoundaryAfter(c)) {
113                 // stop in incremental mode when we reach the input limit
114                 // in case there are additional characters that could change the
115                 // normalization result
116                 start=prev;
117                 break;
118             }
119             norm2.normalize(segment, normalized);
120             if(!Normalizer2Impl.UTF16Plus.equal(segment, normalized)) {
121                 // replace the input chunk with its normalized form
122                 text.replace(prev, start, normalized.toString());
123
124                 // update all necessary indexes accordingly
125                 int delta = normalized.length() - (start - prev);
126                 start += delta;
127                 limit += delta;
128             }
129         } while(start < limit);
130
131         offsets.start = start;
132         offsets.contextLimit += limit - offsets.limit;
133         offsets.limit = limit;
134     }
135
136     static final Map<Normalizer2, SourceTargetUtility> SOURCE_CACHE = new HashMap<Normalizer2, SourceTargetUtility>();
137     
138     // TODO Get rid of this if Normalizer2 becomes a Transform
139     static class NormalizingTransform implements Transform<String,String> {
140         final Normalizer2 norm2;
141         public NormalizingTransform(Normalizer2 norm2) {
142             this.norm2 = norm2;
143         }
144         public String transform(String source) {
145             return norm2.normalize(source);
146         }   
147     }
148
149     /* (non-Javadoc)
150      * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
151      */
152     @Override
153     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
154         SourceTargetUtility cache;
155         synchronized (SOURCE_CACHE) {
156             //String id = getID();
157             cache = SOURCE_CACHE.get(norm2);
158             if (cache == null) {
159                 cache = new SourceTargetUtility(new NormalizingTransform(norm2), norm2);
160                 SOURCE_CACHE.put(norm2, cache);
161             }
162         }
163         cache.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);
164     }
165 }