2 **********************************************************************
3 * Copyright (C) 2001-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 06/08/01 aliu Creation.
8 **********************************************************************
11 package com.ibm.icu.text;
12 import java.util.HashMap;
15 import com.ibm.icu.impl.Norm2AllModes;
16 import com.ibm.icu.impl.Normalizer2Impl;
19 * @author Alan Liu, Markus Scherer
21 final class NormalizationTransliterator extends Transliterator {
22 private final Normalizer2 norm2;
25 * System registration hook.
27 static void register() {
28 Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
29 public Transliterator getInstance(String ID) {
30 return new NormalizationTransliterator(
31 "NFC", Norm2AllModes.getNFCInstance().comp);
34 Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
35 public Transliterator getInstance(String ID) {
36 return new NormalizationTransliterator(
37 "NFD", Norm2AllModes.getNFCInstance().decomp);
40 Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
41 public Transliterator getInstance(String ID) {
42 return new NormalizationTransliterator(
43 "NFKC", Norm2AllModes.getNFKCInstance().comp);
46 Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
47 public Transliterator getInstance(String ID) {
48 return new NormalizationTransliterator(
49 "NFKD", Norm2AllModes.getNFKCInstance().decomp);
52 Transliterator.registerFactory("Any-FCD", new Transliterator.Factory() {
53 public Transliterator getInstance(String ID) {
54 return new NormalizationTransliterator(
55 "FCD", Norm2AllModes.getFCDNormalizer2());
58 Transliterator.registerFactory("Any-FCC", new Transliterator.Factory() {
59 public Transliterator getInstance(String ID) {
60 return new NormalizationTransliterator(
61 "FCC", Norm2AllModes.getNFCInstance().fcc);
64 Transliterator.registerSpecialInverse("NFC", "NFD", true);
65 Transliterator.registerSpecialInverse("NFKC", "NFKD", true);
66 Transliterator.registerSpecialInverse("FCC", "NFD", false);
67 Transliterator.registerSpecialInverse("FCD", "FCD", false);
71 * Constructs a transliterator.
73 private NormalizationTransliterator(String id, Normalizer2 n2) {
79 * Implements {@link Transliterator#handleTransliterate}.
81 protected void handleTransliterate(Replaceable text,
82 Position offsets, boolean isIncremental) {
83 // start and limit of the input range
84 int start = offsets.start;
85 int limit = offsets.limit;
91 * Normalize as short chunks at a time as possible even in
92 * bulk mode, so that styled text is minimally disrupted.
93 * In incremental mode, a chunk that ends with offsets.limit
94 * must not be normalized.
96 * If it was known that the input text is not styled, then
97 * a bulk mode normalization could be used.
98 * (For details, see the comment in the C++ version.)
100 StringBuilder segment = new StringBuilder();
101 StringBuilder normalized = new StringBuilder();
102 int c = text.char32At(start);
105 // Skip at least one character so we make progress.
106 // c holds the character at start.
107 segment.setLength(0);
109 segment.appendCodePoint(c);
110 start += Character.charCount(c);
111 } while(start < limit && !norm2.hasBoundaryBefore(c = text.char32At(start)));
112 if(start == limit && isIncremental && !norm2.hasBoundaryAfter(c)) {
113 // stop in incremental mode when we reach the input limit
114 // in case there are additional characters that could change the
115 // normalization result
119 norm2.normalize(segment, normalized);
120 if(!Normalizer2Impl.UTF16Plus.equal(segment, normalized)) {
121 // replace the input chunk with its normalized form
122 text.replace(prev, start, normalized.toString());
124 // update all necessary indexes accordingly
125 int delta = normalized.length() - (start - prev);
129 } while(start < limit);
131 offsets.start = start;
132 offsets.contextLimit += limit - offsets.limit;
133 offsets.limit = limit;
136 static final Map<Normalizer2, SourceTargetUtility> SOURCE_CACHE = new HashMap<Normalizer2, SourceTargetUtility>();
138 // TODO Get rid of this if Normalizer2 becomes a Transform
139 static class NormalizingTransform implements Transform<String,String> {
140 final Normalizer2 norm2;
141 public NormalizingTransform(Normalizer2 norm2) {
144 public String transform(String source) {
145 return norm2.normalize(source);
150 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
153 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
154 SourceTargetUtility cache;
155 synchronized (SOURCE_CACHE) {
156 //String id = getID();
157 cache = SOURCE_CACHE.get(norm2);
159 cache = new SourceTargetUtility(new NormalizingTransform(norm2), norm2);
160 SOURCE_CACHE.put(norm2, cache);
163 cache.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);