]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / translit / src / com / ibm / icu / text / NormalizationTransliterator.java
1 /*\r
2 **********************************************************************\r
3 *   Copyright (C) 2001-2010, International Business Machines\r
4 *   Corporation and others.  All Rights Reserved.\r
5 **********************************************************************\r
6 *   Date        Name        Description\r
7 *   06/08/01    aliu        Creation.\r
8 **********************************************************************\r
9 */\r
10 \r
11 package com.ibm.icu.text;\r
12 import com.ibm.icu.impl.Norm2AllModes;\r
13 import com.ibm.icu.impl.Normalizer2Impl;\r
14 \r
15 /**\r
16  * @author Alan Liu, Markus Scherer\r
17  */\r
18 final class NormalizationTransliterator extends Transliterator {\r
19     private final Normalizer2 norm2;\r
20 \r
21     /**\r
22      * System registration hook.\r
23      */\r
24     static void register() {\r
25         Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {\r
26             public Transliterator getInstance(String ID) {\r
27                 return new NormalizationTransliterator(\r
28                         "NFC", Norm2AllModes.getNFCInstance().comp);\r
29             }\r
30         });\r
31         Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {\r
32             public Transliterator getInstance(String ID) {\r
33                 return new NormalizationTransliterator(\r
34                         "NFD", Norm2AllModes.getNFCInstance().decomp);\r
35             }\r
36         });\r
37         Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {\r
38             public Transliterator getInstance(String ID) {\r
39                 return new NormalizationTransliterator(\r
40                         "NFKC", Norm2AllModes.getNFKCInstance().comp);\r
41             }\r
42         });\r
43         Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {\r
44             public Transliterator getInstance(String ID) {\r
45                 return new NormalizationTransliterator(\r
46                         "NFKD", Norm2AllModes.getNFKCInstance().decomp);\r
47             }\r
48         });\r
49         Transliterator.registerFactory("Any-FCD", new Transliterator.Factory() {\r
50             public Transliterator getInstance(String ID) {\r
51                 return new NormalizationTransliterator(\r
52                         "FCD", Norm2AllModes.getFCDNormalizer2());\r
53             }\r
54         });\r
55         Transliterator.registerFactory("Any-FCC", new Transliterator.Factory() {\r
56             public Transliterator getInstance(String ID) {\r
57                 return new NormalizationTransliterator(\r
58                         "FCC", Norm2AllModes.getNFCInstance().fcc);\r
59             }\r
60         });\r
61         Transliterator.registerSpecialInverse("NFC", "NFD", true);\r
62         Transliterator.registerSpecialInverse("NFKC", "NFKD", true);\r
63         Transliterator.registerSpecialInverse("FCC", "NFD", false);\r
64         Transliterator.registerSpecialInverse("FCD", "FCD", false);\r
65     }\r
66 \r
67     /**\r
68      * Constructs a transliterator.\r
69      */\r
70     private NormalizationTransliterator(String id, Normalizer2 n2) {\r
71         super(id, null);\r
72         norm2 = n2;\r
73     }\r
74 \r
75     /**\r
76      * Implements {@link Transliterator#handleTransliterate}.\r
77      */\r
78     protected void handleTransliterate(Replaceable text,\r
79                                        Position offsets, boolean isIncremental) {\r
80         // start and limit of the input range\r
81         int start = offsets.start;\r
82         int limit = offsets.limit;\r
83         if(start >= limit) {\r
84             return;\r
85         }\r
86 \r
87         /*\r
88          * Normalize as short chunks at a time as possible even in\r
89          * bulk mode, so that styled text is minimally disrupted.\r
90          * In incremental mode, a chunk that ends with offsets.limit\r
91          * must not be normalized.\r
92          *\r
93          * If it was known that the input text is not styled, then\r
94          * a bulk mode normalization could be used.\r
95          * (For details, see the comment in the C++ version.)\r
96          */\r
97         StringBuilder segment = new StringBuilder();\r
98         StringBuilder normalized = new StringBuilder();\r
99         int c = text.char32At(start);\r
100         do {\r
101             int prev = start;\r
102             // Skip at least one character so we make progress.\r
103             // c holds the character at start.\r
104             segment.setLength(0);\r
105             do {\r
106                 segment.appendCodePoint(c);\r
107                 start += Character.charCount(c);\r
108             } while(start < limit && !norm2.hasBoundaryBefore(c = text.char32At(start)));\r
109             if(start == limit && isIncremental && !norm2.hasBoundaryAfter(c)) {\r
110                 // stop in incremental mode when we reach the input limit\r
111                 // in case there are additional characters that could change the\r
112                 // normalization result\r
113                 start=prev;\r
114                 break;\r
115             }\r
116             norm2.normalize(segment, normalized);\r
117             if(!Normalizer2Impl.UTF16Plus.equal(segment, normalized)) {\r
118                 // replace the input chunk with its normalized form\r
119                 text.replace(prev, start, normalized.toString());\r
120 \r
121                 // update all necessary indexes accordingly\r
122                 int delta = normalized.length() - (start - prev);\r
123                 start += delta;\r
124                 limit += delta;\r
125             }\r
126         } while(start < limit);\r
127 \r
128         offsets.start = start;\r
129         offsets.contextLimit += limit - offsets.limit;\r
130         offsets.limit = limit;\r
131     }\r
132 }\r