]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / translit / src / com / ibm / icu / text / BreakTransliterator.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.text;\r
8 \r
9 import java.text.CharacterIterator;\r
10 \r
11 import com.ibm.icu.lang.UCharacter;\r
12 import com.ibm.icu.util.ULocale;\r
13 \r
14 \r
15 /**\r
16  * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.\r
17  * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.\r
18  */\r
19 final class BreakTransliterator extends Transliterator {\r
20     private BreakIterator bi;\r
21     private String insertion;\r
22     private int[] boundaries = new int[50];\r
23     private int boundaryCount = 0;\r
24 \r
25     public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {\r
26         super(ID, filter);\r
27         this.bi = bi;\r
28         this.insertion = insertion;\r
29     }\r
30 \r
31     public BreakTransliterator(String ID, UnicodeFilter filter) {\r
32         this(ID, filter, null, " ");\r
33     }\r
34 \r
35     ///CLOVER:OFF\r
36     // The following method is not called by anything and can't be reached\r
37     public String getInsertion() {\r
38         return insertion;\r
39     }\r
40     ///CLOVER:ON\r
41 \r
42     ///CLOVER:OFF\r
43     // The following method is not called by anything and can't be reached\r
44     public void setInsertion(String insertion) {\r
45         this.insertion = insertion;\r
46     }\r
47     ///CLOVER:ON\r
48 \r
49     public BreakIterator getBreakIterator() {\r
50         // Defer initialization of BreakIterator because it is slow,\r
51         // typically over 2000 ms.\r
52         if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));\r
53         return bi;\r
54     }\r
55 \r
56     ///CLOVER:OFF\r
57     // The following method is not called by anything and can't be reached\r
58     public void setBreakIterator(BreakIterator bi) {\r
59         this.bi = bi;\r
60     }\r
61     ///CLOVER:ON\r
62 \r
63     static final int LETTER_OR_MARK_MASK =\r
64           (1<<Character.UPPERCASE_LETTER)\r
65         | (1<<Character.LOWERCASE_LETTER)\r
66         | (1<<Character.TITLECASE_LETTER)\r
67         | (1<<Character.MODIFIER_LETTER)\r
68         | (1<<Character.OTHER_LETTER)\r
69         | (1<<Character.COMBINING_SPACING_MARK)\r
70         | (1<<Character.NON_SPACING_MARK)\r
71         | (1<<Character.ENCLOSING_MARK)\r
72         ;\r
73     protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {\r
74         boundaryCount = 0;\r
75         int boundary = 0;\r
76         getBreakIterator(); // Lazy-create it if necessary\r
77         bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));\r
78         // TODO: fix clumsy workaround used below.\r
79         /*\r
80         char[] tempBuffer = new char[text.length()];\r
81         text.getChars(0, text.length(), tempBuffer, 0);\r
82         bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));\r
83         */\r
84         // end debugging\r
85 \r
86         // To make things much easier, we will stack the boundaries, and then insert at the end.\r
87         // generally, we won't need too many, since we will be filtered.\r
88 \r
89         for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {\r
90             if (boundary == 0) continue;\r
91             // HACK: Check to see that preceeding item was a letter\r
92 \r
93             int cp = UTF16.charAt(text, boundary-1);\r
94             int type = UCharacter.getType(cp);\r
95             //System.out.println(Integer.toString(cp,16) + " (before): " + type);\r
96             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;\r
97 \r
98             cp = UTF16.charAt(text, boundary);\r
99             type = UCharacter.getType(cp);\r
100             //System.out.println(Integer.toString(cp,16) + " (after): " + type);\r
101             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;\r
102 \r
103             if (boundaryCount >= boundaries.length) {       // realloc if necessary\r
104                 int[] temp = new int[boundaries.length * 2];\r
105                 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);\r
106                 boundaries = temp;\r
107             }\r
108 \r
109             boundaries[boundaryCount++] = boundary;\r
110             //System.out.println(boundary);\r
111         }\r
112 \r
113         int delta = 0;\r
114         int lastBoundary = 0;\r
115 \r
116         if (boundaryCount != 0) { // if we found something, adjust\r
117             delta = boundaryCount * insertion.length();\r
118             lastBoundary = boundaries[boundaryCount-1];\r
119 \r
120             // we do this from the end backwards, so that we don't have to keep updating.\r
121 \r
122             while (boundaryCount > 0) {\r
123                 boundary = boundaries[--boundaryCount];\r
124                 text.replace(boundary, boundary, insertion);\r
125             }\r
126         }\r
127 \r
128         // Now fix up the return values\r
129         pos.contextLimit += delta;\r
130         pos.limit += delta;\r
131         pos.start = incremental ? lastBoundary + delta : pos.limit;\r
132     }\r
133 \r
134 \r
135     /**\r
136      * Registers standard variants with the system.  Called by\r
137      * Transliterator during initialization.\r
138      */\r
139     static void register() {\r
140         // false means that it is invisible\r
141         Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);\r
142         Transliterator.registerInstance(trans, false);\r
143         /*\r
144         Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {\r
145             public Transliterator getInstance(String ID) {\r
146                 return new BreakTransliterator("Any-Break", null);\r
147             }\r
148         });\r
149         */\r
150     }\r
151 \r
152     // Hack, just to get a real character iterator.\r
153     static final class ReplaceableCharacterIterator implements CharacterIterator\r
154     {\r
155         private Replaceable text;\r
156         private int begin;\r
157         private int end;\r
158         // invariant: begin <= pos <= end\r
159         private int pos;\r
160 \r
161         /**\r
162         * Constructs an iterator with an initial index of 0.\r
163         */\r
164         /*public ReplaceableCharacterIterator(Replaceable text)\r
165         {\r
166             this(text, 0);\r
167         }*/\r
168 \r
169         /**\r
170         * Constructs an iterator with the specified initial index.\r
171         *\r
172         * @param  text   The String to be iterated over\r
173         * @param  pos    Initial iterator position\r
174         */\r
175         /*public ReplaceableCharacterIterator(Replaceable text, int pos)\r
176         {\r
177             this(text, 0, text.length(), pos);\r
178         }*/\r
179 \r
180         /**\r
181         * Constructs an iterator over the given range of the given string, with the\r
182         * index set at the specified position.\r
183         *\r
184         * @param  text   The String to be iterated over\r
185         * @param  begin  Index of the first character\r
186         * @param  end    Index of the character following the last character\r
187         * @param  pos    Initial iterator position\r
188         */\r
189         public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {\r
190             if (text == null) {\r
191                 throw new NullPointerException();\r
192             }\r
193             this.text = text;\r
194 \r
195             if (begin < 0 || begin > end || end > text.length()) {\r
196                 throw new IllegalArgumentException("Invalid substring range");\r
197             }\r
198 \r
199             if (pos < begin || pos > end) {\r
200                 throw new IllegalArgumentException("Invalid position");\r
201             }\r
202 \r
203             this.begin = begin;\r
204             this.end = end;\r
205             this.pos = pos;\r
206         }\r
207 \r
208         /**\r
209         * Reset this iterator to point to a new string.  This package-visible\r
210         * method is used by other java.text classes that want to avoid allocating\r
211         * new ReplaceableCharacterIterator objects every time their setText method\r
212         * is called.\r
213         *\r
214         * @param  text   The String to be iterated over\r
215         */\r
216         public void setText(Replaceable text) {\r
217             if (text == null) {\r
218                 throw new NullPointerException();\r
219             }\r
220             this.text = text;\r
221             this.begin = 0;\r
222             this.end = text.length();\r
223             this.pos = 0;\r
224         }\r
225 \r
226         /**\r
227         * Implements CharacterIterator.first() for String.\r
228         * @see CharacterIterator#first\r
229         */\r
230         public char first()\r
231         {\r
232             pos = begin;\r
233             return current();\r
234         }\r
235 \r
236         /**\r
237         * Implements CharacterIterator.last() for String.\r
238         * @see CharacterIterator#last\r
239         */\r
240         public char last()\r
241         {\r
242             if (end != begin) {\r
243                 pos = end - 1;\r
244             } else {\r
245                 pos = end;\r
246             }\r
247             return current();\r
248         }\r
249 \r
250         /**\r
251         * Implements CharacterIterator.setIndex() for String.\r
252         * @see CharacterIterator#setIndex\r
253         */\r
254         public char setIndex(int p)\r
255         {\r
256         if (p < begin || p > end) {\r
257                 throw new IllegalArgumentException("Invalid index");\r
258         }\r
259             pos = p;\r
260             return current();\r
261         }\r
262 \r
263         /**\r
264         * Implements CharacterIterator.current() for String.\r
265         * @see CharacterIterator#current\r
266         */\r
267         public char current()\r
268         {\r
269             if (pos >= begin && pos < end) {\r
270                 return text.charAt(pos);\r
271             }\r
272             else {\r
273                 return DONE;\r
274             }\r
275         }\r
276 \r
277         /**\r
278         * Implements CharacterIterator.next() for String.\r
279         * @see CharacterIterator#next\r
280         */\r
281         public char next()\r
282         {\r
283             if (pos < end - 1) {\r
284                 pos++;\r
285                 return text.charAt(pos);\r
286             }\r
287             else {\r
288                 pos = end;\r
289                 return DONE;\r
290             }\r
291         }\r
292 \r
293         /**\r
294         * Implements CharacterIterator.previous() for String.\r
295         * @see CharacterIterator#previous\r
296         */\r
297         public char previous()\r
298         {\r
299             if (pos > begin) {\r
300                 pos--;\r
301                 return text.charAt(pos);\r
302             }\r
303             else {\r
304                 return DONE;\r
305             }\r
306         }\r
307 \r
308         /**\r
309         * Implements CharacterIterator.getBeginIndex() for String.\r
310         * @see CharacterIterator#getBeginIndex\r
311         */\r
312         public int getBeginIndex()\r
313         {\r
314             return begin;\r
315         }\r
316 \r
317         /**\r
318         * Implements CharacterIterator.getEndIndex() for String.\r
319         * @see CharacterIterator#getEndIndex\r
320         */\r
321         public int getEndIndex()\r
322         {\r
323             return end;\r
324         }\r
325 \r
326         /**\r
327         * Implements CharacterIterator.getIndex() for String.\r
328         * @see CharacterIterator#getIndex\r
329         */\r
330         public int getIndex()\r
331         {\r
332             return pos;\r
333         }\r
334 \r
335         /**\r
336         * Compares the equality of two ReplaceableCharacterIterator objects.\r
337         * @param obj the ReplaceableCharacterIterator object to be compared with.\r
338         * @return true if the given obj is the same as this\r
339         * ReplaceableCharacterIterator object; false otherwise.\r
340         */\r
341         public boolean equals(Object obj)\r
342         {\r
343             if (this == obj) {\r
344                 return true;\r
345             }\r
346             if (!(obj instanceof ReplaceableCharacterIterator)) {\r
347                 return false;\r
348             }\r
349 \r
350             ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;\r
351 \r
352             if (hashCode() != that.hashCode()) {\r
353                 return false;\r
354             }\r
355             if (!text.equals(that.text)) {\r
356                 return false;\r
357             }\r
358             if (pos != that.pos || begin != that.begin || end != that.end) {\r
359                 return false;\r
360             }\r
361             return true;\r
362         }\r
363 \r
364         /**\r
365         * Computes a hashcode for this iterator.\r
366         * @return A hash code\r
367         */\r
368         public int hashCode()\r
369         {\r
370             return text.hashCode() ^ pos ^ begin ^ end;\r
371         }\r
372 \r
373         /**\r
374         * Creates a copy of this iterator.\r
375         * @return A copy of this\r
376         */\r
377         public Object clone()\r
378         {\r
379             try {\r
380                 ReplaceableCharacterIterator other\r
381                 = (ReplaceableCharacterIterator) super.clone();\r
382                 return other;\r
383             }\r
384             catch (CloneNotSupportedException e) {\r
385                 throw new IllegalStateException();\r
386             }\r
387         }\r
388 \r
389     }\r
390 \r
391 }\r