]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/text/BreakTransliterator.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / text / BreakTransliterator.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2007, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.text;\r
8 \r
9 import com.ibm.icu.lang.UCharacter;\r
10 import com.ibm.icu.util.ULocale;\r
11 \r
12 import java.text.CharacterIterator;\r
13 \r
14 \r
15 /**\r
16  * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.\r
17  * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.\r
18  */\r
19 final class BreakTransliterator extends Transliterator {\r
20     private BreakIterator bi;\r
21     private String insertion;\r
22     private int[] boundaries = new int[50];\r
23     private int boundaryCount = 0;\r
24 \r
25     public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {\r
26         super(ID, filter);\r
27         this.bi = bi;\r
28         this.insertion = insertion;\r
29     }\r
30 \r
31     public BreakTransliterator(String ID, UnicodeFilter filter) {\r
32         this(ID, filter, null, " ");\r
33     }\r
34 \r
35     public String getInsertion() {\r
36         return insertion;\r
37     }\r
38 \r
39     public void setInsertion(String insertion) {\r
40         this.insertion = insertion;\r
41     }\r
42 \r
43     public BreakIterator getBreakIterator() {\r
44         // Defer initialization of BreakIterator because it is slow,\r
45         // typically over 2000 ms.\r
46         if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));\r
47         return bi;\r
48     }\r
49 \r
50     public void setBreakIterator(BreakIterator bi) {\r
51         this.bi = bi;\r
52     }\r
53 \r
54     static final int LETTER_OR_MARK_MASK =\r
55           (1<<Character.UPPERCASE_LETTER)\r
56         | (1<<Character.LOWERCASE_LETTER)\r
57         | (1<<Character.TITLECASE_LETTER)\r
58         | (1<<Character.MODIFIER_LETTER)\r
59         | (1<<Character.OTHER_LETTER)\r
60         | (1<<Character.COMBINING_SPACING_MARK)\r
61         | (1<<Character.NON_SPACING_MARK)\r
62         | (1<<Character.ENCLOSING_MARK)\r
63         ;\r
64     protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {\r
65         boundaryCount = 0;\r
66         int boundary = 0;\r
67         getBreakIterator(); // Lazy-create it if necessary\r
68         bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));\r
69         // TODO: fix clumsy workaround used below.\r
70         /*\r
71         char[] tempBuffer = new char[text.length()];\r
72         text.getChars(0, text.length(), tempBuffer, 0);\r
73         bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));\r
74         */\r
75         // end debugging\r
76 \r
77         // To make things much easier, we will stack the boundaries, and then insert at the end.\r
78         // generally, we won't need too many, since we will be filtered.\r
79 \r
80         for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {\r
81             if (boundary == 0) continue;\r
82             // HACK: Check to see that preceeding item was a letter\r
83 \r
84             int cp = UTF16.charAt(text, boundary-1);\r
85             int type = UCharacter.getType(cp);\r
86             //System.out.println(Integer.toString(cp,16) + " (before): " + type);\r
87             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;\r
88 \r
89             cp = UTF16.charAt(text, boundary);\r
90             type = UCharacter.getType(cp);\r
91             //System.out.println(Integer.toString(cp,16) + " (after): " + type);\r
92             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;\r
93 \r
94             if (boundaryCount >= boundaries.length) {       // realloc if necessary\r
95                 int[] temp = new int[boundaries.length * 2];\r
96                 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);\r
97                 boundaries = temp;\r
98             }\r
99 \r
100             boundaries[boundaryCount++] = boundary;\r
101             //System.out.println(boundary);\r
102         }\r
103 \r
104         int delta = 0;\r
105         int lastBoundary = 0;\r
106 \r
107         if (boundaryCount != 0) { // if we found something, adjust\r
108             delta = boundaryCount * insertion.length();\r
109             lastBoundary = boundaries[boundaryCount-1];\r
110 \r
111             // we do this from the end backwards, so that we don't have to keep updating.\r
112 \r
113             while (boundaryCount > 0) {\r
114                 boundary = boundaries[--boundaryCount];\r
115                 text.replace(boundary, boundary, insertion);\r
116             }\r
117         }\r
118 \r
119         // Now fix up the return values\r
120         pos.contextLimit += delta;\r
121         pos.limit += delta;\r
122         pos.start = incremental ? lastBoundary + delta : pos.limit;\r
123     }\r
124 \r
125 \r
126     /**\r
127      * Registers standard variants with the system.  Called by\r
128      * Transliterator during initialization.\r
129      */\r
130     static void register() {\r
131         // false means that it is invisible\r
132         Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);\r
133         Transliterator.registerInstance(trans, false);\r
134         /*\r
135         Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {\r
136             public Transliterator getInstance(String ID) {\r
137                 return new BreakTransliterator("Any-Break", null);\r
138             }\r
139         });\r
140         */\r
141     }\r
142 \r
143     // Hack, just to get a real character iterator.\r
144     static final class ReplaceableCharacterIterator implements CharacterIterator\r
145     {\r
146         private Replaceable text;\r
147         private int begin;\r
148         private int end;\r
149         // invariant: begin <= pos <= end\r
150         private int pos;\r
151 \r
152         /**\r
153         * Constructs an iterator with an initial index of 0.\r
154         */\r
155         /*public ReplaceableCharacterIterator(Replaceable text)\r
156         {\r
157             this(text, 0);\r
158         }*/\r
159 \r
160         /**\r
161         * Constructs an iterator with the specified initial index.\r
162         *\r
163         * @param  text   The String to be iterated over\r
164         * @param  pos    Initial iterator position\r
165         */\r
166         /*public ReplaceableCharacterIterator(Replaceable text, int pos)\r
167         {\r
168             this(text, 0, text.length(), pos);\r
169         }*/\r
170 \r
171         /**\r
172         * Constructs an iterator over the given range of the given string, with the\r
173         * index set at the specified position.\r
174         *\r
175         * @param  text   The String to be iterated over\r
176         * @param  begin  Index of the first character\r
177         * @param  end    Index of the character following the last character\r
178         * @param  pos    Initial iterator position\r
179         */\r
180         public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {\r
181             if (text == null) {\r
182                 throw new NullPointerException();\r
183             }\r
184             this.text = text;\r
185 \r
186             if (begin < 0 || begin > end || end > text.length()) {\r
187                 throw new IllegalArgumentException("Invalid substring range");\r
188             }\r
189 \r
190             if (pos < begin || pos > end) {\r
191                 throw new IllegalArgumentException("Invalid position");\r
192             }\r
193 \r
194             this.begin = begin;\r
195             this.end = end;\r
196             this.pos = pos;\r
197         }\r
198 \r
199         /**\r
200         * Reset this iterator to point to a new string.  This package-visible\r
201         * method is used by other java.text classes that want to avoid allocating\r
202         * new ReplaceableCharacterIterator objects every time their setText method\r
203         * is called.\r
204         *\r
205         * @param  text   The String to be iterated over\r
206         */\r
207         public void setText(Replaceable text) {\r
208             if (text == null) {\r
209                 throw new NullPointerException();\r
210             }\r
211             this.text = text;\r
212             this.begin = 0;\r
213             this.end = text.length();\r
214             this.pos = 0;\r
215         }\r
216 \r
217         /**\r
218         * Implements CharacterIterator.first() for String.\r
219         * @see CharacterIterator#first\r
220         */\r
221         public char first()\r
222         {\r
223             pos = begin;\r
224             return current();\r
225         }\r
226 \r
227         /**\r
228         * Implements CharacterIterator.last() for String.\r
229         * @see CharacterIterator#last\r
230         */\r
231         public char last()\r
232         {\r
233             if (end != begin) {\r
234                 pos = end - 1;\r
235             } else {\r
236                 pos = end;\r
237             }\r
238             return current();\r
239         }\r
240 \r
241         /**\r
242         * Implements CharacterIterator.setIndex() for String.\r
243         * @see CharacterIterator#setIndex\r
244         */\r
245         public char setIndex(int p)\r
246         {\r
247         if (p < begin || p > end) {\r
248                 throw new IllegalArgumentException("Invalid index");\r
249         }\r
250             pos = p;\r
251             return current();\r
252         }\r
253 \r
254         /**\r
255         * Implements CharacterIterator.current() for String.\r
256         * @see CharacterIterator#current\r
257         */\r
258         public char current()\r
259         {\r
260             if (pos >= begin && pos < end) {\r
261                 return text.charAt(pos);\r
262             }\r
263             else {\r
264                 return DONE;\r
265             }\r
266         }\r
267 \r
268         /**\r
269         * Implements CharacterIterator.next() for String.\r
270         * @see CharacterIterator#next\r
271         */\r
272         public char next()\r
273         {\r
274             if (pos < end - 1) {\r
275                 pos++;\r
276                 return text.charAt(pos);\r
277             }\r
278             else {\r
279                 pos = end;\r
280                 return DONE;\r
281             }\r
282         }\r
283 \r
284         /**\r
285         * Implements CharacterIterator.previous() for String.\r
286         * @see CharacterIterator#previous\r
287         */\r
288         public char previous()\r
289         {\r
290             if (pos > begin) {\r
291                 pos--;\r
292                 return text.charAt(pos);\r
293             }\r
294             else {\r
295                 return DONE;\r
296             }\r
297         }\r
298 \r
299         /**\r
300         * Implements CharacterIterator.getBeginIndex() for String.\r
301         * @see CharacterIterator#getBeginIndex\r
302         */\r
303         public int getBeginIndex()\r
304         {\r
305             return begin;\r
306         }\r
307 \r
308         /**\r
309         * Implements CharacterIterator.getEndIndex() for String.\r
310         * @see CharacterIterator#getEndIndex\r
311         */\r
312         public int getEndIndex()\r
313         {\r
314             return end;\r
315         }\r
316 \r
317         /**\r
318         * Implements CharacterIterator.getIndex() for String.\r
319         * @see CharacterIterator#getIndex\r
320         */\r
321         public int getIndex()\r
322         {\r
323             return pos;\r
324         }\r
325 \r
326         /**\r
327         * Compares the equality of two ReplaceableCharacterIterator objects.\r
328         * @param obj the ReplaceableCharacterIterator object to be compared with.\r
329         * @return true if the given obj is the same as this\r
330         * ReplaceableCharacterIterator object; false otherwise.\r
331         */\r
332         public boolean equals(Object obj)\r
333         {\r
334             if (this == obj) {\r
335                 return true;\r
336             }\r
337             if (!(obj instanceof ReplaceableCharacterIterator)) {\r
338                 return false;\r
339             }\r
340 \r
341             ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;\r
342 \r
343             if (hashCode() != that.hashCode()) {\r
344                 return false;\r
345             }\r
346             if (!text.equals(that.text)) {\r
347                 return false;\r
348             }\r
349             if (pos != that.pos || begin != that.begin || end != that.end) {\r
350                 return false;\r
351             }\r
352             return true;\r
353         }\r
354 \r
355         /**\r
356         * Computes a hashcode for this iterator.\r
357         * @return A hash code\r
358         */\r
359         public int hashCode()\r
360         {\r
361             return text.hashCode() ^ pos ^ begin ^ end;\r
362         }\r
363 \r
364         /**\r
365         * Creates a copy of this iterator.\r
366         * @return A copy of this\r
367         */\r
368         public Object clone()\r
369         {\r
370             try {\r
371                 ReplaceableCharacterIterator other\r
372                 = (ReplaceableCharacterIterator) super.clone();\r
373                 return other;\r
374             }\r
375             catch (CloneNotSupportedException e) {\r
376                 throw new IllegalStateException();\r
377             }\r
378         }\r
379 \r
380     }\r
381 \r
382 }\r