]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / translit / src / com / ibm / icu / text / BreakTransliterator.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8
9 import java.text.CharacterIterator;
10
11 import com.ibm.icu.lang.UCharacter;
12 import com.ibm.icu.util.ULocale;
13
14
15 /**
16  * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
17  * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
18  */
19 final class BreakTransliterator extends Transliterator {
20     private BreakIterator bi;
21     private String insertion;
22     private int[] boundaries = new int[50];
23     private int boundaryCount = 0;
24
25     public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {
26         super(ID, filter);
27         this.bi = bi;
28         this.insertion = insertion;
29     }
30
31     public BreakTransliterator(String ID, UnicodeFilter filter) {
32         this(ID, filter, null, " ");
33     }
34
35     ///CLOVER:OFF
36     // The following method is not called by anything and can't be reached
37     public String getInsertion() {
38         return insertion;
39     }
40     ///CLOVER:ON
41
42     ///CLOVER:OFF
43     // The following method is not called by anything and can't be reached
44     public void setInsertion(String insertion) {
45         this.insertion = insertion;
46     }
47     ///CLOVER:ON
48
49     public BreakIterator getBreakIterator() {
50         // Defer initialization of BreakIterator because it is slow,
51         // typically over 2000 ms.
52         if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
53         return bi;
54     }
55
56     ///CLOVER:OFF
57     // The following method is not called by anything and can't be reached
58     public void setBreakIterator(BreakIterator bi) {
59         this.bi = bi;
60     }
61     ///CLOVER:ON
62
63     static final int LETTER_OR_MARK_MASK =
64           (1<<Character.UPPERCASE_LETTER)
65         | (1<<Character.LOWERCASE_LETTER)
66         | (1<<Character.TITLECASE_LETTER)
67         | (1<<Character.MODIFIER_LETTER)
68         | (1<<Character.OTHER_LETTER)
69         | (1<<Character.COMBINING_SPACING_MARK)
70         | (1<<Character.NON_SPACING_MARK)
71         | (1<<Character.ENCLOSING_MARK)
72         ;
73     protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
74         boundaryCount = 0;
75         int boundary = 0;
76         getBreakIterator(); // Lazy-create it if necessary
77         bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
78         // TODO: fix clumsy workaround used below.
79         /*
80         char[] tempBuffer = new char[text.length()];
81         text.getChars(0, text.length(), tempBuffer, 0);
82         bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
83         */
84         // end debugging
85
86         // To make things much easier, we will stack the boundaries, and then insert at the end.
87         // generally, we won't need too many, since we will be filtered.
88
89         for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
90             if (boundary == 0) continue;
91             // HACK: Check to see that preceeding item was a letter
92
93             int cp = UTF16.charAt(text, boundary-1);
94             int type = UCharacter.getType(cp);
95             //System.out.println(Integer.toString(cp,16) + " (before): " + type);
96             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
97
98             cp = UTF16.charAt(text, boundary);
99             type = UCharacter.getType(cp);
100             //System.out.println(Integer.toString(cp,16) + " (after): " + type);
101             if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
102
103             if (boundaryCount >= boundaries.length) {       // realloc if necessary
104                 int[] temp = new int[boundaries.length * 2];
105                 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
106                 boundaries = temp;
107             }
108
109             boundaries[boundaryCount++] = boundary;
110             //System.out.println(boundary);
111         }
112
113         int delta = 0;
114         int lastBoundary = 0;
115
116         if (boundaryCount != 0) { // if we found something, adjust
117             delta = boundaryCount * insertion.length();
118             lastBoundary = boundaries[boundaryCount-1];
119
120             // we do this from the end backwards, so that we don't have to keep updating.
121
122             while (boundaryCount > 0) {
123                 boundary = boundaries[--boundaryCount];
124                 text.replace(boundary, boundary, insertion);
125             }
126         }
127
128         // Now fix up the return values
129         pos.contextLimit += delta;
130         pos.limit += delta;
131         pos.start = incremental ? lastBoundary + delta : pos.limit;
132     }
133
134
135     /**
136      * Registers standard variants with the system.  Called by
137      * Transliterator during initialization.
138      */
139     static void register() {
140         // false means that it is invisible
141         Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);
142         Transliterator.registerInstance(trans, false);
143         /*
144         Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
145             public Transliterator getInstance(String ID) {
146                 return new BreakTransliterator("Any-Break", null);
147             }
148         });
149         */
150     }
151
152     // Hack, just to get a real character iterator.
153     static final class ReplaceableCharacterIterator implements CharacterIterator
154     {
155         private Replaceable text;
156         private int begin;
157         private int end;
158         // invariant: begin <= pos <= end
159         private int pos;
160
161         /**
162         * Constructs an iterator with an initial index of 0.
163         */
164         /*public ReplaceableCharacterIterator(Replaceable text)
165         {
166             this(text, 0);
167         }*/
168
169         /**
170         * Constructs an iterator with the specified initial index.
171         *
172         * @param  text   The String to be iterated over
173         * @param  pos    Initial iterator position
174         */
175         /*public ReplaceableCharacterIterator(Replaceable text, int pos)
176         {
177             this(text, 0, text.length(), pos);
178         }*/
179
180         /**
181         * Constructs an iterator over the given range of the given string, with the
182         * index set at the specified position.
183         *
184         * @param  text   The String to be iterated over
185         * @param  begin  Index of the first character
186         * @param  end    Index of the character following the last character
187         * @param  pos    Initial iterator position
188         */
189         public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {
190             if (text == null) {
191                 throw new NullPointerException();
192             }
193             this.text = text;
194
195             if (begin < 0 || begin > end || end > text.length()) {
196                 throw new IllegalArgumentException("Invalid substring range");
197             }
198
199             if (pos < begin || pos > end) {
200                 throw new IllegalArgumentException("Invalid position");
201             }
202
203             this.begin = begin;
204             this.end = end;
205             this.pos = pos;
206         }
207
208         /**
209         * Reset this iterator to point to a new string.  This package-visible
210         * method is used by other java.text classes that want to avoid allocating
211         * new ReplaceableCharacterIterator objects every time their setText method
212         * is called.
213         *
214         * @param  text   The String to be iterated over
215         */
216         public void setText(Replaceable text) {
217             if (text == null) {
218                 throw new NullPointerException();
219             }
220             this.text = text;
221             this.begin = 0;
222             this.end = text.length();
223             this.pos = 0;
224         }
225
226         /**
227         * Implements CharacterIterator.first() for String.
228         * @see CharacterIterator#first
229         */
230         public char first()
231         {
232             pos = begin;
233             return current();
234         }
235
236         /**
237         * Implements CharacterIterator.last() for String.
238         * @see CharacterIterator#last
239         */
240         public char last()
241         {
242             if (end != begin) {
243                 pos = end - 1;
244             } else {
245                 pos = end;
246             }
247             return current();
248         }
249
250         /**
251         * Implements CharacterIterator.setIndex() for String.
252         * @see CharacterIterator#setIndex
253         */
254         public char setIndex(int p)
255         {
256         if (p < begin || p > end) {
257                 throw new IllegalArgumentException("Invalid index");
258         }
259             pos = p;
260             return current();
261         }
262
263         /**
264         * Implements CharacterIterator.current() for String.
265         * @see CharacterIterator#current
266         */
267         public char current()
268         {
269             if (pos >= begin && pos < end) {
270                 return text.charAt(pos);
271             }
272             else {
273                 return DONE;
274             }
275         }
276
277         /**
278         * Implements CharacterIterator.next() for String.
279         * @see CharacterIterator#next
280         */
281         public char next()
282         {
283             if (pos < end - 1) {
284                 pos++;
285                 return text.charAt(pos);
286             }
287             else {
288                 pos = end;
289                 return DONE;
290             }
291         }
292
293         /**
294         * Implements CharacterIterator.previous() for String.
295         * @see CharacterIterator#previous
296         */
297         public char previous()
298         {
299             if (pos > begin) {
300                 pos--;
301                 return text.charAt(pos);
302             }
303             else {
304                 return DONE;
305             }
306         }
307
308         /**
309         * Implements CharacterIterator.getBeginIndex() for String.
310         * @see CharacterIterator#getBeginIndex
311         */
312         public int getBeginIndex()
313         {
314             return begin;
315         }
316
317         /**
318         * Implements CharacterIterator.getEndIndex() for String.
319         * @see CharacterIterator#getEndIndex
320         */
321         public int getEndIndex()
322         {
323             return end;
324         }
325
326         /**
327         * Implements CharacterIterator.getIndex() for String.
328         * @see CharacterIterator#getIndex
329         */
330         public int getIndex()
331         {
332             return pos;
333         }
334
335         /**
336         * Compares the equality of two ReplaceableCharacterIterator objects.
337         * @param obj the ReplaceableCharacterIterator object to be compared with.
338         * @return true if the given obj is the same as this
339         * ReplaceableCharacterIterator object; false otherwise.
340         */
341         public boolean equals(Object obj)
342         {
343             if (this == obj) {
344                 return true;
345             }
346             if (!(obj instanceof ReplaceableCharacterIterator)) {
347                 return false;
348             }
349
350             ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
351
352             if (hashCode() != that.hashCode()) {
353                 return false;
354             }
355             if (!text.equals(that.text)) {
356                 return false;
357             }
358             if (pos != that.pos || begin != that.begin || end != that.end) {
359                 return false;
360             }
361             return true;
362         }
363
364         /**
365         * Computes a hashcode for this iterator.
366         * @return A hash code
367         */
368         public int hashCode()
369         {
370             return text.hashCode() ^ pos ^ begin ^ end;
371         }
372
373         /**
374         * Creates a copy of this iterator.
375         * @return A copy of this
376         */
377         public Object clone()
378         {
379             try {
380                 ReplaceableCharacterIterator other
381                 = (ReplaceableCharacterIterator) super.clone();
382                 return other;
383             }
384             catch (CloneNotSupportedException e) {
385                 throw new IllegalStateException();
386             }
387         }
388
389     }
390     /* (non-Javadoc)
391      * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
392      */
393     @Override
394     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
395         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
396         // Doesn't actually modify the source characters, so leave them alone.
397         // add the characters inserted
398         if (myFilter.size() != 0) {
399             targetSet.addAll(insertion);
400         }
401     }
402
403 }