2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.text.CharacterIterator;
11 import com.ibm.icu.lang.UCharacter;
12 import com.ibm.icu.util.ULocale;
16 * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
17 * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
19 final class BreakTransliterator extends Transliterator {
20 private BreakIterator bi;
21 private String insertion;
22 private int[] boundaries = new int[50];
23 private int boundaryCount = 0;
25 public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {
28 this.insertion = insertion;
31 public BreakTransliterator(String ID, UnicodeFilter filter) {
32 this(ID, filter, null, " ");
36 // The following method is not called by anything and can't be reached
37 public String getInsertion() {
43 // The following method is not called by anything and can't be reached
44 public void setInsertion(String insertion) {
45 this.insertion = insertion;
49 public BreakIterator getBreakIterator() {
50 // Defer initialization of BreakIterator because it is slow,
51 // typically over 2000 ms.
52 if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
57 // The following method is not called by anything and can't be reached
58 public void setBreakIterator(BreakIterator bi) {
63 static final int LETTER_OR_MARK_MASK =
64 (1<<Character.UPPERCASE_LETTER)
65 | (1<<Character.LOWERCASE_LETTER)
66 | (1<<Character.TITLECASE_LETTER)
67 | (1<<Character.MODIFIER_LETTER)
68 | (1<<Character.OTHER_LETTER)
69 | (1<<Character.COMBINING_SPACING_MARK)
70 | (1<<Character.NON_SPACING_MARK)
71 | (1<<Character.ENCLOSING_MARK)
73 protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
76 getBreakIterator(); // Lazy-create it if necessary
77 bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
78 // TODO: fix clumsy workaround used below.
80 char[] tempBuffer = new char[text.length()];
81 text.getChars(0, text.length(), tempBuffer, 0);
82 bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
86 // To make things much easier, we will stack the boundaries, and then insert at the end.
87 // generally, we won't need too many, since we will be filtered.
89 for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
90 if (boundary == 0) continue;
91 // HACK: Check to see that preceeding item was a letter
93 int cp = UTF16.charAt(text, boundary-1);
94 int type = UCharacter.getType(cp);
95 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
96 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
98 cp = UTF16.charAt(text, boundary);
99 type = UCharacter.getType(cp);
100 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
101 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
103 if (boundaryCount >= boundaries.length) { // realloc if necessary
104 int[] temp = new int[boundaries.length * 2];
105 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
109 boundaries[boundaryCount++] = boundary;
110 //System.out.println(boundary);
114 int lastBoundary = 0;
116 if (boundaryCount != 0) { // if we found something, adjust
117 delta = boundaryCount * insertion.length();
118 lastBoundary = boundaries[boundaryCount-1];
120 // we do this from the end backwards, so that we don't have to keep updating.
122 while (boundaryCount > 0) {
123 boundary = boundaries[--boundaryCount];
124 text.replace(boundary, boundary, insertion);
128 // Now fix up the return values
129 pos.contextLimit += delta;
131 pos.start = incremental ? lastBoundary + delta : pos.limit;
136 * Registers standard variants with the system. Called by
137 * Transliterator during initialization.
139 static void register() {
140 // false means that it is invisible
141 Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);
142 Transliterator.registerInstance(trans, false);
144 Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
145 public Transliterator getInstance(String ID) {
146 return new BreakTransliterator("Any-Break", null);
152 // Hack, just to get a real character iterator.
153 static final class ReplaceableCharacterIterator implements CharacterIterator
155 private Replaceable text;
158 // invariant: begin <= pos <= end
162 * Constructs an iterator with an initial index of 0.
164 /*public ReplaceableCharacterIterator(Replaceable text)
170 * Constructs an iterator with the specified initial index.
172 * @param text The String to be iterated over
173 * @param pos Initial iterator position
175 /*public ReplaceableCharacterIterator(Replaceable text, int pos)
177 this(text, 0, text.length(), pos);
181 * Constructs an iterator over the given range of the given string, with the
182 * index set at the specified position.
184 * @param text The String to be iterated over
185 * @param begin Index of the first character
186 * @param end Index of the character following the last character
187 * @param pos Initial iterator position
189 public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {
191 throw new NullPointerException();
195 if (begin < 0 || begin > end || end > text.length()) {
196 throw new IllegalArgumentException("Invalid substring range");
199 if (pos < begin || pos > end) {
200 throw new IllegalArgumentException("Invalid position");
209 * Reset this iterator to point to a new string. This package-visible
210 * method is used by other java.text classes that want to avoid allocating
211 * new ReplaceableCharacterIterator objects every time their setText method
214 * @param text The String to be iterated over
216 public void setText(Replaceable text) {
218 throw new NullPointerException();
222 this.end = text.length();
227 * Implements CharacterIterator.first() for String.
228 * @see CharacterIterator#first
237 * Implements CharacterIterator.last() for String.
238 * @see CharacterIterator#last
251 * Implements CharacterIterator.setIndex() for String.
252 * @see CharacterIterator#setIndex
254 public char setIndex(int p)
256 if (p < begin || p > end) {
257 throw new IllegalArgumentException("Invalid index");
264 * Implements CharacterIterator.current() for String.
265 * @see CharacterIterator#current
267 public char current()
269 if (pos >= begin && pos < end) {
270 return text.charAt(pos);
278 * Implements CharacterIterator.next() for String.
279 * @see CharacterIterator#next
285 return text.charAt(pos);
294 * Implements CharacterIterator.previous() for String.
295 * @see CharacterIterator#previous
297 public char previous()
301 return text.charAt(pos);
309 * Implements CharacterIterator.getBeginIndex() for String.
310 * @see CharacterIterator#getBeginIndex
312 public int getBeginIndex()
318 * Implements CharacterIterator.getEndIndex() for String.
319 * @see CharacterIterator#getEndIndex
321 public int getEndIndex()
327 * Implements CharacterIterator.getIndex() for String.
328 * @see CharacterIterator#getIndex
330 public int getIndex()
336 * Compares the equality of two ReplaceableCharacterIterator objects.
337 * @param obj the ReplaceableCharacterIterator object to be compared with.
338 * @return true if the given obj is the same as this
339 * ReplaceableCharacterIterator object; false otherwise.
341 public boolean equals(Object obj)
346 if (!(obj instanceof ReplaceableCharacterIterator)) {
350 ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
352 if (hashCode() != that.hashCode()) {
355 if (!text.equals(that.text)) {
358 if (pos != that.pos || begin != that.begin || end != that.end) {
365 * Computes a hashcode for this iterator.
366 * @return A hash code
368 public int hashCode()
370 return text.hashCode() ^ pos ^ begin ^ end;
374 * Creates a copy of this iterator.
375 * @return A copy of this
377 public Object clone()
380 ReplaceableCharacterIterator other
381 = (ReplaceableCharacterIterator) super.clone();
384 catch (CloneNotSupportedException e) {
385 throw new IllegalStateException();
391 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
394 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
395 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
396 // Doesn't actually modify the source characters, so leave them alone.
397 // add the characters inserted
398 if (myFilter.size() != 0) {
399 targetSet.addAll(insertion);