2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.text.CharacterIterator;
\r
11 import com.ibm.icu.lang.UCharacter;
\r
12 import com.ibm.icu.util.ULocale;
\r
16 * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
\r
17 * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
\r
19 final class BreakTransliterator extends Transliterator {
\r
20 private BreakIterator bi;
\r
21 private String insertion;
\r
22 private int[] boundaries = new int[50];
\r
23 private int boundaryCount = 0;
\r
25 public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {
\r
28 this.insertion = insertion;
\r
31 public BreakTransliterator(String ID, UnicodeFilter filter) {
\r
32 this(ID, filter, null, " ");
\r
36 // The following method is not called by anything and can't be reached
\r
37 public String getInsertion() {
\r
43 // The following method is not called by anything and can't be reached
\r
44 public void setInsertion(String insertion) {
\r
45 this.insertion = insertion;
\r
49 public BreakIterator getBreakIterator() {
\r
50 // Defer initialization of BreakIterator because it is slow,
\r
51 // typically over 2000 ms.
\r
52 if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
\r
57 // The following method is not called by anything and can't be reached
\r
58 public void setBreakIterator(BreakIterator bi) {
\r
63 static final int LETTER_OR_MARK_MASK =
\r
64 (1<<Character.UPPERCASE_LETTER)
\r
65 | (1<<Character.LOWERCASE_LETTER)
\r
66 | (1<<Character.TITLECASE_LETTER)
\r
67 | (1<<Character.MODIFIER_LETTER)
\r
68 | (1<<Character.OTHER_LETTER)
\r
69 | (1<<Character.COMBINING_SPACING_MARK)
\r
70 | (1<<Character.NON_SPACING_MARK)
\r
71 | (1<<Character.ENCLOSING_MARK)
\r
73 protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
\r
76 getBreakIterator(); // Lazy-create it if necessary
\r
77 bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
\r
78 // TODO: fix clumsy workaround used below.
\r
80 char[] tempBuffer = new char[text.length()];
\r
81 text.getChars(0, text.length(), tempBuffer, 0);
\r
82 bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
\r
86 // To make things much easier, we will stack the boundaries, and then insert at the end.
\r
87 // generally, we won't need too many, since we will be filtered.
\r
89 for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
\r
90 if (boundary == 0) continue;
\r
91 // HACK: Check to see that preceeding item was a letter
\r
93 int cp = UTF16.charAt(text, boundary-1);
\r
94 int type = UCharacter.getType(cp);
\r
95 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
\r
96 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
\r
98 cp = UTF16.charAt(text, boundary);
\r
99 type = UCharacter.getType(cp);
\r
100 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
\r
101 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
\r
103 if (boundaryCount >= boundaries.length) { // realloc if necessary
\r
104 int[] temp = new int[boundaries.length * 2];
\r
105 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
\r
109 boundaries[boundaryCount++] = boundary;
\r
110 //System.out.println(boundary);
\r
114 int lastBoundary = 0;
\r
116 if (boundaryCount != 0) { // if we found something, adjust
\r
117 delta = boundaryCount * insertion.length();
\r
118 lastBoundary = boundaries[boundaryCount-1];
\r
120 // we do this from the end backwards, so that we don't have to keep updating.
\r
122 while (boundaryCount > 0) {
\r
123 boundary = boundaries[--boundaryCount];
\r
124 text.replace(boundary, boundary, insertion);
\r
128 // Now fix up the return values
\r
129 pos.contextLimit += delta;
\r
130 pos.limit += delta;
\r
131 pos.start = incremental ? lastBoundary + delta : pos.limit;
\r
136 * Registers standard variants with the system. Called by
\r
137 * Transliterator during initialization.
\r
139 static void register() {
\r
140 // false means that it is invisible
\r
141 Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);
\r
142 Transliterator.registerInstance(trans, false);
\r
144 Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
\r
145 public Transliterator getInstance(String ID) {
\r
146 return new BreakTransliterator("Any-Break", null);
\r
152 // Hack, just to get a real character iterator.
\r
153 static final class ReplaceableCharacterIterator implements CharacterIterator
\r
155 private Replaceable text;
\r
158 // invariant: begin <= pos <= end
\r
162 * Constructs an iterator with an initial index of 0.
\r
164 /*public ReplaceableCharacterIterator(Replaceable text)
\r
170 * Constructs an iterator with the specified initial index.
\r
172 * @param text The String to be iterated over
\r
173 * @param pos Initial iterator position
\r
175 /*public ReplaceableCharacterIterator(Replaceable text, int pos)
\r
177 this(text, 0, text.length(), pos);
\r
181 * Constructs an iterator over the given range of the given string, with the
\r
182 * index set at the specified position.
\r
184 * @param text The String to be iterated over
\r
185 * @param begin Index of the first character
\r
186 * @param end Index of the character following the last character
\r
187 * @param pos Initial iterator position
\r
189 public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {
\r
190 if (text == null) {
\r
191 throw new NullPointerException();
\r
195 if (begin < 0 || begin > end || end > text.length()) {
\r
196 throw new IllegalArgumentException("Invalid substring range");
\r
199 if (pos < begin || pos > end) {
\r
200 throw new IllegalArgumentException("Invalid position");
\r
203 this.begin = begin;
\r
209 * Reset this iterator to point to a new string. This package-visible
\r
210 * method is used by other java.text classes that want to avoid allocating
\r
211 * new ReplaceableCharacterIterator objects every time their setText method
\r
214 * @param text The String to be iterated over
\r
216 public void setText(Replaceable text) {
\r
217 if (text == null) {
\r
218 throw new NullPointerException();
\r
222 this.end = text.length();
\r
227 * Implements CharacterIterator.first() for String.
\r
228 * @see CharacterIterator#first
\r
230 public char first()
\r
237 * Implements CharacterIterator.last() for String.
\r
238 * @see CharacterIterator#last
\r
242 if (end != begin) {
\r
251 * Implements CharacterIterator.setIndex() for String.
\r
252 * @see CharacterIterator#setIndex
\r
254 public char setIndex(int p)
\r
256 if (p < begin || p > end) {
\r
257 throw new IllegalArgumentException("Invalid index");
\r
264 * Implements CharacterIterator.current() for String.
\r
265 * @see CharacterIterator#current
\r
267 public char current()
\r
269 if (pos >= begin && pos < end) {
\r
270 return text.charAt(pos);
\r
278 * Implements CharacterIterator.next() for String.
\r
279 * @see CharacterIterator#next
\r
283 if (pos < end - 1) {
\r
285 return text.charAt(pos);
\r
294 * Implements CharacterIterator.previous() for String.
\r
295 * @see CharacterIterator#previous
\r
297 public char previous()
\r
301 return text.charAt(pos);
\r
309 * Implements CharacterIterator.getBeginIndex() for String.
\r
310 * @see CharacterIterator#getBeginIndex
\r
312 public int getBeginIndex()
\r
318 * Implements CharacterIterator.getEndIndex() for String.
\r
319 * @see CharacterIterator#getEndIndex
\r
321 public int getEndIndex()
\r
327 * Implements CharacterIterator.getIndex() for String.
\r
328 * @see CharacterIterator#getIndex
\r
330 public int getIndex()
\r
336 * Compares the equality of two ReplaceableCharacterIterator objects.
\r
337 * @param obj the ReplaceableCharacterIterator object to be compared with.
\r
338 * @return true if the given obj is the same as this
\r
339 * ReplaceableCharacterIterator object; false otherwise.
\r
341 public boolean equals(Object obj)
\r
346 if (!(obj instanceof ReplaceableCharacterIterator)) {
\r
350 ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
\r
352 if (hashCode() != that.hashCode()) {
\r
355 if (!text.equals(that.text)) {
\r
358 if (pos != that.pos || begin != that.begin || end != that.end) {
\r
365 * Computes a hashcode for this iterator.
\r
366 * @return A hash code
\r
368 public int hashCode()
\r
370 return text.hashCode() ^ pos ^ begin ^ end;
\r
374 * Creates a copy of this iterator.
\r
375 * @return A copy of this
\r
377 public Object clone()
\r
380 ReplaceableCharacterIterator other
\r
381 = (ReplaceableCharacterIterator) super.clone();
\r
384 catch (CloneNotSupportedException e) {
\r
385 throw new IllegalStateException();
\r