2 *******************************************************************************
\r
3 * Copyright (C) 1996-2007, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import com.ibm.icu.lang.UCharacter;
\r
10 import com.ibm.icu.util.ULocale;
\r
12 import java.text.CharacterIterator;
\r
16 * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
\r
17 * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
\r
19 final class BreakTransliterator extends Transliterator {
\r
20 private BreakIterator bi;
\r
21 private String insertion;
\r
22 private int[] boundaries = new int[50];
\r
23 private int boundaryCount = 0;
\r
25 public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {
\r
28 this.insertion = insertion;
\r
31 public BreakTransliterator(String ID, UnicodeFilter filter) {
\r
32 this(ID, filter, null, " ");
\r
35 public String getInsertion() {
\r
39 public void setInsertion(String insertion) {
\r
40 this.insertion = insertion;
\r
43 public BreakIterator getBreakIterator() {
\r
44 // Defer initialization of BreakIterator because it is slow,
\r
45 // typically over 2000 ms.
\r
46 if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
\r
50 public void setBreakIterator(BreakIterator bi) {
\r
54 static final int LETTER_OR_MARK_MASK =
\r
55 (1<<Character.UPPERCASE_LETTER)
\r
56 | (1<<Character.LOWERCASE_LETTER)
\r
57 | (1<<Character.TITLECASE_LETTER)
\r
58 | (1<<Character.MODIFIER_LETTER)
\r
59 | (1<<Character.OTHER_LETTER)
\r
60 | (1<<Character.COMBINING_SPACING_MARK)
\r
61 | (1<<Character.NON_SPACING_MARK)
\r
62 | (1<<Character.ENCLOSING_MARK)
\r
64 protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
\r
67 getBreakIterator(); // Lazy-create it if necessary
\r
68 bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
\r
69 // TODO: fix clumsy workaround used below.
\r
71 char[] tempBuffer = new char[text.length()];
\r
72 text.getChars(0, text.length(), tempBuffer, 0);
\r
73 bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
\r
77 // To make things much easier, we will stack the boundaries, and then insert at the end.
\r
78 // generally, we won't need too many, since we will be filtered.
\r
80 for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
\r
81 if (boundary == 0) continue;
\r
82 // HACK: Check to see that preceeding item was a letter
\r
84 int cp = UTF16.charAt(text, boundary-1);
\r
85 int type = UCharacter.getType(cp);
\r
86 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
\r
87 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
\r
89 cp = UTF16.charAt(text, boundary);
\r
90 type = UCharacter.getType(cp);
\r
91 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
\r
92 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
\r
94 if (boundaryCount >= boundaries.length) { // realloc if necessary
\r
95 int[] temp = new int[boundaries.length * 2];
\r
96 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
\r
100 boundaries[boundaryCount++] = boundary;
\r
101 //System.out.println(boundary);
\r
105 int lastBoundary = 0;
\r
107 if (boundaryCount != 0) { // if we found something, adjust
\r
108 delta = boundaryCount * insertion.length();
\r
109 lastBoundary = boundaries[boundaryCount-1];
\r
111 // we do this from the end backwards, so that we don't have to keep updating.
\r
113 while (boundaryCount > 0) {
\r
114 boundary = boundaries[--boundaryCount];
\r
115 text.replace(boundary, boundary, insertion);
\r
119 // Now fix up the return values
\r
120 pos.contextLimit += delta;
\r
121 pos.limit += delta;
\r
122 pos.start = incremental ? lastBoundary + delta : pos.limit;
\r
127 * Registers standard variants with the system. Called by
\r
128 * Transliterator during initialization.
\r
130 static void register() {
\r
131 // false means that it is invisible
\r
132 Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);
\r
133 Transliterator.registerInstance(trans, false);
\r
135 Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
\r
136 public Transliterator getInstance(String ID) {
\r
137 return new BreakTransliterator("Any-Break", null);
\r
143 // Hack, just to get a real character iterator.
\r
144 static final class ReplaceableCharacterIterator implements CharacterIterator
\r
146 private Replaceable text;
\r
149 // invariant: begin <= pos <= end
\r
153 * Constructs an iterator with an initial index of 0.
\r
155 /*public ReplaceableCharacterIterator(Replaceable text)
\r
161 * Constructs an iterator with the specified initial index.
\r
163 * @param text The String to be iterated over
\r
164 * @param pos Initial iterator position
\r
166 /*public ReplaceableCharacterIterator(Replaceable text, int pos)
\r
168 this(text, 0, text.length(), pos);
\r
172 * Constructs an iterator over the given range of the given string, with the
\r
173 * index set at the specified position.
\r
175 * @param text The String to be iterated over
\r
176 * @param begin Index of the first character
\r
177 * @param end Index of the character following the last character
\r
178 * @param pos Initial iterator position
\r
180 public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {
\r
181 if (text == null) {
\r
182 throw new NullPointerException();
\r
186 if (begin < 0 || begin > end || end > text.length()) {
\r
187 throw new IllegalArgumentException("Invalid substring range");
\r
190 if (pos < begin || pos > end) {
\r
191 throw new IllegalArgumentException("Invalid position");
\r
194 this.begin = begin;
\r
200 * Reset this iterator to point to a new string. This package-visible
\r
201 * method is used by other java.text classes that want to avoid allocating
\r
202 * new ReplaceableCharacterIterator objects every time their setText method
\r
205 * @param text The String to be iterated over
\r
207 public void setText(Replaceable text) {
\r
208 if (text == null) {
\r
209 throw new NullPointerException();
\r
213 this.end = text.length();
\r
218 * Implements CharacterIterator.first() for String.
\r
219 * @see CharacterIterator#first
\r
221 public char first()
\r
228 * Implements CharacterIterator.last() for String.
\r
229 * @see CharacterIterator#last
\r
233 if (end != begin) {
\r
242 * Implements CharacterIterator.setIndex() for String.
\r
243 * @see CharacterIterator#setIndex
\r
245 public char setIndex(int p)
\r
247 if (p < begin || p > end) {
\r
248 throw new IllegalArgumentException("Invalid index");
\r
255 * Implements CharacterIterator.current() for String.
\r
256 * @see CharacterIterator#current
\r
258 public char current()
\r
260 if (pos >= begin && pos < end) {
\r
261 return text.charAt(pos);
\r
269 * Implements CharacterIterator.next() for String.
\r
270 * @see CharacterIterator#next
\r
274 if (pos < end - 1) {
\r
276 return text.charAt(pos);
\r
285 * Implements CharacterIterator.previous() for String.
\r
286 * @see CharacterIterator#previous
\r
288 public char previous()
\r
292 return text.charAt(pos);
\r
300 * Implements CharacterIterator.getBeginIndex() for String.
\r
301 * @see CharacterIterator#getBeginIndex
\r
303 public int getBeginIndex()
\r
309 * Implements CharacterIterator.getEndIndex() for String.
\r
310 * @see CharacterIterator#getEndIndex
\r
312 public int getEndIndex()
\r
318 * Implements CharacterIterator.getIndex() for String.
\r
319 * @see CharacterIterator#getIndex
\r
321 public int getIndex()
\r
327 * Compares the equality of two ReplaceableCharacterIterator objects.
\r
328 * @param obj the ReplaceableCharacterIterator object to be compared with.
\r
329 * @return true if the given obj is the same as this
\r
330 * ReplaceableCharacterIterator object; false otherwise.
\r
332 public boolean equals(Object obj)
\r
337 if (!(obj instanceof ReplaceableCharacterIterator)) {
\r
341 ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
\r
343 if (hashCode() != that.hashCode()) {
\r
346 if (!text.equals(that.text)) {
\r
349 if (pos != that.pos || begin != that.begin || end != that.end) {
\r
356 * Computes a hashcode for this iterator.
\r
357 * @return A hash code
\r
359 public int hashCode()
\r
361 return text.hashCode() ^ pos ^ begin ^ end;
\r
365 * Creates a copy of this iterator.
\r
366 * @return A copy of this
\r
368 public Object clone()
\r
371 ReplaceableCharacterIterator other
\r
372 = (ReplaceableCharacterIterator) super.clone();
\r
375 catch (CloneNotSupportedException e) {
\r
376 throw new IllegalStateException();
\r