2 *******************************************************************************
\r
3 * Copyright (C) 1996-2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.text;
\r
10 import java.lang.ref.SoftReference;
\r
11 import java.text.CharacterIterator;
\r
12 import java.text.StringCharacterIterator;
\r
13 import java.util.Locale;
\r
14 import java.util.MissingResourceException;
\r
16 import com.ibm.icu.impl.ICUDebug;
\r
17 import com.ibm.icu.util.ULocale;
\r
20 * A class that locates boundaries in text. This class defines a protocol for
\r
21 * objects that break up a piece of natural-language text according to a set
\r
22 * of criteria. Instances or subclasses of BreakIterator can be provided, for
\r
23 * example, to break a piece of text into words, sentences, or logical characters
\r
24 * according to the conventions of some language or group of languages.
\r
26 * We provide five built-in types of BreakIterator:
\r
27 * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries
\r
28 * between title breaks.
\r
29 * <li>getSentenceInstance() returns a BreakIterator that locates boundaries
\r
30 * between sentences. This is useful for triple-click selection, for example.
\r
31 * <li>getWordInstance() returns a BreakIterator that locates boundaries between
\r
32 * words. This is useful for double-click selection or "find whole words" searches.
\r
33 * This type of BreakIterator makes sure there is a boundary position at the
\r
34 * beginning and end of each legal word. (Numbers count as words, too.) Whitespace
\r
35 * and punctuation are kept separate from real words.
\r
36 * <li>getLineInstance() returns a BreakIterator that locates positions where it is
\r
37 * legal for a text editor to wrap lines. This is similar to word breaking, but
\r
38 * not the same: punctuation and whitespace are generally kept with words (you don't
\r
39 * want a line to start with whitespace, for example), and some special characters
\r
40 * can force a position to be considered a line-break position or prevent a position
\r
41 * from being a line-break position.
\r
42 * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between
\r
43 * logical characters. Because of the structure of the Unicode encoding, a logical
\r
44 * character may be stored internally as more than one Unicode code point. (A with an
\r
45 * umlaut may be stored as an a followed by a separate combining umlaut character,
\r
46 * for example, but the user still thinks of it as one character.) This iterator allows
\r
47 * various processes (especially text editors) to treat as characters the units of text
\r
48 * that a user would think of as characters, rather than the units of text that the
\r
49 * computer sees as "characters".</ul>
\r
51 * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
\r
52 * has a concept of a "current position" and methods like first(), last(), next(),
\r
53 * and previous() that update the current position. All BreakIterators uphold the
\r
54 * following invariants:
\r
55 * <ul><li>The beginning and end of the text are always treated as boundary positions.
\r
56 * <li>The current position of the iterator is always a boundary position (random-
\r
57 * access methods move the iterator to the nearest boundary position before or
\r
58 * after the specified position, not _to_ the specified position).
\r
59 * <li>DONE is used as a flag to indicate when iteration has stopped. DONE is only
\r
60 * returned when the current position is the end of the text and the user calls next(),
\r
61 * or when the current position is the beginning of the text and the user calls
\r
63 * <li>Break positions are numbered by the positions of the characters that follow
\r
64 * them. Thus, under normal circumstances, the position before the first character
\r
65 * is 0, the position after the first character is 1, and the position after the
\r
66 * last character is 1 plus the length of the string.
\r
67 * <li>The client can change the position of an iterator, or the text it analyzes,
\r
68 * at will, but cannot change the behavior. If the user wants different behavior, he
\r
69 * must instantiate a new iterator.</ul>
\r
71 * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
\r
72 * it possible to use BreakIterator to analyze text in any text-storage vehicle that
\r
73 * provides a CharacterIterator interface.
\r
75 * <b>NOTE:</b> Some types of BreakIterator can take a long time to create, and
\r
76 * instances of BreakIterator are not currently cached by the system. For
\r
77 * optimal performance, keep instances of BreakIterator around as long as makes
\r
78 * sense. For example, when word-wrapping a document, don't create and destroy a
\r
79 * new BreakIterator for each line. Create one break iterator for the whole document
\r
80 * (or whatever stretch of text you're wrapping) and use it to do the whole job of
\r
81 * wrapping the text.
\r
84 * <strong>Examples</strong>:<P>
\r
85 * Creating and using text boundaries
\r
88 * public static void main(String args[]) {
\r
89 * if (args.length == 1) {
\r
90 * String stringToExamine = args[0];
\r
91 * //print each word in order
\r
92 * BreakIterator boundary = BreakIterator.getWordInstance();
\r
93 * boundary.setText(stringToExamine);
\r
94 * printEachForward(boundary, stringToExamine);
\r
95 * //print each sentence in reverse order
\r
96 * boundary = BreakIterator.getSentenceInstance(Locale.US);
\r
97 * boundary.setText(stringToExamine);
\r
98 * printEachBackward(boundary, stringToExamine);
\r
99 * printFirst(boundary, stringToExamine);
\r
100 * printLast(boundary, stringToExamine);
\r
106 * Print each element in order
\r
109 * public static void printEachForward(BreakIterator boundary, String source) {
\r
110 * int start = boundary.first();
\r
111 * for (int end = boundary.next();
\r
112 * end != BreakIterator.DONE;
\r
113 * start = end, end = boundary.next()) {
\r
114 * System.out.println(source.substring(start,end));
\r
120 * Print each element in reverse order
\r
123 * public static void printEachBackward(BreakIterator boundary, String source) {
\r
124 * int end = boundary.last();
\r
125 * for (int start = boundary.previous();
\r
126 * start != BreakIterator.DONE;
\r
127 * end = start, start = boundary.previous()) {
\r
128 * System.out.println(source.substring(start,end));
\r
134 * Print first element
\r
137 * public static void printFirst(BreakIterator boundary, String source) {
\r
138 * int start = boundary.first();
\r
139 * int end = boundary.next();
\r
140 * System.out.println(source.substring(start,end));
\r
145 * Print last element
\r
148 * public static void printLast(BreakIterator boundary, String source) {
\r
149 * int end = boundary.last();
\r
150 * int start = boundary.previous();
\r
151 * System.out.println(source.substring(start,end));
\r
156 * Print the element at a specified position
\r
159 * public static void printAt(BreakIterator boundary, int pos, String source) {
\r
160 * int end = boundary.following(pos);
\r
161 * int start = boundary.previous();
\r
162 * System.out.println(source.substring(start,end));
\r
167 * Find the next word
\r
170 * public static int nextWordStartAfter(int pos, String text) {
\r
171 * BreakIterator wb = BreakIterator.getWordInstance();
\r
172 * wb.setText(text);
\r
173 * int last = wb.following(pos);
\r
174 * int current = wb.next();
\r
175 * while (current != BreakIterator.DONE) {
\r
176 * for (int p = last; p < current; p++) {
\r
177 * if (Character.isLetter(text.charAt(p)))
\r
181 * current = wb.next();
\r
183 * return BreakIterator.DONE;
\r
186 * (The iterator returned by BreakIterator.getWordInstance() is unique in that
\r
187 * the break positions it returns don't represent both the start and end of the
\r
188 * thing being iterated over. That is, a sentence-break iterator returns breaks
\r
189 * that each represent the end of one sentence and the beginning of the next.
\r
190 * With the word-break iterator, the characters between two boundaries might be a
\r
191 * word, or they might be the punctuation or whitespace between two words. The
\r
192 * above code uses a simple heuristic to determine which boundary is the beginning
\r
193 * of a word: If the characters between this boundary and the next boundary
\r
194 * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
\r
195 * a Hangul syllable, a Kana character, etc.), then the text between this boundary
\r
196 * and the next is a word; otherwise, it's the material between words.)
\r
199 * @see CharacterIterator
\r
204 public abstract class BreakIterator implements Cloneable
\r
207 private static final boolean DEBUG = ICUDebug.enabled("breakiterator");
\r
210 * Default constructor. There is no state that is carried by this abstract
\r
214 protected BreakIterator()
\r
219 * Clone method. Creates another BreakIterator with the same behavior and
\r
220 * current state as this one.
\r
221 * @return The clone.
\r
224 public Object clone()
\r
227 return super.clone();
\r
229 catch (CloneNotSupportedException e) {
\r
231 throw new IllegalStateException();
\r
237 * DONE is returned by previous() and next() after all valid
\r
238 * boundaries have been returned.
\r
241 public static final int DONE = -1;
\r
244 * Return the first boundary position. This is always the beginning
\r
245 * index of the text this iterator iterates over. For example, if
\r
246 * the iterator iterates over a whole string, this function will
\r
247 * always return 0. This function also updates the iteration position
\r
248 * to point to the beginning of the text.
\r
249 * @return The character offset of the beginning of the stretch of text
\r
253 public abstract int first();
\r
256 * Return the last boundary position. This is always the "past-the-end"
\r
257 * index of the text this iterator iterates over. For example, if the
\r
258 * iterator iterates over a whole string (call it "text"), this function
\r
259 * will always return text.length(). This function also updated the
\r
260 * iteration position to point to the end of the text.
\r
261 * @return The character offset of the end of the stretch of text
\r
265 public abstract int last();
\r
268 * Advances the specified number of steps forward in the text (a negative
\r
269 * number, therefore, advances backwards). If this causes the iterator
\r
270 * to advance off either end of the text, this function returns DONE;
\r
271 * otherwise, this function returns the position of the appropriate
\r
272 * boundary. Calling this function is equivalent to calling next() or
\r
273 * previous() n times.
\r
274 * @param n The number of boundaries to advance over (if positive, moves
\r
275 * forward; if negative, moves backwards).
\r
276 * @return The position of the boundary n boundaries from the current
\r
277 * iteration position, or DONE if moving n boundaries causes the iterator
\r
278 * to advance off either end of the text.
\r
281 public abstract int next(int n);
\r
284 * Advances the iterator forward one boundary. The current iteration
\r
285 * position is updated to point to the next boundary position after the
\r
286 * current position, and this is also the value that is returned. If
\r
287 * the current position is equal to the value returned by last(), or to
\r
288 * DONE, this function returns DONE and sets the current position to
\r
290 * @return The position of the first boundary position following the
\r
291 * iteration position.
\r
294 public abstract int next();
\r
297 * Advances the iterator backward one boundary. The current iteration
\r
298 * position is updated to point to the last boundary position before
\r
299 * the current position, and this is also the value that is returned. If
\r
300 * the current position is equal to the value returned by first(), or to
\r
301 * DONE, this function returns DONE and sets the current position to
\r
303 * @return The position of the last boundary position preceding the
\r
304 * iteration position.
\r
307 public abstract int previous();
\r
310 * Sets the iterator's current iteration position to be the first
\r
311 * boundary position following the specified position. (Whether the
\r
312 * specified position is itself a boundary position or not doesn't
\r
313 * matter-- this function always moves the iteration position to the
\r
314 * first boundary after the specified position.) If the specified
\r
315 * position is the past-the-end position, returns DONE.
\r
316 * @param offset The character position to start searching from.
\r
317 * @return The position of the first boundary position following
\r
318 * "offset" (whether or not "offset" itself is a boundary position),
\r
319 * or DONE if "offset" is the past-the-end offset.
\r
322 public abstract int following(int offset);
\r
325 * Sets the iterator's current iteration position to be the last
\r
326 * boundary position preceding the specified position. (Whether the
\r
327 * specified position is itself a boundary position or not doesn't
\r
328 * matter-- this function always moves the iteration position to the
\r
329 * last boundary before the specified position.) If the specified
\r
330 * position is the starting position, returns DONE.
\r
331 * @param offset The character position to start searching from.
\r
332 * @return The position of the last boundary position preceding
\r
333 * "offset" (whether of not "offset" itself is a boundary position),
\r
334 * or DONE if "offset" is the starting offset of the iterator.
\r
337 public int preceding(int offset) {
\r
338 // NOTE: This implementation is here solely because we can't add new
\r
339 // abstract methods to an existing class. There is almost ALWAYS a
\r
340 // better, faster way to do this.
\r
341 int pos = following(offset);
\r
342 while (pos >= offset && pos != DONE)
\r
348 * Return true if the specfied position is a boundary position. If the
\r
349 * function returns true, the current iteration position is set to the
\r
350 * specified position; if the function returns false, the current
\r
351 * iteration position is set as though following() had been called.
\r
352 * @param offset the offset to check.
\r
353 * @return True if "offset" is a boundary position.
\r
356 public boolean isBoundary(int offset) {
\r
357 // Again, this is the default implementation, which is provided solely because
\r
358 // we couldn't add a new abstract method to an existing class. The real
\r
359 // implementations will usually need to do a little more work.
\r
364 return following(offset - 1) == offset;
\r
368 * Return the iterator's current position.
\r
369 * @return The iterator's current position.
\r
372 public abstract int current();
\r
375 * Returns a CharacterIterator over the text being analyzed.
\r
376 * For at least some subclasses of BreakIterator, this is a reference
\r
377 * to the <b>actual iterator being used</b> by the BreakIterator,
\r
378 * and therefore, this function's return value should be treated as
\r
379 * <tt>const</tt>. No guarantees are made about the current position
\r
380 * of this iterator when it is returned. If you need to move that
\r
381 * position to examine the text, clone this function's return value first.
\r
382 * @return A CharacterIterator over the text being analyzed.
\r
385 public abstract CharacterIterator getText();
\r
388 * Sets the iterator to analyze a new piece of text. The new
\r
389 * piece of text is passed in as a String, and the current
\r
390 * iteration position is reset to the beginning of the string.
\r
391 * (The old text is dropped.)
\r
392 * @param newText A String containing the text to analyze with
\r
393 * this BreakIterator.
\r
396 public void setText(String newText)
\r
398 setText(new StringCharacterIterator(newText));
\r
402 * Sets the iterator to analyze a new piece of text. The
\r
403 * BreakIterator is passed a CharacterIterator through which
\r
404 * it will access the text itself. The current iteration
\r
405 * position is reset to the CharacterIterator's start index.
\r
406 * (The old iterator is dropped.)
\r
407 * @param newText A CharacterIterator referring to the text
\r
408 * to analyze with this BreakIterator (the iterator's current
\r
409 * position is ignored, but its other state is significant).
\r
412 public abstract void setText(CharacterIterator newText);
\r
414 /** @stable ICU 2.4 */
\r
415 public static final int KIND_CHARACTER = 0;
\r
416 /** @stable ICU 2.4 */
\r
417 public static final int KIND_WORD = 1;
\r
418 /** @stable ICU 2.4 */
\r
419 public static final int KIND_LINE = 2;
\r
420 /** @stable ICU 2.4 */
\r
421 public static final int KIND_SENTENCE = 3;
\r
422 /** @stable ICU 2.4 */
\r
423 public static final int KIND_TITLE = 4;
\r
425 /** @since ICU 2.8 */
\r
426 private static final int KIND_COUNT = 5;
\r
429 private static final SoftReference[] iterCache = new SoftReference[5];
\r
432 * Returns a new instance of BreakIterator that locates word boundaries.
\r
433 * This function assumes that the text being analyzed is in the default
\r
434 * locale's language.
\r
435 * @return An instance of BreakIterator that locates word boundaries.
\r
438 public static BreakIterator getWordInstance()
\r
440 return getWordInstance(ULocale.getDefault());
\r
444 * Returns a new instance of BreakIterator that locates word boundaries.
\r
445 * @param where A locale specifying the language of the text to be
\r
447 * @return An instance of BreakIterator that locates word boundaries.
\r
450 public static BreakIterator getWordInstance(Locale where)
\r
452 return getBreakInstance(ULocale.forLocale(where), KIND_WORD);
\r
456 * Returns a new instance of BreakIterator that locates word boundaries.
\r
457 * @param where A locale specifying the language of the text to be
\r
459 * @return An instance of BreakIterator that locates word boundaries.
\r
462 public static BreakIterator getWordInstance(ULocale where)
\r
464 return getBreakInstance(where, KIND_WORD);
\r
468 * Returns a new instance of BreakIterator that locates legal line-
\r
469 * wrapping positions. This function assumes the text being broken
\r
470 * is in the default locale's language.
\r
471 * @return A new instance of BreakIterator that locates legal
\r
472 * line-wrapping positions.
\r
475 public static BreakIterator getLineInstance()
\r
477 return getLineInstance(ULocale.getDefault());
\r
481 * Returns a new instance of BreakIterator that locates legal line-
\r
482 * wrapping positions.
\r
483 * @param where A Locale specifying the language of the text being broken.
\r
484 * @return A new instance of BreakIterator that locates legal
\r
485 * line-wrapping positions.
\r
488 public static BreakIterator getLineInstance(Locale where)
\r
490 return getBreakInstance(ULocale.forLocale(where), KIND_LINE);
\r
494 * Returns a new instance of BreakIterator that locates legal line-
\r
495 * wrapping positions.
\r
496 * @param where A Locale specifying the language of the text being broken.
\r
497 * @return A new instance of BreakIterator that locates legal
\r
498 * line-wrapping positions.
\r
501 public static BreakIterator getLineInstance(ULocale where)
\r
503 return getBreakInstance(where, KIND_LINE);
\r
507 * Returns a new instance of BreakIterator that locates logical-character
\r
508 * boundaries. This function assumes that the text being analyzed is
\r
509 * in the default locale's language.
\r
510 * @return A new instance of BreakIterator that locates logical-character
\r
514 public static BreakIterator getCharacterInstance()
\r
516 return getCharacterInstance(ULocale.getDefault());
\r
520 * Returns a new instance of BreakIterator that locates logical-character
\r
522 * @param where A Locale specifying the language of the text being analyzed.
\r
523 * @return A new instance of BreakIterator that locates logical-character
\r
527 public static BreakIterator getCharacterInstance(Locale where)
\r
529 return getBreakInstance(ULocale.forLocale(where), KIND_CHARACTER);
\r
533 * Returns a new instance of BreakIterator that locates logical-character
\r
535 * @param where A Locale specifying the language of the text being analyzed.
\r
536 * @return A new instance of BreakIterator that locates logical-character
\r
540 public static BreakIterator getCharacterInstance(ULocale where)
\r
542 return getBreakInstance(where, KIND_CHARACTER);
\r
546 * Returns a new instance of BreakIterator that locates sentence boundaries.
\r
547 * This function assumes the text being analyzed is in the default locale's
\r
549 * @return A new instance of BreakIterator that locates sentence boundaries.
\r
552 public static BreakIterator getSentenceInstance()
\r
554 return getSentenceInstance(ULocale.getDefault());
\r
558 * Returns a new instance of BreakIterator that locates sentence boundaries.
\r
559 * @param where A Locale specifying the language of the text being analyzed.
\r
560 * @return A new instance of BreakIterator that locates sentence boundaries.
\r
563 public static BreakIterator getSentenceInstance(Locale where)
\r
565 return getBreakInstance(ULocale.forLocale(where), KIND_SENTENCE);
\r
569 * Returns a new instance of BreakIterator that locates sentence boundaries.
\r
570 * @param where A Locale specifying the language of the text being analyzed.
\r
571 * @return A new instance of BreakIterator that locates sentence boundaries.
\r
574 public static BreakIterator getSentenceInstance(ULocale where)
\r
576 return getBreakInstance(where, KIND_SENTENCE);
\r
580 * Returns a new instance of BreakIterator that locates title boundaries.
\r
581 * This function assumes the text being analyzed is in the default locale's
\r
582 * language. The iterator returned locates title boundaries as described for
\r
583 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
\r
584 * please use a word boundary iterator. {@link #getWordInstance}
\r
585 * @return A new instance of BreakIterator that locates title boundaries.
\r
588 public static BreakIterator getTitleInstance()
\r
590 return getTitleInstance(ULocale.getDefault());
\r
594 * Returns a new instance of BreakIterator that locates title boundaries.
\r
595 * The iterator returned locates title boundaries as described for
\r
596 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
\r
597 * please use Word Boundary iterator.{@link #getWordInstance}
\r
598 * @param where A Locale specifying the language of the text being analyzed.
\r
599 * @return A new instance of BreakIterator that locates title boundaries.
\r
602 public static BreakIterator getTitleInstance(Locale where)
\r
604 return getBreakInstance(ULocale.forLocale(where), KIND_TITLE);
\r
608 * Returns a new instance of BreakIterator that locates title boundaries.
\r
609 * The iterator returned locates title boundaries as described for
\r
610 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
\r
611 * please use Word Boundary iterator.{@link #getWordInstance}
\r
612 * @param where A Locale specifying the language of the text being analyzed.
\r
613 * @return A new instance of BreakIterator that locates title boundaries.
\r
616 public static BreakIterator getTitleInstance(ULocale where)
\r
618 return getBreakInstance(where, KIND_TITLE);
\r
622 * Register a new break iterator of the indicated kind, to use in the given locale.
\r
623 * Clones of the iterator will be returned
\r
624 * if a request for a break iterator of the given kind matches or falls back to
\r
626 * @param iter the BreakIterator instance to adopt.
\r
627 * @param locale the Locale for which this instance is to be registered
\r
628 * @param kind the type of iterator for which this instance is to be registered
\r
629 * @return a registry key that can be used to unregister this instance
\r
632 public static Object registerInstance(BreakIterator iter, Locale locale, int kind) {
\r
633 return registerInstance(iter, ULocale.forLocale(locale), kind);
\r
637 * Register a new break iterator of the indicated kind, to use in the given locale.
\r
638 * Clones of the iterator will be returned
\r
639 * if a request for a break iterator of the given kind matches or falls back to
\r
641 * @param iter the BreakIterator instance to adopt.
\r
642 * @param locale the Locale for which this instance is to be registered
\r
643 * @param kind the type of iterator for which this instance is to be registered
\r
644 * @return a registry key that can be used to unregister this instance
\r
647 public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
\r
648 // If the registered object matches the one in the cache, then
\r
649 // flush the cached object.
\r
650 if (iterCache[kind] != null) {
\r
651 BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind].get();
\r
652 if (cache != null) {
\r
653 if (cache.getLocale().equals(locale)) {
\r
654 iterCache[kind] = null;
\r
658 return getShim().registerInstance(iter, locale, kind);
\r
662 * Unregister a previously-registered BreakIterator using the key returned from the
\r
663 * register call. Key becomes invalid after this call and should not be used again.
\r
664 * @param key the registry key returned by a previous call to registerInstance
\r
665 * @return true if the iterator for the key was successfully unregistered
\r
668 public static boolean unregister(Object key) {
\r
670 throw new IllegalArgumentException("registry key must not be null");
\r
672 // TODO: we don't do code coverage for the following lines
\r
673 // because in getBreakInstance we always instantiate the shim,
\r
674 // and test execution is such that we always instantiate a
\r
675 // breakiterator before we get to the break iterator tests.
\r
676 // this is for modularization, and we could remove the
\r
677 // dependencies in getBreakInstance by rewriting part of the
\r
678 // LocaleData code, or perhaps by accepting it into the
\r
681 if (shim != null) {
\r
682 // Unfortunately, we don't know what is being unregistered
\r
683 // -- what `kind' and what locale -- so we flush all
\r
684 // caches. This is safe but inefficient if people are
\r
685 // actively registering and unregistering.
\r
686 for (int kind=0; kind<KIND_COUNT; ++kind) {
\r
687 iterCache[kind] = null;
\r
689 return shim.unregister(key);
\r
695 // end of registration
\r
698 * Get a particular kind of BreakIterator for a locale.
\r
699 * Avoids writing a switch statement with getXYZInstance(where) calls.
\r
701 * @deprecated This API is ICU internal only.
\r
703 public static BreakIterator getBreakInstance(ULocale where, int kind) {
\r
705 if (iterCache[kind] != null) {
\r
706 BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind].get();
\r
707 if (cache != null) {
\r
708 if (cache.getLocale().equals(where)) {
\r
709 return cache.createBreakInstance();
\r
714 // sigh, all to avoid linking in ICULocaleData...
\r
715 BreakIterator result = getShim().createBreakIterator(where, kind);
\r
717 BreakIteratorCache cache = new BreakIteratorCache(where, result);
\r
718 iterCache[kind] = new SoftReference(cache);
\r
724 * Returns a list of locales for which BreakIterators can be used.
\r
725 * @return An array of Locales. All of the locales in the array can
\r
726 * be used when creating a BreakIterator.
\r
729 public static synchronized Locale[] getAvailableLocales()
\r
731 // to avoid linking ICULocaleData
\r
732 return getShim().getAvailableLocales();
\r
736 * Returns a list of locales for which BreakIterators can be used.
\r
737 * @return An array of Locales. All of the locales in the array can
\r
738 * be used when creating a BreakIterator.
\r
739 * @draft ICU 3.2 (retain)
\r
740 * @provisional This API might change or be removed in a future release.
\r
742 public static synchronized ULocale[] getAvailableULocales()
\r
744 // to avoid linking ICULocaleData
\r
745 return getShim().getAvailableULocales();
\r
748 private static final class BreakIteratorCache {
\r
750 private BreakIterator iter;
\r
751 private ULocale where;
\r
753 BreakIteratorCache(ULocale where, BreakIterator iter) {
\r
754 this.where = where;
\r
755 this.iter = (BreakIterator) iter.clone();
\r
758 ULocale getLocale() {
\r
762 BreakIterator createBreakInstance() {
\r
763 return (BreakIterator) iter.clone();
\r
767 static abstract class BreakIteratorServiceShim {
\r
768 public abstract Object registerInstance(BreakIterator iter, ULocale l, int k);
\r
769 public abstract boolean unregister(Object key);
\r
770 public abstract Locale[] getAvailableLocales();
\r
771 public abstract ULocale[] getAvailableULocales();
\r
772 public abstract BreakIterator createBreakIterator(ULocale l, int k);
\r
775 private static BreakIteratorServiceShim shim;
\r
776 private static BreakIteratorServiceShim getShim() {
\r
777 // Note: this instantiation is safe on loose-memory-model configurations
\r
778 // despite lack of synchronization, since the shim instance has no state--
\r
779 // it's all in the class init. The worst problem is we might instantiate
\r
780 // two shim instances, but they'll share the same state so that's ok.
\r
781 if (shim == null) {
\r
783 Class cls = Class.forName("com.ibm.icu.text.BreakIteratorFactory");
\r
784 shim = (BreakIteratorServiceShim)cls.newInstance();
\r
786 catch (MissingResourceException e)
\r
790 catch (Exception e) {
\r
793 e.printStackTrace();
\r
795 throw new RuntimeException(e.getMessage());
\r
802 // -------- BEGIN ULocale boilerplate --------
\r
805 * Return the locale that was used to create this object, or null.
\r
806 * This may may differ from the locale requested at the time of
\r
807 * this object's creation. For example, if an object is created
\r
808 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
\r
809 * drawn from <tt>en</tt> (the <i>actual</i> locale), and
\r
810 * <tt>en_US</tt> may be the most specific locale that exists (the
\r
811 * <i>valid</i> locale).
\r
813 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
\r
814 * contains a partial preview implementation. The * <i>actual</i>
\r
815 * locale is returned correctly, but the <i>valid</i> locale is
\r
816 * not, in most cases.
\r
817 * @param type type of information requested, either {@link
\r
818 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
\r
819 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
\r
820 * @return the information specified by <i>type</i>, or null if
\r
821 * this object was not constructed from locale data.
\r
822 * @see com.ibm.icu.util.ULocale
\r
823 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
824 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
825 * @draft ICU 2.8 (retain)
\r
826 * @provisional This API might change or be removed in a future release.
\r
828 public final ULocale getLocale(ULocale.Type type) {
\r
829 return type == ULocale.ACTUAL_LOCALE ?
\r
830 this.actualLocale : this.validLocale;
\r
834 * Set information about the locales that were used to create this
\r
835 * object. If the object was not constructed from locale data,
\r
836 * both arguments should be set to null. Otherwise, neither
\r
837 * should be null. The actual locale must be at the same level or
\r
838 * less specific than the valid locale. This method is intended
\r
839 * for use by factories or other entities that create objects of
\r
841 * @param valid the most specific locale containing any resource
\r
843 * @param actual the locale containing data used to construct this
\r
845 * @see com.ibm.icu.util.ULocale
\r
846 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
847 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
850 final void setLocale(ULocale valid, ULocale actual) {
\r
851 // Change the following to an assertion later
\r
852 if ((valid == null) != (actual == null)) {
\r
854 throw new IllegalArgumentException();
\r
857 // Another check we could do is that the actual locale is at
\r
858 // the same level or less specific than the valid locale.
\r
859 this.validLocale = valid;
\r
860 this.actualLocale = actual;
\r
864 * The most specific locale containing any resource data, or null.
\r
865 * @see com.ibm.icu.util.ULocale
\r
868 private ULocale validLocale;
\r
871 * The locale containing data used to construct this object, or
\r
873 * @see com.ibm.icu.util.ULocale
\r
876 private ULocale actualLocale;
\r
878 // -------- END ULocale boilerplate --------
\r