2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.util.Comparator;
\r
10 import java.util.Enumeration;
\r
11 import java.util.Iterator;
\r
12 import java.util.LinkedList;
\r
13 import java.util.Locale;
\r
14 import java.util.MissingResourceException;
\r
15 import java.util.Set;
\r
17 import com.ibm.icu.impl.ICUDebug;
\r
18 import com.ibm.icu.impl.ICUResourceBundle;
\r
19 import com.ibm.icu.util.ULocale;
\r
20 import com.ibm.icu.util.UResourceBundle;
\r
21 import com.ibm.icu.util.VersionInfo;
\r
24 * <p>Collator performs locale-sensitive string comparison. A concrete
\r
25 * subclass, RuleBasedCollator, allows customization of the collation
\r
26 * ordering by the use of rule sets.</p>
\r
28 * <p>Following the <a href=http://www.unicode.org>Unicode
\r
29 * Consortium</a>'s specifications for the
\r
30 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
\r
31 * Algorithm (UCA)</a>, there are 5 different levels of strength used
\r
35 * <li>PRIMARY strength: Typically, this is used to denote differences between
\r
36 * base characters (for example, "a" < "b").
\r
37 * It is the strongest difference. For example, dictionaries are divided
\r
38 * into different sections by base character.
\r
39 * <li>SECONDARY strength: Accents in the characters are considered secondary
\r
40 * differences (for example, "as" < "às" < "at"). Other
\r
42 * between letters can also be considered secondary differences, depending
\r
43 * on the language. A secondary difference is ignored when there is a
\r
44 * primary difference anywhere in the strings.
\r
45 * <li>TERTIARY strength: Upper and lower case differences in characters are
\r
46 * distinguished at tertiary strength (for example, "ao" < "Ao" <
\r
47 * "aò"). In addition, a variant of a letter differs from the base
\r
48 * form on the tertiary strength (such as "A" and "Ⓐ"). Another
\r
50 * difference between large and small Kana. A tertiary difference is ignored
\r
51 * when there is a primary or secondary difference anywhere in the strings.
\r
52 * <li>QUATERNARY strength: When punctuation is ignored
\r
53 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
\r
54 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
\r
55 * strength, an additional strength level can
\r
56 * be used to distinguish words with and without punctuation (for example,
\r
57 * "ab" < "a-b" < "aB").
\r
58 * This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
\r
59 * difference. The QUATERNARY strength should only be used if ignoring
\r
60 * punctuation is required.
\r
61 * <li>IDENTICAL strength:
\r
62 * When all other strengths are equal, the IDENTICAL strength is used as a
\r
63 * tiebreaker. The Unicode code point values of the NFD form of each string
\r
64 * are compared, just in case there is no difference.
\r
65 * For example, Hebrew cantellation marks are only distinguished at this
\r
66 * strength. This strength should be used sparingly, as only code point
\r
67 * value differences between two strings is an extremely rare occurrence.
\r
68 * Using this strength substantially decreases the performance for both
\r
69 * comparison and collation key generation APIs. This strength also
\r
70 * increases the size of the collation key.
\r
73 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
\r
74 * the canonical decomposition mode and one that does not use any decomposition.
\r
75 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
\r
76 * is not supported here. If the canonical
\r
77 * decomposition mode is set, the Collator handles un-normalized text properly,
\r
78 * producing the same results as if the text were normalized in NFD. If
\r
79 * canonical decomposition is turned off, it is the user's responsibility to
\r
80 * ensure that all text is already in the appropriate form before performing
\r
81 * a comparison or before getting a CollationKey.</p>
\r
83 * <p>For more information about the collation service see the
\r
84 * <a href="http://www.icu-project.org/userguide/Collate_Intro.html">users
\r
87 * <p>Examples of use
\r
89 * // Get the Collator for US English and set its strength to PRIMARY
\r
90 * Collator usCollator = Collator.getInstance(Locale.US);
\r
91 * usCollator.setStrength(Collator.PRIMARY);
\r
92 * if (usCollator.compare("abc", "ABC") == 0) {
\r
93 * System.out.println("Strings are equivalent");
\r
96 * The following example shows how to compare two strings using the
\r
97 * Collator for the default locale.
\r
99 * // Compare two strings in the default locale
\r
100 * Collator myCollator = Collator.getInstance();
\r
101 * myCollator.setDecomposition(NO_DECOMPOSITION);
\r
102 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
\r
103 * System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
\r
104 * myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
\r
105 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
\r
106 * System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
\r
109 * System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
\r
113 * System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
\r
117 * @see RuleBasedCollator
\r
118 * @see CollationKey
\r
119 * @author Syn Wee Quek
\r
122 public abstract class Collator implements Comparator, Cloneable
\r
124 // public data members ---------------------------------------------------
\r
127 * Strongest collator strength value. Typically used to denote differences
\r
128 * between base characters. See class documentation for more explanation.
\r
129 * @see #setStrength
\r
130 * @see #getStrength
\r
133 public final static int PRIMARY = 0;
\r
136 * Second level collator strength value.
\r
137 * Accents in the characters are considered secondary differences.
\r
138 * Other differences between letters can also be considered secondary
\r
139 * differences, depending on the language.
\r
140 * See class documentation for more explanation.
\r
141 * @see #setStrength
\r
142 * @see #getStrength
\r
145 public final static int SECONDARY = 1;
\r
148 * Third level collator strength value.
\r
149 * Upper and lower case differences in characters are distinguished at this
\r
150 * strength level. In addition, a variant of a letter differs from the base
\r
151 * form on the tertiary level.
\r
152 * See class documentation for more explanation.
\r
153 * @see #setStrength
\r
154 * @see #getStrength
\r
157 public final static int TERTIARY = 2;
\r
160 * Fourth level collator strength value.
\r
161 * When punctuation is ignored
\r
162 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
\r
163 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
\r
164 * strength, an additional strength level can
\r
165 * be used to distinguish words with and without punctuation.
\r
166 * See class documentation for more explanation.
\r
167 * @see #setStrength
\r
168 * @see #getStrength
\r
171 public final static int QUATERNARY = 3;
\r
175 * Smallest Collator strength value. When all other strengths are equal,
\r
176 * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
\r
177 * values of the NFD form of each string are compared, just in case there
\r
178 * is no difference.
\r
179 * See class documentation for more explanation.
\r
182 * Note this value is different from JDK's
\r
186 public final static int IDENTICAL = 15;
\r
189 * This is for backwards compatibility with Java APIs only. It
\r
190 * should not be used, IDENTICAL should be used instead. ICU's
\r
191 * collation does not support Java's FULL_DECOMPOSITION mode.
\r
194 public final static int FULL_DECOMPOSITION = IDENTICAL;
\r
197 * <p>Decomposition mode value. With NO_DECOMPOSITION set, Strings
\r
198 * will not be decomposed for collation. This is the default
\r
199 * decomposition setting unless otherwise specified by the locale
\r
200 * used to create the Collator.</p>
\r
202 * <p><strong>Note</strong> this value is different from the JDK's.</p>
\r
203 * @see #CANONICAL_DECOMPOSITION
\r
204 * @see #getDecomposition
\r
205 * @see #setDecomposition
\r
208 public final static int NO_DECOMPOSITION = 16;
\r
211 * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set,
\r
212 * characters that are canonical variants according to the Unicode standard
\r
213 * will be decomposed for collation.</p>
\r
215 * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
\r
216 * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
\r
217 * Unicode Technical Report #15</a>.
\r
219 * @see #NO_DECOMPOSITION
\r
220 * @see #getDecomposition
\r
221 * @see #setDecomposition
\r
224 public final static int CANONICAL_DECOMPOSITION = 17;
\r
226 // public methods --------------------------------------------------------
\r
228 // public setters --------------------------------------------------------
\r
231 * <p>Sets this Collator's strength property. The strength property
\r
232 * determines the minimum level of difference considered significant
\r
233 * during comparison.</p>
\r
235 * <p>The default strength for the Collator is TERTIARY, unless specified
\r
236 * otherwise by the locale used to create the Collator.</p>
\r
238 * <p>See the Collator class description for an example of use.</p>
\r
239 * @param newStrength the new strength value.
\r
240 * @see #getStrength
\r
246 * @exception IllegalArgumentException if the new strength value is not one
\r
247 * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
\r
250 public void setStrength(int newStrength)
\r
252 if ((newStrength != PRIMARY) &&
\r
253 (newStrength != SECONDARY) &&
\r
254 (newStrength != TERTIARY) &&
\r
255 (newStrength != QUATERNARY) &&
\r
256 (newStrength != IDENTICAL)) {
\r
257 throw new IllegalArgumentException("Incorrect comparison level.");
\r
259 m_strength_ = newStrength;
\r
263 * <p>Set the decomposition mode of this Collator. Setting this
\r
264 * decomposition property with CANONICAL_DECOMPOSITION allows the
\r
265 * Collator to handle un-normalized text properly, producing the
\r
266 * same results as if the text were normalized. If
\r
267 * NO_DECOMPOSITION is set, it is the user's responsibility to
\r
268 * insure that all text is already in the appropriate form before
\r
269 * a comparison or before getting a CollationKey. Adjusting
\r
270 * decomposition mode allows the user to select between faster and
\r
271 * more complete collation behavior.</p>
\r
273 * <p>Since a great many of the world's languages do not require
\r
274 * text normalization, most locales set NO_DECOMPOSITION as the
\r
275 * default decomposition mode.</p>
\r
277 * The default decompositon mode for the Collator is
\r
278 * NO_DECOMPOSITON, unless specified otherwise by the locale used
\r
279 * to create the Collator.</p>
\r
281 * <p>See getDecomposition for a description of decomposition
\r
284 * @param decomposition the new decomposition mode
\r
285 * @see #getDecomposition
\r
286 * @see #NO_DECOMPOSITION
\r
287 * @see #CANONICAL_DECOMPOSITION
\r
288 * @exception IllegalArgumentException If the given value is not a valid
\r
289 * decomposition mode.
\r
292 public void setDecomposition(int decomposition)
\r
294 if ((decomposition != NO_DECOMPOSITION) &&
\r
295 (decomposition != CANONICAL_DECOMPOSITION)) {
\r
296 throw new IllegalArgumentException("Wrong decomposition mode.");
\r
298 m_decomposition_ = decomposition;
\r
301 // public getters --------------------------------------------------------
\r
304 * Gets the Collator for the current default locale.
\r
305 * The default locale is determined by java.util.Locale.getDefault().
\r
306 * @return the Collator for the default locale (for example, en_US) if it
\r
307 * is created successfully. Otherwise if there is no Collator
\r
308 * associated with the current locale, the default UCA collator
\r
309 * will be returned.
\r
310 * @see java.util.Locale#getDefault()
\r
311 * @see #getInstance(Locale)
\r
314 public static final Collator getInstance()
\r
316 return getInstance(ULocale.getDefault());
\r
320 * Clone the collator.
\r
322 * @return a clone of this collator.
\r
324 public Object clone() throws CloneNotSupportedException {
\r
325 return super.clone();
\r
328 // begin registry stuff
\r
331 * A factory used with registerFactory to register multiple collators and provide
\r
332 * display names for them. If standard locale display names are sufficient,
\r
333 * Collator instances may be registered instead.
\r
334 * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
\r
335 * ULocale instead of Locale. Instead of overriding createCollator(Locale),
\r
336 * new implementations should override createCollator(ULocale). Note that
\r
337 * one of these two methods <b>MUST</b> be overridden or else an infinite
\r
341 public static abstract class CollatorFactory {
\r
343 * Return true if this factory will be visible. Default is true.
\r
344 * If not visible, the locales supported by this factory will not
\r
345 * be listed by getAvailableLocales.
\r
347 * @return true if this factory is visible
\r
350 public boolean visible() {
\r
355 * Return an instance of the appropriate collator. If the locale
\r
356 * is not supported, return null.
\r
357 * <b>Note:</b> as of ICU4J 3.2, implementations should override
\r
358 * this method instead of createCollator(Locale).
\r
359 * @param loc the locale for which this collator is to be created.
\r
360 * @return the newly created collator.
\r
363 public Collator createCollator(ULocale loc) {
\r
364 return createCollator(loc.toLocale());
\r
368 * Return an instance of the appropriate collator. If the locale
\r
369 * is not supported, return null.
\r
370 * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
\r
371 * createCollator(ULocale) instead of this method, and inherit this
\r
372 * method's implementation. This method is no longer abstract
\r
373 * and instead delegates to createCollator(ULocale).
\r
374 * @param loc the locale for which this collator is to be created.
\r
375 * @return the newly created collator.
\r
378 public Collator createCollator(Locale loc) {
\r
379 return createCollator(ULocale.forLocale(loc));
\r
383 * Return the name of the collator for the objectLocale, localized for the displayLocale.
\r
384 * If objectLocale is not visible or not defined by the factory, return null.
\r
385 * @param objectLocale the locale identifying the collator
\r
386 * @param displayLocale the locale for which the display name of the collator should be localized
\r
387 * @return the display name
\r
390 public String getDisplayName(Locale objectLocale, Locale displayLocale) {
\r
391 return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
\r
395 * Return the name of the collator for the objectLocale, localized for the displayLocale.
\r
396 * If objectLocale is not visible or not defined by the factory, return null.
\r
397 * @param objectLocale the locale identifying the collator
\r
398 * @param displayLocale the locale for which the display name of the collator should be localized
\r
399 * @return the display name
\r
402 public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
\r
404 Set supported = getSupportedLocaleIDs();
\r
405 String name = objectLocale.getBaseName();
\r
406 if (supported.contains(name)) {
\r
407 return objectLocale.getDisplayName(displayLocale);
\r
414 * Return an unmodifiable collection of the locale names directly
\r
415 * supported by this factory.
\r
417 * @return the set of supported locale IDs.
\r
420 public abstract Set getSupportedLocaleIDs();
\r
423 * Empty default constructor.
\r
426 protected CollatorFactory() {
\r
430 static abstract class ServiceShim {
\r
431 abstract Collator getInstance(ULocale l);
\r
432 abstract Object registerInstance(Collator c, ULocale l);
\r
433 abstract Object registerFactory(CollatorFactory f);
\r
434 abstract boolean unregister(Object k);
\r
435 abstract Locale[] getAvailableLocales(); // TODO remove
\r
436 abstract ULocale[] getAvailableULocales();
\r
437 abstract String getDisplayName(ULocale ol, ULocale dl);
\r
440 private static ServiceShim shim;
\r
441 private static ServiceShim getShim() {
\r
442 // Note: this instantiation is safe on loose-memory-model configurations
\r
443 // despite lack of synchronization, since the shim instance has no state--
\r
444 // it's all in the class init. The worst problem is we might instantiate
\r
445 // two shim instances, but they'll share the same state so that's ok.
\r
446 if (shim == null) {
\r
448 Class cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
\r
449 shim = (ServiceShim)cls.newInstance();
\r
451 catch (MissingResourceException e)
\r
455 catch (Exception e) {
\r
458 e.printStackTrace();
\r
460 throw new RuntimeException(e.getMessage());
\r
468 * Gets the Collator for the desired locale.
\r
469 * @param locale the desired locale.
\r
470 * @return Collator for the desired locale if it is created successfully.
\r
471 * Otherwise if there is no Collator
\r
472 * associated with the current locale, a default UCA collator will
\r
474 * @see java.util.Locale
\r
475 * @see java.util.ResourceBundle
\r
476 * @see #getInstance(Locale)
\r
477 * @see #getInstance()
\r
480 public static final Collator getInstance(ULocale locale) {
\r
481 // fetching from service cache is faster than instantiation
\r
482 return getShim().getInstance(locale);
\r
486 * Gets the Collator for the desired locale.
\r
487 * @param locale the desired locale.
\r
488 * @return Collator for the desired locale if it is created successfully.
\r
489 * Otherwise if there is no Collator
\r
490 * associated with the current locale, a default UCA collator will
\r
492 * @see java.util.Locale
\r
493 * @see java.util.ResourceBundle
\r
494 * @see #getInstance(ULocale)
\r
495 * @see #getInstance()
\r
498 public static final Collator getInstance(Locale locale) {
\r
499 return getInstance(ULocale.forLocale(locale));
\r
503 * Register a collator as the default collator for the provided locale. The
\r
504 * collator should not be modified after it is registered.
\r
506 * @param collator the collator to register
\r
507 * @param locale the locale for which this is the default collator
\r
508 * @return an object that can be used to unregister the registered collator.
\r
512 public static final Object registerInstance(Collator collator, ULocale locale) {
\r
513 return getShim().registerInstance(collator, locale);
\r
517 * Register a collator factory.
\r
519 * @param factory the factory to register
\r
520 * @return an object that can be used to unregister the registered factory.
\r
524 public static final Object registerFactory(CollatorFactory factory) {
\r
525 return getShim().registerFactory(factory);
\r
529 * Unregister a collator previously registered using registerInstance.
\r
530 * @param registryKey the object previously returned by registerInstance.
\r
531 * @return true if the collator was successfully unregistered.
\r
534 public static final boolean unregister(Object registryKey) {
\r
535 if (shim == null) {
\r
538 return shim.unregister(registryKey);
\r
542 * Get the set of locales, as Locale objects, for which collators
\r
543 * are installed. Note that Locale objects do not support RFC 3066.
\r
544 * @return the list of locales in which collators are installed.
\r
545 * This list includes any that have been registered, in addition to
\r
546 * those that are installed with ICU4J.
\r
549 public static Locale[] getAvailableLocales() {
\r
550 // TODO make this wrap getAvailableULocales later
\r
551 if (shim == null) {
\r
552 return ICUResourceBundle.getAvailableLocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME);
\r
554 return shim.getAvailableLocales();
\r
558 * Get the set of locales, as ULocale objects, for which collators
\r
559 * are installed. ULocale objects support RFC 3066.
\r
560 * @return the list of locales in which collators are installed.
\r
561 * This list includes any that have been registered, in addition to
\r
562 * those that are installed with ICU4J.
\r
565 public static final ULocale[] getAvailableULocales() {
\r
566 if (shim == null) {
\r
567 return ICUResourceBundle.getAvailableULocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME);
\r
569 return shim.getAvailableULocales();
\r
573 * The list of keywords for this service. This must be kept in sync with
\r
574 * the resource data.
\r
577 private static final String[] KEYWORDS = { "collation" };
\r
580 * The resource name for this service. Note that this is not the same as
\r
581 * the keyword for this service.
\r
584 private static final String RESOURCE = "collations";
\r
587 * The resource bundle base name for this service.
\r
590 private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
\r
593 * Return an array of all possible keywords that are relevant to
\r
594 * collation. At this point, the only recognized keyword for this
\r
595 * service is "collation".
\r
596 * @return an array of valid collation keywords.
\r
597 * @see #getKeywordValues
\r
600 public static final String[] getKeywords() {
\r
605 * Given a keyword, return an array of all values for
\r
606 * that keyword that are currently in use.
\r
607 * @param keyword one of the keywords returned by getKeywords.
\r
608 * @see #getKeywords
\r
611 public static final String[] getKeywordValues(String keyword) {
\r
612 if (!keyword.equals(KEYWORDS[0])) {
\r
613 throw new IllegalArgumentException("Invalid keyword: " + keyword);
\r
615 return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
\r
619 * Given a key and a locale, returns an array of string values in a preferred
\r
620 * order that would make a difference. These are all and only those values where
\r
621 * the open (creation) of the service with the locale formed from the input locale
\r
622 * plus input keyword and that value has different behavior than creation with the
\r
623 * input locale alone.
\r
624 * @param key one of the keys supported by this service. For now, only
\r
625 * "collation" is supported.
\r
626 * @param locale the locale
\r
627 * @param commonlyUsed if set to true it will return only commonly used values
\r
628 * with the given locale in preferred order. Otherwise,
\r
629 * it will return all the available values for the locale.
\r
630 * @return an array of string values for the given key and the locale.
\r
632 * @provisional This API might change or be removed in a future release.
\r
634 public static final String[] getKeywordValuesForLocale(String key, ULocale locale, boolean commonlyUsed) {
\r
635 // Note: The parameter commonlyUsed is actually not used.
\r
636 // The switch is in the method signature for consistency
\r
637 // with other locale services.
\r
639 // Read available collation values from collation bundles
\r
640 String baseLoc = locale.getBaseName();
\r
641 LinkedList values = new LinkedList();
\r
643 UResourceBundle bundle = UResourceBundle.getBundleInstance(
\r
644 ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
\r
646 String defcoll = null;
\r
647 while (bundle != null) {
\r
648 UResourceBundle collations = bundle.get("collations");
\r
649 Enumeration collEnum = collations.getKeys();
\r
650 while (collEnum.hasMoreElements()) {
\r
651 String collkey = (String)collEnum.nextElement();
\r
652 if (collkey.equals("default")) {
\r
653 if (defcoll == null) {
\r
654 // Keep the default
\r
655 defcoll = collations.getString("default");
\r
657 } else if (!values.contains(collkey)) {
\r
658 values.add(collkey);
\r
661 bundle = ((ICUResourceBundle)bundle).getParent();
\r
664 Iterator itr = values.iterator();
\r
665 String[] result = new String[values.size()];
\r
666 result[0] = defcoll;
\r
668 while (itr.hasNext()) {
\r
669 String collKey = (String)itr.next();
\r
670 if (!collKey.equals(defcoll)) {
\r
671 result[idx++] = collKey;
\r
677 * Return the functionally equivalent locale for the given
\r
678 * requested locale, with respect to given keyword, for the
\r
679 * collation service. If two locales return the same result, then
\r
680 * collators instantiated for these locales will behave
\r
681 * equivalently. The converse is not always true; two collators
\r
682 * may in fact be equivalent, but return different results, due to
\r
683 * internal details. The return result has no other meaning than
\r
684 * that stated above, and implies nothing as to the relationship
\r
685 * between the two locales. This is intended for use by
\r
686 * applications who wish to cache collators, or otherwise reuse
\r
687 * collators when possible. The functional equivalent may change
\r
688 * over time. For more information, please see the <a
\r
689 * href="http://www.icu-project.org/userguide/locale.html#services">
\r
690 * Locales and Services</a> section of the ICU User Guide.
\r
691 * @param keyword a particular keyword as enumerated by
\r
693 * @param locID The requested locale
\r
694 * @param isAvailable If non-null, isAvailable[0] will receive and
\r
695 * output boolean that indicates whether the requested locale was
\r
696 * 'available' to the collation service. If non-null, isAvailable
\r
697 * must have length >= 1.
\r
698 * @return the locale
\r
701 public static final ULocale getFunctionalEquivalent(String keyword,
\r
703 boolean isAvailable[]) {
\r
704 return ICUResourceBundle.getFunctionalEquivalent(
\r
705 BASE, RESOURCE, keyword, locID, isAvailable, true);
\r
709 * Return the functionally equivalent locale for the given
\r
710 * requested locale, with respect to given keyword, for the
\r
711 * collation service.
\r
712 * @param keyword a particular keyword as enumerated by
\r
714 * @param locID The requested locale
\r
715 * @return the locale
\r
716 * @see #getFunctionalEquivalent(String,ULocale,boolean[])
\r
719 public static final ULocale getFunctionalEquivalent(String keyword,
\r
721 return getFunctionalEquivalent(keyword, locID, null);
\r
725 * Get the name of the collator for the objectLocale, localized for the displayLocale.
\r
726 * @param objectLocale the locale of the collator
\r
727 * @param displayLocale the locale for the collator's display name
\r
728 * @return the display name
\r
731 static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
\r
732 return getShim().getDisplayName(ULocale.forLocale(objectLocale),
\r
733 ULocale.forLocale(displayLocale));
\r
737 * Get the name of the collator for the objectLocale, localized for the displayLocale.
\r
738 * @param objectLocale the locale of the collator
\r
739 * @param displayLocale the locale for the collator's display name
\r
740 * @return the display name
\r
743 static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
\r
744 return getShim().getDisplayName(objectLocale, displayLocale);
\r
748 * Get the name of the collator for the objectLocale, localized for the current locale.
\r
749 * @param objectLocale the locale of the collator
\r
750 * @return the display name
\r
753 static public String getDisplayName(Locale objectLocale) {
\r
754 return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault());
\r
758 * Get the name of the collator for the objectLocale, localized for the current locale.
\r
759 * @param objectLocale the locale of the collator
\r
760 * @return the display name
\r
763 static public String getDisplayName(ULocale objectLocale) {
\r
764 return getShim().getDisplayName(objectLocale, ULocale.getDefault());
\r
768 * <p>Returns this Collator's strength property. The strength property
\r
769 * determines the minimum level of difference considered significant.
\r
772 * See the Collator class description for more details.
\r
774 * @return this Collator's current strength property.
\r
775 * @see #setStrength
\r
783 public int getStrength()
\r
785 return m_strength_;
\r
790 * Get the decomposition mode of this Collator. Decomposition mode
\r
791 * determines how Unicode composed characters are handled.
\r
794 * See the Collator class description for more details.
\r
796 * @return the decomposition mode
\r
797 * @see #setDecomposition
\r
798 * @see #NO_DECOMPOSITION
\r
799 * @see #CANONICAL_DECOMPOSITION
\r
802 public int getDecomposition()
\r
804 return m_decomposition_;
\r
809 * Compares the source text String to the target text String according to
\r
810 * this Collator's rules, strength and decomposition mode.
\r
811 * Returns an integer less than,
\r
812 * equal to or greater than zero depending on whether the source String is
\r
813 * less than, equal to or greater than the target String. See the Collator
\r
814 * class description for an example of use.
\r
816 * @param source the source String.
\r
817 * @param target the target String.
\r
818 * @return Returns an integer value. Value is less than zero if source is
\r
819 * less than target, value is zero if source and target are equal,
\r
820 * value is greater than zero if source is greater than target.
\r
821 * @see CollationKey
\r
822 * @see #getCollationKey
\r
823 * @exception NullPointerException thrown if either arguments is null.
\r
824 * IllegalArgumentException thrown if either source or target is
\r
825 * not of the class String.
\r
828 public int compare(Object source, Object target)
\r
830 if (!(source instanceof String) || !(target instanceof String)) {
\r
831 throw new IllegalArgumentException("Arguments have to be of type String");
\r
833 return compare((String)source, (String)target);
\r
836 // public other methods -------------------------------------------------
\r
839 * Convenience method for comparing the equality of two text Strings using
\r
840 * this Collator's rules, strength and decomposition mode.
\r
841 * @param source the source string to be compared.
\r
842 * @param target the target string to be compared.
\r
843 * @return true if the strings are equal according to the collation
\r
844 * rules, otherwise false.
\r
846 * @exception NullPointerException thrown if either arguments is null.
\r
849 public boolean equals(String source, String target)
\r
851 return (compare(source, target) == 0);
\r
855 * Get an UnicodeSet that contains all the characters and sequences
\r
856 * tailored in this collator.
\r
857 * @return a pointer to a UnicodeSet object containing all the
\r
858 * code points and sequences that may sort differently than
\r
862 public UnicodeSet getTailoredSet()
\r
864 return new UnicodeSet(0, 0x10FFFF);
\r
869 * Compares the source text String to the target text String according to
\r
870 * this Collator's rules, strength and decomposition mode.
\r
871 * Returns an integer less than,
\r
872 * equal to or greater than zero depending on whether the source String is
\r
873 * less than, equal to or greater than the target String. See the Collator
\r
874 * class description for an example of use.
\r
876 * @param source the source String.
\r
877 * @param target the target String.
\r
878 * @return Returns an integer value. Value is less than zero if source is
\r
879 * less than target, value is zero if source and target are equal,
\r
880 * value is greater than zero if source is greater than target.
\r
881 * @see CollationKey
\r
882 * @see #getCollationKey
\r
883 * @exception NullPointerException thrown if either arguments is null.
\r
886 public abstract int compare(String source, String target);
\r
890 * Transforms the String into a CollationKey suitable for efficient
\r
891 * repeated comparison. The resulting key depends on the collator's
\r
892 * rules, strength and decomposition mode.
\r
894 * <p>See the CollationKey class documentation for more information.</p>
\r
895 * @param source the string to be transformed into a CollationKey.
\r
896 * @return the CollationKey for the given String based on this Collator's
\r
897 * collation rules. If the source String is null, a null
\r
898 * CollationKey is returned.
\r
899 * @see CollationKey
\r
900 * @see #compare(String, String)
\r
901 * @see #getRawCollationKey
\r
904 public abstract CollationKey getCollationKey(String source);
\r
907 * Gets the simpler form of a CollationKey for the String source following
\r
908 * the rules of this Collator and stores the result into the user provided
\r
910 * If key has a internal byte array of length that's too small for the
\r
911 * result, the internal byte array will be grown to the exact required
\r
913 * @param source the text String to be transformed into a RawCollationKey
\r
914 * @return If key is null, a new instance of RawCollationKey will be
\r
915 * created and returned, otherwise the user provided key will be
\r
917 * @see #compare(String, String)
\r
918 * @see #getCollationKey
\r
919 * @see RawCollationKey
\r
922 public abstract RawCollationKey getRawCollationKey(String source,
\r
923 RawCollationKey key);
\r
927 * Variable top is a two byte primary value which causes all the codepoints
\r
928 * with primary values that are less or equal than the variable top to be
\r
929 * shifted when alternate handling is set to SHIFTED.
\r
932 * Sets the variable top to a collation element value of a string supplied.
\r
934 * @param varTop one or more (if contraction) characters to which the
\r
935 * variable top should be set
\r
936 * @return a int value containing the value of the variable top in upper 16
\r
937 * bits. Lower 16 bits are undefined.
\r
938 * @exception IllegalArgumentException is thrown if varTop argument is not
\r
939 * a valid variable top element. A variable top element is
\r
940 * invalid when it is a contraction that does not exist in the
\r
941 * Collation order or when the PRIMARY strength collation
\r
942 * element for the variable top has more than two bytes
\r
943 * @see #getVariableTop
\r
944 * @see RuleBasedCollator#setAlternateHandlingShifted
\r
947 public abstract int setVariableTop(String varTop);
\r
950 * Gets the variable top value of a Collator.
\r
951 * Lower 16 bits are undefined and should be ignored.
\r
952 * @return the variable top value of a Collator.
\r
953 * @see #setVariableTop
\r
956 public abstract int getVariableTop();
\r
959 * Sets the variable top to a collation element value supplied.
\r
960 * Variable top is set to the upper 16 bits.
\r
961 * Lower 16 bits are ignored.
\r
962 * @param varTop Collation element value, as returned by setVariableTop or
\r
964 * @see #getVariableTop
\r
965 * @see #setVariableTop
\r
968 public abstract void setVariableTop(int varTop);
\r
971 * Get the version of this collator object.
\r
972 * @return the version object associated with this collator
\r
975 public abstract VersionInfo getVersion();
\r
978 * Get the UCA version of this collator object.
\r
979 * @return the version object associated with this collator
\r
982 public abstract VersionInfo getUCAVersion();
\r
984 // protected constructor -------------------------------------------------
\r
987 * Empty default constructor to make javadocs happy
\r
990 protected Collator()
\r
994 // package private methods -----------------------------------------------
\r
996 // private data members --------------------------------------------------
\r
999 * Collation strength
\r
1001 private int m_strength_ = TERTIARY;
\r
1004 * Decomposition mode
\r
1006 private int m_decomposition_ = CANONICAL_DECOMPOSITION;
\r
1008 private static final boolean DEBUG = ICUDebug.enabled("collator");
\r
1010 // private methods -------------------------------------------------------
\r
1012 // end registry stuff
\r
1014 // -------- BEGIN ULocale boilerplate --------
\r
1017 * Return the locale that was used to create this object, or null.
\r
1018 * This may may differ from the locale requested at the time of
\r
1019 * this object's creation. For example, if an object is created
\r
1020 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
\r
1021 * drawn from <tt>en</tt> (the <i>actual</i> locale), and
\r
1022 * <tt>en_US</tt> may be the most specific locale that exists (the
\r
1023 * <i>valid</i> locale).
\r
1025 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
\r
1026 * contains a partial preview implementation. The * <i>actual</i>
\r
1027 * locale is returned correctly, but the <i>valid</i> locale is
\r
1028 * not, in most cases.
\r
1029 * @param type type of information requested, either {@link
\r
1030 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
\r
1031 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
\r
1032 * @return the information specified by <i>type</i>, or null if
\r
1033 * this object was not constructed from locale data.
\r
1034 * @see com.ibm.icu.util.ULocale
\r
1035 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
1036 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
1037 * @draft ICU 2.8 (retain)
\r
1038 * @provisional This API might change or be removed in a future release.
\r
1040 public final ULocale getLocale(ULocale.Type type) {
\r
1041 return type == ULocale.ACTUAL_LOCALE ?
\r
1042 this.actualLocale : this.validLocale;
\r
1046 * Set information about the locales that were used to create this
\r
1047 * object. If the object was not constructed from locale data,
\r
1048 * both arguments should be set to null. Otherwise, neither
\r
1049 * should be null. The actual locale must be at the same level or
\r
1050 * less specific than the valid locale. This method is intended
\r
1051 * for use by factories or other entities that create objects of
\r
1053 * @param valid the most specific locale containing any resource
\r
1055 * @param actual the locale containing data used to construct this
\r
1057 * @see com.ibm.icu.util.ULocale
\r
1058 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
1059 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
1062 final void setLocale(ULocale valid, ULocale actual) {
\r
1063 // Change the following to an assertion later
\r
1064 if ((valid == null) != (actual == null)) {
\r
1066 throw new IllegalArgumentException();
\r
1069 // Another check we could do is that the actual locale is at
\r
1070 // the same level or less specific than the valid locale.
\r
1071 this.validLocale = valid;
\r
1072 this.actualLocale = actual;
\r
1076 * The most specific locale containing any resource data, or null.
\r
1077 * @see com.ibm.icu.util.ULocale
\r
1080 private ULocale validLocale;
\r
1083 * The locale containing data used to construct this object, or
\r
1085 * @see com.ibm.icu.util.ULocale
\r
1088 private ULocale actualLocale;
\r
1090 // -------- END ULocale boilerplate --------
\r