2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.util.Comparator;
10 import java.util.Enumeration;
11 import java.util.Iterator;
12 import java.util.LinkedList;
13 import java.util.Locale;
14 import java.util.MissingResourceException;
17 import com.ibm.icu.impl.ICUDebug;
18 import com.ibm.icu.impl.ICUResourceBundle;
19 import com.ibm.icu.impl.Norm2AllModes;
20 import com.ibm.icu.lang.UScript;
21 import com.ibm.icu.util.Freezable;
22 import com.ibm.icu.util.ULocale;
23 import com.ibm.icu.util.ULocale.Category;
24 import com.ibm.icu.util.UResourceBundle;
25 import com.ibm.icu.util.VersionInfo;
28 * {@icuenhanced java.text.Collator}.{@icu _usage_}
30 * <p>Collator performs locale-sensitive string comparison. A concrete
31 * subclass, RuleBasedCollator, allows customization of the collation
32 * ordering by the use of rule sets.</p>
34 * <p>Following the <a href=http://www.unicode.org>Unicode
35 * Consortium</a>'s specifications for the
36 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
37 * Algorithm (UCA)</a>, there are 5 different levels of strength used
41 * <li>PRIMARY strength: Typically, this is used to denote differences between
42 * base characters (for example, "a" < "b").
43 * It is the strongest difference. For example, dictionaries are divided
44 * into different sections by base character.
45 * <li>SECONDARY strength: Accents in the characters are considered secondary
46 * differences (for example, "as" < "às" < "at"). Other
48 * between letters can also be considered secondary differences, depending
49 * on the language. A secondary difference is ignored when there is a
50 * primary difference anywhere in the strings.
51 * <li>TERTIARY strength: Upper and lower case differences in characters are
52 * distinguished at tertiary strength (for example, "ao" < "Ao" <
53 * "aò"). In addition, a variant of a letter differs from the base
54 * form on the tertiary strength (such as "A" and "Ⓐ"). Another
56 * difference between large and small Kana. A tertiary difference is ignored
57 * when there is a primary or secondary difference anywhere in the strings.
58 * <li>QUATERNARY strength: When punctuation is ignored
59 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
60 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
61 * strength, an additional strength level can
62 * be used to distinguish words with and without punctuation (for example,
63 * "ab" < "a-b" < "aB").
64 * This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
65 * difference. The QUATERNARY strength should only be used if ignoring
66 * punctuation is required.
67 * <li>IDENTICAL strength:
68 * When all other strengths are equal, the IDENTICAL strength is used as a
69 * tiebreaker. The Unicode code point values of the NFD form of each string
70 * are compared, just in case there is no difference.
71 * For example, Hebrew cantellation marks are only distinguished at this
72 * strength. This strength should be used sparingly, as only code point
73 * value differences between two strings is an extremely rare occurrence.
74 * Using this strength substantially decreases the performance for both
75 * comparison and collation key generation APIs. This strength also
76 * increases the size of the collation key.
79 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
80 * the canonical decomposition mode and one that does not use any decomposition.
81 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
82 * is not supported here. If the canonical
83 * decomposition mode is set, the Collator handles un-normalized text properly,
84 * producing the same results as if the text were normalized in NFD. If
85 * canonical decomposition is turned off, it is the user's responsibility to
86 * ensure that all text is already in the appropriate form before performing
87 * a comparison or before getting a CollationKey.</p>
89 * <p>For more information about the collation service see the
90 * <a href="http://www.icu-project.org/userguide/Collate_Intro.html">users
95 * // Get the Collator for US English and set its strength to PRIMARY
96 * Collator usCollator = Collator.getInstance(Locale.US);
97 * usCollator.setStrength(Collator.PRIMARY);
98 * if (usCollator.compare("abc", "ABC") == 0) {
99 * System.out.println("Strings are equivalent");
102 * The following example shows how to compare two strings using the
103 * Collator for the default locale.
105 * // Compare two strings in the default locale
106 * Collator myCollator = Collator.getInstance();
107 * myCollator.setDecomposition(NO_DECOMPOSITION);
108 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
109 * System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
110 * myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
111 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
112 * System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
115 * System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
119 * System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
123 * @see RuleBasedCollator
125 * @author Syn Wee Quek
128 public abstract class Collator implements Comparator<Object>, Freezable<Collator>
130 // public data members ---------------------------------------------------
133 * Strongest collator strength value. Typically used to denote differences
134 * between base characters. See class documentation for more explanation.
139 public final static int PRIMARY = 0;
142 * Second level collator strength value.
143 * Accents in the characters are considered secondary differences.
144 * Other differences between letters can also be considered secondary
145 * differences, depending on the language.
146 * See class documentation for more explanation.
151 public final static int SECONDARY = 1;
154 * Third level collator strength value.
155 * Upper and lower case differences in characters are distinguished at this
156 * strength level. In addition, a variant of a letter differs from the base
157 * form on the tertiary level.
158 * See class documentation for more explanation.
163 public final static int TERTIARY = 2;
166 * {@icu} Fourth level collator strength value.
167 * When punctuation is ignored
168 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
169 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
170 * strength, an additional strength level can
171 * be used to distinguish words with and without punctuation.
172 * See class documentation for more explanation.
177 public final static int QUATERNARY = 3;
180 * Smallest Collator strength value. When all other strengths are equal,
181 * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
182 * values of the NFD form of each string are compared, just in case there
184 * See class documentation for more explanation.
187 * Note this value is different from JDK's
191 public final static int IDENTICAL = 15;
194 * {@icunote} This is for backwards compatibility with Java APIs only. It
195 * should not be used, IDENTICAL should be used instead. ICU's
196 * collation does not support Java's FULL_DECOMPOSITION mode.
199 public final static int FULL_DECOMPOSITION = IDENTICAL;
202 * Decomposition mode value. With NO_DECOMPOSITION set, Strings
203 * will not be decomposed for collation. This is the default
204 * decomposition setting unless otherwise specified by the locale
205 * used to create the Collator.</p>
207 * <p><strong>Note</strong> this value is different from the JDK's.</p>
208 * @see #CANONICAL_DECOMPOSITION
209 * @see #getDecomposition
210 * @see #setDecomposition
213 public final static int NO_DECOMPOSITION = 16;
216 * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
217 * characters that are canonical variants according to the Unicode standard
218 * will be decomposed for collation.</p>
220 * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
221 * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
222 * Unicode Technical Report #15</a>.
224 * @see #NO_DECOMPOSITION
225 * @see #getDecomposition
226 * @see #setDecomposition
229 public final static int CANONICAL_DECOMPOSITION = 17;
232 * Reordering codes for non-script groups that can be reordered under collation.
234 * @see #getReorderCodes
235 * @see #setReorderCodes
236 * @see #getEquivalentReorderCodes
239 public static interface ReorderCodes {
241 * A special reordering code that is used to specify the default reordering codes for a locale.
244 public final static int DEFAULT = -1; // == UScript.INVALID_CODE
246 * A special reordering code that is used to specify no reordering codes.
249 public final static int NONE = UScript.UNKNOWN;
251 * A special reordering code that is used to specify all other codes used for reordering except
252 * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
255 public final static int OTHERS = UScript.UNKNOWN;
257 * Characters with the space property.
258 * This is equivalent to the rule value "space".
261 public final static int SPACE = 0x1000;
263 * The first entry in the enumeration of reordering groups. This is intended for use in
264 * range checking and enumeration of the reorder codes.
267 public final static int FIRST = SPACE;
269 * Characters with the punctuation property.
270 * This is equivalent to the rule value "punct".
273 public final static int PUNCTUATION = 0x1001;
275 * Characters with the symbol property.
276 * This is equivalent to the rule value "symbol".
279 public final static int SYMBOL = 0x1002;
281 * Characters with the currency property.
282 * This is equivalent to the rule value "currency".
285 public final static int CURRENCY = 0x1003;
287 * Characters with the digit property.
288 * This is equivalent to the rule value "digit".
291 public final static int DIGIT = 0x1004;
293 * The limit of the reorder codes. This is intended for use in range checking
294 * and enumeration of the reorder codes.
297 public final static int LIMIT = 0x1005;
300 // public methods --------------------------------------------------------
302 // public setters --------------------------------------------------------
305 * Sets this Collator's strength property. The strength property
306 * determines the minimum level of difference considered significant
307 * during comparison.</p>
309 * <p>The default strength for the Collator is TERTIARY, unless specified
310 * otherwise by the locale used to create the Collator.</p>
312 * <p>See the Collator class description for an example of use.</p>
313 * @param newStrength the new strength value.
320 * @throws IllegalArgumentException if the new strength value is not one
321 * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
324 public void setStrength(int newStrength)
327 throw new UnsupportedOperationException("Attempt to modify frozen object");
330 if ((newStrength != PRIMARY) &&
331 (newStrength != SECONDARY) &&
332 (newStrength != TERTIARY) &&
333 (newStrength != QUATERNARY) &&
334 (newStrength != IDENTICAL)) {
335 throw new IllegalArgumentException("Incorrect comparison level.");
337 m_strength_ = newStrength;
342 * @deprecated This API is ICU internal only.
344 public Collator setStrength2(int newStrength)
346 setStrength(newStrength);
351 * Sets the decomposition mode of this Collator. Setting this
352 * decomposition property with CANONICAL_DECOMPOSITION allows the
353 * Collator to handle un-normalized text properly, producing the
354 * same results as if the text were normalized. If
355 * NO_DECOMPOSITION is set, it is the user's responsibility to
356 * insure that all text is already in the appropriate form before
357 * a comparison or before getting a CollationKey. Adjusting
358 * decomposition mode allows the user to select between faster and
359 * more complete collation behavior.</p>
361 * <p>Since a great many of the world's languages do not require
362 * text normalization, most locales set NO_DECOMPOSITION as the
363 * default decomposition mode.</p>
365 * The default decompositon mode for the Collator is
366 * NO_DECOMPOSITON, unless specified otherwise by the locale used
367 * to create the Collator.</p>
369 * <p>See getDecomposition for a description of decomposition
372 * @param decomposition the new decomposition mode
373 * @see #getDecomposition
374 * @see #NO_DECOMPOSITION
375 * @see #CANONICAL_DECOMPOSITION
376 * @throws IllegalArgumentException If the given value is not a valid
377 * decomposition mode.
380 public void setDecomposition(int decomposition)
383 throw new UnsupportedOperationException("Attempt to modify frozen object");
385 internalSetDecomposition(decomposition);
389 * Internal set decompostion call to workaround frozen state because of self-modification
390 * in the RuleBasedCollator. This method <b>must</b> only be called by code that has
391 * passed the frozen check already <b>and</b> has the lock if the Collator is frozen.
392 * Better still this method should go away and RuleBasedCollator.getSortKeyBytes()
393 * should be fixed to not self-modify.
394 * @param decomposition
397 protected void internalSetDecomposition(int decomposition)
399 if ((decomposition != NO_DECOMPOSITION) &&
400 (decomposition != CANONICAL_DECOMPOSITION)) {
401 throw new IllegalArgumentException("Wrong decomposition mode.");
403 m_decomposition_ = decomposition;
404 if (decomposition != NO_DECOMPOSITION) {
405 // ensure the FCD data is initialized
406 Norm2AllModes.getFCDNormalizer2();
411 * Sets the reordering codes for this collator.
412 * <p>Collation reordering allows scripts and some other defined blocks of characters
413 * to be moved relative to each other as a block. This reordering is done on top of
414 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
415 * at the start and/or the end of the collation order. These groups are specified using
416 * UScript codes and UColReorderCode entries.
417 * <p>By default, reordering codes specified for the start of the order are placed in the
418 * order given after a group of "special" non-script blocks. These special groups of characters
419 * are space, punctuation, symbol, currency, and digit. These special groups are represented with
420 * UColReorderCode entries. Script groups can be intermingled with
421 * these special non-script blocks if those special blocks are explicitly specified in the reordering.
422 * <p>The special code OTHERS stands for any script that is not explicitly
423 * mentioned in the list of reordering codes given. Anything that is after OTHERS
424 * will go at the very end of the reordering in the order given.
425 * <p>The special reorder code DEFAULT will reset the reordering for this collator
426 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
427 * was specified when this collator was created from resource data or from rules. The
428 * DEFAULT code <b>must</b> be the sole code supplied when it used. If not
429 * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
430 * <p>The special reorder code NONE will remove any reordering for this collator.
431 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
432 * NONE code <b>must</b> be the sole code supplied when it used.
433 * @param order the reordering codes to apply to this collator; if this is null or an empty array
434 * then this clears any existing reordering
435 * @see #getReorderCodes
436 * @see #getEquivalentReorderCodes
437 * @see Collator.ReorderCodes
441 public void setReorderCodes(int... order)
443 throw new UnsupportedOperationException();
446 // public getters --------------------------------------------------------
449 * Returns the Collator for the current default locale.
450 * The default locale is determined by java.util.Locale.getDefault().
451 * @return the Collator for the default locale (for example, en_US) if it
452 * is created successfully. Otherwise if there is no Collator
453 * associated with the current locale, the default UCA collator
455 * @see java.util.Locale#getDefault()
456 * @see #getInstance(Locale)
459 public static final Collator getInstance()
461 return getInstance(ULocale.getDefault());
465 * Clones the collator.
467 * @return a clone of this collator.
469 public Object clone() throws CloneNotSupportedException {
470 return super.clone();
473 // begin registry stuff
476 * A factory used with registerFactory to register multiple collators and provide
477 * display names for them. If standard locale display names are sufficient,
478 * Collator instances may be registered instead.
479 * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
480 * ULocale instead of Locale. Instead of overriding createCollator(Locale),
481 * new implementations should override createCollator(ULocale). Note that
482 * one of these two methods <b>MUST</b> be overridden or else an infinite
486 public static abstract class CollatorFactory {
488 * Return true if this factory will be visible. Default is true.
489 * If not visible, the locales supported by this factory will not
490 * be listed by getAvailableLocales.
492 * @return true if this factory is visible
495 public boolean visible() {
500 * Return an instance of the appropriate collator. If the locale
501 * is not supported, return null.
502 * <b>Note:</b> as of ICU4J 3.2, implementations should override
503 * this method instead of createCollator(Locale).
504 * @param loc the locale for which this collator is to be created.
505 * @return the newly created collator.
508 public Collator createCollator(ULocale loc) {
509 return createCollator(loc.toLocale());
513 * Return an instance of the appropriate collator. If the locale
514 * is not supported, return null.
515 * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
516 * createCollator(ULocale) instead of this method, and inherit this
517 * method's implementation. This method is no longer abstract
518 * and instead delegates to createCollator(ULocale).
519 * @param loc the locale for which this collator is to be created.
520 * @return the newly created collator.
523 public Collator createCollator(Locale loc) {
524 return createCollator(ULocale.forLocale(loc));
528 * Return the name of the collator for the objectLocale, localized for the displayLocale.
529 * If objectLocale is not visible or not defined by the factory, return null.
530 * @param objectLocale the locale identifying the collator
531 * @param displayLocale the locale for which the display name of the collator should be localized
532 * @return the display name
535 public String getDisplayName(Locale objectLocale, Locale displayLocale) {
536 return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
540 * Return the name of the collator for the objectLocale, localized for the displayLocale.
541 * If objectLocale is not visible or not defined by the factory, return null.
542 * @param objectLocale the locale identifying the collator
543 * @param displayLocale the locale for which the display name of the collator should be localized
544 * @return the display name
547 public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
549 Set<String> supported = getSupportedLocaleIDs();
550 String name = objectLocale.getBaseName();
551 if (supported.contains(name)) {
552 return objectLocale.getDisplayName(displayLocale);
559 * Return an unmodifiable collection of the locale names directly
560 * supported by this factory.
562 * @return the set of supported locale IDs.
565 public abstract Set<String> getSupportedLocaleIDs();
568 * Empty default constructor.
571 protected CollatorFactory() {
575 static abstract class ServiceShim {
576 abstract Collator getInstance(ULocale l);
577 abstract Object registerInstance(Collator c, ULocale l);
578 abstract Object registerFactory(CollatorFactory f);
579 abstract boolean unregister(Object k);
580 abstract Locale[] getAvailableLocales(); // TODO remove
581 abstract ULocale[] getAvailableULocales();
582 abstract String getDisplayName(ULocale ol, ULocale dl);
585 private static ServiceShim shim;
586 private static ServiceShim getShim() {
587 // Note: this instantiation is safe on loose-memory-model configurations
588 // despite lack of synchronization, since the shim instance has no state--
589 // it's all in the class init. The worst problem is we might instantiate
590 // two shim instances, but they'll share the same state so that's ok.
593 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
594 shim = (ServiceShim)cls.newInstance();
596 catch (MissingResourceException e)
602 catch (Exception e) {
607 throw new RuntimeException(e.getMessage());
615 * {@icu} Returns the Collator for the desired locale.
616 * @param locale the desired locale.
617 * @return Collator for the desired locale if it is created successfully.
618 * Otherwise if there is no Collator
619 * associated with the current locale, a default UCA collator will
621 * @see java.util.Locale
622 * @see java.util.ResourceBundle
623 * @see #getInstance(Locale)
624 * @see #getInstance()
627 public static final Collator getInstance(ULocale locale) {
628 // fetching from service cache is faster than instantiation
629 return getShim().getInstance(locale);
633 * Returns the Collator for the desired locale.
634 * @param locale the desired locale.
635 * @return Collator for the desired locale if it is created successfully.
636 * Otherwise if there is no Collator
637 * associated with the current locale, a default UCA collator will
639 * @see java.util.Locale
640 * @see java.util.ResourceBundle
641 * @see #getInstance(ULocale)
642 * @see #getInstance()
645 public static final Collator getInstance(Locale locale) {
646 return getInstance(ULocale.forLocale(locale));
650 * {@icu} Registers a collator as the default collator for the provided locale. The
651 * collator should not be modified after it is registered.
653 * @param collator the collator to register
654 * @param locale the locale for which this is the default collator
655 * @return an object that can be used to unregister the registered collator.
659 public static final Object registerInstance(Collator collator, ULocale locale) {
660 return getShim().registerInstance(collator, locale);
664 * {@icu} Registers a collator factory.
666 * @param factory the factory to register
667 * @return an object that can be used to unregister the registered factory.
671 public static final Object registerFactory(CollatorFactory factory) {
672 return getShim().registerFactory(factory);
676 * {@icu} Unregisters a collator previously registered using registerInstance.
677 * @param registryKey the object previously returned by registerInstance.
678 * @return true if the collator was successfully unregistered.
681 public static final boolean unregister(Object registryKey) {
685 return shim.unregister(registryKey);
689 * Returns the set of locales, as Locale objects, for which collators
690 * are installed. Note that Locale objects do not support RFC 3066.
691 * @return the list of locales in which collators are installed.
692 * This list includes any that have been registered, in addition to
693 * those that are installed with ICU4J.
696 public static Locale[] getAvailableLocales() {
697 // TODO make this wrap getAvailableULocales later
699 return ICUResourceBundle.getAvailableLocales(
700 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
702 return shim.getAvailableLocales();
706 * {@icu} Returns the set of locales, as ULocale objects, for which collators
707 * are installed. ULocale objects support RFC 3066.
708 * @return the list of locales in which collators are installed.
709 * This list includes any that have been registered, in addition to
710 * those that are installed with ICU4J.
713 public static final ULocale[] getAvailableULocales() {
715 return ICUResourceBundle.getAvailableULocales(
716 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
718 return shim.getAvailableULocales();
722 * The list of keywords for this service. This must be kept in sync with
726 private static final String[] KEYWORDS = { "collation" };
729 * The resource name for this service. Note that this is not the same as
730 * the keyword for this service.
733 private static final String RESOURCE = "collations";
736 * The resource bundle base name for this service.
740 private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
743 * {@icu} Returns an array of all possible keywords that are relevant to
744 * collation. At this point, the only recognized keyword for this
745 * service is "collation".
746 * @return an array of valid collation keywords.
747 * @see #getKeywordValues
750 public static final String[] getKeywords() {
755 * {@icu} Given a keyword, returns an array of all values for
756 * that keyword that are currently in use.
757 * @param keyword one of the keywords returned by getKeywords.
761 public static final String[] getKeywordValues(String keyword) {
762 if (!keyword.equals(KEYWORDS[0])) {
763 throw new IllegalArgumentException("Invalid keyword: " + keyword);
765 return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
769 * {@icu} Given a key and a locale, returns an array of string values in a preferred
770 * order that would make a difference. These are all and only those values where
771 * the open (creation) of the service with the locale formed from the input locale
772 * plus input keyword and that value has different behavior than creation with the
773 * input locale alone.
774 * @param key one of the keys supported by this service. For now, only
775 * "collation" is supported.
776 * @param locale the locale
777 * @param commonlyUsed if set to true it will return only commonly used values
778 * with the given locale in preferred order. Otherwise,
779 * it will return all the available values for the locale.
780 * @return an array of string values for the given key and the locale.
783 public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
784 boolean commonlyUsed) {
785 // Note: The parameter commonlyUsed is actually not used.
786 // The switch is in the method signature for consistency
787 // with other locale services.
789 // Read available collation values from collation bundles
790 String baseLoc = locale.getBaseName();
791 LinkedList<String> values = new LinkedList<String>();
793 UResourceBundle bundle = UResourceBundle.getBundleInstance(
794 ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
796 String defcoll = null;
797 while (bundle != null) {
798 UResourceBundle collations = bundle.get("collations");
799 Enumeration<String> collEnum = collations.getKeys();
800 while (collEnum.hasMoreElements()) {
801 String collkey = collEnum.nextElement();
802 if (collkey.equals("default")) {
803 if (defcoll == null) {
805 defcoll = collations.getString("default");
807 } else if (!values.contains(collkey)) {
811 bundle = ((ICUResourceBundle)bundle).getParent();
814 Iterator<String> itr = values.iterator();
815 String[] result = new String[values.size()];
818 while (itr.hasNext()) {
819 String collKey = itr.next();
820 if (!collKey.equals(defcoll)) {
821 result[idx++] = collKey;
828 * {@icu} Returns the functionally equivalent locale for the given
829 * requested locale, with respect to given keyword, for the
830 * collation service. If two locales return the same result, then
831 * collators instantiated for these locales will behave
832 * equivalently. The converse is not always true; two collators
833 * may in fact be equivalent, but return different results, due to
834 * internal details. The return result has no other meaning than
835 * that stated above, and implies nothing as to the relationship
836 * between the two locales. This is intended for use by
837 * applications who wish to cache collators, or otherwise reuse
838 * collators when possible. The functional equivalent may change
839 * over time. For more information, please see the <a
840 * href="http://www.icu-project.org/userguide/locale.html#services">
841 * Locales and Services</a> section of the ICU User Guide.
842 * @param keyword a particular keyword as enumerated by
844 * @param locID The requested locale
845 * @param isAvailable If non-null, isAvailable[0] will receive and
846 * output boolean that indicates whether the requested locale was
847 * 'available' to the collation service. If non-null, isAvailable
848 * must have length >= 1.
852 public static final ULocale getFunctionalEquivalent(String keyword,
854 boolean isAvailable[]) {
855 return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
856 keyword, locID, isAvailable, true);
860 * {@icu} Returns the functionally equivalent locale for the given
861 * requested locale, with respect to given keyword, for the
863 * @param keyword a particular keyword as enumerated by
865 * @param locID The requested locale
867 * @see #getFunctionalEquivalent(String,ULocale,boolean[])
870 public static final ULocale getFunctionalEquivalent(String keyword,
872 return getFunctionalEquivalent(keyword, locID, null);
876 * {@icu} Returns the name of the collator for the objectLocale, localized for the
878 * @param objectLocale the locale of the collator
879 * @param displayLocale the locale for the collator's display name
880 * @return the display name
883 static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
884 return getShim().getDisplayName(ULocale.forLocale(objectLocale),
885 ULocale.forLocale(displayLocale));
889 * {@icu} Returns the name of the collator for the objectLocale, localized for the
891 * @param objectLocale the locale of the collator
892 * @param displayLocale the locale for the collator's display name
893 * @return the display name
896 static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
897 return getShim().getDisplayName(objectLocale, displayLocale);
901 * {@icu} Returns the name of the collator for the objectLocale, localized for the
902 * default <code>DISPLAY</code> locale.
903 * @param objectLocale the locale of the collator
904 * @return the display name
905 * @see com.ibm.icu.util.ULocale.Category#DISPLAY
908 static public String getDisplayName(Locale objectLocale) {
909 return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
913 * {@icu} Returns the name of the collator for the objectLocale, localized for the
914 * default <code>DISPLAY</code> locale.
915 * @param objectLocale the locale of the collator
916 * @return the display name
917 * @see com.ibm.icu.util.ULocale.Category#DISPLAY
920 static public String getDisplayName(ULocale objectLocale) {
921 return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
925 * Returns this Collator's strength property. The strength property
926 * determines the minimum level of difference considered significant.
928 * {@icunote} This can return QUATERNARY strength, which is not supported by the
931 * See the Collator class description for more details.
933 * @return this Collator's current strength property.
942 public int getStrength()
948 * Returns the decomposition mode of this Collator. The decomposition mode
949 * determines how Unicode composed characters are handled.
952 * See the Collator class description for more details.
954 * @return the decomposition mode
955 * @see #setDecomposition
956 * @see #NO_DECOMPOSITION
957 * @see #CANONICAL_DECOMPOSITION
960 public int getDecomposition()
962 return m_decomposition_;
965 // public other methods -------------------------------------------------
968 * Compares the equality of two text Strings using
969 * this Collator's rules, strength and decomposition mode. Convenience method.
970 * @param source the source string to be compared.
971 * @param target the target string to be compared.
972 * @return true if the strings are equal according to the collation
973 * rules, otherwise false.
975 * @throws NullPointerException thrown if either arguments is null.
978 public boolean equals(String source, String target)
980 return (compare(source, target) == 0);
984 * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
986 * @return a pointer to a UnicodeSet object containing all the
987 * code points and sequences that may sort differently than
991 public UnicodeSet getTailoredSet()
993 return new UnicodeSet(0, 0x10FFFF);
997 * Compares the source text String to the target text String according to
998 * this Collator's rules, strength and decomposition mode.
999 * Returns an integer less than,
1000 * equal to or greater than zero depending on whether the source String is
1001 * less than, equal to or greater than the target String. See the Collator
1002 * class description for an example of use.
1004 * @param source the source String.
1005 * @param target the target String.
1006 * @return Returns an integer value. Value is less than zero if source is
1007 * less than target, value is zero if source and target are equal,
1008 * value is greater than zero if source is greater than target.
1010 * @see #getCollationKey
1011 * @throws NullPointerException thrown if either argument is null.
1014 public abstract int compare(String source, String target);
1017 * Compares the source Object to the target Object.
1019 * @param source the source Object.
1020 * @param target the target Object.
1021 * @return Returns an integer value. Value is less than zero if source is
1022 * less than target, value is zero if source and target are equal,
1023 * value is greater than zero if source is greater than target.
1024 * @throws ClassCastException thrown if either arguments cannot be cast to String.
1027 public int compare(Object source, Object target) {
1028 return compare((String)source, (String)target);
1033 * Transforms the String into a CollationKey suitable for efficient
1034 * repeated comparison. The resulting key depends on the collator's
1035 * rules, strength and decomposition mode.
1037 * <p>See the CollationKey class documentation for more information.</p>
1038 * @param source the string to be transformed into a CollationKey.
1039 * @return the CollationKey for the given String based on this Collator's
1040 * collation rules. If the source String is null, a null
1041 * CollationKey is returned.
1043 * @see #compare(String, String)
1044 * @see #getRawCollationKey
1047 public abstract CollationKey getCollationKey(String source);
1050 * {@icu} Returns the simpler form of a CollationKey for the String source following
1051 * the rules of this Collator and stores the result into the user provided argument
1052 * key. If key has a internal byte array of length that's too small for the result,
1053 * the internal byte array will be grown to the exact required size.
1054 * @param source the text String to be transformed into a RawCollationKey
1055 * @return If key is null, a new instance of RawCollationKey will be
1056 * created and returned, otherwise the user provided key will be
1058 * @see #compare(String, String)
1059 * @see #getCollationKey
1060 * @see RawCollationKey
1063 public abstract RawCollationKey getRawCollationKey(String source,
1064 RawCollationKey key);
1067 * {@icu} Variable top is a two byte primary value which causes all the codepoints
1068 * with primary values that are less or equal than the variable top to be
1069 * shifted when alternate handling is set to SHIFTED.
1072 * Sets the variable top to a collation element value of a string supplied.
1074 * @param varTop one or more (if contraction) characters to which the
1075 * variable top should be set
1076 * @return a int value containing the value of the variable top in upper 16
1077 * bits. Lower 16 bits are undefined.
1078 * @throws IllegalArgumentException is thrown if varTop argument is not
1079 * a valid variable top element. A variable top element is
1080 * invalid when it is a contraction that does not exist in the
1081 * Collation order or when the PRIMARY strength collation
1082 * element for the variable top has more than two bytes
1083 * @see #getVariableTop
1084 * @see RuleBasedCollator#setAlternateHandlingShifted
1087 public abstract int setVariableTop(String varTop);
1090 * {@icu} Returns the variable top value of a Collator.
1091 * Lower 16 bits are undefined and should be ignored.
1092 * @return the variable top value of a Collator.
1093 * @see #setVariableTop
1096 public abstract int getVariableTop();
1099 * {@icu} Sets the variable top to a collation element value supplied.
1100 * Variable top is set to the upper 16 bits.
1101 * Lower 16 bits are ignored.
1102 * @param varTop Collation element value, as returned by setVariableTop or
1104 * @see #getVariableTop
1105 * @see #setVariableTop
1108 public abstract void setVariableTop(int varTop);
1111 * {@icu} Returns the version of this collator object.
1112 * @return the version object associated with this collator
1115 public abstract VersionInfo getVersion();
1118 * {@icu} Returns the UCA version of this collator object.
1119 * @return the version object associated with this collator
1122 public abstract VersionInfo getUCAVersion();
1125 * Retrieves the reordering codes for this collator.
1126 * These reordering codes are a combination of UScript codes and ReorderCodes.
1127 * @return a copy of the reordering codes for this collator;
1128 * if none are set then returns an empty array
1129 * @see #setReorderCodes
1130 * @see #getEquivalentReorderCodes
1131 * @see Collator.ReorderCodes
1135 public int[] getReorderCodes()
1137 throw new UnsupportedOperationException();
1141 * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
1142 * codes are grouped and must reorder together.
1144 * @param reorderCode code for which equivalents to be retrieved
1145 * @return the set of all reorder codes in the same group as the given reorder code.
1146 * @see #setReorderCodes
1147 * @see #getReorderCodes
1148 * @see Collator.ReorderCodes
1152 public static int[] getEquivalentReorderCodes(int reorderCode)
1154 throw new UnsupportedOperationException();
1158 // Freezable interface implementation -------------------------------------------------
1161 * Determines whether the object has been frozen or not.
1164 public boolean isFrozen() {
1169 * Freezes the collaotr.
1170 * @return the collator itself.
1173 public Collator freeze() {
1174 throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1178 * Provides for the clone operation. Any clone is initially unfrozen.
1181 public Collator cloneAsThawed() {
1182 throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1185 // protected constructor -------------------------------------------------
1188 * Empty default constructor to make javadocs happy
1191 protected Collator()
1195 // package private methods -----------------------------------------------
1197 // private data members --------------------------------------------------
1200 * Collation strength
1202 private int m_strength_ = TERTIARY;
1205 * Decomposition mode
1207 private int m_decomposition_ = CANONICAL_DECOMPOSITION;
1209 private static final boolean DEBUG = ICUDebug.enabled("collator");
1211 // private methods -------------------------------------------------------
1213 // end registry stuff
1215 // -------- BEGIN ULocale boilerplate --------
1218 * {@icu} Returns the locale that was used to create this object, or null.
1219 * This may may differ from the locale requested at the time of
1220 * this object's creation. For example, if an object is created
1221 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
1222 * drawn from <tt>en</tt> (the <i>actual</i> locale), and
1223 * <tt>en_US</tt> may be the most specific locale that exists (the
1224 * <i>valid</i> locale).
1226 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
1227 * contains a partial preview implementation. The * <i>actual</i>
1228 * locale is returned correctly, but the <i>valid</i> locale is
1229 * not, in most cases.
1230 * @param type type of information requested, either {@link
1231 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
1232 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
1233 * @return the information specified by <i>type</i>, or null if
1234 * this object was not constructed from locale data.
1235 * @see com.ibm.icu.util.ULocale
1236 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1237 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1238 * @draft ICU 2.8 (retain)
1239 * @provisional This API might change or be removed in a future release.
1241 public final ULocale getLocale(ULocale.Type type) {
1242 return type == ULocale.ACTUAL_LOCALE ?
1243 this.actualLocale : this.validLocale;
1247 * Set information about the locales that were used to create this
1248 * object. If the object was not constructed from locale data,
1249 * both arguments should be set to null. Otherwise, neither
1250 * should be null. The actual locale must be at the same level or
1251 * less specific than the valid locale. This method is intended
1252 * for use by factories or other entities that create objects of
1254 * @param valid the most specific locale containing any resource
1256 * @param actual the locale containing data used to construct this
1258 * @see com.ibm.icu.util.ULocale
1259 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1260 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1262 final void setLocale(ULocale valid, ULocale actual) {
1263 // Change the following to an assertion later
1265 // The following would not happen since the method is called
1266 // by other protected functions that checks and makes sure that
1267 // valid and actual are not null before passing
1268 if ((valid == null) != (actual == null)) {
1269 throw new IllegalArgumentException();
1272 // Another check we could do is that the actual locale is at
1273 // the same level or less specific than the valid locale.
1274 this.validLocale = valid;
1275 this.actualLocale = actual;
1279 * The most specific locale containing any resource data, or null.
1280 * @see com.ibm.icu.util.ULocale
1282 private ULocale validLocale;
1285 * The locale containing data used to construct this object, or
1287 * @see com.ibm.icu.util.ULocale
1289 private ULocale actualLocale;
1291 // -------- END ULocale boilerplate --------