2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.util.Comparator;
\r
10 import java.util.Enumeration;
\r
11 import java.util.Iterator;
\r
12 import java.util.LinkedList;
\r
13 import java.util.Locale;
\r
14 import java.util.MissingResourceException;
\r
15 import java.util.Set;
\r
17 import com.ibm.icu.impl.ICUDebug;
\r
18 import com.ibm.icu.impl.ICUResourceBundle;
\r
19 import com.ibm.icu.util.ULocale;
\r
20 import com.ibm.icu.util.UResourceBundle;
\r
21 import com.ibm.icu.util.VersionInfo;
\r
24 * {@icuenhanced java.text.Collator}.{@icu _usage_}
\r
26 * <p>Collator performs locale-sensitive string comparison. A concrete
\r
27 * subclass, RuleBasedCollator, allows customization of the collation
\r
28 * ordering by the use of rule sets.</p>
\r
30 * <p>Following the <a href=http://www.unicode.org>Unicode
\r
31 * Consortium</a>'s specifications for the
\r
32 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
\r
33 * Algorithm (UCA)</a>, there are 5 different levels of strength used
\r
37 * <li>PRIMARY strength: Typically, this is used to denote differences between
\r
38 * base characters (for example, "a" < "b").
\r
39 * It is the strongest difference. For example, dictionaries are divided
\r
40 * into different sections by base character.
\r
41 * <li>SECONDARY strength: Accents in the characters are considered secondary
\r
42 * differences (for example, "as" < "às" < "at"). Other
\r
44 * between letters can also be considered secondary differences, depending
\r
45 * on the language. A secondary difference is ignored when there is a
\r
46 * primary difference anywhere in the strings.
\r
47 * <li>TERTIARY strength: Upper and lower case differences in characters are
\r
48 * distinguished at tertiary strength (for example, "ao" < "Ao" <
\r
49 * "aò"). In addition, a variant of a letter differs from the base
\r
50 * form on the tertiary strength (such as "A" and "Ⓐ"). Another
\r
52 * difference between large and small Kana. A tertiary difference is ignored
\r
53 * when there is a primary or secondary difference anywhere in the strings.
\r
54 * <li>QUATERNARY strength: When punctuation is ignored
\r
55 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
\r
56 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
\r
57 * strength, an additional strength level can
\r
58 * be used to distinguish words with and without punctuation (for example,
\r
59 * "ab" < "a-b" < "aB").
\r
60 * This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
\r
61 * difference. The QUATERNARY strength should only be used if ignoring
\r
62 * punctuation is required.
\r
63 * <li>IDENTICAL strength:
\r
64 * When all other strengths are equal, the IDENTICAL strength is used as a
\r
65 * tiebreaker. The Unicode code point values of the NFD form of each string
\r
66 * are compared, just in case there is no difference.
\r
67 * For example, Hebrew cantellation marks are only distinguished at this
\r
68 * strength. This strength should be used sparingly, as only code point
\r
69 * value differences between two strings is an extremely rare occurrence.
\r
70 * Using this strength substantially decreases the performance for both
\r
71 * comparison and collation key generation APIs. This strength also
\r
72 * increases the size of the collation key.
\r
75 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
\r
76 * the canonical decomposition mode and one that does not use any decomposition.
\r
77 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
\r
78 * is not supported here. If the canonical
\r
79 * decomposition mode is set, the Collator handles un-normalized text properly,
\r
80 * producing the same results as if the text were normalized in NFD. If
\r
81 * canonical decomposition is turned off, it is the user's responsibility to
\r
82 * ensure that all text is already in the appropriate form before performing
\r
83 * a comparison or before getting a CollationKey.</p>
\r
85 * <p>For more information about the collation service see the
\r
86 * <a href="http://www.icu-project.org/userguide/Collate_Intro.html">users
\r
89 * <p>Examples of use
\r
91 * // Get the Collator for US English and set its strength to PRIMARY
\r
92 * Collator usCollator = Collator.getInstance(Locale.US);
\r
93 * usCollator.setStrength(Collator.PRIMARY);
\r
94 * if (usCollator.compare("abc", "ABC") == 0) {
\r
95 * System.out.println("Strings are equivalent");
\r
98 * The following example shows how to compare two strings using the
\r
99 * Collator for the default locale.
\r
101 * // Compare two strings in the default locale
\r
102 * Collator myCollator = Collator.getInstance();
\r
103 * myCollator.setDecomposition(NO_DECOMPOSITION);
\r
104 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
\r
105 * System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
\r
106 * myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
\r
107 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
\r
108 * System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
\r
111 * System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
\r
115 * System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
\r
119 * @see RuleBasedCollator
\r
120 * @see CollationKey
\r
121 * @author Syn Wee Quek
\r
124 public abstract class Collator implements Comparator<Object>, Cloneable
\r
126 // public data members ---------------------------------------------------
\r
129 * Strongest collator strength value. Typically used to denote differences
\r
130 * between base characters. See class documentation for more explanation.
\r
131 * @see #setStrength
\r
132 * @see #getStrength
\r
135 public final static int PRIMARY = 0;
\r
138 * Second level collator strength value.
\r
139 * Accents in the characters are considered secondary differences.
\r
140 * Other differences between letters can also be considered secondary
\r
141 * differences, depending on the language.
\r
142 * See class documentation for more explanation.
\r
143 * @see #setStrength
\r
144 * @see #getStrength
\r
147 public final static int SECONDARY = 1;
\r
150 * Third level collator strength value.
\r
151 * Upper and lower case differences in characters are distinguished at this
\r
152 * strength level. In addition, a variant of a letter differs from the base
\r
153 * form on the tertiary level.
\r
154 * See class documentation for more explanation.
\r
155 * @see #setStrength
\r
156 * @see #getStrength
\r
159 public final static int TERTIARY = 2;
\r
162 * {@icu} Fourth level collator strength value.
\r
163 * When punctuation is ignored
\r
164 * <a href="http://www.icu-project.org/userguide/Collate_Concepts.html#Ignoring_Punctuation">
\r
165 * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
\r
166 * strength, an additional strength level can
\r
167 * be used to distinguish words with and without punctuation.
\r
168 * See class documentation for more explanation.
\r
169 * @see #setStrength
\r
170 * @see #getStrength
\r
173 public final static int QUATERNARY = 3;
\r
176 * Smallest Collator strength value. When all other strengths are equal,
\r
177 * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
\r
178 * values of the NFD form of each string are compared, just in case there
\r
179 * is no difference.
\r
180 * See class documentation for more explanation.
\r
183 * Note this value is different from JDK's
\r
187 public final static int IDENTICAL = 15;
\r
190 * {@icunote} This is for backwards compatibility with Java APIs only. It
\r
191 * should not be used, IDENTICAL should be used instead. ICU's
\r
192 * collation does not support Java's FULL_DECOMPOSITION mode.
\r
195 public final static int FULL_DECOMPOSITION = IDENTICAL;
\r
198 * Decomposition mode value. With NO_DECOMPOSITION set, Strings
\r
199 * will not be decomposed for collation. This is the default
\r
200 * decomposition setting unless otherwise specified by the locale
\r
201 * used to create the Collator.</p>
\r
203 * <p><strong>Note</strong> this value is different from the JDK's.</p>
\r
204 * @see #CANONICAL_DECOMPOSITION
\r
205 * @see #getDecomposition
\r
206 * @see #setDecomposition
\r
209 public final static int NO_DECOMPOSITION = 16;
\r
212 * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
\r
213 * characters that are canonical variants according to the Unicode standard
\r
214 * will be decomposed for collation.</p>
\r
216 * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
\r
217 * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
\r
218 * Unicode Technical Report #15</a>.
\r
220 * @see #NO_DECOMPOSITION
\r
221 * @see #getDecomposition
\r
222 * @see #setDecomposition
\r
225 public final static int CANONICAL_DECOMPOSITION = 17;
\r
227 // public methods --------------------------------------------------------
\r
229 // public setters --------------------------------------------------------
\r
232 * Sets this Collator's strength property. The strength property
\r
233 * determines the minimum level of difference considered significant
\r
234 * during comparison.</p>
\r
236 * <p>The default strength for the Collator is TERTIARY, unless specified
\r
237 * otherwise by the locale used to create the Collator.</p>
\r
239 * <p>See the Collator class description for an example of use.</p>
\r
240 * @param newStrength the new strength value.
\r
241 * @see #getStrength
\r
247 * @throws IllegalArgumentException if the new strength value is not one
\r
248 * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
\r
251 public void setStrength(int newStrength)
\r
253 if ((newStrength != PRIMARY) &&
\r
254 (newStrength != SECONDARY) &&
\r
255 (newStrength != TERTIARY) &&
\r
256 (newStrength != QUATERNARY) &&
\r
257 (newStrength != IDENTICAL)) {
\r
258 throw new IllegalArgumentException("Incorrect comparison level.");
\r
260 m_strength_ = newStrength;
\r
265 * @deprecated This API is ICU internal only.
\r
267 public Collator setStrength2(int newStrength)
\r
269 setStrength(newStrength);
\r
274 * Sets the decomposition mode of this Collator. Setting this
\r
275 * decomposition property with CANONICAL_DECOMPOSITION allows the
\r
276 * Collator to handle un-normalized text properly, producing the
\r
277 * same results as if the text were normalized. If
\r
278 * NO_DECOMPOSITION is set, it is the user's responsibility to
\r
279 * insure that all text is already in the appropriate form before
\r
280 * a comparison or before getting a CollationKey. Adjusting
\r
281 * decomposition mode allows the user to select between faster and
\r
282 * more complete collation behavior.</p>
\r
284 * <p>Since a great many of the world's languages do not require
\r
285 * text normalization, most locales set NO_DECOMPOSITION as the
\r
286 * default decomposition mode.</p>
\r
288 * The default decompositon mode for the Collator is
\r
289 * NO_DECOMPOSITON, unless specified otherwise by the locale used
\r
290 * to create the Collator.</p>
\r
292 * <p>See getDecomposition for a description of decomposition
\r
295 * @param decomposition the new decomposition mode
\r
296 * @see #getDecomposition
\r
297 * @see #NO_DECOMPOSITION
\r
298 * @see #CANONICAL_DECOMPOSITION
\r
299 * @throws IllegalArgumentException If the given value is not a valid
\r
300 * decomposition mode.
\r
303 public void setDecomposition(int decomposition)
\r
305 if ((decomposition != NO_DECOMPOSITION) &&
\r
306 (decomposition != CANONICAL_DECOMPOSITION)) {
\r
307 throw new IllegalArgumentException("Wrong decomposition mode.");
\r
309 m_decomposition_ = decomposition;
\r
312 // public getters --------------------------------------------------------
\r
315 * Returns the Collator for the current default locale.
\r
316 * The default locale is determined by java.util.Locale.getDefault().
\r
317 * @return the Collator for the default locale (for example, en_US) if it
\r
318 * is created successfully. Otherwise if there is no Collator
\r
319 * associated with the current locale, the default UCA collator
\r
320 * will be returned.
\r
321 * @see java.util.Locale#getDefault()
\r
322 * @see #getInstance(Locale)
\r
325 public static final Collator getInstance()
\r
327 return getInstance(ULocale.getDefault());
\r
331 * Clones the collator.
\r
333 * @return a clone of this collator.
\r
335 public Object clone() throws CloneNotSupportedException {
\r
336 return super.clone();
\r
339 // begin registry stuff
\r
342 * A factory used with registerFactory to register multiple collators and provide
\r
343 * display names for them. If standard locale display names are sufficient,
\r
344 * Collator instances may be registered instead.
\r
345 * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
\r
346 * ULocale instead of Locale. Instead of overriding createCollator(Locale),
\r
347 * new implementations should override createCollator(ULocale). Note that
\r
348 * one of these two methods <b>MUST</b> be overridden or else an infinite
\r
352 public static abstract class CollatorFactory {
\r
354 * Return true if this factory will be visible. Default is true.
\r
355 * If not visible, the locales supported by this factory will not
\r
356 * be listed by getAvailableLocales.
\r
358 * @return true if this factory is visible
\r
361 public boolean visible() {
\r
366 * Return an instance of the appropriate collator. If the locale
\r
367 * is not supported, return null.
\r
368 * <b>Note:</b> as of ICU4J 3.2, implementations should override
\r
369 * this method instead of createCollator(Locale).
\r
370 * @param loc the locale for which this collator is to be created.
\r
371 * @return the newly created collator.
\r
374 public Collator createCollator(ULocale loc) {
\r
375 return createCollator(loc.toLocale());
\r
379 * Return an instance of the appropriate collator. If the locale
\r
380 * is not supported, return null.
\r
381 * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
\r
382 * createCollator(ULocale) instead of this method, and inherit this
\r
383 * method's implementation. This method is no longer abstract
\r
384 * and instead delegates to createCollator(ULocale).
\r
385 * @param loc the locale for which this collator is to be created.
\r
386 * @return the newly created collator.
\r
389 public Collator createCollator(Locale loc) {
\r
390 return createCollator(ULocale.forLocale(loc));
\r
394 * Return the name of the collator for the objectLocale, localized for the displayLocale.
\r
395 * If objectLocale is not visible or not defined by the factory, return null.
\r
396 * @param objectLocale the locale identifying the collator
\r
397 * @param displayLocale the locale for which the display name of the collator should be localized
\r
398 * @return the display name
\r
401 public String getDisplayName(Locale objectLocale, Locale displayLocale) {
\r
402 return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
\r
406 * Return the name of the collator for the objectLocale, localized for the displayLocale.
\r
407 * If objectLocale is not visible or not defined by the factory, return null.
\r
408 * @param objectLocale the locale identifying the collator
\r
409 * @param displayLocale the locale for which the display name of the collator should be localized
\r
410 * @return the display name
\r
413 public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
\r
415 Set<String> supported = getSupportedLocaleIDs();
\r
416 String name = objectLocale.getBaseName();
\r
417 if (supported.contains(name)) {
\r
418 return objectLocale.getDisplayName(displayLocale);
\r
425 * Return an unmodifiable collection of the locale names directly
\r
426 * supported by this factory.
\r
428 * @return the set of supported locale IDs.
\r
431 public abstract Set<String> getSupportedLocaleIDs();
\r
434 * Empty default constructor.
\r
437 protected CollatorFactory() {
\r
441 static abstract class ServiceShim {
\r
442 abstract Collator getInstance(ULocale l);
\r
443 abstract Object registerInstance(Collator c, ULocale l);
\r
444 abstract Object registerFactory(CollatorFactory f);
\r
445 abstract boolean unregister(Object k);
\r
446 abstract Locale[] getAvailableLocales(); // TODO remove
\r
447 abstract ULocale[] getAvailableULocales();
\r
448 abstract String getDisplayName(ULocale ol, ULocale dl);
\r
451 private static ServiceShim shim;
\r
452 private static ServiceShim getShim() {
\r
453 // Note: this instantiation is safe on loose-memory-model configurations
\r
454 // despite lack of synchronization, since the shim instance has no state--
\r
455 // it's all in the class init. The worst problem is we might instantiate
\r
456 // two shim instances, but they'll share the same state so that's ok.
\r
457 if (shim == null) {
\r
459 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
\r
460 shim = (ServiceShim)cls.newInstance();
\r
462 catch (MissingResourceException e)
\r
468 catch (Exception e) {
\r
471 e.printStackTrace();
\r
473 throw new RuntimeException(e.getMessage());
\r
481 * {@icu} Returns the Collator for the desired locale.
\r
482 * @param locale the desired locale.
\r
483 * @return Collator for the desired locale if it is created successfully.
\r
484 * Otherwise if there is no Collator
\r
485 * associated with the current locale, a default UCA collator will
\r
487 * @see java.util.Locale
\r
488 * @see java.util.ResourceBundle
\r
489 * @see #getInstance(Locale)
\r
490 * @see #getInstance()
\r
493 public static final Collator getInstance(ULocale locale) {
\r
494 // fetching from service cache is faster than instantiation
\r
495 return getShim().getInstance(locale);
\r
499 * Returns the Collator for the desired locale.
\r
500 * @param locale the desired locale.
\r
501 * @return Collator for the desired locale if it is created successfully.
\r
502 * Otherwise if there is no Collator
\r
503 * associated with the current locale, a default UCA collator will
\r
505 * @see java.util.Locale
\r
506 * @see java.util.ResourceBundle
\r
507 * @see #getInstance(ULocale)
\r
508 * @see #getInstance()
\r
511 public static final Collator getInstance(Locale locale) {
\r
512 return getInstance(ULocale.forLocale(locale));
\r
516 * {@icu} Registers a collator as the default collator for the provided locale. The
\r
517 * collator should not be modified after it is registered.
\r
519 * @param collator the collator to register
\r
520 * @param locale the locale for which this is the default collator
\r
521 * @return an object that can be used to unregister the registered collator.
\r
525 public static final Object registerInstance(Collator collator, ULocale locale) {
\r
526 return getShim().registerInstance(collator, locale);
\r
530 * {@icu} Registers a collator factory.
\r
532 * @param factory the factory to register
\r
533 * @return an object that can be used to unregister the registered factory.
\r
537 public static final Object registerFactory(CollatorFactory factory) {
\r
538 return getShim().registerFactory(factory);
\r
542 * {@icu} Unregisters a collator previously registered using registerInstance.
\r
543 * @param registryKey the object previously returned by registerInstance.
\r
544 * @return true if the collator was successfully unregistered.
\r
547 public static final boolean unregister(Object registryKey) {
\r
548 if (shim == null) {
\r
551 return shim.unregister(registryKey);
\r
555 * Returns the set of locales, as Locale objects, for which collators
\r
556 * are installed. Note that Locale objects do not support RFC 3066.
\r
557 * @return the list of locales in which collators are installed.
\r
558 * This list includes any that have been registered, in addition to
\r
559 * those that are installed with ICU4J.
\r
562 public static Locale[] getAvailableLocales() {
\r
563 // TODO make this wrap getAvailableULocales later
\r
564 if (shim == null) {
\r
565 ClassLoader cl = Collator.class.getClassLoader();
\r
566 return ICUResourceBundle.getAvailableLocales(
\r
567 ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl);
\r
569 return shim.getAvailableLocales();
\r
573 * {@icu} Returns the set of locales, as ULocale objects, for which collators
\r
574 * are installed. ULocale objects support RFC 3066.
\r
575 * @return the list of locales in which collators are installed.
\r
576 * This list includes any that have been registered, in addition to
\r
577 * those that are installed with ICU4J.
\r
580 public static final ULocale[] getAvailableULocales() {
\r
581 if (shim == null) {
\r
582 ClassLoader cl = Collator.class.getClassLoader();
\r
583 return ICUResourceBundle.getAvailableULocales(
\r
584 ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl);
\r
586 return shim.getAvailableULocales();
\r
590 * The list of keywords for this service. This must be kept in sync with
\r
591 * the resource data.
\r
594 private static final String[] KEYWORDS = { "collation" };
\r
597 * The resource name for this service. Note that this is not the same as
\r
598 * the keyword for this service.
\r
601 private static final String RESOURCE = "collations";
\r
604 * The resource bundle base name for this service.
\r
607 private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
\r
610 * {@icu} Returns an array of all possible keywords that are relevant to
\r
611 * collation. At this point, the only recognized keyword for this
\r
612 * service is "collation".
\r
613 * @return an array of valid collation keywords.
\r
614 * @see #getKeywordValues
\r
617 public static final String[] getKeywords() {
\r
622 * {@icu} Given a keyword, returns an array of all values for
\r
623 * that keyword that are currently in use.
\r
624 * @param keyword one of the keywords returned by getKeywords.
\r
625 * @see #getKeywords
\r
628 public static final String[] getKeywordValues(String keyword) {
\r
629 if (!keyword.equals(KEYWORDS[0])) {
\r
630 throw new IllegalArgumentException("Invalid keyword: " + keyword);
\r
632 return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
\r
636 * {@icu} Given a key and a locale, returns an array of string values in a preferred
\r
637 * order that would make a difference. These are all and only those values where
\r
638 * the open (creation) of the service with the locale formed from the input locale
\r
639 * plus input keyword and that value has different behavior than creation with the
\r
640 * input locale alone.
\r
641 * @param key one of the keys supported by this service. For now, only
\r
642 * "collation" is supported.
\r
643 * @param locale the locale
\r
644 * @param commonlyUsed if set to true it will return only commonly used values
\r
645 * with the given locale in preferred order. Otherwise,
\r
646 * it will return all the available values for the locale.
\r
647 * @return an array of string values for the given key and the locale.
\r
650 public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
\r
651 boolean commonlyUsed) {
\r
652 // Note: The parameter commonlyUsed is actually not used.
\r
653 // The switch is in the method signature for consistency
\r
654 // with other locale services.
\r
656 // Read available collation values from collation bundles
\r
657 String baseLoc = locale.getBaseName();
\r
658 LinkedList<String> values = new LinkedList<String>();
\r
660 UResourceBundle bundle = UResourceBundle.getBundleInstance(
\r
661 ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
\r
663 String defcoll = null;
\r
664 while (bundle != null) {
\r
665 UResourceBundle collations = bundle.get("collations");
\r
666 Enumeration<String> collEnum = collations.getKeys();
\r
667 while (collEnum.hasMoreElements()) {
\r
668 String collkey = collEnum.nextElement();
\r
669 if (collkey.equals("default")) {
\r
670 if (defcoll == null) {
\r
671 // Keep the default
\r
672 defcoll = collations.getString("default");
\r
674 } else if (!values.contains(collkey)) {
\r
675 values.add(collkey);
\r
678 bundle = ((ICUResourceBundle)bundle).getParent();
\r
681 Iterator<String> itr = values.iterator();
\r
682 String[] result = new String[values.size()];
\r
683 result[0] = defcoll;
\r
685 while (itr.hasNext()) {
\r
686 String collKey = itr.next();
\r
687 if (!collKey.equals(defcoll)) {
\r
688 result[idx++] = collKey;
\r
695 * {@icu} Returns the functionally equivalent locale for the given
\r
696 * requested locale, with respect to given keyword, for the
\r
697 * collation service. If two locales return the same result, then
\r
698 * collators instantiated for these locales will behave
\r
699 * equivalently. The converse is not always true; two collators
\r
700 * may in fact be equivalent, but return different results, due to
\r
701 * internal details. The return result has no other meaning than
\r
702 * that stated above, and implies nothing as to the relationship
\r
703 * between the two locales. This is intended for use by
\r
704 * applications who wish to cache collators, or otherwise reuse
\r
705 * collators when possible. The functional equivalent may change
\r
706 * over time. For more information, please see the <a
\r
707 * href="http://www.icu-project.org/userguide/locale.html#services">
\r
708 * Locales and Services</a> section of the ICU User Guide.
\r
709 * @param keyword a particular keyword as enumerated by
\r
711 * @param locID The requested locale
\r
712 * @param isAvailable If non-null, isAvailable[0] will receive and
\r
713 * output boolean that indicates whether the requested locale was
\r
714 * 'available' to the collation service. If non-null, isAvailable
\r
715 * must have length >= 1.
\r
716 * @return the locale
\r
719 public static final ULocale getFunctionalEquivalent(String keyword,
\r
721 boolean isAvailable[]) {
\r
722 ClassLoader cl = Collator.class.getClassLoader();
\r
723 return ICUResourceBundle.getFunctionalEquivalent(BASE, cl, RESOURCE,
\r
724 keyword, locID, isAvailable, true);
\r
728 * {@icu} Returns the functionally equivalent locale for the given
\r
729 * requested locale, with respect to given keyword, for the
\r
730 * collation service.
\r
731 * @param keyword a particular keyword as enumerated by
\r
733 * @param locID The requested locale
\r
734 * @return the locale
\r
735 * @see #getFunctionalEquivalent(String,ULocale,boolean[])
\r
738 public static final ULocale getFunctionalEquivalent(String keyword,
\r
740 return getFunctionalEquivalent(keyword, locID, null);
\r
744 * {@icu} Returns the name of the collator for the objectLocale, localized for the
\r
746 * @param objectLocale the locale of the collator
\r
747 * @param displayLocale the locale for the collator's display name
\r
748 * @return the display name
\r
751 static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
\r
752 return getShim().getDisplayName(ULocale.forLocale(objectLocale),
\r
753 ULocale.forLocale(displayLocale));
\r
757 * {@icu} Returns the name of the collator for the objectLocale, localized for the
\r
759 * @param objectLocale the locale of the collator
\r
760 * @param displayLocale the locale for the collator's display name
\r
761 * @return the display name
\r
764 static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
\r
765 return getShim().getDisplayName(objectLocale, displayLocale);
\r
769 * {@icu} Returns the name of the collator for the objectLocale, localized for the
\r
771 * @param objectLocale the locale of the collator
\r
772 * @return the display name
\r
775 static public String getDisplayName(Locale objectLocale) {
\r
776 return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault());
\r
780 * {@icu} Returns the name of the collator for the objectLocale, localized for the
\r
782 * @param objectLocale the locale of the collator
\r
783 * @return the display name
\r
786 static public String getDisplayName(ULocale objectLocale) {
\r
787 return getShim().getDisplayName(objectLocale, ULocale.getDefault());
\r
791 * Returns this Collator's strength property. The strength property
\r
792 * determines the minimum level of difference considered significant.
\r
794 * {@icunote} This can return QUATERNARY strength, which is not supported by the
\r
797 * See the Collator class description for more details.
\r
799 * @return this Collator's current strength property.
\r
800 * @see #setStrength
\r
808 public int getStrength()
\r
810 return m_strength_;
\r
814 * Returns the decomposition mode of this Collator. The decomposition mode
\r
815 * determines how Unicode composed characters are handled.
\r
818 * See the Collator class description for more details.
\r
820 * @return the decomposition mode
\r
821 * @see #setDecomposition
\r
822 * @see #NO_DECOMPOSITION
\r
823 * @see #CANONICAL_DECOMPOSITION
\r
826 public int getDecomposition()
\r
828 return m_decomposition_;
\r
831 // public other methods -------------------------------------------------
\r
834 * Compares the equality of two text Strings using
\r
835 * this Collator's rules, strength and decomposition mode. Convenience method.
\r
836 * @param source the source string to be compared.
\r
837 * @param target the target string to be compared.
\r
838 * @return true if the strings are equal according to the collation
\r
839 * rules, otherwise false.
\r
841 * @throws NullPointerException thrown if either arguments is null.
\r
844 public boolean equals(String source, String target)
\r
846 return (compare(source, target) == 0);
\r
850 * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
\r
851 * in this collator.
\r
852 * @return a pointer to a UnicodeSet object containing all the
\r
853 * code points and sequences that may sort differently than
\r
857 public UnicodeSet getTailoredSet()
\r
859 return new UnicodeSet(0, 0x10FFFF);
\r
863 * Compares the source text String to the target text String according to
\r
864 * this Collator's rules, strength and decomposition mode.
\r
865 * Returns an integer less than,
\r
866 * equal to or greater than zero depending on whether the source String is
\r
867 * less than, equal to or greater than the target String. See the Collator
\r
868 * class description for an example of use.
\r
870 * @param source the source String.
\r
871 * @param target the target String.
\r
872 * @return Returns an integer value. Value is less than zero if source is
\r
873 * less than target, value is zero if source and target are equal,
\r
874 * value is greater than zero if source is greater than target.
\r
875 * @see CollationKey
\r
876 * @see #getCollationKey
\r
877 * @throws NullPointerException thrown if either argument is null.
\r
880 public abstract int compare(String source, String target);
\r
883 * Compares the source Object to the target Object.
\r
885 * @param source the source Object.
\r
886 * @param target the target Object.
\r
887 * @return Returns an integer value. Value is less than zero if source is
\r
888 * less than target, value is zero if source and target are equal,
\r
889 * value is greater than zero if source is greater than target.
\r
890 * @throws ClassCastException thrown if either arguments cannot be cast to String.
\r
893 public int compare(Object source, Object target) {
\r
894 return compare((String)source, (String)target);
\r
899 * Transforms the String into a CollationKey suitable for efficient
\r
900 * repeated comparison. The resulting key depends on the collator's
\r
901 * rules, strength and decomposition mode.
\r
903 * <p>See the CollationKey class documentation for more information.</p>
\r
904 * @param source the string to be transformed into a CollationKey.
\r
905 * @return the CollationKey for the given String based on this Collator's
\r
906 * collation rules. If the source String is null, a null
\r
907 * CollationKey is returned.
\r
908 * @see CollationKey
\r
909 * @see #compare(String, String)
\r
910 * @see #getRawCollationKey
\r
913 public abstract CollationKey getCollationKey(String source);
\r
916 * {@icu} Returns the simpler form of a CollationKey for the String source following
\r
917 * the rules of this Collator and stores the result into the user provided argument
\r
918 * key. If key has a internal byte array of length that's too small for the result,
\r
919 * the internal byte array will be grown to the exact required size.
\r
920 * @param source the text String to be transformed into a RawCollationKey
\r
921 * @return If key is null, a new instance of RawCollationKey will be
\r
922 * created and returned, otherwise the user provided key will be
\r
924 * @see #compare(String, String)
\r
925 * @see #getCollationKey
\r
926 * @see RawCollationKey
\r
929 public abstract RawCollationKey getRawCollationKey(String source,
\r
930 RawCollationKey key);
\r
933 * {@icu} Variable top is a two byte primary value which causes all the codepoints
\r
934 * with primary values that are less or equal than the variable top to be
\r
935 * shifted when alternate handling is set to SHIFTED.
\r
938 * Sets the variable top to a collation element value of a string supplied.
\r
940 * @param varTop one or more (if contraction) characters to which the
\r
941 * variable top should be set
\r
942 * @return a int value containing the value of the variable top in upper 16
\r
943 * bits. Lower 16 bits are undefined.
\r
944 * @throws IllegalArgumentException is thrown if varTop argument is not
\r
945 * a valid variable top element. A variable top element is
\r
946 * invalid when it is a contraction that does not exist in the
\r
947 * Collation order or when the PRIMARY strength collation
\r
948 * element for the variable top has more than two bytes
\r
949 * @see #getVariableTop
\r
950 * @see RuleBasedCollator#setAlternateHandlingShifted
\r
953 public abstract int setVariableTop(String varTop);
\r
956 * {@icu} Returns the variable top value of a Collator.
\r
957 * Lower 16 bits are undefined and should be ignored.
\r
958 * @return the variable top value of a Collator.
\r
959 * @see #setVariableTop
\r
962 public abstract int getVariableTop();
\r
965 * {@icu} Sets the variable top to a collation element value supplied.
\r
966 * Variable top is set to the upper 16 bits.
\r
967 * Lower 16 bits are ignored.
\r
968 * @param varTop Collation element value, as returned by setVariableTop or
\r
970 * @see #getVariableTop
\r
971 * @see #setVariableTop
\r
974 public abstract void setVariableTop(int varTop);
\r
977 * {@icu} Returns the version of this collator object.
\r
978 * @return the version object associated with this collator
\r
981 public abstract VersionInfo getVersion();
\r
984 * {@icu} Returns the UCA version of this collator object.
\r
985 * @return the version object associated with this collator
\r
988 public abstract VersionInfo getUCAVersion();
\r
990 // protected constructor -------------------------------------------------
\r
993 * Empty default constructor to make javadocs happy
\r
996 protected Collator()
\r
1000 // package private methods -----------------------------------------------
\r
1002 // private data members --------------------------------------------------
\r
1005 * Collation strength
\r
1007 private int m_strength_ = TERTIARY;
\r
1010 * Decomposition mode
\r
1012 private int m_decomposition_ = CANONICAL_DECOMPOSITION;
\r
1014 private static final boolean DEBUG = ICUDebug.enabled("collator");
\r
1016 // private methods -------------------------------------------------------
\r
1018 // end registry stuff
\r
1020 // -------- BEGIN ULocale boilerplate --------
\r
1023 * {@icu} Returns the locale that was used to create this object, or null.
\r
1024 * This may may differ from the locale requested at the time of
\r
1025 * this object's creation. For example, if an object is created
\r
1026 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
\r
1027 * drawn from <tt>en</tt> (the <i>actual</i> locale), and
\r
1028 * <tt>en_US</tt> may be the most specific locale that exists (the
\r
1029 * <i>valid</i> locale).
\r
1031 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
\r
1032 * contains a partial preview implementation. The * <i>actual</i>
\r
1033 * locale is returned correctly, but the <i>valid</i> locale is
\r
1034 * not, in most cases.
\r
1035 * @param type type of information requested, either {@link
\r
1036 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
\r
1037 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
\r
1038 * @return the information specified by <i>type</i>, or null if
\r
1039 * this object was not constructed from locale data.
\r
1040 * @see com.ibm.icu.util.ULocale
\r
1041 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
1042 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
1043 * @draft ICU 2.8 (retain)
\r
1044 * @provisional This API might change or be removed in a future release.
\r
1046 public final ULocale getLocale(ULocale.Type type) {
\r
1047 return type == ULocale.ACTUAL_LOCALE ?
\r
1048 this.actualLocale : this.validLocale;
\r
1052 * Set information about the locales that were used to create this
\r
1053 * object. If the object was not constructed from locale data,
\r
1054 * both arguments should be set to null. Otherwise, neither
\r
1055 * should be null. The actual locale must be at the same level or
\r
1056 * less specific than the valid locale. This method is intended
\r
1057 * for use by factories or other entities that create objects of
\r
1059 * @param valid the most specific locale containing any resource
\r
1061 * @param actual the locale containing data used to construct this
\r
1063 * @see com.ibm.icu.util.ULocale
\r
1064 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
1065 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
1067 final void setLocale(ULocale valid, ULocale actual) {
\r
1068 // Change the following to an assertion later
\r
1070 // The following would not happen since the method is called
\r
1071 // by other protected functions that checks and makes sure that
\r
1072 // valid and actual are not null before passing
\r
1073 if ((valid == null) != (actual == null)) {
\r
1074 throw new IllegalArgumentException();
\r
1077 // Another check we could do is that the actual locale is at
\r
1078 // the same level or less specific than the valid locale.
\r
1079 this.validLocale = valid;
\r
1080 this.actualLocale = actual;
\r
1084 * The most specific locale containing any resource data, or null.
\r
1085 * @see com.ibm.icu.util.ULocale
\r
1087 private ULocale validLocale;
\r
1090 * The locale containing data used to construct this object, or
\r
1092 * @see com.ibm.icu.util.ULocale
\r
1094 private ULocale actualLocale;
\r
1096 // -------- END ULocale boilerplate --------
\r