-//##header\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2008-2009, Google Inc, International Business Machines Corporation\r
- * and others. All Rights Reserved.\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.text;\r
-\r
-import java.util.ArrayList;\r
-import java.util.Collection;\r
-import java.util.Collections;\r
-import java.util.Comparator;\r
-import java.util.Iterator;\r
-import java.util.LinkedHashMap;\r
-import java.util.LinkedHashSet;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Set;\r
-import java.util.TreeSet;\r
-\r
-import com.ibm.icu.lang.UCharacter;\r
-import com.ibm.icu.util.LocaleData;\r
-import com.ibm.icu.util.ULocale;\r
-import com.ibm.icu.impl.CollectionUtilities.MultiComparator;\r
-\r
-/**\r
- * A set of characters for use as a UI "index", that is, a\r
- * list of clickable characters (or character sequences) that allow the user to\r
- * see a segment of a larger "target" list. That is, each character corresponds\r
- * to a bucket in the target list, where everything in the bucket is greater\r
- * than or equal to the character (according to the locale's collation). The\r
- * intention is to have two main functions; one that produces an index list that\r
- * is relatively static, and the other is a list that produces roughly\r
- * equally-sized buckets. Only the first is currently provided.\r
- * <p>\r
- * The static list would be presented as something like\r
- * \r
- * <pre>\r
- * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z\r
- * </pre>\r
- * \r
- * In the UI, an index character could be omitted if its bucket is empty. For\r
- * example, if there is nothing in the bucket for Q, then Q could be omitted.\r
- * <p>\r
- * <b>Important Notes:</b>\r
- * <ul>\r
- * <li>Although we say "character" above, the index character could be a\r
- * sequence, like "CH".</li>\r
- * <li>There could be items in a target list that are less than the first or\r
- * (much) greater than the last; examples include words from other scripts. The\r
- * UI could bucket them with the first or last respectively, or have some symbol\r
- * for those categories.</li>\r
- * <li>The use of the list requires that the target list be sorted according to\r
- * the locale that is used to create that list.</li>\r
- * <li>For languages without widely accepted sorting methods (eg Chinese/Japanese)\r
- * the results may appear arbitrary, and it may be best not to use these methods.</li>\r
- * <li>In the initial version, an arbitrary limit of 100 is placed on these lists.</li>\r
- * </ul>\r
- * \r
- * @author markdavis\r
- * @draft ICU 4.2\r
- * @provisional This API might change or be removed in a future release.\r
- */\r
-//TODO(markdavis) return an additional character that is the "least greater" character than\r
-//the last character.\r
-public class IndexCharacters {\r
- public static final char CGJ = '\u034F';\r
- private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]");\r
- private static final UnicodeSet HANGUL = new UnicodeSet("[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]");\r
- private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");\r
- private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]");\r
-\r
- private ULocale locale;\r
- private Collator comparator;\r
- private Set indexCharacters;\r
- private LinkedHashMap alreadyIn = new LinkedHashMap();\r
- private List noDistinctSorting = new ArrayList();\r
- private List notAlphabetic = new ArrayList();\r
-\r
- /**\r
- * Create the index object.\r
- * @param locale\r
- * @draft ICU 4.2\r
- * @provisional This API might change or be removed in a future release.\r
- */\r
- public IndexCharacters(ULocale locale) {\r
- this.locale = locale;\r
- comparator = Collator.getInstance(locale);\r
- comparator.setStrength(Collator.PRIMARY);\r
-\r
- // get the exemplars, and handle special cases\r
-\r
- UnicodeSet exemplars = LocaleData.getExemplarSet(locale, LocaleData.ES_STANDARD);\r
- // question: should we add auxiliary exemplars?\r
- if (exemplars.containsSome(CORE_LATIN)) {\r
- exemplars.addAll(CORE_LATIN);\r
- }\r
- if (exemplars.containsSome(HANGUL)) {\r
- // cut down to small list\r
- exemplars.removeAll(new UnicodeSet("[:block=hangul_syllables:]")).addAll(HANGUL);\r
- }\r
- if (exemplars.containsSome(ETHIOPIC)) {\r
- // cut down to small list\r
- // make use of the fact that Ethiopic is allocated in 8's, where\r
- // the base is 0 mod 8.\r
- for (UnicodeSetIterator it = new UnicodeSetIterator(ETHIOPIC); it.next();) {\r
- if ((it.codepoint & 0x7) != 0) {\r
- exemplars.remove(it.codepoint);\r
- }\r
- }\r
- }\r
-\r
- // first sort them, with an "best" ordering among items that are the same according\r
- // to the collator\r
-\r
- Set preferenceSorting = new TreeSet(new MultiComparator(new Comparator[]{\r
- comparator, new PreferenceComparator(Collator.getInstance(locale))}));\r
- for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {\r
- preferenceSorting.add(it.getString());\r
- }\r
-\r
- indexCharacters = new TreeSet(comparator);\r
-\r
- // We nw make a sorted array of elements, uppercased\r
- // Some of the input may, however, be redundant.\r
- // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"\r
- // So we make a pass through, filtering out those cases.\r
-\r
- for (Iterator it = preferenceSorting.iterator(); it.hasNext();) {\r
- String item = (String) it.next();\r
- item = UCharacter.toUpperCase(locale, item);\r
- if (indexCharacters.contains(item)) {\r
- for (Iterator it2 = indexCharacters.iterator(); it2.hasNext();) {\r
- Object itemAlreadyIn = it2.next();\r
- if (comparator.compare(item, itemAlreadyIn) == 0) {\r
- Set targets = (Set) alreadyIn.get(itemAlreadyIn);\r
- if (targets == null) {\r
- alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet());\r
- }\r
- targets.add(item);\r
- break;\r
- }\r
- }\r
- } else if (UTF16.countCodePoint(item) > 1 && comparator.compare(item, separated(item)) == 0){\r
- noDistinctSorting.add(item);\r
- } else if (!ALPHABETIC.containsSome(item)) {\r
- notAlphabetic.add(item);\r
- } else {\r
- indexCharacters.add(item);\r
- }\r
- }\r
-\r
- // if the result is still too large, cut down to 100 elements\r
-\r
- final int size = indexCharacters.size() - 1;\r
- if (size > 99) {\r
- int count = 0;\r
- int old = -1;\r
- for (Iterator it = indexCharacters.iterator(); it.hasNext();) {\r
- ++ count;\r
- it.next();\r
- final int bump = count * 99 / size;\r
- if (bump == old) {\r
- it.remove();\r
- } else {\r
- old = bump;\r
- } \r
- }\r
- }\r
- indexCharacters = Collections.unmodifiableSet(indexCharacters);\r
- }\r
-\r
- /*\r
- * Return the string with interspersed CGJs. Input must have more than 2 codepoints.\r
- */\r
- private String separated(String item) {\r
- StringBuffer result = new StringBuffer();\r
- // add a CGJ except within surrogates\r
- char last = item.charAt(0);\r
- result.append(last);\r
- for (int i = 1; i < item.length(); ++i) {\r
- char ch = item.charAt(i);\r
- if (!UCharacter.isHighSurrogate(last) || !UCharacter.isLowSurrogate(ch)) {\r
- result.append(CGJ);\r
- }\r
- result.append(ch);\r
- last = ch;\r
- }\r
- return result.toString();\r
- }\r
-\r
- /**\r
- * Get the index characters.\r
- * @return A collection including the index characters\r
- * @draft ICU 4.2\r
- * @provisional This API might change or be removed in a future release.\r
- */\r
- public Collection getIndexCharacters() {\r
- return indexCharacters;\r
- }\r
-\r
- /**\r
- * Get the locale\r
- * @return The locale.\r
- * @draft ICU 4.2\r
- * @provisional This API might change or be removed in a future release.\r
- */\r
- public ULocale getLocale() {\r
- return locale;\r
- }\r
-\r
- /**\r
- * As the index is built, items may be discarded from the exemplars.\r
- * This contains some of the discards, and is intended for debugging.\r
- * @internal\r
- */\r
- public Map getAlreadyIn() {\r
- return alreadyIn;\r
- }\r
-\r
- /**\r
- * As the index is built, items may be discarded from the exemplars.\r
- * This contains some of the discards, and is intended for debugging.\r
- * @internal\r
- */\r
- public List getNoDistinctSorting() {\r
- return noDistinctSorting;\r
- }\r
-\r
- /**\r
- * As the index is built, items may be discarded from the exemplars.\r
- * This contains some of the discards, and is intended for debugging.\r
- * @internal\r
- */\r
- public List getNotAlphabetic() {\r
- return notAlphabetic;\r
- }\r
-\r
- /*\r
- * Comparator that returns "better" items first, where shorter NFKD is better,\r
- * and otherwise NFKD binary order is better, and otherwise binary order is better.\r
- */\r
- private static class PreferenceComparator implements Comparator {\r
- static final Comparator binary = new UTF16.StringComparator(true,false,0);\r
- final Collator collator;\r
-\r
- public PreferenceComparator(Collator collator) {\r
- this.collator = collator;\r
- }\r
- \r
- public int compare(Object o1, Object o2) {\r
- if (o1 == o2) {\r
- return 0;\r
- }\r
- String s1 = (String) o1;\r
- String s2 = (String) o2;\r
- String n1 = Normalizer.decompose(s1, true);\r
- String n2 = Normalizer.decompose(s2, true);\r
- int result = n1.length() - n2.length();\r
- if (result != 0) {\r
- return result;\r
- }\r
- result = collator.compare(n1, n2);\r
- if (result != 0) {\r
- return result;\r
- }\r
- return binary.compare(s1, s2);\r
- }\r
- }\r
-}\r
-//#endif\r
+//##header J2SE15
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2009, Google Inc, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.LocaleData;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.impl.CollectionUtilities.MultiComparator;
+
+/**
+ * A set of characters for use as a UI "index", that is, a
+ * list of clickable characters (or character sequences) that allow the user to
+ * see a segment of a larger "target" list. That is, each character corresponds
+ * to a bucket in the target list, where everything in the bucket is greater
+ * than or equal to the character (according to the locale's collation). The
+ * intention is to have two main functions; one that produces an index list that
+ * is relatively static, and the other is a list that produces roughly
+ * equally-sized buckets. Only the first is currently provided.
+ * <p>
+ * The static list would be presented as something like
+ *
+ * <pre>
+ * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ * </pre>
+ *
+ * In the UI, an index character could be omitted if its bucket is empty. For
+ * example, if there is nothing in the bucket for Q, then Q could be omitted.
+ * <p>
+ * <b>Important Notes:</b>
+ * <ul>
+ * <li>Although we say "character" above, the index character could be a
+ * sequence, like "CH".</li>
+ * <li>There could be items in a target list that are less than the first or
+ * (much) greater than the last; examples include words from other scripts. The
+ * UI could bucket them with the first or last respectively, or have some symbol
+ * for those categories.</li>
+ * <li>The use of the list requires that the target list be sorted according to
+ * the locale that is used to create that list.</li>
+ * <li>For languages without widely accepted sorting methods (eg Chinese/Japanese)
+ * the results may appear arbitrary, and it may be best not to use these methods.</li>
+ * <li>In the initial version, an arbitrary limit of 100 is placed on these lists.</li>
+ * </ul>
+ *
+ * @author markdavis
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+//TODO(markdavis) return an additional character that is the "least greater" character than
+//the last character.
+public class IndexCharacters {
+ public static final char CGJ = '\u034F';
+ private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]");
+ private static final UnicodeSet HANGUL = new UnicodeSet("[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]");
+ private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
+ private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]");
+
+ private ULocale locale;
+ private Collator comparator;
+ private Set indexCharacters;
+ private LinkedHashMap alreadyIn = new LinkedHashMap();
+ private List noDistinctSorting = new ArrayList();
+ private List notAlphabetic = new ArrayList();
+
+ /**
+ * Create the index object.
+ * @param locale
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public IndexCharacters(ULocale locale) {
+ this.locale = locale;
+ comparator = Collator.getInstance(locale);
+ comparator.setStrength(Collator.PRIMARY);
+
+ // get the exemplars, and handle special cases
+
+ UnicodeSet exemplars = LocaleData.getExemplarSet(locale, LocaleData.ES_STANDARD);
+ // question: should we add auxiliary exemplars?
+ if (exemplars.containsSome(CORE_LATIN)) {
+ exemplars.addAll(CORE_LATIN);
+ }
+ if (exemplars.containsSome(HANGUL)) {
+ // cut down to small list
+ exemplars.removeAll(new UnicodeSet("[:block=hangul_syllables:]")).addAll(HANGUL);
+ }
+ if (exemplars.containsSome(ETHIOPIC)) {
+ // cut down to small list
+ // make use of the fact that Ethiopic is allocated in 8's, where
+ // the base is 0 mod 8.
+ for (UnicodeSetIterator it = new UnicodeSetIterator(ETHIOPIC); it.next();) {
+ if ((it.codepoint & 0x7) != 0) {
+ exemplars.remove(it.codepoint);
+ }
+ }
+ }
+
+ // first sort them, with an "best" ordering among items that are the same according
+ // to the collator
+
+ Set preferenceSorting = new TreeSet(new MultiComparator(new Comparator[]{
+ comparator, new PreferenceComparator(Collator.getInstance(locale))}));
+ for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
+ preferenceSorting.add(it.getString());
+ }
+
+ indexCharacters = new TreeSet(comparator);
+
+ // We nw make a sorted array of elements, uppercased
+ // Some of the input may, however, be redundant.
+ // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
+ // So we make a pass through, filtering out those cases.
+
+ for (Iterator it = preferenceSorting.iterator(); it.hasNext();) {
+ String item = (String) it.next();
+ item = UCharacter.toUpperCase(locale, item);
+ if (indexCharacters.contains(item)) {
+ for (Iterator it2 = indexCharacters.iterator(); it2.hasNext();) {
+ Object itemAlreadyIn = it2.next();
+ if (comparator.compare(item, itemAlreadyIn) == 0) {
+ Set targets = (Set) alreadyIn.get(itemAlreadyIn);
+ if (targets == null) {
+ alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet());
+ }
+ targets.add(item);
+ break;
+ }
+ }
+ } else if (UTF16.countCodePoint(item) > 1 && comparator.compare(item, separated(item)) == 0){
+ noDistinctSorting.add(item);
+ } else if (!ALPHABETIC.containsSome(item)) {
+ notAlphabetic.add(item);
+ } else {
+ indexCharacters.add(item);
+ }
+ }
+
+ // if the result is still too large, cut down to 100 elements
+
+ final int size = indexCharacters.size() - 1;
+ if (size > 99) {
+ int count = 0;
+ int old = -1;
+ for (Iterator it = indexCharacters.iterator(); it.hasNext();) {
+ ++ count;
+ it.next();
+ final int bump = count * 99 / size;
+ if (bump == old) {
+ it.remove();
+ } else {
+ old = bump;
+ }
+ }
+ }
+ indexCharacters = Collections.unmodifiableSet(indexCharacters);
+ }
+
+ /*
+ * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
+ */
+ private String separated(String item) {
+ StringBuffer result = new StringBuffer();
+ // add a CGJ except within surrogates
+ char last = item.charAt(0);
+ result.append(last);
+ for (int i = 1; i < item.length(); ++i) {
+ char ch = item.charAt(i);
+ if (!UCharacter.isHighSurrogate(last) || !UCharacter.isLowSurrogate(ch)) {
+ result.append(CGJ);
+ }
+ result.append(ch);
+ last = ch;
+ }
+ return result.toString();
+ }
+
+ /**
+ * Get the index characters.
+ * @return A collection including the index characters
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Collection getIndexCharacters() {
+ return indexCharacters;
+ }
+
+ /**
+ * Get the locale
+ * @return The locale.
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getLocale() {
+ return locale;
+ }
+
+ /**
+ * As the index is built, items may be discarded from the exemplars.
+ * This contains some of the discards, and is intended for debugging.
+ * @internal
+ */
+ public Map getAlreadyIn() {
+ return alreadyIn;
+ }
+
+ /**
+ * As the index is built, items may be discarded from the exemplars.
+ * This contains some of the discards, and is intended for debugging.
+ * @internal
+ */
+ public List getNoDistinctSorting() {
+ return noDistinctSorting;
+ }
+
+ /**
+ * As the index is built, items may be discarded from the exemplars.
+ * This contains some of the discards, and is intended for debugging.
+ * @internal
+ */
+ public List getNotAlphabetic() {
+ return notAlphabetic;
+ }
+
+ /*
+ * Comparator that returns "better" items first, where shorter NFKD is better,
+ * and otherwise NFKD binary order is better, and otherwise binary order is better.
+ */
+ private static class PreferenceComparator implements Comparator {
+ static final Comparator binary = new UTF16.StringComparator(true,false,0);
+ final Collator collator;
+
+ public PreferenceComparator(Collator collator) {
+ this.collator = collator;
+ }
+
+ public int compare(Object o1, Object o2) {
+ if (o1 == o2) {
+ return 0;
+ }
+ String s1 = (String) o1;
+ String s2 = (String) o2;
+ String n1 = Normalizer.decompose(s1, true);
+ String n2 = Normalizer.decompose(s2, true);
+ int result = n1.length() - n2.length();
+ if (result != 0) {
+ return result;
+ }
+ result = collator.compare(n1, n2);
+ if (result != 0) {
+ return result;
+ }
+ return binary.compare(s1, s2);
+ }
+ }
+}
+//#endif