/**
*******************************************************************************
- * Copyright (C) 1996-2011, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 1996-2013, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UScript;
+import com.ibm.icu.util.Output;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
/**
* Determines whether the object has been frozen or not.
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public boolean isFrozen() {
return frozenLock != null;
/**
* Freezes the collator.
* @return the collator itself.
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public Collator freeze() {
if (!isFrozen()) {
/**
* Provides for the clone operation. Any clone is initially unfrozen.
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public RuleBasedCollator cloneAsThawed() {
RuleBasedCollator clone = null;
* Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
* positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
* correct JIS collation order, distinguishing between Katakana and Hiragana characters.
+ *
+ * This attribute is an implementation detail of the CLDR Japanese tailoring.
+ * The implementation might change to use a different mechanism
+ * to achieve the same Japanese sort order.
+ * Since ICU 50, this attribute is not settable any more via API functions.
*
* @param flag
* true if Hiragana Quaternary mode is to be on, false otherwise
* @see #setHiraganaQuaternaryDefault
* @see #isHiraganaQuaternary
- * @stable ICU 2.8
+ * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation.
*/
public void setHiraganaQuaternary(boolean flag) {
if (isFrozen()) {
throw new UnsupportedOperationException("Attempt to modify frozen object");
}
-
- m_isHiragana4_ = flag;
- updateInternalState();
}
/**
* Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
* setHiraganaQuaternary(boolean) for more details.
+ *
+ * This attribute is an implementation detail of the CLDR Japanese tailoring.
+ * The implementation might change to use a different mechanism
+ * to achieve the same Japanese sort order.
+ * Since ICU 50, this attribute is not settable any more via API functions.
*
* @see #setHiraganaQuaternary(boolean)
* @see #isHiraganaQuaternary
- * @stable ICU 2.8
+ * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation.
*/
public void setHiraganaQuaternaryDefault() {
if (isFrozen()) {
throw new UnsupportedOperationException("Attempt to modify frozen object");
}
-
- m_isHiragana4_ = m_defaultIsHiragana4_;
- updateInternalState();
}
/**
* @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
* @see #getReorderCodes
* @see #getEquivalentReorderCodes
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public void setReorderCodes(int... order) {
if (isFrozen()) {
// public getters --------------------------------------------------------
/**
- * Gets the collation rules for this RuleBasedCollator. Equivalent to String getRules(RuleOption.FULL_RULES).
+ * Gets the collation tailoring rules for this RuleBasedCollator.
+ * Equivalent to String getRules(false).
*
- * @return returns the collation rules
+ * @return the collation tailoring rules
* @see #getRules(boolean)
* @stable ICU 2.8
*/
* Returns current rules. The argument defines whether full rules (UCA + tailored) rules are returned or just the
* tailoring.
*
+ * <p>The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
+ * They are almost never used or useful at runtime and can be removed from the data.
+ * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
+ * Collation Customization, Building on Existing Locales</a>
+ *
+ * <p>{@link #getRules()} should normally be used instead.
* @param fullrules
* true if the rules that defines the full set of collation order is required, otherwise false for
* returning only the tailored rules
}
}
- private class contContext {
+ private static class contContext {
RuleBasedCollator coll;
UnicodeSet contractions;
UnicodeSet expansions;
/**
* Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
+ *
+ * This attribute is an implementation detail of the CLDR Japanese tailoring.
+ * The implementation might change to use a different mechanism
+ * to achieve the same Japanese sort order.
+ * Since ICU 50, this attribute is not settable any more via API functions.
*
* @return flag true if Hiragana Quaternary mode is on, false otherwise
* @see #setHiraganaQuaternaryDefault
* @see #setHiraganaQuaternary(boolean)
- * @stable ICU 2.8
+ * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation.
*/
public boolean isHiraganaQuaternary() {
return m_isHiragana4_;
* if none are set then returns an empty array
* @see #setReorderCodes
* @see #getEquivalentReorderCodes
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public int[] getReorderCodes() {
if (m_reorderCodes_ != null) {
* @return the set of all reorder codes in the same group as the given reorder code.
* @see #setReorderCodes
* @see #getReorderCodes
- * @draft ICU 4.8
+ * @stable ICU 4.8
*/
public static int[] getEquivalentReorderCodes(int reorderCode) {
Set<Integer> equivalentCodesSet = new HashSet<Integer>();
* @stable ICU 2.8
*/
public int compare(String source, String target) {
- if (source == target) {
+ if (source.equals(target)) {
return 0;
}
CollationBuffer buffer = null;
* Table for UCA and builder use
*/
static final char UCA_CONTRACTIONS_[];
+ static final int MAX_UCA_CONTRACTION_LENGTH;
private static boolean UCA_INIT_COMPLETE;
UCAConstants iUCA_CONSTANTS_ = null;
LeadByteConstants iLEADBYTE_CONSTANTS = null;
char iUCA_CONTRACTIONS_[] = null;
+ Output<Integer> maxUCAContractionLength = new Output<Integer>();
ImplicitCEGenerator iimpCEGen_ = null;
try {
// !!! note what's going on here...
iUCA_ = new RuleBasedCollator();
iUCA_CONSTANTS_ = new UCAConstants();
iLEADBYTE_CONSTANTS = new LeadByteConstants();
- iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_, iLEADBYTE_CONSTANTS);
+ iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_, iLEADBYTE_CONSTANTS, maxUCAContractionLength);
// called before doing canonical closure for the UCA.
iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary);
UCA_CONSTANTS_ = iUCA_CONSTANTS_;
LEADBYTE_CONSTANTS_ = iLEADBYTE_CONSTANTS;
UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;
+ MAX_UCA_CONTRACTION_LENGTH = maxUCAContractionLength.value;
impCEGen_ = iimpCEGen_;
UCA_INIT_COMPLETE = true;
}
/**
- * Constructors a RuleBasedCollator from the argument locale. If no resource bundle is associated with the locale,
- * UCA is used instead.
+ * Constructs a RuleBasedCollator from the argument locale.
+ * If no resource bundle is associated with the locale, UCA is used instead.
*
* @param locale
*/
RuleBasedCollator(ULocale locale) {
checkUCA();
- ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
- ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
- if (rb != null) {
- try {
+ try {
+ ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
+ if (rb != null) {
+ ICUResourceBundle elements = null;
+
// Use keywords, if supplied for lookup
String collkey = locale.getKeywordValue("collation");
- if (collkey == null) {
+ if (collkey != null) {
+ try {
+ elements = rb.getWithFallback("collations/" + collkey);
+ } catch (MissingResourceException e) {
+ // fall through
+ }
+ }
+ if (elements == null) {
+ // either collation keyword was not supplied or
+ // the keyword was invalid - use default collation for the locale
+
+ // collations/default should always give a string back
+ // keyword for the real collation data
collkey = rb.getStringWithFallback("collations/default");
+ elements = rb.getWithFallback("collations/" + collkey);
}
- // collations/default will always give a string back
- // keyword for the real collation data
- // if "collations/collkey" will return null if collkey == null
- ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey);
- if (elements != null) {
- // TODO: Determine actual & valid locale correctly
- ULocale uloc = rb.getULocale();
- setLocale(uloc, uloc);
-
- m_rules_ = elements.getString("Sequence");
- ByteBuffer buf = elements.get("%%CollationBin").getBinary();
- // %%CollationBin
- if (buf != null) {
- // m_rules_ = (String)rules[1][1];
- CollatorReader.initRBC(this, buf);
- /*
- * BufferedInputStream input = new BufferedInputStream( new ByteArrayInputStream(map)); /*
- * CollatorReader reader = new CollatorReader(input, false); if (map.length >
- * MIN_BINARY_DATA_SIZE_) { reader.read(this, null); } else { reader.readHeader(this);
- * reader.readOptions(this); // duplicating UCA_'s data setWithUCATables(); }
- */
- // at this point, we have read in the collator
- // now we need to check whether the binary image has
- // the right UCA and other versions
- if (!m_UCA_version_.equals(UCA_.m_UCA_version_) || !m_UCD_version_.equals(UCA_.m_UCD_version_)) {
- init(m_rules_);
- return;
- }
- try {
- UResourceBundle reorderRes = elements.get("%%ReorderCodes");
- if (reorderRes != null) {
- int[] reorderCodes = reorderRes.getIntVector();
- setReorderCodes(reorderCodes);
- m_defaultReorderCodes_ = reorderCodes.clone();
- }
- } catch (MissingResourceException e) {
- // ignore
- }
- init();
- return;
- } else {
+ // TODO: Determine actual & valid locale correctly
+ ULocale uloc = rb.getULocale();
+ setLocale(uloc, uloc);
+
+ m_rules_ = elements.getString("Sequence");
+ ByteBuffer buf = elements.get("%%CollationBin").getBinary();
+ // %%CollationBin
+ if (buf != null) {
+ // m_rules_ = (String)rules[1][1];
+ CollatorReader.initRBC(this, buf);
+ /*
+ * BufferedInputStream input = new BufferedInputStream( new ByteArrayInputStream(map)); /*
+ * CollatorReader reader = new CollatorReader(input, false); if (map.length >
+ * MIN_BINARY_DATA_SIZE_) { reader.read(this, null); } else { reader.readHeader(this);
+ * reader.readOptions(this); // duplicating UCA_'s data setWithUCATables(); }
+ */
+ // at this point, we have read in the collator
+ // now we need to check whether the binary image has
+ // the right UCA and other versions
+ if (!m_UCA_version_.equals(UCA_.m_UCA_version_) || !m_UCD_version_.equals(UCA_.m_UCD_version_)) {
init(m_rules_);
return;
}
+ init();
+ try {
+ UResourceBundle reorderRes = elements.get("%%ReorderCodes");
+ if (reorderRes != null) {
+ int[] reorderCodes = reorderRes.getIntVector();
+ setReorderCodes(reorderCodes);
+ m_defaultReorderCodes_ = reorderCodes.clone();
+ }
+ } catch (MissingResourceException e) {
+ // ignore
+ }
+ return;
+ } else {
+ init(m_rules_);
+ return;
}
- } catch (Exception e) {
- e.printStackTrace();
- // if failed use UCA.
}
+ } catch (Exception e) {
+ // fallthrough
}
setWithUCAData();
}
* @param buffer collation buffer temporary state
*/
private final void doSecondaryBytes(int ce, boolean notIsContinuation, boolean doFrench, CollationBuffer buffer) {
- int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison
+ int s = (ce >> 8) & LAST_BYTE_MASK_; // int for comparison
if (s != 0) {
if (!doFrench) {
// This is compression code.
int hiraganaresult = 0;
while (true) {
int sorder = 0;
+ int sPrimary;
// We fetch CEs until we hit a non ignorable primary or end.
do {
sorder = buffer.m_srcUtilColEIter_.next();
buffer.m_srcUtilCEBuffer_ = append(buffer.m_srcUtilCEBuffer_, buffer.m_srcUtilCEBufferSize_, sorder);
buffer.m_srcUtilCEBufferSize_++;
- sorder &= CE_PRIMARY_MASK_;
- } while (sorder == CollationElementIterator.IGNORABLE);
+ sPrimary = sorder & CE_PRIMARY_MASK_;
+ } while (sPrimary == CollationElementIterator.IGNORABLE);
int torder = 0;
+ int tPrimary;
do {
torder = buffer.m_tgtUtilColEIter_.next();
buffer.m_tgtUtilCEBuffer_ = append(buffer.m_tgtUtilCEBuffer_, buffer.m_tgtUtilCEBufferSize_, torder);
buffer.m_tgtUtilCEBufferSize_++;
- torder &= CE_PRIMARY_MASK_;
- } while (torder == CollationElementIterator.IGNORABLE);
-
- if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) {
- sorder = (m_leadBytePermutationTable_[((sorder >> 24) + 256) % 256] << 24) | (sorder & 0x00FFFFFF);
- torder = (m_leadBytePermutationTable_[((torder >> 24) + 256) % 256] << 24) | (torder & 0x00FFFFFF);
- }
+ tPrimary = torder & CE_PRIMARY_MASK_;
+ } while (tPrimary == CollationElementIterator.IGNORABLE);
// if both primaries are the same
- if (sorder == torder) {
+ if (sPrimary == tPrimary) {
// and there are no more CEs, we advance to the next level
// see if we are at the end of either string
if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
}
}
} else {
+ if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) {
+ sPrimary = (m_leadBytePermutationTable_[sPrimary >>> 24] << 24) | (sPrimary & 0x00FFFFFF);
+ tPrimary = (m_leadBytePermutationTable_[tPrimary >>> 24] << 24) | (tPrimary & 0x00FFFFFF);
+ }
// if two primaries are different, we are done
- return endPrimaryCompare(sorder, torder, buffer);
+ return endPrimaryCompare(sPrimary, tPrimary, buffer);
}
}
// no primary difference... do the rest from the buffers
int sorder = CollationElementIterator.IGNORABLE;
int torder = CollationElementIterator.IGNORABLE;
while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
- sorder = buffer.m_srcUtilCEBuffer_[soffset++] & m_mask3_;
+ sorder = buffer.m_srcUtilCEBuffer_[soffset++];
if (!isContinuation(sorder)) {
- sorder ^= m_caseSwitch_;
+ sorder = (sorder & m_mask3_) ^ m_caseSwitch_;
} else {
- sorder &= CE_REMOVE_CASE_;
+ sorder = (sorder & m_mask3_) & CE_REMOVE_CASE_;
}
}
while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
- torder = buffer.m_tgtUtilCEBuffer_[toffset++] & m_mask3_;
+ torder = buffer.m_tgtUtilCEBuffer_[toffset++];
if (!isContinuation(torder)) {
- torder ^= m_caseSwitch_;
+ torder = (torder & m_mask3_) ^ m_caseSwitch_;
} else {
- torder &= CE_REMOVE_CASE_;
+ torder = (torder & m_mask3_) & CE_REMOVE_CASE_;
}
}
if (m_reorderCodes_[0] == ReorderCodes.DEFAULT) {
if (m_reorderCodes_.length != 1) {
- throw new IllegalArgumentException("Illegal collation reorder codes - default reorder code must be the only code in the list.");
+ throw new IllegalArgumentException("Illegal collation reorder codes - default reorder code must be the only code in the list.");
}
// swap the reorder codes for those at build of the rules
if (m_defaultReorderCodes_ == null || m_defaultReorderCodes_.length == 0) {
- m_leadBytePermutationTable_ = null;
+ m_leadBytePermutationTable_ = null;
+ return;
}
m_reorderCodes_ = m_defaultReorderCodes_.clone();
}
* Generate latin-1 tables
*/
- private class shiftValues {
+ private static class shiftValues {
int primShift = 24;
int secShift = 24;
int terShift = 24;
return true;
}
- private class ContractionInfo {
+ private static class ContractionInfo {
int index;
}