]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - jars/icu4j-4_2_1-src/src/com/ibm/icu/text/RuleBasedCollator.java
go
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / text / RuleBasedCollator.java
old mode 100755 (executable)
new mode 100644 (file)
index 880f27f..c052691
-//##header\r
-/**\r
-*******************************************************************************\r
-* Copyright (C) 1996-2009, International Business Machines Corporation and    *\r
-* others. All Rights Reserved.                                                *\r
-*******************************************************************************\r
-*/\r
-package com.ibm.icu.text;\r
-\r
-import java.io.IOException;\r
-import java.text.CharacterIterator;\r
-import java.text.ParseException;\r
-import java.util.Arrays;\r
-import java.util.MissingResourceException;\r
-\r
-//#if defined(FOUNDATION10) || defined(J2SE13) || defined(ECLIPSE_FRAGMENT)\r
-//##import com.ibm.icu.impl.ByteBuffer;\r
-//#else\r
-import java.nio.ByteBuffer;\r
-//#endif\r
-\r
-import com.ibm.icu.impl.BOCU;\r
-import com.ibm.icu.impl.ICUDebug;\r
-import com.ibm.icu.impl.ICUResourceBundle;\r
-import com.ibm.icu.impl.ImplicitCEGenerator;\r
-import com.ibm.icu.impl.IntTrie;\r
-import com.ibm.icu.impl.StringUCharacterIterator;\r
-import com.ibm.icu.impl.Trie;\r
-import com.ibm.icu.impl.TrieIterator;\r
-import com.ibm.icu.impl.Utility;\r
-import com.ibm.icu.lang.UCharacter;\r
-import com.ibm.icu.util.RangeValueIterator;\r
-import com.ibm.icu.util.ULocale;\r
-import com.ibm.icu.util.UResourceBundle;\r
-import com.ibm.icu.util.VersionInfo;\r
-\r
-/**\r
- * <p>RuleBasedCollator is a concrete subclass of Collator. It allows\r
- * customization of the Collator via user-specified rule sets.\r
- * RuleBasedCollator is designed to be fully compliant to the <a\r
- * href="http://www.unicode.org/unicode/reports/tr10/">Unicode\r
- * Collation Algorithm (UCA)</a> and conforms to ISO 14651.</p>\r
- *\r
- * <p>Users are strongly encouraged to read <a\r
- * href="http://www.icu-project.org/userguide/Collate_Intro.html">\r
- * the users guide</a> for more information about the collation\r
- * service before using this class.</p>\r
- *\r
- * <p>Create a RuleBasedCollator from a locale by calling the\r
- * getInstance(Locale) factory method in the base class Collator.\r
- * Collator.getInstance(Locale) creates a RuleBasedCollator object\r
- * based on the collation rules defined by the argument locale.  If a\r
- * customized collation ordering ar attributes is required, use the\r
- * RuleBasedCollator(String) constructor with the appropriate\r
- * rules. The customized RuleBasedCollator will base its ordering on\r
- * UCA, while re-adjusting the attributes and orders of the characters\r
- * in the specified rule accordingly.</p>\r
- *\r
- * <p>RuleBasedCollator provides correct collation orders for most\r
- * locales supported in ICU. If specific data for a locale is not\r
- * available, the orders eventually falls back to the <a\r
- * href="http://www.unicode.org/unicode/reports/tr10/">UCA collation\r
- * order </a>.</p>\r
- *\r
- * <p>For information about the collation rule syntax and details\r
- * about customization, please refer to the\r
- * <a href="http://www.icu-project.org/userguide/Collate_Customization.html">\r
- * Collation customization</a> section of the user's guide.</p>\r
- *\r
- * <p><strong>Note</strong> that there are some differences between\r
- * the Collation rule syntax used in Java and ICU4J:\r
- *\r
- * <ul>\r
- * <li>According to the JDK documentation:\r
- * <i>\r
- * <p>\r
- * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule\r
- * is in force when a Thai vowel of the range &#92;U0E40-&#92;U0E44 precedes a\r
- * Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the\r
- * range &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range\r
- * &#92;U0E81-&#92;U0EAE then the\r
- * vowel is placed after the consonant for collation purposes.\r
- * </p>\r
- * <p>\r
- * If a rule is without the modifier '!', the Thai/Lao vowel-consonant\r
- * swapping is not turned on.\r
- * </p>\r
- * </i>\r
- * <p>\r
- * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao\r
- * vowel-consonant swapping, since the UCA clearly states that it has to be\r
- * supported to ensure a correct sorting order. If a '!' is encountered, it is\r
- * ignored.\r
- * </p>\r
- * <li>As mentioned in the documentation of the base class Collator,\r
- *     compatibility decomposition mode is not supported.\r
- * </ul>\r
- * <p>\r
- * <strong>Examples</strong>\r
- * </p>\r
- * <p>\r
- * Creating Customized RuleBasedCollators:\r
- * <blockquote>\r
- * <pre>\r
- * String simple = "&amp; a &lt; b &lt; c &lt; d";\r
- * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);\r
- *\r
- * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "\r
- *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "\r
- *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "\r
- *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "\r
- *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "\r
- *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "\r
- *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "\r
- *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";\r
- * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);\r
- * </pre>\r
- * </blockquote>\r
- *\r
- * Concatenating rules to combine <code>Collator</code>s:\r
- * <blockquote>\r
- * <pre>\r
- * // Create an en_US Collator object\r
- * RuleBasedCollator en_USCollator = (RuleBasedCollator)\r
- *     Collator.getInstance(new Locale("en", "US", ""));\r
- * // Create a da_DK Collator object\r
- * RuleBasedCollator da_DKCollator = (RuleBasedCollator)\r
- *     Collator.getInstance(new Locale("da", "DK", ""));\r
- * // Combine the two\r
- * // First, get the collation rules from en_USCollator\r
- * String en_USRules = en_USCollator.getRules();\r
- * // Second, get the collation rules from da_DKCollator\r
- * String da_DKRules = da_DKCollator.getRules();\r
- * RuleBasedCollator newCollator =\r
- *                             new RuleBasedCollator(en_USRules + da_DKRules);\r
- * // newCollator has the combined rules\r
- * </pre>\r
- * </blockquote>\r
- *\r
- * Making changes to an existing RuleBasedCollator to create a new\r
- * <code>Collator</code> object, by appending changes to the existing rule:\r
- * <blockquote>\r
- * <pre>\r
- * // Create a new Collator object with additional rules\r
- * String addRules = "&amp; C &lt; ch, cH, Ch, CH";\r
- * RuleBasedCollator myCollator =\r
- *     new RuleBasedCollator(en_USCollator.getRules() + addRules);\r
- * // myCollator contains the new rules\r
- * </pre>\r
- * </blockquote>\r
- *\r
- * How to change the order of non-spacing accents:\r
- * <blockquote>\r
- * <pre>\r
- * // old rule with main accents\r
- * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "\r
- *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "\r
- *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "\r
- *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "\r
- *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "\r
- *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "\r
- *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";\r
- * // change the order of accent characters\r
- * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";\r
- * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);\r
- * </pre>\r
- * </blockquote>\r
- *\r
- * Putting in a new primary ordering before the default setting,\r
- * e.g. sort English characters before or after Japanese characters in the Japanese\r
- * <code>Collator</code>:\r
- * <blockquote>\r
- * <pre>\r
- * // get en_US Collator rules\r
- * RuleBasedCollator en_USCollator\r
- *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);\r
- * // add a few Japanese characters to sort before English characters\r
- * // suppose the last character before the first base letter 'a' in\r
- * // the English collation rule is &#92;u2212\r
- * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "\r
- *                   + "&#92;u3044";\r
- * RuleBasedCollator myJapaneseCollator\r
- *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);\r
- * </pre>\r
- * </blockquote>\r
- * </p>\r
- * <p>\r
- * This class is not subclassable\r
- * </p>\r
- * @author Syn Wee Quek\r
- * @stable ICU 2.8\r
- */\r
-public final class RuleBasedCollator extends Collator\r
-{   \r
-    // public constructors ---------------------------------------------------\r
-\r
-    /**\r
-     * <p>\r
-     * Constructor that takes the argument rules for\r
-     * customization. The collator will be based on UCA,\r
-     * with the attributes and re-ordering of the characters specified in the\r
-     * argument rules.\r
-     * </p>\r
-     * <p>See the user guide's section on\r
-     * <a href="http://www.icu-project.org/userguide/Collate_Customization.html">\r
-     * Collation Customization</a> for details on the rule syntax.\r
-     * </p>\r
-     * @param rules the collation rules to build the collation table from.\r
-     * @exception ParseException and IOException thrown. ParseException thrown\r
-     *            when argument rules have an invalid syntax. IOException\r
-     *            thrown when an error occured while reading internal data.\r
-     * @stable ICU 2.8\r
-     */\r
-    public RuleBasedCollator(String rules) throws Exception\r
-    {\r
-        checkUCA();\r
-        if (rules == null) {\r
-            throw new IllegalArgumentException(\r
-                                            "Collation rules can not be null");\r
-        }\r
-        init(rules);\r
-    }\r
-\r
-    // public methods --------------------------------------------------------\r
-\r
-    /**\r
-     * Clones the RuleBasedCollator\r
-     * @return a new instance of this RuleBasedCollator object\r
-     * @stable ICU 2.8\r
-     */\r
-    public Object clone() throws CloneNotSupportedException\r
-    {\r
-        RuleBasedCollator result = (RuleBasedCollator)super.clone();\r
-        if (latinOneCEs_ != null) {\r
-            result.m_reallocLatinOneCEs_ = true;\r
-            result.m_ContInfo_ = new ContractionInfo();\r
-        }\r
-\r
-        // since all collation data in the RuleBasedCollator do not change\r
-        // we can safely assign the result.fields to this collator\r
-        result.initUtility(false);  // let the new clone have their own util\r
-                                    // iterators\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Return a CollationElementIterator for the given String.\r
-     * @see CollationElementIterator\r
-     * @stable ICU 2.8\r
-     */\r
-    public CollationElementIterator getCollationElementIterator(String source)\r
-    {\r
-        return new CollationElementIterator(source, this);\r
-    }\r
-\r
-    /**\r
-     * Return a CollationElementIterator for the given CharacterIterator.\r
-     * The source iterator's integrity will be preserved since a new copy\r
-     * will be created for use.\r
-     * @see CollationElementIterator\r
-     * @stable ICU 2.8\r
-     */\r
-    public CollationElementIterator getCollationElementIterator(\r
-                                                CharacterIterator source)\r
-    {\r
-        CharacterIterator newsource = (CharacterIterator)source.clone();\r
-        return new CollationElementIterator(newsource, this);\r
-    }\r
-    \r
-    /**\r
-     * Return a CollationElementIterator for the given UCharacterIterator.\r
-     * The source iterator's integrity will be preserved since a new copy\r
-     * will be created for use.\r
-     * @see CollationElementIterator\r
-     * @stable ICU 2.8\r
-     */\r
-    public CollationElementIterator getCollationElementIterator(\r
-                                                UCharacterIterator source)\r
-    {\r
-        return new CollationElementIterator(source, this);\r
-    }\r
-\r
-    // public setters --------------------------------------------------------\r
-\r
-    /**\r
-     * Sets the Hiragana Quaternary mode to be on or off.\r
-     * When the Hiragana Quaternary mode is turned on, the collator\r
-     * positions Hiragana characters before all non-ignorable characters in\r
-     * QUATERNARY strength. This is to produce a correct JIS collation order,\r
-     * distinguishing between Katakana  and Hiragana characters.\r
-     * @param flag true if Hiragana Quaternary mode is to be on, false\r
-     *        otherwise\r
-     * @see #setHiraganaQuaternaryDefault\r
-     * @see #isHiraganaQuaternary\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setHiraganaQuaternary(boolean flag)\r
-    {\r
-        m_isHiragana4_ = flag;\r
-        updateInternalState();        \r
-    }\r
-\r
-    /**\r
-     * Sets the Hiragana Quaternary mode to the initial mode set during\r
-     * construction of the RuleBasedCollator.\r
-     * See setHiraganaQuaternary(boolean) for more details.\r
-     * @see #setHiraganaQuaternary(boolean)\r
-     * @see #isHiraganaQuaternary\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setHiraganaQuaternaryDefault()\r
-    {\r
-        m_isHiragana4_ = m_defaultIsHiragana4_;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets whether uppercase characters sort before lowercase\r
-     * characters or vice versa, in strength TERTIARY. The default\r
-     * mode is false, and so lowercase characters sort before uppercase\r
-     * characters.\r
-     * If true, sort upper case characters first.\r
-     * @param upperfirst true to sort uppercase characters before\r
-     *                   lowercase characters, false to sort lowercase\r
-     *                   characters before uppercase characters\r
-     * @see #isLowerCaseFirst\r
-     * @see #isUpperCaseFirst\r
-     * @see #setLowerCaseFirst\r
-     * @see #setCaseFirstDefault\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setUpperCaseFirst(boolean upperfirst)\r
-    {\r
-        if (upperfirst) {\r
-            if(m_caseFirst_ != AttributeValue.UPPER_FIRST_) {\r
-                latinOneRegenTable_ = true;\r
-            }\r
-            m_caseFirst_ = AttributeValue.UPPER_FIRST_;\r
-        }\r
-        else {\r
-            if(m_caseFirst_ != AttributeValue.OFF_) {\r
-                latinOneRegenTable_ = true;\r
-            }\r
-            m_caseFirst_ = AttributeValue.OFF_;\r
-        }\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the orders of lower cased characters to sort before upper cased\r
-     * characters, in strength TERTIARY. The default\r
-     * mode is false.\r
-     * If true is set, the RuleBasedCollator will sort lower cased characters\r
-     * before the upper cased ones.\r
-     * Otherwise, if false is set, the RuleBasedCollator will ignore case\r
-     * preferences.\r
-     * @param lowerfirst true for sorting lower cased characters before\r
-     *                   upper cased characters, false to ignore case\r
-     *                   preferences.\r
-     * @see #isLowerCaseFirst\r
-     * @see #isUpperCaseFirst\r
-     * @see #setUpperCaseFirst\r
-     * @see #setCaseFirstDefault\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setLowerCaseFirst(boolean lowerfirst)\r
-    {\r
-        if (lowerfirst) {\r
-                if(m_caseFirst_ != AttributeValue.LOWER_FIRST_) {\r
-                    latinOneRegenTable_ = true;\r
-                }\r
-                m_caseFirst_ = AttributeValue.LOWER_FIRST_;\r
-        }\r
-        else {\r
-                if(m_caseFirst_ != AttributeValue.OFF_) {\r
-                    latinOneRegenTable_ = true;\r
-                }\r
-            m_caseFirst_ = AttributeValue.OFF_;\r
-            }\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the case first mode to the initial mode set during\r
-     * construction of the RuleBasedCollator.\r
-     * See setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more\r
-     * details.\r
-     * @see #isLowerCaseFirst\r
-     * @see #isUpperCaseFirst\r
-     * @see #setLowerCaseFirst(boolean)\r
-     * @see #setUpperCaseFirst(boolean)\r
-     * @stable ICU 2.8\r
-     */\r
-    public final void setCaseFirstDefault()\r
-    {\r
-        if(m_caseFirst_ != m_defaultCaseFirst_) {\r
-            latinOneRegenTable_ = true;\r
-        }\r
-        m_caseFirst_ = m_defaultCaseFirst_;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the alternate handling mode to the initial mode set during\r
-     * construction of the RuleBasedCollator.\r
-     * See setAlternateHandling(boolean) for more details.\r
-     * @see #setAlternateHandlingShifted(boolean)\r
-     * @see #isAlternateHandlingShifted()\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setAlternateHandlingDefault()\r
-    {\r
-        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the case level mode to the initial mode set during\r
-     * construction of the RuleBasedCollator.\r
-     * See setCaseLevel(boolean) for more details.\r
-     * @see #setCaseLevel(boolean)\r
-     * @see #isCaseLevel\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setCaseLevelDefault()\r
-    {\r
-        m_isCaseLevel_ = m_defaultIsCaseLevel_;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the decomposition mode to the initial mode set during construction\r
-     * of the RuleBasedCollator.\r
-     * See setDecomposition(int) for more details.\r
-     * @see #getDecomposition\r
-     * @see #setDecomposition(int)\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setDecompositionDefault()\r
-    {\r
-        setDecomposition(m_defaultDecomposition_);\r
-        updateInternalState();        \r
-    }\r
-\r
-    /**\r
-     * Sets the French collation mode to the initial mode set during\r
-     * construction of the RuleBasedCollator.\r
-     * See setFrenchCollation(boolean) for more details.\r
-     * @see #isFrenchCollation\r
-     * @see #setFrenchCollation(boolean)\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setFrenchCollationDefault()\r
-    {\r
-        if(m_isFrenchCollation_ != m_defaultIsFrenchCollation_) {\r
-            latinOneRegenTable_ = true;\r
-        }\r
-        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the collation strength to the initial mode set during the\r
-     * construction of the RuleBasedCollator.\r
-     * See setStrength(int) for more details.\r
-     * @see #setStrength(int)\r
-     * @see #getStrength\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setStrengthDefault()\r
-    {\r
-        setStrength(m_defaultStrength_);\r
-        updateInternalState();        \r
-    }\r
-    \r
-    /**\r
-     * Method to set numeric collation to its default value.\r
-     * When numeric collation is turned on, this Collator generates a collation \r
-     * key for the numeric value of substrings of digits. This is a way to get \r
-     * '100' to sort AFTER '2'\r
-     * @see #getNumericCollation\r
-     * @see #setNumericCollation\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setNumericCollationDefault()\r
-    {\r
-        setNumericCollation(m_defaultIsNumericCollation_);\r
-        updateInternalState();        \r
-    }\r
-\r
-    /**\r
-     * Sets the mode for the direction of SECONDARY weights to be used in\r
-     * French collation.\r
-     * The default value is false, which treats SECONDARY weights in the order\r
-     * they appear.\r
-     * If set to true, the SECONDARY weights will be sorted backwards.\r
-     * See the section on\r
-     * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">\r
-     * French collation</a> for more information.\r
-     * @param flag true to set the French collation on, false to set it off\r
-     * @stable ICU 2.8\r
-     * @see #isFrenchCollation\r
-     * @see #setFrenchCollationDefault\r
-     */\r
-    public void setFrenchCollation(boolean flag)\r
-    {\r
-        if(m_isFrenchCollation_ != flag) {\r
-            latinOneRegenTable_ = true;\r
-        }\r
-        m_isFrenchCollation_ = flag;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * Sets the alternate handling for QUATERNARY strength to be either\r
-     * shifted or non-ignorable.\r
-     * See the UCA definition on\r
-     * <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">\r
-     * Alternate Weighting</a>.\r
-     * This attribute will only be effective when QUATERNARY strength is set.\r
-     * The default value for this mode is false, corresponding to the\r
-     * NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the\r
-     * RuleBasedCollator will treats all the codepoints with non-ignorable\r
-     * primary weights in the same way.\r
-     * If the mode is set to true, the behaviour corresponds to SHIFTED defined\r
-     * in UCA, this causes codepoints with PRIMARY orders that are equal or\r
-     * below the variable top value to be ignored in PRIMARY order and\r
-     * moved to the QUATERNARY order.\r
-     * @param shifted true if SHIFTED behaviour for alternate handling is\r
-     *        desired, false for the NON_IGNORABLE behaviour.\r
-     * @see #isAlternateHandlingShifted\r
-     * @see #setAlternateHandlingDefault\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setAlternateHandlingShifted(boolean shifted)\r
-    {\r
-        m_isAlternateHandlingShifted_ = shifted;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * <p>\r
-     * When case level is set to true, an additional weight is formed\r
-     * between the SECONDARY and TERTIARY weight, known as the case level.\r
-     * The case level is used to distinguish large and small Japanese Kana\r
-     * characters. Case level could also be used in other situations.\r
-     * For example to distinguish certain Pinyin characters.\r
-     * The default value is false, which means the case level is not generated.\r
-     * The contents of the case level are affected by the case first\r
-     * mode. A simple way to ignore accent differences in a string is to set\r
-     * the strength to PRIMARY and enable case level.\r
-     * </p>\r
-     * <p>\r
-     * See the section on\r
-     * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">\r
-     * case level</a> for more information.\r
-     * </p>\r
-     * @param flag true if case level sorting is required, false otherwise\r
-     * @stable ICU 2.8\r
-     * @see #setCaseLevelDefault\r
-     * @see #isCaseLevel\r
-     */\r
-    public void setCaseLevel(boolean flag)\r
-    {\r
-        m_isCaseLevel_ = flag;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     * <p>\r
-     * Sets this Collator's strength property. The strength property\r
-     * determines the minimum level of difference considered significant\r
-     * during comparison.\r
-     * </p>\r
-     * <p>See the Collator class description for an example of use.</p>\r
-     * @param newStrength the new strength value.\r
-     * @see #getStrength\r
-     * @see #setStrengthDefault\r
-     * @see #PRIMARY\r
-     * @see #SECONDARY\r
-     * @see #TERTIARY\r
-     * @see #QUATERNARY\r
-     * @see #IDENTICAL\r
-     * @exception IllegalArgumentException If the new strength value is not one\r
-     *              of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setStrength(int newStrength)\r
-    {\r
-        super.setStrength(newStrength);\r
-        updateInternalState();\r
-    }\r
-    \r
-    /** \r
-     * <p>\r
-     * Variable top is a two byte primary value which causes all the codepoints \r
-     * with primary values that are less or equal than the variable top to be \r
-     * shifted when alternate handling is set to SHIFTED.\r
-     * </p>\r
-     * <p>\r
-     * Sets the variable top to a collation element value of a string supplied.\r
-     * </p> \r
-     * @param varTop one or more (if contraction) characters to which the \r
-     *               variable top should be set\r
-     * @return a int value containing the value of the variable top in upper 16\r
-     *         bits. Lower 16 bits are undefined.\r
-     * @exception IllegalArgumentException is thrown if varTop argument is not \r
-     *            a valid variable top element. A variable top element is \r
-     *            invalid when \r
-     *            <ul>\r
-     *            <li>it is a contraction that does not exist in the\r
-     *                Collation order\r
-     *            <li>when the PRIMARY strength collation element for the \r
-     *                variable top has more than two bytes\r
-     *            <li>when the varTop argument is null or zero in length.\r
-     *            </ul>\r
-     * @see #getVariableTop\r
-     * @see RuleBasedCollator#setAlternateHandlingShifted\r
-     * @stable ICU 2.6\r
-     */\r
-    public int setVariableTop(String varTop)\r
-    {\r
-        if (varTop == null || varTop.length() == 0) {\r
-            throw new IllegalArgumentException(\r
-            "Variable top argument string can not be null or zero in length.");\r
-        }\r
-        if (m_srcUtilIter_ == null) {\r
-            initUtility(true);\r
-        }\r
-\r
-        m_srcUtilColEIter_.setText(varTop);\r
-        int ce = m_srcUtilColEIter_.next();\r
-        \r
-        // here we check if we have consumed all characters \r
-        // you can put in either one character or a contraction\r
-        // you shouldn't put more... \r
-        if (m_srcUtilColEIter_.getOffset() != varTop.length() \r
-            || ce == CollationElementIterator.NULLORDER) {\r
-            throw new IllegalArgumentException(\r
-            "Variable top argument string is a contraction that does not exist "\r
-            + "in the Collation order");\r
-        }\r
-        \r
-        int nextCE = m_srcUtilColEIter_.next();\r
-        \r
-        if ((nextCE != CollationElementIterator.NULLORDER) \r
-            && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) {\r
-                throw new IllegalArgumentException(\r
-                "Variable top argument string can only have a single collation "\r
-                + "element that has less than or equal to two PRIMARY strength "\r
-                + "bytes");\r
-        }\r
-        \r
-        m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16;\r
-        \r
-        return ce & CE_PRIMARY_MASK_;\r
-    }\r
-    \r
-    /** \r
-     * Sets the variable top to a collation element value supplied.\r
-     * Variable top is set to the upper 16 bits. \r
-     * Lower 16 bits are ignored.\r
-     * @param varTop Collation element value, as returned by setVariableTop or \r
-     *               getVariableTop\r
-     * @see #getVariableTop\r
-     * @see #setVariableTop(String)\r
-     * @stable ICU 2.6\r
-     */\r
-    public void setVariableTop(int varTop)\r
-    {\r
-        m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16;\r
-    }\r
-    \r
-    /**\r
-     * When numeric collation is turned on, this Collator generates a collation \r
-     * key for the numeric value of substrings of digits. This is a way to get \r
-     * '100' to sort AFTER '2'\r
-     * @param flag true to turn numeric collation on and false to turn it off\r
-     * @see #getNumericCollation\r
-     * @see #setNumericCollationDefault\r
-     * @stable ICU 2.8\r
-     */\r
-    public void setNumericCollation(boolean flag)\r
-    {\r
-        // sort substrings of digits as numbers\r
-        m_isNumericCollation_ = flag;\r
-        updateInternalState();\r
-    }\r
-\r
-    // public getters --------------------------------------------------------\r
-\r
-    /**\r
-     * Gets the collation rules for this RuleBasedCollator.\r
-     * Equivalent to String getRules(RuleOption.FULL_RULES).\r
-     * @return returns the collation rules\r
-     * @see #getRules(boolean)\r
-     * @stable ICU 2.8\r
-     */\r
-    public String getRules()\r
-    {\r
-        return m_rules_;\r
-    }\r
-    \r
-    /**\r
-     * Returns current rules. The argument defines whether full rules \r
-     * (UCA + tailored) rules are returned or just the tailoring. \r
-     * @param fullrules true if the rules that defines the full set of \r
-     *        collation order is required, otherwise false for returning only \r
-     *        the tailored rules\r
-     * @return the current rules that defines this Collator.\r
-     * @see #getRules()\r
-     * @stable ICU 2.6\r
-     */\r
-    public String getRules(boolean fullrules)\r
-    {\r
-        if (!fullrules) {\r
-            return m_rules_;\r
-        }\r
-        // take the UCA rules and append real rules at the end \r
-        return UCA_.m_rules_.concat(m_rules_);\r
-    }\r
-\r
-    /**\r
-     * Get an UnicodeSet that contains all the characters and sequences\r
-     * tailored in this collator.\r
-     * @return a pointer to a UnicodeSet object containing all the\r
-     *         code points and sequences that may sort differently than\r
-     *         in the UCA.\r
-     * @exception ParseException thrown when argument rules have an\r
-     *            invalid syntax. IOException\r
-     * @stable ICU 2.4\r
-     */\r
-    public UnicodeSet getTailoredSet()\r
-    {\r
-        try {\r
-           CollationRuleParser src = new CollationRuleParser(getRules());\r
-           return src.getTailoredSet();\r
-        } catch(Exception e) {\r
-            throw new IllegalStateException("A tailoring rule should not " +\r
-                "have errors. Something is quite wrong!");\r
-        }\r
-    }\r
-\r
-    private class contContext {\r
-        RuleBasedCollator coll;\r
-        UnicodeSet contractions;\r
-        UnicodeSet expansions;\r
-        UnicodeSet removedContractions;\r
-        boolean addPrefixes;       \r
-        contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions, \r
-                UnicodeSet removedContractions, boolean addPrefixes) {\r
-            this.coll = coll;\r
-            this.contractions = contractions;\r
-            this.expansions = expansions;\r
-            this.removedContractions = removedContractions;\r
-            this.addPrefixes = addPrefixes;\r
-        }\r
-    }\r
-    \r
-    private void\r
-    addSpecial(contContext c, StringBuffer buffer, int CE)\r
-    {\r
-        StringBuffer b = new StringBuffer();\r
-        int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_;\r
-        int newCE = c.coll.m_contractionCE_[offset];\r
-        // we might have a contraction that ends from previous level\r
-        if(newCE != CollationElementIterator.CE_NOT_FOUND_) {\r
-            if(isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ \r
-                    && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ \r
-                    && c.addPrefixes) {\r
-                addSpecial(c, buffer, newCE);\r
-            }\r
-            if(buffer.length() > 1) {\r
-                if(c.contractions != null) {\r
-                    c.contractions.add(buffer.toString());\r
-                }\r
-                if(c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {\r
-                    c.expansions.add(buffer.toString());\r
-                }\r
-            }\r
-        }    \r
-        \r
-        offset++;\r
-        // check whether we're doing contraction or prefix\r
-        if(getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {\r
-            while(c.coll.m_contractionIndex_[offset] != 0xFFFF) {\r
-                b.delete(0, b.length());\r
-                b.append(buffer);\r
-                newCE = c.coll.m_contractionCE_[offset];\r
-                b.insert(0, c.coll.m_contractionIndex_[offset]);\r
-                if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {\r
-                    addSpecial(c, b, newCE);\r
-                } else {\r
-                    if(c.contractions != null) {\r
-                        c.contractions.add(b.toString());\r
-                    }\r
-                    if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {\r
-                        c.expansions.add(b.toString());\r
-                    }\r
-                }\r
-                offset++;\r
-            }\r
-        } else if(getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) {\r
-            while(c.coll.m_contractionIndex_[offset] != 0xFFFF) {\r
-                b.delete(0, b.length());\r
-                b.append(buffer);\r
-                newCE = c.coll.m_contractionCE_[offset];\r
-                b.append(c.coll.m_contractionIndex_[offset]);\r
-                if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {\r
-                    addSpecial(c, b, newCE);\r
-                } else {\r
-                    if(c.contractions != null) {\r
-                        c.contractions.add(b.toString());\r
-                    }\r
-                    if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {\r
-                        c.expansions.add(b.toString());\r
-                    }\r
-                }\r
-                offset++;\r
-            }\r
-        }\r
-    }\r
-    \r
-    private\r
-    void processSpecials(contContext c) \r
-    {\r
-        int internalBufferSize = 512;\r
-        TrieIterator trieiterator \r
-        = new TrieIterator(c.coll.m_trie_);\r
-        RangeValueIterator.Element element = new RangeValueIterator.Element();\r
-        while (trieiterator.next(element)) {\r
-            int start = element.start;\r
-            int limit = element.limit;\r
-            int CE = element.value;\r
-            StringBuffer contraction = new StringBuffer(internalBufferSize);\r
-            \r
-            if(isSpecial(CE)) {\r
-                if(((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) {\r
-                    while(start < limit) {\r
-                        // if there are suppressed contractions, we don't \r
-                        // want to add them.\r
-                        if(c.removedContractions != null && c.removedContractions.contains(start)) {\r
-                            start++;\r
-                            continue;\r
-                        }\r
-                        // we start our contraction from middle, since we don't know if it\r
-                        // will grow toward right or left\r
-                        contraction.append((char) start);\r
-                        addSpecial(c, contraction, CE);\r
-                        start++;\r
-                    }\r
-                } else if(c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {\r
-                    while(start < limit) {\r
-                        c.expansions.add(start++);\r
-                    }\r
-                }\r
-            }\r
-        }\r
-    }\r
-    \r
-    /**\r
-     * Gets unicode sets containing contractions and/or expansions of a collator\r
-     * @param contractions if not null, set to contain contractions\r
-     * @param expansions if not null, set to contain expansions\r
-     * @param addPrefixes add the prefix contextual elements to contractions\r
-     * @throws Exception \r
-     * @stable ICU 3.4\r
-     */\r
-    public void\r
-    getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions,\r
-            boolean addPrefixes) throws Exception {\r
-        if(contractions != null) {\r
-            contractions.clear();\r
-        }\r
-        if(expansions != null) {\r
-            expansions.clear();\r
-        }\r
-        String rules = getRules();\r
-        try {\r
-            CollationRuleParser src = new CollationRuleParser(rules);\r
-            contContext c = new contContext(RuleBasedCollator.UCA_, \r
-                    contractions, expansions, src.m_removeSet_, addPrefixes);\r
-            \r
-            // Add the UCA contractions\r
-            processSpecials(c);\r
-            // This is collator specific. Add contractions from a collator\r
-            c.coll = this;\r
-            c.removedContractions =  null;\r
-            processSpecials(c);\r
-        } catch (Exception e) {\r
-            throw e;\r
-        }\r
-    }\r
-    \r
-    /**\r
-     * <p>\r
-     * Get a Collation key for the argument String source from this\r
-     * RuleBasedCollator.\r
-     * </p>\r
-     * <p>\r
-     * General recommendation: <br>\r
-     * If comparison are to be done to the same String multiple times, it would\r
-     * be more efficient to generate CollationKeys for the Strings and use\r
-     * CollationKey.compareTo(CollationKey) for the comparisons.\r
-     * If the each Strings are compared to only once, using the method\r
-     * RuleBasedCollator.compare(String, String) will have a better performance.\r
-     * </p>\r
-     * <p>\r
-     * See the class documentation for an explanation about CollationKeys.\r
-     * </p>\r
-     * @param source the text String to be transformed into a collation key.\r
-     * @return the CollationKey for the given String based on this\r
-     *         RuleBasedCollator's collation rules. If the source String is\r
-     *         null, a null CollationKey is returned.\r
-     * @see CollationKey\r
-     * @see #compare(String, String)\r
-     * @see #getRawCollationKey\r
-     * @stable ICU 2.8\r
-     */\r
-    public CollationKey getCollationKey(String source) {\r
-        if (source == null) {\r
-            return null;\r
-        }\r
-        m_utilRawCollationKey_ = getRawCollationKey(source, \r
-                                                    m_utilRawCollationKey_);\r
-        return new CollationKey(source, m_utilRawCollationKey_);\r
-    }\r
-    \r
-    /**\r
-     * Gets the simpler form of a CollationKey for the String source following\r
-     * the rules of this Collator and stores the result into the user provided \r
-     * argument key. \r
-     * If key has a internal byte array of length that's too small for the \r
-     * result, the internal byte array will be grown to the exact required \r
-     * size.\r
-     * @param source the text String to be transformed into a RawCollationKey  \r
-     * @param key output RawCollationKey to store results\r
-     * @return If key is null, a new instance of RawCollationKey will be \r
-     *         created and returned, otherwise the user provided key will be \r
-     *         returned.\r
-     * @see #getCollationKey \r
-     * @see #compare(String, String)\r
-     * @see RawCollationKey\r
-     * @stable ICU 2.8\r
-     */\r
-    public RawCollationKey getRawCollationKey(String source, \r
-                                              RawCollationKey key)\r
-    {\r
-        if (source == null) {\r
-            return null;\r
-        }\r
-        int strength = getStrength();\r
-        m_utilCompare0_ = m_isCaseLevel_;\r
-        //m_utilCompare1_ = true;\r
-        m_utilCompare2_ = strength >= SECONDARY;\r
-        m_utilCompare3_ = strength >= TERTIARY;\r
-        m_utilCompare4_ = strength >= QUATERNARY;\r
-        m_utilCompare5_ = strength == IDENTICAL;\r
-\r
-        m_utilBytesCount0_ = 0;\r
-        m_utilBytesCount1_ = 0;\r
-        m_utilBytesCount2_ = 0;\r
-        m_utilBytesCount3_ = 0;\r
-        m_utilBytesCount4_ = 0;\r
-        //m_utilBytesCount5_ = 0;\r
-        //m_utilCount0_ = 0;\r
-        //m_utilCount1_ = 0;\r
-        m_utilCount2_ = 0;\r
-        m_utilCount3_ = 0;\r
-        m_utilCount4_ = 0;\r
-        //m_utilCount5_ = 0;\r
-        boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;\r
-        // TODO: UCOL_COMMON_BOT4 should be a function of qShifted.\r
-        // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so\r
-        // high.\r
-        int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_;\r
-        byte hiragana4 = 0;\r
-        if (m_isHiragana4_ && m_utilCompare4_) {\r
-            // allocate one more space for hiragana, value for hiragana\r
-            hiragana4 = (byte)commonBottom4;\r
-            commonBottom4 ++;\r
-        }\r
-\r
-        int bottomCount4 = 0xFF - commonBottom4;\r
-        // If we need to normalize, we'll do it all at once at the beginning!\r
-        if (m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD,0)\r
-                                                    != Normalizer.YES) {\r
-            // if it is identical strength, we have to normalize the string to\r
-            // NFD so that it will be appended correctly to the end of the sort\r
-            // key\r
-            source = Normalizer.decompose(source, false);\r
-        }\r
-        else if (getDecomposition() != NO_DECOMPOSITION\r
-            && Normalizer.quickCheck(source, Normalizer.FCD,0)\r
-                                                    != Normalizer.YES) {\r
-            // for the rest of the strength, if decomposition is on, FCD is\r
-            // enough for us to work on.\r
-            source = Normalizer.normalize(source,Normalizer.FCD);\r
-        }\r
-        getSortKeyBytes(source, doFrench, hiragana4, commonBottom4,\r
-                        bottomCount4);\r
-        if (key == null) {\r
-            key = new RawCollationKey();\r
-        }\r
-        getSortKey(source, doFrench, commonBottom4, bottomCount4, key);\r
-        return key;\r
-    }\r
-\r
-    /**\r
-     * Return true if an uppercase character is sorted before the corresponding lowercase character.\r
-     * See setCaseFirst(boolean) for details.\r
-     * @see #setUpperCaseFirst\r
-     * @see #setLowerCaseFirst\r
-     * @see #isLowerCaseFirst\r
-     * @see #setCaseFirstDefault\r
-     * @return true if upper cased characters are sorted before lower cased\r
-     *         characters, false otherwise\r
-     * @stable ICU 2.8\r
-     */\r
-     public boolean isUpperCaseFirst()\r
-     {\r
-        return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);\r
-     }\r
-     \r
-    /**\r
-     * Return true if a lowercase character is sorted before the corresponding uppercase character.\r
-     * See setCaseFirst(boolean) for details.\r
-     * @see #setUpperCaseFirst\r
-     * @see #setLowerCaseFirst\r
-     * @see #isUpperCaseFirst\r
-     * @see #setCaseFirstDefault\r
-     * @return true lower cased characters are sorted before upper cased\r
-     *         characters, false otherwise\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean isLowerCaseFirst()\r
-    {\r
-        return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);\r
-    }\r
-\r
-    /**\r
-     * Checks if the alternate handling behaviour is the UCA defined SHIFTED or\r
-     * NON_IGNORABLE.\r
-     * If return value is true, then the alternate handling attribute for the\r
-     * Collator is SHIFTED. Otherwise if return value is false, then the\r
-     * alternate handling attribute for the Collator is NON_IGNORABLE\r
-     * See setAlternateHandlingShifted(boolean) for more details.\r
-     * @return true or false\r
-     * @see #setAlternateHandlingShifted(boolean)\r
-     * @see #setAlternateHandlingDefault\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean isAlternateHandlingShifted()\r
-    {\r
-        return m_isAlternateHandlingShifted_;\r
-    }\r
-\r
-    /**\r
-     * Checks if case level is set to true.\r
-     * See setCaseLevel(boolean) for details.\r
-     * @return the case level mode\r
-     * @see #setCaseLevelDefault\r
-     * @see #isCaseLevel\r
-     * @see #setCaseLevel(boolean)\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean isCaseLevel()\r
-    {\r
-        return m_isCaseLevel_;\r
-    }\r
-\r
-    /**\r
-     * Checks if French Collation is set to true.\r
-     * See setFrenchCollation(boolean) for details.\r
-     * @return true if French Collation is set to true, false otherwise\r
-     * @see #setFrenchCollation(boolean)\r
-     * @see #setFrenchCollationDefault\r
-     * @stable ICU 2.8\r
-     */\r
-     public boolean isFrenchCollation()\r
-     {\r
-         return m_isFrenchCollation_;\r
-     }\r
-\r
-    /**\r
-     * Checks if the Hiragana Quaternary mode is set on.\r
-     * See setHiraganaQuaternary(boolean) for more details.\r
-     * @return flag true if Hiragana Quaternary mode is on, false otherwise\r
-     * @see #setHiraganaQuaternaryDefault\r
-     * @see #setHiraganaQuaternary(boolean)\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean isHiraganaQuaternary()\r
-    {\r
-        return m_isHiragana4_;\r
-    }\r
-\r
-    /** \r
-     * Gets the variable top value of a Collator. \r
-     * Lower 16 bits are undefined and should be ignored.\r
-     * @return the variable top value of a Collator.\r
-     * @see #setVariableTop\r
-     * @stable ICU 2.6\r
-     */\r
-    public int getVariableTop()\r
-    {\r
-          return m_variableTopValue_ << 16;\r
-    }\r
-    \r
-    /** \r
-     * Method to retrieve the numeric collation value.\r
-     * When numeric collation is turned on, this Collator generates a collation \r
-     * key for the numeric value of substrings of digits. This is a way to get \r
-     * '100' to sort AFTER '2'\r
-     * @see #setNumericCollation\r
-     * @see #setNumericCollationDefault\r
-     * @return true if numeric collation is turned on, false otherwise\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean getNumericCollation()\r
-    {\r
-        return m_isNumericCollation_;\r
-    }\r
-    \r
-    // public other methods -------------------------------------------------\r
-\r
-    /**\r
-     * Compares the equality of two RuleBasedCollator objects.\r
-     * RuleBasedCollator objects are equal if they have the same collation\r
-     * rules and the same attributes.\r
-     * @param obj the RuleBasedCollator to be compared to.\r
-     * @return true if this RuleBasedCollator has exactly the same\r
-     *         collation behaviour as obj, false otherwise.\r
-     * @stable ICU 2.8\r
-     */\r
-    public boolean equals(Object obj)\r
-    {\r
-        if (obj == null) {\r
-            return false;  // super does class check\r
-        }\r
-        if (this == obj) {\r
-            return true;\r
-        }\r
-        if (getClass() != obj.getClass()) {\r
-            return false;\r
-        }\r
-        RuleBasedCollator other = (RuleBasedCollator)obj;\r
-        // all other non-transient information is also contained in rules.\r
-        if (getStrength() != other.getStrength()\r
-               || getDecomposition() != other.getDecomposition()\r
-               || other.m_caseFirst_ != m_caseFirst_\r
-               || other.m_caseSwitch_ != m_caseSwitch_\r
-               || other.m_isAlternateHandlingShifted_\r
-                                             != m_isAlternateHandlingShifted_\r
-               || other.m_isCaseLevel_ != m_isCaseLevel_\r
-               || other.m_isFrenchCollation_ != m_isFrenchCollation_\r
-               || other.m_isHiragana4_ != m_isHiragana4_) {\r
-            return false;\r
-        }\r
-        boolean rules = m_rules_ == other.m_rules_;\r
-        if (!rules && (m_rules_ != null && other.m_rules_ != null)) {\r
-            rules = m_rules_.equals(other.m_rules_);\r
-        }\r
-        if (!rules || !ICUDebug.enabled("collation")) {\r
-            return rules;\r
-        }\r
-        if (m_addition3_ != other.m_addition3_\r
-                  || m_bottom3_ != other.m_bottom3_\r
-                  || m_bottomCount3_ != other.m_bottomCount3_\r
-                  || m_common3_ != other.m_common3_\r
-                  || m_isSimple3_ != other.m_isSimple3_\r
-                  || m_mask3_ != other.m_mask3_\r
-                  || m_minContractionEnd_ != other.m_minContractionEnd_\r
-                  || m_minUnsafe_ != other.m_minUnsafe_\r
-                  || m_top3_ != other.m_top3_\r
-                  || m_topCount3_ != other.m_topCount3_\r
-                  || !Arrays.equals(m_unsafe_, other.m_unsafe_)) {\r
-            return false;\r
-        }\r
-        if (!m_trie_.equals(other.m_trie_)) {\r
-            // we should use the trie iterator here, but then this part is\r
-            // only used in the test.\r
-            for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i --)\r
-            {\r
-                int v = m_trie_.getCodePointValue(i);\r
-                int otherv = other.m_trie_.getCodePointValue(i);\r
-                if (v != otherv) {\r
-                    int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_);\r
-                    if (mask == (otherv & 0xff000000)) {\r
-                        v &= 0xffffff;\r
-                        otherv &= 0xffffff;\r
-                        if (mask == 0xf1000000) {\r
-                            v -= (m_expansionOffset_ << 4);\r
-                            otherv -= (other.m_expansionOffset_ << 4);\r
-                        }\r
-                        else if (mask == 0xf2000000) {\r
-                            v -= m_contractionOffset_;\r
-                            otherv -= other.m_contractionOffset_;\r
-                        }\r
-                        if (v == otherv) {\r
-                            continue;\r
-                        }\r
-                    }\r
-                    return false;\r
-                }\r
-            }\r
-        }\r
-        if (Arrays.equals(m_contractionCE_, other.m_contractionCE_)\r
-            && Arrays.equals(m_contractionEnd_, other.m_contractionEnd_)\r
-            && Arrays.equals(m_contractionIndex_, other.m_contractionIndex_)\r
-            && Arrays.equals(m_expansion_, other.m_expansion_)\r
-            && Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) {\r
-            // not comparing paddings\r
-            for (int i = 0; i < m_expansionEndCE_.length; i ++) {\r
-                 if (m_expansionEndCEMaxSize_[i]\r
-                     != other.m_expansionEndCEMaxSize_[i]) {\r
-                     return false;\r
-                 }\r
-                 return true;\r
-            }\r
-        }\r
-        return false;\r
-    }\r
-\r
-    /**\r
-     * Generates a unique hash code for this RuleBasedCollator.\r
-     * @return the unique hash code for this Collator\r
-     * @stable ICU 2.8\r
-     */\r
-    public int hashCode()\r
-    {\r
-        String rules = getRules();\r
-        if (rules == null) {\r
-            rules = "";\r
-        }\r
-        return rules.hashCode();\r
-    }\r
-\r
-    /**\r
-     * Compares the source text String to the target text String according to\r
-     * the collation rules, strength and decomposition mode for this\r
-     * RuleBasedCollator.\r
-     * Returns an integer less than,\r
-     * equal to or greater than zero depending on whether the source String is\r
-     * less than, equal to or greater than the target String. See the Collator\r
-     * class description for an example of use.\r
-     * </p>\r
-     * <p>\r
-     * General recommendation: <br>\r
-     * If comparison are to be done to the same String multiple times, it would\r
-     * be more efficient to generate CollationKeys for the Strings and use\r
-     * CollationKey.compareTo(CollationKey) for the comparisons.\r
-     * If speed performance is critical and object instantiation is to be \r
-     * reduced, further optimization may be achieved by generating a simpler \r
-     * key of the form RawCollationKey and reusing this RawCollationKey \r
-     * object with the method RuleBasedCollator.getRawCollationKey. Internal \r
-     * byte representation can be directly accessed via RawCollationKey and\r
-     * stored for future use. Like CollationKey, RawCollationKey provides a\r
-     * method RawCollationKey.compareTo for key comparisons.\r
-     * If the each Strings are compared to only once, using the method\r
-     * RuleBasedCollator.compare(String, String) will have a better performance.\r
-     * </p>\r
-     * @param source the source text String.\r
-     * @param target the target text String.\r
-     * @return Returns an integer value. Value is less than zero if source is\r
-     *         less than target, value is zero if source and target are equal,\r
-     *         value is greater than zero if source is greater than target.\r
-     * @see CollationKey\r
-     * @see #getCollationKey\r
-     * @stable ICU 2.8\r
-     */\r
-    public int compare(String source, String target)\r
-    {\r
-        if (source == target) {\r
-            return 0;\r
-        }\r
-\r
-        // Find the length of any leading portion that is equal\r
-        int offset = getFirstUnmatchedOffset(source, target);\r
-        //return compareRegular(source, target, offset);\r
-        if(latinOneUse_) {\r
-          if ((offset < source.length() \r
-               && source.charAt(offset) > ENDOFLATINONERANGE_) \r
-              || (offset < target.length() \r
-                  && target.charAt(offset) > ENDOFLATINONERANGE_)) { \r
-              // source or target start with non-latin-1\r
-            return compareRegular(source, target, offset);\r
-          } else {\r
-            return compareUseLatin1(source, target, offset);\r
-          }\r
-        } else {\r
-          return compareRegular(source, target, offset);\r
-        }\r
-    }\r
-    \r
-    // package private inner interfaces --------------------------------------\r
-\r
-    /**\r
-     * Attribute values to be used when setting the Collator options\r
-     */\r
-    static interface AttributeValue\r
-    {\r
-        /**\r
-         * Indicates that the default attribute value will be used.\r
-         * See individual attribute for details on its default value.\r
-         */\r
-        static final int DEFAULT_ = -1;\r
-        /**\r
-         * Primary collation strength\r
-         */\r
-        static final int PRIMARY_ = Collator.PRIMARY;\r
-        /**\r
-         * Secondary collation strength\r
-         */\r
-        static final int SECONDARY_ = Collator.SECONDARY;\r
-        /**\r
-         * Tertiary collation strength\r
-         */\r
-        static final int TERTIARY_ = Collator.TERTIARY;\r
-        /**\r
-         * Default collation strength\r
-         */\r
-        static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;\r
-        /**\r
-         * Internal use for strength checks in Collation elements\r
-         */\r
-        static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;\r
-        /**\r
-         * Quaternary collation strength\r
-         */\r
-        static final int QUATERNARY_ = 3;\r
-        /**\r
-         * Identical collation strength\r
-         */\r
-        static final int IDENTICAL_ = Collator.IDENTICAL;\r
-        /**\r
-         * Internal use for strength checks\r
-         */\r
-        static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;\r
-        /**\r
-         * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL,\r
-         * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE\r
-         */\r
-        static final int OFF_ = 16;\r
-        /**\r
-         * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL,\r
-         * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE\r
-         */\r
-        static final int ON_ = 17;\r
-        /**\r
-         * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted\r
-         */\r
-        static final int SHIFTED_ = 20;\r
-        /**\r
-         * Valid for ALTERNATE_HANDLING. Alternate handling will be non\r
-         * ignorable\r
-         */\r
-        static final int NON_IGNORABLE_ = 21;\r
-        /**\r
-         * Valid for CASE_FIRST - lower case sorts before upper case\r
-         */\r
-        static final int LOWER_FIRST_ = 24;\r
-        /**\r
-         * Upper case sorts before lower case\r
-         */\r
-        static final int UPPER_FIRST_ = 25;\r
-        /**\r
-         * Number of attribute values\r
-         */\r
-        static final int LIMIT_ = 29;\r
-    }\r
-\r
-    /**\r
-     * Attributes that collation service understands. All the attributes can\r
-     * take DEFAULT value, as well as the values specific to each one.\r
-     */\r
-    static interface Attribute\r
-    {\r
-        /**\r
-         * Attribute for direction of secondary weights - used in French.\r
-         * Acceptable values are ON, which results in secondary weights being\r
-         * considered backwards and OFF which treats secondary weights in the\r
-         * order they appear.\r
-         */\r
-        static final int FRENCH_COLLATION_ = 0;\r
-        /**\r
-         * Attribute for handling variable elements. Acceptable values are\r
-         * NON_IGNORABLE (default) which treats all the codepoints with\r
-         * non-ignorable primary weights in the same way, and SHIFTED which\r
-         * causes codepoints with primary weights that are equal or below the\r
-         * variable top value to be ignored on primary level and moved to the\r
-         * quaternary level.\r
-         */\r
-        static final int ALTERNATE_HANDLING_ = 1;\r
-        /**\r
-         * Controls the ordering of upper and lower case letters. Acceptable\r
-         * values are OFF (default), which orders upper and lower case letters\r
-         * in accordance to their tertiary weights, UPPER_FIRST which forces\r
-         * upper case letters to sort before lower case letters, and\r
-         * LOWER_FIRST which does the opposite.\r
-         */\r
-        static final int CASE_FIRST_ = 2;\r
-        /**\r
-         * Controls whether an extra case level (positioned before the third\r
-         * level) is generated or not. Acceptable values are OFF (default),\r
-         * when case level is not generated, and ON which causes the case\r
-         * level to be generated. Contents of the case level are affected by\r
-         * the value of CASE_FIRST attribute. A simple way to ignore accent\r
-         * differences in a string is to set the strength to PRIMARY and\r
-         * enable case level.\r
-         */\r
-        static final int CASE_LEVEL_ = 3;\r
-        /**\r
-         * Controls whether the normalization check and necessary\r
-         * normalizations are performed. When set to OFF (default) no\r
-         * normalization check is performed. The correctness of the result is\r
-         * guaranteed only if the input data is in so-called FCD form (see\r
-         * users manual for more info). When set to ON, an incremental check\r
-         * is performed to see whether the input data is in the FCD form. If\r
-         * the data is not in the FCD form, incremental NFD normalization is\r
-         * performed.\r
-         */\r
-        static final int NORMALIZATION_MODE_ = 4;\r
-        /**\r
-         * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY,\r
-         * QUATERNARY or IDENTICAL. The usual strength for most locales\r
-         * (except Japanese) is tertiary. Quaternary strength is useful when\r
-         * combined with shifted setting for alternate handling attribute and\r
-         * for JIS x 4061 collation, when it is used to distinguish between\r
-         * Katakana  and Hiragana (this is achieved by setting the\r
-         * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is\r
-         * affected only by the number of non ignorable code points in the\r
-         * string. Identical strength is rarely useful, as it amounts to\r
-         * codepoints of the NFD form of the string.\r
-         */\r
-        static final int STRENGTH_ = 5;\r
-        /**\r
-         * When turned on, this attribute positions Hiragana before all\r
-         * non-ignorables on quaternary level. This is a sneaky way to produce\r
-         * JIS sort order.\r
-         */\r
-        static final int HIRAGANA_QUATERNARY_MODE_ = 6;\r
-        /**\r
-         * Attribute count\r
-         */\r
-        static final int LIMIT_ = 7;\r
-    }\r
-\r
-    /**\r
-     * DataManipulate singleton\r
-     */\r
-    static class DataManipulate implements Trie.DataManipulate\r
-    {\r
-        // public methods ----------------------------------------------------\r
-\r
-        /**\r
-         * Internal method called to parse a lead surrogate's ce for the offset\r
-         * to the next trail surrogate data.\r
-         * @param ce collation element of the lead surrogate\r
-         * @return data offset or 0 for the next trail surrogate\r
-         * @stable ICU 2.8\r
-         */\r
-        public final int getFoldingOffset(int ce)\r
-        {\r
-            if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {\r
-                return (ce & 0xFFFFFF);\r
-            }\r
-            return 0;\r
-        }\r
-\r
-        /**\r
-         * Get singleton object\r
-         */\r
-        public static final DataManipulate getInstance()\r
-        {\r
-            if (m_instance_ == null) {\r
-                m_instance_ =  new DataManipulate();\r
-            }\r
-            return m_instance_;\r
-        }\r
-\r
-        // private data member ----------------------------------------------\r
-\r
-        /**\r
-         * Singleton instance\r
-         */\r
-        private static DataManipulate m_instance_;\r
-\r
-        // private constructor ----------------------------------------------\r
-\r
-        /**\r
-         * private to prevent initialization\r
-         */\r
-        private DataManipulate()\r
-        {\r
-        }\r
-    }\r
-\r
-    /**\r
-     * UCAConstants\r
-     */\r
-    static final class UCAConstants\r
-    {\r
-         int FIRST_TERTIARY_IGNORABLE_[] = new int[2];       // 0x00000000\r
-         int LAST_TERTIARY_IGNORABLE_[] = new int[2];        // 0x00000000\r
-         int FIRST_PRIMARY_IGNORABLE_[] = new int[2];        // 0x00008705\r
-         int FIRST_SECONDARY_IGNORABLE_[] = new int[2];      // 0x00000000\r
-         int LAST_SECONDARY_IGNORABLE_[] = new int[2];       // 0x00000500\r
-         int LAST_PRIMARY_IGNORABLE_[] = new int[2];         // 0x0000DD05\r
-         int FIRST_VARIABLE_[] = new int[2];                 // 0x05070505\r
-         int LAST_VARIABLE_[] = new int[2];                  // 0x13CF0505\r
-         int FIRST_NON_VARIABLE_[] = new int[2];             // 0x16200505\r
-         int LAST_NON_VARIABLE_[] = new int[2];              // 0x767C0505\r
-         int RESET_TOP_VALUE_[] = new int[2];                // 0x9F000303\r
-         int FIRST_IMPLICIT_[] = new int[2];\r
-         int LAST_IMPLICIT_[] = new int[2];\r
-         int FIRST_TRAILING_[] = new int[2];\r
-         int LAST_TRAILING_[] = new int[2];\r
-         int PRIMARY_TOP_MIN_;\r
-         int PRIMARY_IMPLICIT_MIN_; // 0xE8000000\r
-         int PRIMARY_IMPLICIT_MAX_; // 0xF0000000\r
-         int PRIMARY_TRAILING_MIN_; // 0xE8000000\r
-         int PRIMARY_TRAILING_MAX_; // 0xF0000000\r
-         int PRIMARY_SPECIAL_MIN_; // 0xE8000000\r
-         int PRIMARY_SPECIAL_MAX_; // 0xF0000000\r
-    }\r
-\r
-    // package private data member -------------------------------------------\r
-\r
-    static final byte BYTE_FIRST_TAILORED_ = (byte)0x04;\r
-    static final byte BYTE_COMMON_ = (byte)0x05;\r
-    static final int COMMON_TOP_2_ = 0x86; // int for unsigness\r
-    static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;\r
-    static final int COMMON_BOTTOM_3 = 0x05;\r
-    /**\r
-     * Case strength mask\r
-     */\r
-    static final int CE_CASE_BIT_MASK_ = 0xC0;\r
-    static final int CE_TAG_SHIFT_ = 24;\r
-    static final int CE_TAG_MASK_ = 0x0F000000;\r
-\r
-    static final int CE_SPECIAL_FLAG_ = 0xF0000000;\r
-    /**\r
-     * Lead surrogate that is tailored and doesn't start a contraction\r
-     */\r
-    static final int CE_SURROGATE_TAG_ = 5;\r
-    /**\r
-     * Mask to get the primary strength of the collation element\r
-     */\r
-    static final int CE_PRIMARY_MASK_ = 0xFFFF0000;\r
-    /**\r
-     * Mask to get the secondary strength of the collation element\r
-     */\r
-    static final int CE_SECONDARY_MASK_ = 0xFF00;\r
-    /**\r
-     * Mask to get the tertiary strength of the collation element\r
-     */\r
-    static final int CE_TERTIARY_MASK_ = 0xFF;\r
-    /**\r
-     * Primary strength shift\r
-     */\r
-    static final int CE_PRIMARY_SHIFT_ = 16;\r
-    /**\r
-     * Secondary strength shift\r
-     */\r
-    static final int CE_SECONDARY_SHIFT_ = 8;\r
-    /**\r
-     * Continuation marker\r
-     */\r
-    static final int CE_CONTINUATION_MARKER_ = 0xC0;\r
-\r
-    /**\r
-     * Size of collator raw data headers and options before the expansion\r
-     * data. This is used when expansion ces are to be retrieved. ICU4C uses\r
-     * the expansion offset starting from UCollator.UColHeader, hence ICU4J\r
-     * will have to minus that off to get the right expansion ce offset. In\r
-     * number of ints.\r
-     */\r
-    int m_expansionOffset_;\r
-    /**\r
-     * Size of collator raw data headers, options and expansions before\r
-     * contraction data. This is used when contraction ces are to be retrieved.\r
-     * ICU4C uses contraction offset starting from UCollator.UColHeader, hence\r
-     * ICU4J will have to minus that off to get the right contraction ce\r
-     * offset. In number of chars.\r
-     */\r
-    int m_contractionOffset_;\r
-    /**\r
-     * Flag indicator if Jamo is special\r
-     */\r
-    boolean m_isJamoSpecial_;\r
-\r
-    // Collator options ------------------------------------------------------\r
-    \r
-    int m_defaultVariableTopValue_;\r
-    boolean m_defaultIsFrenchCollation_;\r
-    boolean m_defaultIsAlternateHandlingShifted_;\r
-    int m_defaultCaseFirst_;\r
-    boolean m_defaultIsCaseLevel_;\r
-    int m_defaultDecomposition_;\r
-    int m_defaultStrength_;\r
-    boolean m_defaultIsHiragana4_;\r
-    boolean m_defaultIsNumericCollation_;\r
-    \r
-    /**\r
-     * Value of the variable top\r
-     */\r
-    int m_variableTopValue_;\r
-    /**\r
-     * Attribute for special Hiragana\r
-     */\r
-    boolean m_isHiragana4_;\r
-    /**\r
-     * Case sorting customization\r
-     */\r
-    int m_caseFirst_;\r
-    /**\r
-     * Numeric collation option\r
-     */\r
-    boolean m_isNumericCollation_;\r
-\r
-    // end Collator options --------------------------------------------------\r
-\r
-    /**\r
-     * Expansion table\r
-     */\r
-    int m_expansion_[];\r
-    /**\r
-     * Contraction index table\r
-     */\r
-    char m_contractionIndex_[];\r
-    /**\r
-     * Contraction CE table\r
-     */\r
-    int m_contractionCE_[];\r
-    /**\r
-     * Data trie\r
-     */\r
-    IntTrie m_trie_;\r
-    /**\r
-     * Table to store all collation elements that are the last element of an\r
-     * expansion. This is for use in StringSearch.\r
-     */\r
-    int m_expansionEndCE_[];\r
-    /**\r
-     * Table to store the maximum size of any expansions that end with the\r
-     * corresponding collation element in m_expansionEndCE_. For use in\r
-     * StringSearch too\r
-     */\r
-    byte m_expansionEndCEMaxSize_[];\r
-    /**\r
-     * Heuristic table to store information on whether a char character is\r
-     * considered "unsafe". "Unsafe" character are combining marks or those\r
-     * belonging to some contraction sequence from the offset 1 onwards.\r
-     * E.g. if "ABC" is the only contraction, then 'B' and 'C' are considered\r
-     * unsafe. If we have another contraction "ZA" with the one above, then\r
-     * 'A', 'B', 'C' are "unsafe" but 'Z' is not.\r
-     */\r
-    byte m_unsafe_[];\r
-    /**\r
-     * Table to store information on whether a codepoint can occur as the last\r
-     * character in a contraction\r
-     */\r
-    byte m_contractionEnd_[];\r
-    /**\r
-     * Original collation rules\r
-     */\r
-    String m_rules_;\r
-    /**\r
-     * The smallest "unsafe" codepoint\r
-     */\r
-    char m_minUnsafe_;\r
-    /**\r
-     * The smallest codepoint that could be the end of a contraction\r
-     */\r
-    char m_minContractionEnd_;\r
-    /**\r
-     * General version of the collator\r
-     */\r
-    VersionInfo m_version_;\r
-    /**\r
-     * UCA version\r
-     */\r
-    VersionInfo m_UCA_version_;\r
-    /**\r
-     * UCD version\r
-     */\r
-    VersionInfo m_UCD_version_;\r
-\r
-    /**\r
-     * UnicodeData.txt property object\r
-     */\r
-    static final RuleBasedCollator UCA_;\r
-    /**\r
-     * UCA Constants\r
-     */\r
-    static final UCAConstants UCA_CONSTANTS_;\r
-    /**\r
-     * Table for UCA and builder use\r
-     */\r
-    static final char UCA_CONTRACTIONS_[];\r
-\r
-    private static boolean UCA_INIT_COMPLETE;\r
-\r
-    /**\r
-     * Implicit generator\r
-     */\r
-    static final ImplicitCEGenerator impCEGen_;\r
-//    /**\r
-//     * Implicit constants\r
-//     */\r
-//    static final int IMPLICIT_BASE_BYTE_;\r
-//    static final int IMPLICIT_LIMIT_BYTE_;\r
-//    static final int IMPLICIT_4BYTE_BOUNDARY_;\r
-//    static final int LAST_MULTIPLIER_;\r
-//    static final int LAST2_MULTIPLIER_;\r
-//    static final int IMPLICIT_BASE_3BYTE_;\r
-//    static final int IMPLICIT_BASE_4BYTE_;\r
-//    static final int BYTES_TO_AVOID_ = 3;\r
-//    static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;\r
-//    static final int LAST_COUNT_ = OTHER_COUNT_ / 2;\r
-//    /**\r
-//     * Room for intervening, without expanding to 5 bytes\r
-//     */\r
-//    static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;\r
-//    static final int IMPLICIT_3BYTE_COUNT_ = 1;\r
-//    \r
-    static final byte SORT_LEVEL_TERMINATOR_ = 1;\r
-\r
-//  These are values from UCA required for\r
-//  implicit generation and supressing sort key compression\r
-//  they should regularly be in the UCA, but if one\r
-//  is running without UCA, it could be a problem\r
-     static final int maxRegularPrimary  = 0xA0;\r
-     static final int minImplicitPrimary = 0xE0;\r
-     static final int maxImplicitPrimary = 0xE4;\r
-\r
-\r
-    // block to initialise character property database\r
-    static\r
-    {\r
-        // take pains to let static class init succeed, otherwise the class itself won't exist and\r
-        // clients will get a NoClassDefFoundException.  Instead, make the constructors fail if\r
-        // we can't load the UCA data.\r
-\r
-        RuleBasedCollator iUCA_ = null;\r
-        UCAConstants iUCA_CONSTANTS_ = null;\r
-        char iUCA_CONTRACTIONS_[] = null;\r
-        ImplicitCEGenerator iimpCEGen_ = null;\r
-        try\r
-        {\r
-            // !!! note what's going on here...\r
-            // even though the static init of the class is not yet complete, we\r
-            // instantiate an instance of the class.  So we'd better be sure that\r
-            // instantiation doesn't rely on the static initialization that's\r
-            // not complete yet!\r
-            iUCA_ = new RuleBasedCollator();\r
-            iUCA_CONSTANTS_ = new UCAConstants();\r
-            iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_);\r
-\r
-            // called before doing canonical closure for the UCA.\r
-            iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary);\r
-            //iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);\r
-            iUCA_.init();\r
-            ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH);\r
-            iUCA_.m_rules_ = (String)rb.getObject("UCARules");\r
-        }\r
-        catch (MissingResourceException ex)\r
-        {\r
-//             throw ex;\r
-        }\r
-        catch (IOException e)\r
-        {\r
-           // e.printStackTrace();\r
-//             throw new MissingResourceException(e.getMessage(),"","");\r
-        }\r
-\r
-        UCA_ = iUCA_;\r
-        UCA_CONSTANTS_ = iUCA_CONSTANTS_;\r
-        UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;\r
-        impCEGen_ = iimpCEGen_;\r
-\r
-        UCA_INIT_COMPLETE = true;\r
-    }\r
-\r
-\r
-    private static void checkUCA() throws MissingResourceException {\r
-        if (UCA_INIT_COMPLETE && UCA_ == null) {\r
-            throw new MissingResourceException("Collator UCA data unavailable", "", "");\r
-        }\r
-    }\r
-        \r
-    // package private constructors ------------------------------------------\r
-\r
-    /**\r
-    * <p>Private contructor for use by subclasses.\r
-    * Public access to creating Collators is handled by the API\r
-    * Collator.getInstance() or RuleBasedCollator(String rules).\r
-    * </p>\r
-    * <p>\r
-    * This constructor constructs the UCA collator internally\r
-    * </p>\r
-    */\r
-    RuleBasedCollator()\r
-    {\r
-        checkUCA();\r
-        initUtility(false);\r
-    }\r
-\r
-    /**\r
-     * Constructors a RuleBasedCollator from the argument locale.\r
-     * If no resource bundle is associated with the locale, UCA is used\r
-     * instead.\r
-     * @param locale\r
-     */\r
-    RuleBasedCollator(ULocale locale)\r
-    {\r
-        checkUCA();\r
-        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);\r
-        initUtility(false);\r
-        if (rb != null) {\r
-            try {\r
-                // Use keywords, if supplied for lookup\r
-                String collkey = locale.getKeywordValue("collation");\r
-                  if(collkey == null) {\r
-                      collkey = rb.getStringWithFallback("collations/default");\r
-                }\r
-                       \r
-                // collations/default will always give a string back\r
-                // keyword for the real collation data\r
-                // if "collations/collkey" will return null if collkey == null \r
-                ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey);\r
-                if (elements != null) {\r
-                    // TODO: Determine actual & valid locale correctly\r
-                    ULocale uloc = rb.getULocale();\r
-                    setLocale(uloc, uloc);\r
-\r
-                    m_rules_ = elements.getString("Sequence");\r
-                    ByteBuffer buf = elements.get("%%CollationBin").getBinary();\r
-                    // %%CollationBin\r
-                    if(buf!=null){\r
-                    //     m_rules_ = (String)rules[1][1];\r
-                        byte map[] = buf.array();\r
-                        CollatorReader.initRBC(this, map);\r
-                        /*\r
-                        BufferedInputStream input =\r
-                                                 new BufferedInputStream(\r
-                                                    new ByteArrayInputStream(map));\r
-                        /*\r
-                        CollatorReader reader = new CollatorReader(input, false);\r
-                        if (map.length > MIN_BINARY_DATA_SIZE_) {\r
-                            reader.read(this, null);\r
-                        }\r
-                        else {\r
-                            reader.readHeader(this);\r
-                            reader.readOptions(this);\r
-                            // duplicating UCA_'s data\r
-                            setWithUCATables();\r
-                        }\r
-                        */\r
-                        // at this point, we have read in the collator\r
-                        // now we need to check whether the binary image has\r
-                        // the right UCA and other versions\r
-                        if(!m_UCA_version_.equals(UCA_.m_UCA_version_) ||\r
-                        !m_UCD_version_.equals(UCA_.m_UCD_version_)) {\r
-                            init(m_rules_);\r
-                            return;\r
-                        }\r
-                        init();\r
-                        return;\r
-                    }\r
-                    else {                        \r
-                        init(m_rules_);\r
-                        return;\r
-                    }\r
-                }\r
-            }\r
-            catch (Exception e) {\r
-                // e.printStackTrace();\r
-                // if failed use UCA.\r
-            }\r
-        }\r
-        setWithUCAData();\r
-    }\r
-\r
-    // package private methods -----------------------------------------------\r
-\r
-    /**\r
-     * Sets this collator to use the tables in UCA. Note options not taken\r
-     * care of here.\r
-     */\r
-    final void setWithUCATables()\r
-    {\r
-        m_contractionOffset_ = UCA_.m_contractionOffset_;\r
-        m_expansionOffset_ = UCA_.m_expansionOffset_;\r
-        m_expansion_ = UCA_.m_expansion_;\r
-        m_contractionIndex_ = UCA_.m_contractionIndex_;\r
-        m_contractionCE_ = UCA_.m_contractionCE_;\r
-        m_trie_ = UCA_.m_trie_;\r
-        m_expansionEndCE_ = UCA_.m_expansionEndCE_;\r
-        m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;\r
-        m_unsafe_ = UCA_.m_unsafe_;\r
-        m_contractionEnd_ = UCA_.m_contractionEnd_;\r
-        m_minUnsafe_ = UCA_.m_minUnsafe_;\r
-        m_minContractionEnd_ = UCA_.m_minContractionEnd_;\r
-    }\r
-\r
-    /**\r
-     * Sets this collator to use the all options and tables in UCA.\r
-     */\r
-    final void setWithUCAData()\r
-    {\r
-        latinOneFailed_ = true;\r
-\r
-        m_addition3_ = UCA_.m_addition3_;\r
-        m_bottom3_ = UCA_.m_bottom3_;\r
-        m_bottomCount3_ = UCA_.m_bottomCount3_;\r
-        m_caseFirst_ = UCA_.m_caseFirst_;\r
-        m_caseSwitch_ = UCA_.m_caseSwitch_;\r
-        m_common3_ = UCA_.m_common3_;\r
-        m_contractionOffset_ = UCA_.m_contractionOffset_;\r
-        setDecomposition(UCA_.getDecomposition());\r
-        m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;\r
-        m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;\r
-        m_defaultIsAlternateHandlingShifted_\r
-                                   = UCA_.m_defaultIsAlternateHandlingShifted_;\r
-        m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;\r
-        m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;\r
-        m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;\r
-        m_defaultStrength_ = UCA_.m_defaultStrength_;\r
-        m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;\r
-        m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_;\r
-        m_expansionOffset_ = UCA_.m_expansionOffset_;\r
-        m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;\r
-        m_isCaseLevel_ = UCA_.m_isCaseLevel_;\r
-        m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;\r
-        m_isHiragana4_ = UCA_.m_isHiragana4_;\r
-        m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;\r
-        m_isSimple3_ = UCA_.m_isSimple3_;\r
-        m_mask3_ = UCA_.m_mask3_;\r
-        m_minContractionEnd_ = UCA_.m_minContractionEnd_;\r
-        m_minUnsafe_ = UCA_.m_minUnsafe_;\r
-        m_rules_ = UCA_.m_rules_;\r
-        setStrength(UCA_.getStrength());\r
-        m_top3_ = UCA_.m_top3_;\r
-        m_topCount3_ = UCA_.m_topCount3_;\r
-        m_variableTopValue_ = UCA_.m_variableTopValue_;\r
-        m_isNumericCollation_ = UCA_.m_isNumericCollation_;\r
-        setWithUCATables();\r
-        latinOneFailed_ = false;\r
-    }\r
-\r
-    /**\r
-     * Test whether a char character is potentially "unsafe" for use as a\r
-     * collation starting point. "Unsafe" characters are combining marks or\r
-     * those belonging to some contraction sequence from the offset 1 onwards.\r
-     * E.g. if "ABC" is the only contraction, then 'B' and\r
-     * 'C' are considered unsafe. If we have another contraction "ZA" with\r
-     * the one above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not.\r
-     * @param ch character to determin\r
-     * @return true if ch is unsafe, false otherwise\r
-     */\r
-    final boolean isUnsafe(char ch)\r
-    {\r
-        if (ch < m_minUnsafe_) {\r
-            return false;\r
-        }\r
-        \r
-        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {\r
-            if (UTF16.isLeadSurrogate(ch) \r
-                || UTF16.isTrailSurrogate(ch)) {\r
-                //  Trail surrogate are always considered unsafe.\r
-                return true;\r
-            }\r
-            ch &= HEURISTIC_OVERFLOW_MASK_;\r
-            ch += HEURISTIC_OVERFLOW_OFFSET_;\r
-        }\r
-        int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];\r
-        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;\r
-    }\r
-\r
-    /**\r
-     * Approximate determination if a char character is at a contraction end.\r
-     * Guaranteed to be true if a character is at the end of a contraction,\r
-     * otherwise it is not deterministic.\r
-     * @param ch character to be determined\r
-     */\r
-    final boolean isContractionEnd(char ch)\r
-    {\r
-        if (UTF16.isTrailSurrogate(ch)) {\r
-            return true;\r
-        }\r
-\r
-        if (ch < m_minContractionEnd_) {\r
-            return false;\r
-        }\r
-\r
-        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {\r
-            ch &= HEURISTIC_OVERFLOW_MASK_;\r
-            ch += HEURISTIC_OVERFLOW_OFFSET_;\r
-        }\r
-        int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];\r
-        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;\r
-    }\r
-\r
-    /**\r
-     * Retrieve the tag of a special ce\r
-     * @param ce ce to test\r
-     * @return tag of ce\r
-     */\r
-    static int getTag(int ce)\r
-    {\r
-        return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;\r
-    }\r
-\r
-    /**\r
-     * Checking if ce is special\r
-     * @param ce to check\r
-     * @return true if ce is special\r
-     */\r
-    static boolean isSpecial(int ce)\r
-    {\r
-        return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_;\r
-    }\r
-\r
-    /**\r
-     * Checks if the argument ce is a continuation\r
-     * @param ce collation element to test\r
-     * @return true if ce is a continuation\r
-     */\r
-    static final boolean isContinuation(int ce)\r
-    {\r
-        return ce != CollationElementIterator.NULLORDER\r
-                       && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;\r
-    }\r
-\r
-    // private inner classes ------------------------------------------------\r
-\r
-    // private variables -----------------------------------------------------\r
-\r
-    /**\r
-     * The smallest natural unsafe or contraction end char character before\r
-     * tailoring.\r
-     * This is a combining mark.\r
-     */\r
-    private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;\r
-    /**\r
-     * Heuristic table table size. Size is 32 bytes, 1 bit for each\r
-     * latin 1 char, and some power of two for hashing the rest of the chars.\r
-     * Size in bytes.\r
-     */\r
-    private static final char HEURISTIC_SIZE_ = 1056;\r
-    /**\r
-     * Mask value down to "some power of two" - 1,\r
-     * number of bits, not num of bytes.\r
-     */\r
-    private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;\r
-    /**\r
-     * Unsafe character shift\r
-     */\r
-    private static final int HEURISTIC_SHIFT_ = 3;\r
-    /**\r
-     * Unsafe character addition for character too large, it has to be folded\r
-     * then incremented.\r
-     */\r
-    private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;\r
-    /**\r
-     * Mask value to get offset in heuristic table.\r
-     */\r
-    private static final char HEURISTIC_MASK_ = 7;\r
-\r
-    private int m_caseSwitch_;\r
-    private int m_common3_;\r
-    private int m_mask3_;\r
-    /**\r
-     * When switching case, we need to add or subtract different values.\r
-     */\r
-    private int m_addition3_;\r
-    /**\r
-     * Upper range when compressing\r
-     */\r
-    private int m_top3_;\r
-    /**\r
-     * Upper range when compressing\r
-     */\r
-    private int m_bottom3_;\r
-    private int m_topCount3_;\r
-    private int m_bottomCount3_;\r
-    /**\r
-     * Case first constants\r
-     */\r
-    private static final int CASE_SWITCH_ = 0xC0;\r
-    private static final int NO_CASE_SWITCH_ = 0;\r
-    /**\r
-     * Case level constants\r
-     */\r
-    private static final int CE_REMOVE_CASE_ = 0x3F;\r
-    private static final int CE_KEEP_CASE_ = 0xFF;\r
-    /**\r
-     * Case strength mask\r
-     */\r
-    private static final int CE_CASE_MASK_3_ = 0xFF;\r
-    /**\r
-     * Sortkey size factor. Values can be changed.\r
-     */\r
-    private static final double PROPORTION_2_ = 0.5;\r
-    private static final double PROPORTION_3_ = 0.667;\r
-\r
-    // These values come from the UCA ----------------------------------------\r
-\r
-    /**\r
-     * This is an enum that lists magic special byte values from the\r
-     * fractional UCA\r
-     */\r
-    //private static final byte BYTE_ZERO_ = 0x0;\r
-    //private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;\r
-    //private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;\r
-    private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;\r
-    /*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;\r
-    //private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;\r
-    static final byte CODAN_PLACEHOLDER = 0x27;\r
-    //private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;\r
-    private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;\r
-    private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;\r
-    private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;\r
-    private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;\r
-    private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;\r
-    private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;\r
-    private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;\r
-    private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;\r
-    private static final int COMMON_BOTTOM_3_ = 0x05;\r
-    private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;\r
-    private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ =\r
-                                                              COMMON_BOTTOM_3_;\r
-    private static final int TOP_COUNT_2_ = (int)(PROPORTION_2_ * TOTAL_2_);\r
-    private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;\r
-    private static final int COMMON_2_ = COMMON_BOTTOM_2_;\r
-    private static final int COMMON_UPPER_FIRST_3_ = 0xC5;\r
-    private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;\r
-    //private static final int COMMON_4_ = (byte)0xFF;\r
-\r
-\r
-\r
-    /*\r
-     * Minimum size required for the binary collation data in bytes.\r
-     * Size of UCA header + size of options to 4 bytes\r
-     */\r
-    //private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;\r
-\r
-    /**\r
-     * If this collator is to generate only simple tertiaries for fast path\r
-     */\r
-    private boolean m_isSimple3_;\r
-\r
-    /**\r
-     * French collation sorting flag\r
-     */\r
-    private boolean m_isFrenchCollation_;\r
-    /**\r
-     * Flag indicating if shifted is requested for Quaternary alternate\r
-     * handling. If this is not true, the default for alternate handling will\r
-     * be non-ignorable.\r
-     */\r
-    private boolean m_isAlternateHandlingShifted_;\r
-    /**\r
-     * Extra case level for sorting\r
-     */\r
-    private boolean m_isCaseLevel_;\r
-\r
-    private static final int SORT_BUFFER_INIT_SIZE_ = 128;\r
-    private static final int SORT_BUFFER_INIT_SIZE_1_ =\r
-                                                    SORT_BUFFER_INIT_SIZE_ << 3;\r
-    private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;\r
-    private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;\r
-    private static final int SORT_BUFFER_INIT_SIZE_CASE_ =\r
-                                                SORT_BUFFER_INIT_SIZE_ >> 2;\r
-    private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;\r
-\r
-    private static final int CE_CONTINUATION_TAG_ = 0xC0;\r
-    private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;\r
-\r
-    private static final int LAST_BYTE_MASK_ = 0xFF;\r
-\r
-    //private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;\r
-    //private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;\r
-\r
-    private static final byte SORT_CASE_BYTE_START_ = (byte)0x80;\r
-    private static final byte SORT_CASE_SHIFT_START_ = (byte)7;\r
-\r
-    /**\r
-     * CE buffer size\r
-     */\r
-    private static final int CE_BUFFER_SIZE_ = 512;\r
-\r
-    // variables for Latin-1 processing\r
-    boolean latinOneUse_        = false;\r
-    boolean latinOneRegenTable_ = false;\r
-    boolean latinOneFailed_     = false;\r
-\r
-    int latinOneTableLen_ = 0;\r
-    int latinOneCEs_[] = null;\r
-    /**\r
-     * Bunch of utility iterators\r
-     */\r
-    private StringUCharacterIterator m_srcUtilIter_;\r
-    private CollationElementIterator m_srcUtilColEIter_;\r
-    private StringUCharacterIterator m_tgtUtilIter_;\r
-    private CollationElementIterator m_tgtUtilColEIter_;\r
-    /**\r
-     * Utility comparison flags\r
-     */\r
-    private boolean m_utilCompare0_;\r
-    //private boolean m_utilCompare1_;\r
-    private boolean m_utilCompare2_;\r
-    private boolean m_utilCompare3_;\r
-    private boolean m_utilCompare4_;\r
-    private boolean m_utilCompare5_;\r
-    /**\r
-     * Utility byte buffer\r
-     */\r
-    private byte m_utilBytes0_[];\r
-    private byte m_utilBytes1_[];\r
-    private byte m_utilBytes2_[];\r
-    private byte m_utilBytes3_[];\r
-    private byte m_utilBytes4_[];\r
-    //private byte m_utilBytes5_[];\r
-    private RawCollationKey m_utilRawCollationKey_;\r
-\r
-    private int m_utilBytesCount0_;\r
-    private int m_utilBytesCount1_;\r
-    private int m_utilBytesCount2_;\r
-    private int m_utilBytesCount3_;\r
-    private int m_utilBytesCount4_;\r
-    //private int m_utilBytesCount5_;\r
-    //private int m_utilCount0_;\r
-    //private int m_utilCount1_;\r
-    private int m_utilCount2_;\r
-    private int m_utilCount3_;\r
-    private int m_utilCount4_;\r
-    //private int m_utilCount5_;\r
-\r
-    private int m_utilFrenchStart_;\r
-    private int m_utilFrenchEnd_;\r
-\r
-    /**\r
-     * Preparing the CE buffers. will be filled during the primary phase\r
-     */\r
-    private int m_srcUtilCEBuffer_[];\r
-    private int m_tgtUtilCEBuffer_[];\r
-    private int m_srcUtilCEBufferSize_;\r
-    private int m_tgtUtilCEBufferSize_;\r
-\r
-    private int m_srcUtilContOffset_;\r
-    private int m_tgtUtilContOffset_;\r
-\r
-    private int m_srcUtilOffset_;\r
-    private int m_tgtUtilOffset_;\r
-\r
-    // private methods -------------------------------------------------------\r
-\r
-    private void init(String rules) throws Exception\r
-    {\r
-        setWithUCAData();\r
-        CollationParsedRuleBuilder builder\r
-                                       = new CollationParsedRuleBuilder(rules);\r
-        builder.setRules(this);\r
-        m_rules_ = rules;\r
-        init();\r
-        initUtility(false);\r
-    }\r
-    \r
-    private final int compareRegular(String source, String target, int offset) {\r
-        if (m_srcUtilIter_ == null) {\r
-            initUtility(true);\r
-        }\r
-        int strength = getStrength();\r
-        // setting up the collator parameters\r
-        m_utilCompare0_ = m_isCaseLevel_;\r
-        //m_utilCompare1_ = true;\r
-        m_utilCompare2_ = strength >= SECONDARY;\r
-        m_utilCompare3_ = strength >= TERTIARY;\r
-        m_utilCompare4_ = strength >= QUATERNARY;\r
-        m_utilCompare5_ = strength == IDENTICAL;\r
-        boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;\r
-        boolean doShift4 = m_isAlternateHandlingShifted_ && m_utilCompare4_;\r
-        boolean doHiragana4 = m_isHiragana4_ && m_utilCompare4_;\r
-\r
-        if (doHiragana4 && doShift4) {\r
-            String sourcesub = source.substring(offset);\r
-            String targetsub = target.substring(offset);\r
-            return compareBySortKeys(sourcesub, targetsub);\r
-        }\r
-\r
-        // This is the lowest primary value that will not be ignored if shifted\r
-        int lowestpvalue = m_isAlternateHandlingShifted_\r
-                                            ? m_variableTopValue_ << 16 : 0;\r
-        m_srcUtilCEBufferSize_ = 0;\r
-        m_tgtUtilCEBufferSize_ = 0;\r
-        int result = doPrimaryCompare(doHiragana4, lowestpvalue, source,\r
-                                      target, offset);\r
-        if (m_srcUtilCEBufferSize_ == -1\r
-            && m_tgtUtilCEBufferSize_ == -1) {\r
-            // since the cebuffer is cleared when we have determined that\r
-            // either source is greater than target or vice versa, the return\r
-            // result is the comparison result and not the hiragana result\r
-            return result;\r
-        }\r
-\r
-        int hiraganaresult = result;\r
-\r
-        if (m_utilCompare2_) {\r
-            result = doSecondaryCompare(doFrench);\r
-            if (result != 0) {\r
-                return result;\r
-            }\r
-        }\r
-        // doing the case bit\r
-        if (m_utilCompare0_) {\r
-            result = doCaseCompare();\r
-            if (result != 0) {\r
-                return result;\r
-            }\r
-        }\r
-        // Tertiary level\r
-        if (m_utilCompare3_) {\r
-            result = doTertiaryCompare();\r
-            if (result != 0) {\r
-                return result;\r
-            }\r
-        }\r
-\r
-        if (doShift4) {  // checkQuad\r
-            result = doQuaternaryCompare(lowestpvalue);\r
-            if (result != 0) {\r
-                return result;\r
-            }\r
-        }\r
-        else if (doHiragana4 && hiraganaresult != 0) {\r
-            // If we're fine on quaternaries, we might be different\r
-            // on Hiragana. This, however, might fail us in shifted.\r
-            return hiraganaresult;\r
-        }\r
-\r
-        // For IDENTICAL comparisons, we use a bitwise character comparison\r
-        // as a tiebreaker if all else is equal.\r
-        // Getting here  should be quite rare - strings are not identical -\r
-        // that is checked first, but compared == through all other checks.\r
-        if (m_utilCompare5_) {\r
-            return doIdenticalCompare(source, target, offset, true);\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * Gets the 2 bytes of primary order and adds it to the primary byte array\r
-     * @param ce current ce\r
-     * @param notIsContinuation flag indicating if the current bytes belong to\r
-     *          a continuation ce\r
-     * @param doShift flag indicating if ce is to be shifted\r
-     * @param leadPrimary lead primary used for compression\r
-     * @param commonBottom4 common byte value for Quaternary\r
-     * @param bottomCount4 smallest byte value for Quaternary\r
-     * @return the new lead primary for compression\r
-     */\r
-    private final int doPrimaryBytes(int ce, boolean notIsContinuation,\r
-                                  boolean doShift, int leadPrimary,\r
-                                  int commonBottom4, int bottomCount4)\r
-    {\r
-\r
-        int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned\r
-        int p1 = ce >>> 8;  // comparison\r
-        if (doShift) {\r
-            if (m_utilCount4_ > 0) {\r
-                while (m_utilCount4_ > bottomCount4) {\r
-                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                         (byte)(commonBottom4 + bottomCount4));\r
-                    m_utilBytesCount4_ ++;\r
-                    m_utilCount4_ -= bottomCount4;\r
-                }\r
-                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                       (byte)(commonBottom4\r
-                                              + (m_utilCount4_ - 1)));\r
-                m_utilBytesCount4_ ++;\r
-                m_utilCount4_ = 0;\r
-            }\r
-            // dealing with a variable and we're treating them as shifted\r
-            // This is a shifted ignorable\r
-            if (p1 != 0) {\r
-                // we need to check this since we could be in continuation\r
-                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                       (byte)p1);\r
-                m_utilBytesCount4_ ++;\r
-            }\r
-            if (p2 != 0) {\r
-                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                       (byte)p2);\r
-                m_utilBytesCount4_ ++;\r
-            }\r
-        }\r
-        else {\r
-            // Note: This code assumes that the table is well built\r
-            // i.e. not having 0 bytes where they are not supposed to be.\r
-            // Usually, we'll have non-zero primary1 & primary2, except\r
-            // in cases of LatinOne and friends, when primary2 will be\r
-            // regular and simple sortkey calc\r
-            if (p1 != CollationElementIterator.IGNORABLE) {\r
-                if (notIsContinuation) {\r
-                    if (leadPrimary == p1) {\r
-                        m_utilBytes1_ = append(m_utilBytes1_,\r
-                                               m_utilBytesCount1_, (byte)p2);\r
-                        m_utilBytesCount1_ ++;\r
-                    }\r
-                    else {\r
-                        if (leadPrimary != 0) {\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                   m_utilBytesCount1_,\r
-                                    ((p1 > leadPrimary)\r
-                                            ? BYTE_UNSHIFTED_MAX_\r
-                                            : BYTE_UNSHIFTED_MIN_)); \r
-                            m_utilBytesCount1_ ++;\r
-                        }\r
-                        if (p2 == CollationElementIterator.IGNORABLE) {\r
-                            // one byter, not compressed\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                   m_utilBytesCount1_,\r
-                                                   (byte)p1);\r
-                            m_utilBytesCount1_ ++;\r
-                            leadPrimary = 0;\r
-                        }\r
-                        else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_\r
-                              || (p1 > maxRegularPrimary\r
-                    //> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0]\r
-                    //                                              >>> 24)\r
-                                && p1 < minImplicitPrimary\r
-                    //< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0]\r
-                    //                                              >>> 24)\r
-                    )) {\r
-                                // not compressible\r
-                                leadPrimary = 0;\r
-                                m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                       m_utilBytesCount1_,\r
-                                                       (byte)p1);\r
-                                m_utilBytesCount1_ ++;\r
-                                m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                       m_utilBytesCount1_,\r
-                                                       (byte)p2);\r
-                                m_utilBytesCount1_ ++;\r
-                        }\r
-                        else { // compress\r
-                            leadPrimary = p1;\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                   m_utilBytesCount1_,\r
-                                                   (byte)p1);\r
-                            m_utilBytesCount1_ ++;\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                  m_utilBytesCount1_, (byte)p2);\r
-                            m_utilBytesCount1_ ++;\r
-                        }\r
-                    }\r
-                }\r
-                else {\r
-                    // continuation, add primary to the key, no compression\r
-                    m_utilBytes1_ = append(m_utilBytes1_,\r
-                                           m_utilBytesCount1_, (byte)p1);\r
-                    m_utilBytesCount1_ ++;\r
-                    if (p2 != CollationElementIterator.IGNORABLE) {\r
-                        m_utilBytes1_ = append(m_utilBytes1_,\r
-                                           m_utilBytesCount1_, (byte)p2);\r
-                        // second part\r
-                        m_utilBytesCount1_ ++;\r
-                    }\r
-                }\r
-            }\r
-        }\r
-        return leadPrimary;\r
-    }\r
-\r
-    /**\r
-     * Gets the secondary byte and adds it to the secondary byte array\r
-     * @param ce current ce\r
-     * @param notIsContinuation flag indicating if the current bytes belong to\r
-     *          a continuation ce\r
-     * @param doFrench flag indicator if french sort is to be performed\r
-     */\r
-    private final void doSecondaryBytes(int ce, boolean notIsContinuation,\r
-                                        boolean doFrench)\r
-    {\r
-        int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison\r
-        if (s != 0) {\r
-            if (!doFrench) {\r
-                // This is compression code.\r
-                if (s == COMMON_2_ && notIsContinuation) {\r
-                   m_utilCount2_ ++;\r
-                }\r
-                else {\r
-                    if (m_utilCount2_ > 0) {\r
-                        if (s > COMMON_2_) { // not necessary for 4th level.\r
-                            while (m_utilCount2_ > TOP_COUNT_2_) {\r
-                                m_utilBytes2_ = append(m_utilBytes2_,\r
-                                        m_utilBytesCount2_,\r
-                                        (byte)(COMMON_TOP_2_ - TOP_COUNT_2_));\r
-                                m_utilBytesCount2_ ++;\r
-                                m_utilCount2_ -= TOP_COUNT_2_;\r
-                            }\r
-                            m_utilBytes2_ = append(m_utilBytes2_,\r
-                                                   m_utilBytesCount2_,\r
-                                                   (byte)(COMMON_TOP_2_\r
-                                                       - (m_utilCount2_ - 1)));\r
-                            m_utilBytesCount2_ ++;\r
-                        }\r
-                        else {\r
-                            while (m_utilCount2_ > BOTTOM_COUNT_2_) {\r
-                                m_utilBytes2_ = append(m_utilBytes2_,\r
-                                                       m_utilBytesCount2_,\r
-                                    (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));\r
-                                m_utilBytesCount2_ ++;\r
-                                m_utilCount2_ -= BOTTOM_COUNT_2_;\r
-                            }\r
-                            m_utilBytes2_ = append(m_utilBytes2_,\r
-                                                   m_utilBytesCount2_,\r
-                                                   (byte)(COMMON_BOTTOM_2_\r
-                                                       + (m_utilCount2_ - 1)));\r
-                            m_utilBytesCount2_ ++;\r
-                        }\r
-                        m_utilCount2_ = 0;\r
-                    }\r
-                    m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,\r
-                                           (byte)s);\r
-                    m_utilBytesCount2_ ++;\r
-                }\r
-            }\r
-            else {\r
-                  m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,\r
-                                         (byte)s);\r
-                  m_utilBytesCount2_ ++;\r
-                  // Do the special handling for French secondaries\r
-                  // We need to get continuation elements and do intermediate\r
-                  // restore\r
-                  // abc1c2c3de with french secondaries need to be edc1c2c3ba\r
-                  // NOT edc3c2c1ba\r
-                  if (notIsContinuation) {\r
-                        if (m_utilFrenchStart_ != -1) {\r
-                            // reverse secondaries from frenchStartPtr up to\r
-                            // frenchEndPtr\r
-                            reverseBuffer(m_utilBytes2_);\r
-                            m_utilFrenchStart_ = -1;\r
-                        }\r
-                  }\r
-                  else {\r
-                        if (m_utilFrenchStart_ == -1) {\r
-                            m_utilFrenchStart_  = m_utilBytesCount2_ - 2;\r
-                        }\r
-                        m_utilFrenchEnd_ = m_utilBytesCount2_ - 1;\r
-                  }\r
-            }\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Reverse the argument buffer\r
-     * @param buffer to reverse\r
-     */\r
-    private void reverseBuffer(byte buffer[])\r
-    {\r
-        int start = m_utilFrenchStart_;\r
-        int end = m_utilFrenchEnd_;\r
-        while (start < end) {\r
-            byte b = buffer[start];\r
-            buffer[start ++] = buffer[end];\r
-            buffer[end --] = b;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Insert the case shifting byte if required\r
-     * @param caseshift value\r
-     * @return new caseshift value\r
-     */\r
-    private final int doCaseShift(int caseshift)\r
-    {\r
-        if (caseshift  == 0) {\r
-            m_utilBytes0_ = append(m_utilBytes0_, m_utilBytesCount0_,\r
-                                   SORT_CASE_BYTE_START_);\r
-            m_utilBytesCount0_ ++;\r
-            caseshift = SORT_CASE_SHIFT_START_;\r
-        }\r
-        return caseshift;\r
-    }\r
-\r
-    /**\r
-     * Performs the casing sort\r
-     * @param tertiary byte in ints for easy comparison\r
-     * @param notIsContinuation flag indicating if the current bytes belong to\r
-     *          a continuation ce\r
-     * @param caseshift\r
-     * @return the new value of case shift\r
-     */\r
-    private final int doCaseBytes(int tertiary, boolean notIsContinuation,\r
-                                  int caseshift)\r
-    {\r
-        caseshift = doCaseShift(caseshift);\r
-\r
-        if (notIsContinuation && tertiary != 0) {\r
-            byte casebits = (byte)(tertiary & 0xC0);\r
-            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {\r
-                if (casebits == 0) {\r
-                    m_utilBytes0_[m_utilBytesCount0_ - 1]\r
-                                                      |= (1 << (-- caseshift));\r
-                }\r
-                else {\r
-                     // second bit\r
-                     caseshift = doCaseShift(caseshift - 1);\r
-                     m_utilBytes0_[m_utilBytesCount0_ - 1]\r
-                                    |= ((casebits >> 6) & 1) << (-- caseshift);\r
-                }\r
-            }\r
-            else {\r
-                if (casebits != 0) {\r
-                    m_utilBytes0_[m_utilBytesCount0_ - 1]\r
-                                                        |= 1 << (-- caseshift);\r
-                    // second bit\r
-                    caseshift = doCaseShift(caseshift);\r
-                    m_utilBytes0_[m_utilBytesCount0_ - 1]\r
-                                  |= ((casebits >> 7) & 1) << (-- caseshift);\r
-                }\r
-                else {\r
-                    caseshift --;\r
-                }\r
-            }\r
-        }\r
-\r
-        return caseshift;\r
-    }\r
-\r
-    /**\r
-     * Gets the tertiary byte and adds it to the tertiary byte array\r
-     * @param tertiary byte in int for easy comparison\r
-     * @param notIsContinuation flag indicating if the current bytes belong to\r
-     *          a continuation ce\r
-     */\r
-    private final void doTertiaryBytes(int tertiary, boolean notIsContinuation)\r
-    {\r
-        if (tertiary != 0) {\r
-            // This is compression code.\r
-            // sequence size check is included in the if clause\r
-            if (tertiary == m_common3_ && notIsContinuation) {\r
-                 m_utilCount3_ ++;\r
-            }\r
-            else {\r
-                int common3 = m_common3_ & LAST_BYTE_MASK_;\r
-                if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) {\r
-                    tertiary += m_addition3_;\r
-                }\r
-                else if (tertiary <= common3\r
-                         && m_common3_ == COMMON_UPPER_FIRST_3_) {\r
-                    tertiary -= m_addition3_;\r
-                }\r
-                if (m_utilCount3_ > 0) {\r
-                    if (tertiary > common3) {\r
-                        while (m_utilCount3_ > m_topCount3_) {\r
-                            m_utilBytes3_ = append(m_utilBytes3_,\r
-                                                   m_utilBytesCount3_,\r
-                                            (byte)(m_top3_ - m_topCount3_));\r
-                            m_utilBytesCount3_ ++;\r
-                            m_utilCount3_ -= m_topCount3_;\r
-                        }\r
-                        m_utilBytes3_ = append(m_utilBytes3_,\r
-                                               m_utilBytesCount3_,\r
-                                               (byte)(m_top3_\r
-                                                      - (m_utilCount3_ - 1)));\r
-                        m_utilBytesCount3_ ++;\r
-                    }\r
-                    else {\r
-                        while (m_utilCount3_ > m_bottomCount3_) {\r
-                            m_utilBytes3_ = append(m_utilBytes3_,\r
-                                                   m_utilBytesCount3_,\r
-                                         (byte)(m_bottom3_ + m_bottomCount3_));\r
-                            m_utilBytesCount3_ ++;\r
-                            m_utilCount3_ -= m_bottomCount3_;\r
-                        }\r
-                        m_utilBytes3_ = append(m_utilBytes3_,\r
-                                               m_utilBytesCount3_,\r
-                                               (byte)(m_bottom3_\r
-                                                      + (m_utilCount3_ - 1)));\r
-                        m_utilBytesCount3_ ++;\r
-                    }\r
-                    m_utilCount3_ = 0;\r
-                }\r
-                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,\r
-                                       (byte)tertiary);\r
-                m_utilBytesCount3_ ++;\r
-            }\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Gets the Quaternary byte and adds it to the Quaternary byte array\r
-     * @param isCodePointHiragana flag indicator if the previous codepoint\r
-     *          we dealt with was Hiragana\r
-     * @param commonBottom4 smallest common Quaternary byte\r
-     * @param bottomCount4 smallest Quaternary byte\r
-     * @param hiragana4 hiragana Quaternary byte\r
-     */\r
-    private final void doQuaternaryBytes(boolean isCodePointHiragana,\r
-                                      int commonBottom4, int bottomCount4,\r
-                                      byte hiragana4)\r
-    {\r
-        if (isCodePointHiragana) { // This was Hiragana, need to note it\r
-            if (m_utilCount4_ > 0) { // Close this part\r
-                while (m_utilCount4_ > bottomCount4) {\r
-                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                           (byte)(commonBottom4\r
-                                                        + bottomCount4));\r
-                    m_utilBytesCount4_ ++;\r
-                    m_utilCount4_ -= bottomCount4;\r
-                }\r
-                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                      (byte)(commonBottom4\r
-                                             + (m_utilCount4_ - 1)));\r
-                m_utilBytesCount4_ ++;\r
-                m_utilCount4_ = 0;\r
-            }\r
-            m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                   hiragana4); // Add the Hiragana\r
-            m_utilBytesCount4_ ++;\r
-        }\r
-        else { // This wasn't Hiragana, so we can continue adding stuff\r
-            m_utilCount4_ ++;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Iterates through the argument string for all ces.\r
-     * Split the ces into their relevant primaries, secondaries etc.\r
-     * @param source normalized string\r
-     * @param doFrench flag indicator if special handling of French has to be\r
-     *                  done\r
-     * @param hiragana4 offset for Hiragana quaternary\r
-     * @param commonBottom4 smallest common quaternary byte\r
-     * @param bottomCount4 smallest quaternary byte\r
-     */\r
-    private final void getSortKeyBytes(String source, boolean doFrench,\r
-                                       byte hiragana4, int commonBottom4,\r
-                                       int bottomCount4)\r
-\r
-    {\r
-        if (m_srcUtilIter_ == null) {\r
-            initUtility(true);\r
-        }\r
-        int backupDecomposition = getDecomposition();\r
-        setDecomposition(NO_DECOMPOSITION); // have to revert to backup later\r
-        m_srcUtilIter_.setText(source);\r
-        m_srcUtilColEIter_.setText(m_srcUtilIter_);\r
-        m_utilFrenchStart_ = -1;\r
-        m_utilFrenchEnd_ = -1;\r
-\r
-        // scriptorder not implemented yet\r
-        // const uint8_t *scriptOrder = coll->scriptOrder;\r
-\r
-        boolean doShift = false;\r
-        boolean notIsContinuation = false;\r
-\r
-        int leadPrimary = 0; // int for easier comparison\r
-        int caseShift = 0;\r
-\r
-        while (true) {\r
-            int ce = m_srcUtilColEIter_.next();\r
-            if (ce == CollationElementIterator.NULLORDER) {\r
-                break;\r
-            }\r
-\r
-            if (ce == CollationElementIterator.IGNORABLE) {\r
-                continue;\r
-            }\r
-\r
-            notIsContinuation = !isContinuation(ce);\r
-\r
-            /*\r
-             * if (notIsContinuation) {\r
-                    if (scriptOrder != NULL) {\r
-                        primary1 = scriptOrder[primary1];\r
-                    }\r
-                }*/\r
-            boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0;\r
-            // actually we can just check that the first byte is 0\r
-            // generation stuffs the order left first\r
-            boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_)\r
-                                               <= m_variableTopValue_;\r
-            doShift = (m_isAlternateHandlingShifted_\r
-                        && ((notIsContinuation && isSmallerThanVariableTop\r
-                            && !isPrimaryByteIgnorable) // primary byte not 0\r
-                        || (!notIsContinuation && doShift))\r
-                        || (doShift && isPrimaryByteIgnorable));\r
-            if (doShift && isPrimaryByteIgnorable) {\r
-                // amendment to the UCA says that primary ignorables and other\r
-                // ignorables should be removed if following a shifted code\r
-                // point\r
-                // if we were shifted and we got an ignorable code point\r
-                // we should just completely ignore it\r
-                continue;\r
-            }\r
-            leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift,\r
-                                         leadPrimary, commonBottom4,\r
-                                         bottomCount4);\r
-            if (doShift) {\r
-                continue;\r
-            }\r
-            if (m_utilCompare2_) {\r
-                doSecondaryBytes(ce, notIsContinuation, doFrench);\r
-            }\r
-\r
-            int t = ce & LAST_BYTE_MASK_;\r
-            if (!notIsContinuation) {\r
-                t = ce & CE_REMOVE_CONTINUATION_MASK_;\r
-            }\r
-\r
-            if (m_utilCompare0_ && (!isPrimaryByteIgnorable || m_utilCompare2_)) {\r
-                // do the case level if we need to do it. We don't want to calculate\r
-                // case level for primary ignorables if we have only primary strength and case level\r
-                // otherwise we would break well formedness of CEs \r
-                caseShift = doCaseBytes(t, notIsContinuation, caseShift);\r
-            }\r
-            else if (notIsContinuation) {\r
-                 t ^= m_caseSwitch_;\r
-            }\r
-\r
-            t &= m_mask3_;\r
-\r
-            if (m_utilCompare3_) {\r
-                doTertiaryBytes(t, notIsContinuation);\r
-            }\r
-\r
-            if (m_utilCompare4_ && notIsContinuation) { // compare quad\r
-                doQuaternaryBytes(m_srcUtilColEIter_.m_isCodePointHiragana_,\r
-                                  commonBottom4, bottomCount4, hiragana4);\r
-            }\r
-        }\r
-        setDecomposition(backupDecomposition); // reverts to original\r
-        if (m_utilFrenchStart_ != -1) {\r
-            // one last round of checks\r
-            reverseBuffer(m_utilBytes2_);\r
-        }\r
-    }\r
-\r
-    /**\r
-     * From the individual strength byte results the final compact sortkey\r
-     * will be calculated.\r
-     * @param source text string\r
-     * @param doFrench flag indicating that special handling of French has to\r
-     *                  be done\r
-     * @param commonBottom4 smallest common quaternary byte\r
-     * @param bottomCount4 smallest quaternary byte\r
-     * @param key output RawCollationKey to store results, key cannot be null\r
-     */\r
-    private final void getSortKey(String source, boolean doFrench,\r
-                                             int commonBottom4, \r
-                                             int bottomCount4,\r
-                                             RawCollationKey key)\r
-    {\r
-        // we have done all the CE's, now let's put them together to form\r
-        // a key\r
-        if (m_utilCompare2_) {\r
-            doSecondary(doFrench);\r
-        }\r
-        // adding case level should be independent of secondary level\r
-        if (m_utilCompare0_) {\r
-            doCase();\r
-        }\r
-        if (m_utilCompare3_) {\r
-            doTertiary();\r
-            if (m_utilCompare4_) {\r
-                doQuaternary(commonBottom4, bottomCount4);\r
-                if (m_utilCompare5_) {\r
-                    doIdentical(source);\r
-                }\r
-\r
-            }\r
-        }\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0);\r
-        m_utilBytesCount1_ ++;\r
-\r
-        key.set(m_utilBytes1_, 0, m_utilBytesCount1_);\r
-    }\r
-\r
-    /**\r
-     * Packs the French bytes\r
-     */\r
-    private final void doFrench()\r
-    {\r
-        for (int i = 0; i < m_utilBytesCount2_; i ++) {\r
-            byte s = m_utilBytes2_[m_utilBytesCount2_ - i - 1];\r
-            // This is compression code.\r
-            if (s == COMMON_2_) {\r
-                ++ m_utilCount2_;\r
-            }\r
-            else {\r
-                if (m_utilCount2_ > 0) {\r
-                    // getting the unsigned value\r
-                    if ((s & LAST_BYTE_MASK_) > COMMON_2_) {\r
-                        // not necessary for 4th level.\r
-                        while (m_utilCount2_ > TOP_COUNT_2_) {\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                   m_utilBytesCount1_,\r
-                                        (byte)(COMMON_TOP_2_ - TOP_COUNT_2_));\r
-                            m_utilBytesCount1_ ++;\r
-                            m_utilCount2_ -= TOP_COUNT_2_;\r
-                        }\r
-                        m_utilBytes1_ = append(m_utilBytes1_,\r
-                                               m_utilBytesCount1_,\r
-                                               (byte)(COMMON_TOP_2_\r
-                                                      - (m_utilCount2_ - 1)));\r
-                        m_utilBytesCount1_ ++;\r
-                    }\r
-                    else {\r
-                        while (m_utilCount2_ > BOTTOM_COUNT_2_) {\r
-                            m_utilBytes1_ = append(m_utilBytes1_,\r
-                                                   m_utilBytesCount1_,\r
-                                (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));\r
-                            m_utilBytesCount1_ ++;\r
-                            m_utilCount2_ -= BOTTOM_COUNT_2_;\r
-                        }\r
-                        m_utilBytes1_ = append(m_utilBytes1_,\r
-                                               m_utilBytesCount1_,\r
-                                               (byte)(COMMON_BOTTOM_2_\r
-                                                      + (m_utilCount2_ - 1)));\r
-                        m_utilBytesCount1_ ++;\r
-                    }\r
-                    m_utilCount2_ = 0;\r
-                }\r
-                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, s);\r
-                m_utilBytesCount1_ ++;\r
-            }\r
-        }\r
-        if (m_utilCount2_ > 0) {\r
-            while (m_utilCount2_ > BOTTOM_COUNT_2_) {\r
-                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                                       (byte)(COMMON_BOTTOM_2_\r
-                                                    + BOTTOM_COUNT_2_));\r
-                m_utilBytesCount1_ ++;\r
-                m_utilCount2_ -= BOTTOM_COUNT_2_;\r
-            }\r
-            m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                                   (byte)(COMMON_BOTTOM_2_\r
-                                                    + (m_utilCount2_ - 1)));\r
-            m_utilBytesCount1_ ++;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Compacts the secondary bytes and stores them into the primary array\r
-     * @param doFrench flag indicator that French has to be handled specially\r
-     */\r
-    private final void doSecondary(boolean doFrench)\r
-    {\r
-        if (m_utilCount2_ > 0) {\r
-            while (m_utilCount2_ > BOTTOM_COUNT_2_) {\r
-                m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,\r
-                                       (byte)(COMMON_BOTTOM_2_\r
-                                                        + BOTTOM_COUNT_2_));\r
-                m_utilBytesCount2_ ++;\r
-                m_utilCount2_ -= BOTTOM_COUNT_2_;\r
-            }\r
-            m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,\r
-                                   (byte)(COMMON_BOTTOM_2_ +\r
-                                                    (m_utilCount2_ - 1)));\r
-            m_utilBytesCount2_ ++;\r
-        }\r
-\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                               SORT_LEVEL_TERMINATOR_);\r
-        m_utilBytesCount1_ ++;\r
-\r
-        if (doFrench) { // do the reverse copy\r
-            doFrench();\r
-        }\r
-        else {\r
-            if (m_utilBytes1_.length <= m_utilBytesCount1_\r
-                                        + m_utilBytesCount2_) {\r
-                m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,\r
-                                         m_utilBytesCount2_);\r
-            }\r
-            System.arraycopy(m_utilBytes2_, 0, m_utilBytes1_,\r
-                             m_utilBytesCount1_, m_utilBytesCount2_);\r
-            m_utilBytesCount1_ += m_utilBytesCount2_;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Increase buffer size\r
-     * @param buffer array of bytes\r
-     * @param size of the byte array\r
-     * @param incrementsize size to increase\r
-     * @return the new buffer\r
-     */\r
-    private static final byte[] increase(byte buffer[], int size,\r
-                                         int incrementsize)\r
-    {\r
-        byte result[] = new byte[buffer.length + incrementsize];\r
-        System.arraycopy(buffer, 0, result, 0, size);\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Increase buffer size\r
-     * @param buffer array of ints\r
-     * @param size of the byte array\r
-     * @param incrementsize size to increase\r
-     * @return the new buffer\r
-     */\r
-    private static final int[] increase(int buffer[], int size,\r
-                                        int incrementsize)\r
-    {\r
-        int result[] = new int[buffer.length + incrementsize];\r
-        System.arraycopy(buffer, 0, result, 0, size);\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Compacts the case bytes and stores them into the primary array\r
-     */\r
-    private final void doCase()\r
-    {\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                               SORT_LEVEL_TERMINATOR_);\r
-        m_utilBytesCount1_ ++;\r
-        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount0_) {\r
-            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,\r
-                                     m_utilBytesCount0_);\r
-        }\r
-        System.arraycopy(m_utilBytes0_, 0, m_utilBytes1_, m_utilBytesCount1_,\r
-                         m_utilBytesCount0_);\r
-        m_utilBytesCount1_ += m_utilBytesCount0_;\r
-    }\r
-\r
-    /**\r
-     * Compacts the tertiary bytes and stores them into the primary array\r
-     */\r
-    private final void doTertiary()\r
-    {\r
-        if (m_utilCount3_ > 0) {\r
-            if (m_common3_ != COMMON_BOTTOM_3_) {\r
-                while (m_utilCount3_ >= m_topCount3_) {\r
-                    m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,\r
-                                           (byte)(m_top3_ - m_topCount3_));\r
-                    m_utilBytesCount3_ ++;\r
-                    m_utilCount3_ -= m_topCount3_;\r
-                }\r
-                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,\r
-                                       (byte)(m_top3_ - m_utilCount3_));\r
-                m_utilBytesCount3_ ++;\r
-            }\r
-            else {\r
-                while (m_utilCount3_ > m_bottomCount3_) {\r
-                    m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,\r
-                                           (byte)(m_bottom3_\r
-                                                        + m_bottomCount3_));\r
-                    m_utilBytesCount3_ ++;\r
-                    m_utilCount3_ -= m_bottomCount3_;\r
-                }\r
-                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,\r
-                                       (byte)(m_bottom3_\r
-                                              + (m_utilCount3_ - 1)));\r
-                m_utilBytesCount3_ ++;\r
-            }\r
-        }\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                               SORT_LEVEL_TERMINATOR_);\r
-        m_utilBytesCount1_ ++;\r
-        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount3_) {\r
-            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,\r
-                                     m_utilBytesCount3_);\r
-        }\r
-        System.arraycopy(m_utilBytes3_, 0, m_utilBytes1_, m_utilBytesCount1_,\r
-                         m_utilBytesCount3_);\r
-        m_utilBytesCount1_ += m_utilBytesCount3_;\r
-    }\r
-\r
-    /**\r
-     * Compacts the quaternary bytes and stores them into the primary array\r
-     */\r
-    private final void doQuaternary(int commonbottom4, int bottomcount4)\r
-    {\r
-        if (m_utilCount4_ > 0) {\r
-            while (m_utilCount4_ > bottomcount4) {\r
-                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                       (byte)(commonbottom4 + bottomcount4));\r
-                m_utilBytesCount4_ ++;\r
-                m_utilCount4_ -= bottomcount4;\r
-            }\r
-            m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,\r
-                                   (byte)(commonbottom4\r
-                                                + (m_utilCount4_ - 1)));\r
-            m_utilBytesCount4_ ++;\r
-        }\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                               SORT_LEVEL_TERMINATOR_);\r
-        m_utilBytesCount1_ ++;\r
-        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount4_) {\r
-            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,\r
-                                     m_utilBytesCount4_);\r
-        }\r
-        System.arraycopy(m_utilBytes4_, 0, m_utilBytes1_, m_utilBytesCount1_,\r
-                         m_utilBytesCount4_);\r
-        m_utilBytesCount1_ += m_utilBytesCount4_;\r
-    }\r
-\r
-    /**\r
-     * Deals with the identical sort.\r
-     * Appends the BOCSU version of the source string to the ends of the\r
-     * byte buffer.\r
-     * @param source text string\r
-     */\r
-    private final void doIdentical(String source)\r
-    {\r
-        int isize = BOCU.getCompressionLength(source);\r
-        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,\r
-                               SORT_LEVEL_TERMINATOR_);\r
-        m_utilBytesCount1_ ++;\r
-        if (m_utilBytes1_.length <= m_utilBytesCount1_ + isize) {\r
-            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,\r
-                                     1 + isize);\r
-        }\r
-        m_utilBytesCount1_ = BOCU.compress(source, m_utilBytes1_,\r
-                                           m_utilBytesCount1_);\r
-    }\r
-\r
-    /**\r
-     * Gets the offset of the first unmatched characters in source and target.\r
-     * This method returns the offset of the start of a contraction or a\r
-     * combining sequence, if the first difference is in the middle of such a\r
-     * sequence.\r
-     * @param source string\r
-     * @param target string\r
-     * @return offset of the first unmatched characters in source and target.\r
-     */\r
-    private final int getFirstUnmatchedOffset(String source, String target)\r
-    {\r
-        int result = 0;\r
-        int slength = source.length();\r
-        int tlength = target.length();\r
-        int minlength = slength;\r
-        if (minlength > tlength) {\r
-            minlength = tlength;\r
-        }\r
-        while (result < minlength\r
-                && source.charAt(result) == target.charAt(result)) {\r
-            result ++;\r
-        }\r
-        if (result > 0) {\r
-            // There is an identical portion at the beginning of the two\r
-            // strings. If the identical portion ends within a contraction or a\r
-            // combining character sequence, back up to the start of that\r
-            // sequence.\r
-            char schar = 0;\r
-            char tchar = 0;\r
-            if (result < minlength) {\r
-                schar = source.charAt(result); // first differing chars\r
-                tchar = target.charAt(result);\r
-            }\r
-            else {\r
-                schar = source.charAt(minlength - 1);\r
-                if (isUnsafe(schar)) {\r
-                    tchar = schar;\r
-                }\r
-                else if (slength == tlength) {\r
-                        return result;\r
-                }\r
-                else if (slength < tlength) {\r
-                    tchar = target.charAt(result);\r
-                }\r
-                else {\r
-                    schar = source.charAt(result);\r
-                }\r
-            }\r
-            if (isUnsafe(schar) || isUnsafe(tchar))\r
-            {\r
-                // We are stopped in the middle of a contraction or combining\r
-                // sequence.\r
-                // Look backwards for the part of the string for the start of\r
-                // the sequence\r
-                // It doesn't matter which string we scan, since they are the\r
-                // same in this region.\r
-                do {\r
-                    result --;\r
-                }\r
-                while (result > 0 && isUnsafe(source.charAt(result)));\r
-            }\r
-        }\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Appending an byte to an array of bytes and increases it if we run out of\r
-     * space\r
-     * @param array of byte arrays\r
-     * @param appendindex index in the byte array to append\r
-     * @param value to append\r
-     * @return array if array size can accomodate the new value, otherwise\r
-     *         a bigger array will be created and returned\r
-     */\r
-    private static final byte[] append(byte array[], int appendindex,\r
-                                       byte value)\r
-    {\r
-        try {\r
-            array[appendindex] = value;\r
-        }\r
-        catch (ArrayIndexOutOfBoundsException e) {\r
-            array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);\r
-            array[appendindex] = value;\r
-        }\r
-        return array;\r
-    }\r
-\r
-    /**\r
-     * This is a trick string compare function that goes in and uses sortkeys\r
-     * to compare. It is used when compare gets in trouble and needs to bail\r
-     * out.\r
-     * @param source text string\r
-     * @param target text string\r
-     */\r
-    private final int compareBySortKeys(String source, String target)\r
-\r
-    {\r
-        m_utilRawCollationKey_ = getRawCollationKey(source, \r
-                                                    m_utilRawCollationKey_);\r
-        // this method is very seldom called\r
-        RawCollationKey targetkey = getRawCollationKey(target, null);\r
-        return m_utilRawCollationKey_.compareTo(targetkey);\r
-    }\r
-\r
-    /**\r
-     * Performs the primary comparisons, and fills up the CE buffer at the\r
-     * same time.\r
-     * The return value toggles between the comparison result and the hiragana\r
-     * result. If either the source is greater than target or vice versa, the\r
-     * return result is the comparison result, ie 1 or -1, furthermore the\r
-     * cebuffers will be cleared when that happens. If the primary comparisons\r
-     * are equal, we'll have to continue with secondary comparison. In this case\r
-     * the cebuffer will not be cleared and the return result will be the\r
-     * hiragana result.\r
-     * @param doHiragana4 flag indicator that Hiragana Quaternary has to be\r
-     *                  observed\r
-     * @param lowestpvalue the lowest primary value that will not be ignored if\r
-     *                      alternate handling is shifted\r
-     * @param source text string\r
-     * @param target text string\r
-     * @param textoffset offset in text to start the comparison\r
-     * @return comparion result if a primary difference is found, otherwise\r
-     *                      hiragana result\r
-     */\r
-    private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue,\r
-                                        String source, String target,\r
-                                        int textoffset)\r
-\r
-    {\r
-        // Preparing the context objects for iterating over strings\r
-        m_srcUtilIter_.setText(source);\r
-        m_srcUtilColEIter_.setText(m_srcUtilIter_, textoffset);\r
-        m_tgtUtilIter_.setText(target);\r
-        m_tgtUtilColEIter_.setText(m_tgtUtilIter_, textoffset);\r
-\r
-        // Non shifted primary processing is quite simple\r
-        if (!m_isAlternateHandlingShifted_) {\r
-            int hiraganaresult = 0;\r
-            while (true) {\r
-                int sorder = 0;\r
-                // We fetch CEs until we hit a non ignorable primary or end.\r
-                do {\r
-                    sorder = m_srcUtilColEIter_.next();\r
-                    m_srcUtilCEBuffer_ = append(m_srcUtilCEBuffer_,\r
-                                                m_srcUtilCEBufferSize_, sorder);\r
-                    m_srcUtilCEBufferSize_ ++;\r
-                    sorder &= CE_PRIMARY_MASK_;\r
-                } while (sorder == CollationElementIterator.IGNORABLE);\r
-\r
-                int torder = 0;\r
-                do {\r
-                    torder = m_tgtUtilColEIter_.next();\r
-                    m_tgtUtilCEBuffer_ = append(m_tgtUtilCEBuffer_,\r
-                                                m_tgtUtilCEBufferSize_, torder);\r
-                    m_tgtUtilCEBufferSize_ ++;\r
-                    torder &= CE_PRIMARY_MASK_;\r
-                } while (torder == CollationElementIterator.IGNORABLE);\r
-\r
-                // if both primaries are the same\r
-                if (sorder == torder) {\r
-                    // and there are no more CEs, we advance to the next level\r
-                    // see if we are at the end of either string\r
-                    if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1]\r
-                                        == CollationElementIterator.NULLORDER) {\r
-                        if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] \r
-                            != CollationElementIterator.NULLORDER) {\r
-                            return -1;\r
-                        }\r
-                        break;\r
-                    }\r
-                    else if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1]\r
-                             == CollationElementIterator.NULLORDER) {\r
-                        return 1;\r
-                    }\r
-                    if (doHiragana4 && hiraganaresult == 0\r
-                        && m_srcUtilColEIter_.m_isCodePointHiragana_ !=\r
-                                        m_tgtUtilColEIter_.m_isCodePointHiragana_) {\r
-                        if (m_srcUtilColEIter_.m_isCodePointHiragana_) {\r
-                            hiraganaresult = -1;\r
-                        }\r
-                        else {\r
-                            hiraganaresult = 1;\r
-                        }\r
-                    }\r
-                }\r
-                else {\r
-                    // if two primaries are different, we are done\r
-                    return endPrimaryCompare(sorder, torder);\r
-                }\r
-            }\r
-            // no primary difference... do the rest from the buffers\r
-            return hiraganaresult;\r
-        }\r
-        else { // shifted - do a slightly more complicated processing :)\r
-            while (true) {\r
-                int sorder = getPrimaryShiftedCompareCE(m_srcUtilColEIter_,\r
-                                                        lowestpvalue, true);\r
-                int torder = getPrimaryShiftedCompareCE(m_tgtUtilColEIter_,\r
-                                                        lowestpvalue, false);\r
-                if (sorder == torder) {\r
-                    if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1]\r
-                            == CollationElementIterator.NULLORDER) {\r
-                        break;\r
-                    }\r
-                    else {\r
-                        continue;\r
-                    }\r
-                }\r
-                else {\r
-                    return endPrimaryCompare(sorder, torder);\r
-                }\r
-            } // no primary difference... do the rest from the buffers\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * This is used only for primary strength when we know that sorder is\r
-     * already different from torder.\r
-     * Compares sorder and torder, returns -1 if sorder is less than torder.\r
-     * Clears the cebuffer at the same time.\r
-     * @param sorder source strength order\r
-     * @param torder target strength order\r
-     * @return the comparison result of sorder and torder\r
-     */\r
-    private final int endPrimaryCompare(int sorder, int torder)\r
-    {\r
-        // if we reach here, the ce offset accessed is the last ce\r
-        // appended to the buffer\r
-        boolean isSourceNullOrder = (m_srcUtilCEBuffer_[\r
-                                                    m_srcUtilCEBufferSize_ - 1]\r
-                                        == CollationElementIterator.NULLORDER);\r
-        boolean isTargetNullOrder = (m_tgtUtilCEBuffer_[\r
-                                                    m_tgtUtilCEBufferSize_ - 1]\r
-                                        == CollationElementIterator.NULLORDER);\r
-        m_srcUtilCEBufferSize_ = -1;\r
-        m_tgtUtilCEBufferSize_ = -1;\r
-        if (isSourceNullOrder) {\r
-            return -1;\r
-        }\r
-        if (isTargetNullOrder) {\r
-            return 1;\r
-        }\r
-        // getting rid of the sign\r
-        sorder >>>= CE_PRIMARY_SHIFT_;\r
-        torder >>>= CE_PRIMARY_SHIFT_;\r
-        if (sorder < torder) {\r
-            return -1;\r
-        }\r
-        return 1;\r
-    }\r
-\r
-    /**\r
-     * Calculates the next primary shifted value and fills up cebuffer with the\r
-     * next non-ignorable ce.\r
-     * @param coleiter collation element iterator\r
-     * @param doHiragana4 flag indicator if hiragana quaternary is to be\r
-     *                      handled\r
-     * @param lowestpvalue lowest primary shifted value that will not be\r
-     *                      ignored\r
-     * @return result next modified ce\r
-     */\r
-    private final int getPrimaryShiftedCompareCE(\r
-                                        CollationElementIterator coleiter,\r
-                                        int lowestpvalue, boolean isSrc)\r
-\r
-    {\r
-        boolean shifted = false;\r
-        int result = CollationElementIterator.IGNORABLE;\r
-        int cebuffer[] = m_srcUtilCEBuffer_;\r
-        int cebuffersize = m_srcUtilCEBufferSize_;\r
-        if (!isSrc) {\r
-            cebuffer = m_tgtUtilCEBuffer_;\r
-            cebuffersize = m_tgtUtilCEBufferSize_;\r
-        }\r
-        while (true) {\r
-            result = coleiter.next();\r
-            if (result == CollationElementIterator.NULLORDER) {\r
-                cebuffer = append(cebuffer, cebuffersize, result);\r
-                cebuffersize ++;\r
-                break;\r
-            }\r
-            else if (result == CollationElementIterator.IGNORABLE\r
-                     || (shifted\r
-                         && (result & CE_PRIMARY_MASK_)\r
-                                      == CollationElementIterator.IGNORABLE)) {\r
-                // UCA amendment - ignore ignorables that follow shifted code\r
-                // points\r
-                continue;\r
-            }\r
-            else if (isContinuation(result)) {\r
-                if ((result & CE_PRIMARY_MASK_)\r
-                                    != CollationElementIterator.IGNORABLE) {\r
-                    // There is primary value\r
-                    if (shifted) {\r
-                        result = (result & CE_PRIMARY_MASK_)\r
-                                            | CE_CONTINUATION_MARKER_;\r
-                        // preserve interesting continuation\r
-                        cebuffer = append(cebuffer, cebuffersize, result);\r
-                        cebuffersize ++;\r
-                        continue;\r
-                    }\r
-                    else {\r
-                        cebuffer = append(cebuffer, cebuffersize, result);\r
-                        cebuffersize ++;\r
-                        break;\r
-                    }\r
-                }\r
-                else { // Just lower level values\r
-                    if (!shifted) {\r
-                        cebuffer = append(cebuffer, cebuffersize, result);\r
-                        cebuffersize ++;\r
-                    }\r
-                }\r
-            }\r
-            else { // regular\r
-                if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_,\r
-                                            lowestpvalue) > 0) {\r
-                    cebuffer = append(cebuffer, cebuffersize, result);\r
-                    cebuffersize ++;\r
-                    break;\r
-                }\r
-                else {\r
-                    if ((result & CE_PRIMARY_MASK_) != 0) {\r
-                        shifted = true;\r
-                        result &= CE_PRIMARY_MASK_;\r
-                        cebuffer = append(cebuffer, cebuffersize, result);\r
-                        cebuffersize ++;\r
-                        continue;\r
-                    }\r
-                    else {\r
-                        cebuffer = append(cebuffer, cebuffersize, result);\r
-                        cebuffersize ++;\r
-                        shifted = false;\r
-                        continue;\r
-                    }\r
-                }\r
-            }\r
-        }\r
-        if (isSrc) {\r
-            m_srcUtilCEBuffer_ = cebuffer;\r
-            m_srcUtilCEBufferSize_ = cebuffersize;\r
-        }\r
-        else {\r
-            m_tgtUtilCEBuffer_ = cebuffer;\r
-            m_tgtUtilCEBufferSize_ = cebuffersize;\r
-        }\r
-        result &= CE_PRIMARY_MASK_;\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Appending an int to an array of ints and increases it if we run out of\r
-     * space\r
-     * @param array of int arrays\r
-     * @param appendindex index at which value will be appended\r
-     * @param value to append\r
-     * @return array if size is not increased, otherwise a new array will be\r
-     *         returned\r
-     */\r
-    private static final int[] append(int array[], int appendindex, int value)\r
-    {\r
-        if (appendindex + 1 >= array.length) {\r
-            array = increase(array, appendindex, CE_BUFFER_SIZE_);\r
-        }\r
-        array[appendindex] = value;\r
-        return array;\r
-    }\r
-\r
-    /**\r
-     * Does secondary strength comparison based on the collected ces.\r
-     * @param doFrench flag indicates if French ordering is to be done\r
-     * @return the secondary strength comparison result\r
-     */\r
-    private final int doSecondaryCompare(boolean doFrench)\r
-    {\r
-        // now, we're gonna reexamine collected CEs\r
-        if (!doFrench) { // normal\r
-            int soffset = 0;\r
-            int toffset = 0;\r
-            while (true) {\r
-                int sorder = CollationElementIterator.IGNORABLE;\r
-                while (sorder == CollationElementIterator.IGNORABLE) {\r
-                    sorder = m_srcUtilCEBuffer_[soffset ++]\r
-                             & CE_SECONDARY_MASK_;\r
-                }\r
-                int torder = CollationElementIterator.IGNORABLE;\r
-                while (torder == CollationElementIterator.IGNORABLE) {\r
-                    torder = m_tgtUtilCEBuffer_[toffset ++]\r
-                             & CE_SECONDARY_MASK_;\r
-                }\r
-\r
-                if (sorder == torder) {\r
-                    if (m_srcUtilCEBuffer_[soffset - 1]\r
-                                    == CollationElementIterator.NULLORDER) {\r
-                        if (m_tgtUtilCEBuffer_[toffset - 1] \r
-                            != CollationElementIterator.NULLORDER) {\r
-                            return -1;\r
-                        }\r
-                        break;\r
-                    }\r
-                    else if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                             == CollationElementIterator.NULLORDER) {\r
-                        return 1;\r
-                    }\r
-                }\r
-                else {\r
-                    if (m_srcUtilCEBuffer_[soffset - 1] ==\r
-                            CollationElementIterator.NULLORDER) {\r
-                        return -1;\r
-                    }\r
-                    if (m_tgtUtilCEBuffer_[toffset - 1] ==\r
-                            CollationElementIterator.NULLORDER) {\r
-                        return 1;\r
-                    }\r
-                    return (sorder < torder) ? -1 : 1;\r
-                }\r
-            }\r
-        }\r
-        else { // do the French\r
-            m_srcUtilContOffset_ = 0;\r
-            m_tgtUtilContOffset_ = 0;\r
-            m_srcUtilOffset_ = m_srcUtilCEBufferSize_ - 2;\r
-            m_tgtUtilOffset_ = m_tgtUtilCEBufferSize_ - 2;\r
-            while (true) {\r
-                int sorder = getSecondaryFrenchCE(true);\r
-                int torder = getSecondaryFrenchCE(false);\r
-                if (sorder == torder) {\r
-                    if ((m_srcUtilOffset_ < 0 && m_tgtUtilOffset_ < 0)\r
-                        || (m_srcUtilOffset_ >= 0 \r
-                            && m_srcUtilCEBuffer_[m_srcUtilOffset_]\r
-                                    == CollationElementIterator.NULLORDER)) {\r
-                        break;\r
-                    }\r
-                }\r
-                else {\r
-                    return (sorder < torder) ? -1 : 1;\r
-                }\r
-            }\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * Calculates the next secondary french CE.\r
-     * @param isSrc flag indicator if we are calculating the src ces\r
-     * @return result next modified ce\r
-     */\r
-    private final int getSecondaryFrenchCE(boolean isSrc)\r
-    {\r
-        int result = CollationElementIterator.IGNORABLE;\r
-        int offset = m_srcUtilOffset_;\r
-        int continuationoffset = m_srcUtilContOffset_;\r
-        int cebuffer[] = m_srcUtilCEBuffer_;\r
-        if (!isSrc) {\r
-            offset = m_tgtUtilOffset_;\r
-            continuationoffset = m_tgtUtilContOffset_;\r
-            cebuffer = m_tgtUtilCEBuffer_;\r
-        }\r
-\r
-        while (result == CollationElementIterator.IGNORABLE\r
-                && offset >= 0) {\r
-            if (continuationoffset == 0) {\r
-                result = cebuffer[offset];\r
-                while (isContinuation(cebuffer[offset --])){\r
-                }\r
-                // after this, sorder is at the start of continuation,\r
-                // and offset points before that\r
-                if (isContinuation(cebuffer[offset + 1])) {\r
-                    // save offset for later\r
-                    continuationoffset = offset;\r
-                    offset += 2;\r
-                }\r
-            }\r
-            else {\r
-                result = cebuffer[offset ++];\r
-                if (!isContinuation(result)) {\r
-                    // we have finished with this continuation\r
-                    offset = continuationoffset;\r
-                    // reset the pointer to before continuation\r
-                    continuationoffset = 0;\r
-                    continue;\r
-                }\r
-            }\r
-            result &= CE_SECONDARY_MASK_; // remove continuation bit\r
-        }\r
-        if (isSrc) {\r
-            m_srcUtilOffset_ = offset;\r
-            m_srcUtilContOffset_ = continuationoffset;\r
-        }\r
-        else {\r
-            m_tgtUtilOffset_ = offset;\r
-            m_tgtUtilContOffset_ = continuationoffset;\r
-        }\r
-        return result;\r
-    }\r
-\r
-    /**\r
-     * Does case strength comparison based on the collected ces.\r
-     * @return the case strength comparison result\r
-     */\r
-    private final int doCaseCompare()\r
-    {\r
-        int soffset = 0;\r
-        int toffset = 0;\r
-        while (true) {\r
-            int sorder = CollationElementIterator.IGNORABLE;\r
-            int torder = CollationElementIterator.IGNORABLE;\r
-            while ((sorder & CE_REMOVE_CASE_)\r
-                                    == CollationElementIterator.IGNORABLE) {\r
-                sorder = m_srcUtilCEBuffer_[soffset ++];\r
-                if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {\r
-                    // primary ignorables should not be considered on the case level when the strength is primary\r
-                    // otherwise, the CEs stop being well-formed\r
-                    sorder &= CE_CASE_MASK_3_;\r
-                    sorder ^= m_caseSwitch_;\r
-                }\r
-                else {\r
-                    sorder = CollationElementIterator.IGNORABLE;\r
-                }\r
-            }\r
-\r
-            while ((torder & CE_REMOVE_CASE_)\r
-                                    == CollationElementIterator.IGNORABLE) {\r
-                torder = m_tgtUtilCEBuffer_[toffset ++];\r
-                if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {\r
-                    // primary ignorables should not be considered on the case level when the strength is primary\r
-                    // otherwise, the CEs stop being well-formed\r
-                    torder &= CE_CASE_MASK_3_;\r
-                    torder ^= m_caseSwitch_;\r
-                }\r
-                else {\r
-                    torder = CollationElementIterator.IGNORABLE;\r
-                }\r
-            }\r
-\r
-            sorder &= CE_CASE_BIT_MASK_;\r
-            torder &= CE_CASE_BIT_MASK_;\r
-            if (sorder == torder) {\r
-                // checking end of strings\r
-                if (m_srcUtilCEBuffer_[soffset - 1]\r
-                                        == CollationElementIterator.NULLORDER) {\r
-                    if (m_tgtUtilCEBuffer_[toffset - 1] \r
-                        != CollationElementIterator.NULLORDER) {\r
-                        return -1;\r
-                    }\r
-                    break;\r
-                }\r
-                else if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                            == CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-            }\r
-            else {\r
-                if (m_srcUtilCEBuffer_[soffset - 1]\r
-                                    == CollationElementIterator.NULLORDER) {\r
-                    return -1;\r
-                }\r
-                if (m_tgtUtilCEBuffer_[soffset - 1]\r
-                                    == CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-                return (sorder < torder) ? -1 : 1;\r
-            }\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * Does tertiary strength comparison based on the collected ces.\r
-     * @return the tertiary strength comparison result\r
-     */\r
-    private final int doTertiaryCompare()\r
-    {\r
-        int soffset = 0;\r
-        int toffset = 0;\r
-        while (true) {\r
-            int sorder = CollationElementIterator.IGNORABLE;\r
-            int torder = CollationElementIterator.IGNORABLE;\r
-            while ((sorder & CE_REMOVE_CASE_)\r
-                                == CollationElementIterator.IGNORABLE) {\r
-                sorder = m_srcUtilCEBuffer_[soffset ++] & m_mask3_;\r
-                if (!isContinuation(sorder)) {\r
-                    sorder ^= m_caseSwitch_;\r
-                }\r
-                else {\r
-                    sorder &= CE_REMOVE_CASE_;\r
-                }\r
-            }\r
-\r
-            while ((torder & CE_REMOVE_CASE_)\r
-                                == CollationElementIterator.IGNORABLE) {\r
-                torder = m_tgtUtilCEBuffer_[toffset ++] & m_mask3_;\r
-                if (!isContinuation(torder)) {\r
-                    torder ^= m_caseSwitch_;\r
-                }\r
-                else {\r
-                    torder &= CE_REMOVE_CASE_;\r
-                }\r
-            }\r
-\r
-            if (sorder == torder) {\r
-                if (m_srcUtilCEBuffer_[soffset - 1]\r
-                                    == CollationElementIterator.NULLORDER) {\r
-                    if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                        != CollationElementIterator.NULLORDER) {\r
-                        return -1;\r
-                    }\r
-                    break;\r
-                }\r
-                else if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                            == CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-            }\r
-            else {\r
-                if (m_srcUtilCEBuffer_[soffset - 1] ==\r
-                                        CollationElementIterator.NULLORDER) {\r
-                    return -1;\r
-                }\r
-                if (m_tgtUtilCEBuffer_[toffset - 1] ==\r
-                            CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-                return (sorder < torder) ? -1 : 1;\r
-            }\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * Does quaternary strength comparison based on the collected ces.\r
-     * @param lowestpvalue the lowest primary value that will not be ignored if\r
-     *                      alternate handling is shifted\r
-     * @return the quaternary strength comparison result\r
-     */\r
-    private final int doQuaternaryCompare(int lowestpvalue)\r
-    {\r
-        boolean sShifted = true;\r
-        boolean tShifted = true;\r
-        int soffset = 0;\r
-        int toffset = 0;\r
-        while (true) {\r
-            int sorder = CollationElementIterator.IGNORABLE;\r
-            int torder = CollationElementIterator.IGNORABLE;\r
-            while (sorder == CollationElementIterator.IGNORABLE\r
-                    || (isContinuation(sorder) && !sShifted)) {\r
-                sorder = m_srcUtilCEBuffer_[soffset ++];\r
-                if (isContinuation(sorder)) {\r
-                    if (!sShifted) {\r
-                        continue;\r
-                    }\r
-                }\r
-                else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0\r
-                            || (sorder & CE_PRIMARY_MASK_)\r
-                                    == CollationElementIterator.IGNORABLE) {\r
-                    // non continuation\r
-                    sorder = CE_PRIMARY_MASK_;\r
-                    sShifted = false;\r
-                }\r
-                else {\r
-                    sShifted = true;\r
-                }\r
-            }\r
-            sorder >>>= CE_PRIMARY_SHIFT_;\r
-            while (torder == CollationElementIterator.IGNORABLE\r
-                    || (isContinuation(torder) && !tShifted)) {\r
-                torder = m_tgtUtilCEBuffer_[toffset ++];\r
-                if (isContinuation(torder)) {\r
-                    if (!tShifted) {\r
-                        continue;\r
-                    }\r
-                }\r
-                else if (Utility.compareUnsigned(torder, lowestpvalue) > 0\r
-                            || (torder & CE_PRIMARY_MASK_)\r
-                                    == CollationElementIterator.IGNORABLE) {\r
-                    // non continuation\r
-                    torder = CE_PRIMARY_MASK_;\r
-                    tShifted = false;\r
-                }\r
-                else {\r
-                    tShifted = true;\r
-                }\r
-            }\r
-            torder >>>= CE_PRIMARY_SHIFT_;\r
-\r
-            if (sorder == torder) {\r
-                if (m_srcUtilCEBuffer_[soffset - 1]\r
-                    == CollationElementIterator.NULLORDER) {\r
-                    if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                        != CollationElementIterator.NULLORDER) {\r
-                        return -1;\r
-                    }\r
-                    break;\r
-                }\r
-                else if (m_tgtUtilCEBuffer_[toffset - 1]\r
-                            == CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-            }\r
-            else {\r
-                if (m_srcUtilCEBuffer_[soffset - 1] ==\r
-                    CollationElementIterator.NULLORDER) {\r
-                    return -1;\r
-                }\r
-                if (m_tgtUtilCEBuffer_[toffset - 1] ==\r
-                    CollationElementIterator.NULLORDER) {\r
-                    return 1;\r
-                }\r
-                return (sorder < torder) ? -1 : 1;\r
-            }\r
-        }\r
-        return 0;\r
-    }\r
-\r
-    /**\r
-     * Internal function. Does byte level string compare. Used by strcoll if\r
-     * strength == identical and strings are otherwise equal. This is a rare\r
-     * case. Comparison must be done on NFD normalized strings. FCD is not good\r
-     * enough.\r
-     * @param source text\r
-     * @param target text\r
-     * @param offset of the first difference in the text strings\r
-     * @param normalize flag indicating if we are to normalize the text before\r
-     *              comparison\r
-     * @return 1 if source is greater than target, -1 less than and 0 if equals\r
-     */\r
-    private static final int doIdenticalCompare(String source, String target,\r
-                                                int offset, boolean normalize)\r
-\r
-    {\r
-        if (normalize) {\r
-            if (Normalizer.quickCheck(source, Normalizer.NFD,0)\r
-                                                    != Normalizer.YES) {\r
-                source = Normalizer.decompose(source, false);\r
-            }\r
-\r
-            if (Normalizer.quickCheck(target, Normalizer.NFD,0)\r
-                                                        != Normalizer.YES) {\r
-                target = Normalizer.decompose(target, false);\r
-            }\r
-            offset = 0;\r
-        }\r
-\r
-        return doStringCompare(source, target, offset);\r
-    }\r
-\r
-    /**\r
-     * Compares string for their codepoint order.\r
-     * This comparison handles surrogate characters and place them after the\r
-     * all non surrogate characters.\r
-     * @param source text\r
-     * @param target text\r
-     * @param offset start offset for comparison\r
-     * @return 1 if source is greater than target, -1 less than and 0 if equals\r
-     */\r
-    private static final int doStringCompare(String source,\r
-                                             String target,\r
-                                             int offset)\r
-    {\r
-        // compare identical prefixes - they do not need to be fixed up\r
-        char schar = 0;\r
-        char tchar = 0;\r
-        int slength = source.length();\r
-        int tlength = target.length();\r
-        int minlength = Math.min(slength, tlength);\r
-        while (offset < minlength) {\r
-            schar = source.charAt(offset);\r
-            tchar = target.charAt(offset ++);\r
-            if (schar != tchar) {\r
-                break;\r
-            }\r
-        }\r
-\r
-        if (schar == tchar && offset == minlength) {\r
-            if (slength > minlength) {\r
-                return 1;\r
-            }\r
-            if (tlength > minlength) {\r
-                return -1;\r
-            }\r
-            return 0;\r
-        }\r
-\r
-        //  if both values are in or above the surrogate range, Fix them up.\r
-        if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE\r
-            && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {\r
-            schar = fixupUTF16(schar);\r
-            tchar = fixupUTF16(tchar);\r
-        }\r
-\r
-        // now c1 and c2 are in UTF-32-compatible order\r
-        return (schar < tchar) ? -1 : 1; // schar and tchar has to be different\r
-    }\r
-\r
-    /**\r
-     * Rotate surrogates to the top to get code point order\r
-     */\r
-    private static final char fixupUTF16(char ch)\r
-    {\r
-        if (ch >= 0xe000) {\r
-            ch -= 0x800;\r
-        }\r
-        else {\r
-            ch += 0x2000;\r
-        }\r
-        return ch;\r
-    }\r
-\r
-    /**\r
-     * Resets the internal case data members and compression values.\r
-     */\r
-    private void updateInternalState()\r
-    {\r
-        if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {\r
-            m_caseSwitch_ = CASE_SWITCH_;\r
-        }\r
-        else {\r
-            m_caseSwitch_ = NO_CASE_SWITCH_;\r
-        }\r
-\r
-        if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {\r
-            m_mask3_ = CE_REMOVE_CASE_;\r
-            m_common3_ = COMMON_NORMAL_3_;\r
-            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;\r
-            m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;\r
-            m_bottom3_ = COMMON_BOTTOM_3_;\r
-        }\r
-        else {\r
-            m_mask3_ = CE_KEEP_CASE_;\r
-            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;\r
-            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {\r
-                m_common3_ = COMMON_UPPER_FIRST_3_;\r
-                m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;\r
-                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;\r
-            } else {\r
-                m_common3_ = COMMON_NORMAL_3_;\r
-                m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;\r
-                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;\r
-            }\r
-        }\r
-\r
-        // Set the compression values\r
-        int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;\r
-        // we multilply double with int, but need only int\r
-        m_topCount3_ = (int)(PROPORTION_3_ * total3);\r
-        m_bottomCount3_ = total3 - m_topCount3_;\r
-\r
-        if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_\r
-            && !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) {\r
-            m_isSimple3_ = true;\r
-        }\r
-        else {\r
-            m_isSimple3_ = false;\r
-        }\r
-        if(!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_\r
-          && !m_isAlternateHandlingShifted_ && !latinOneFailed_) {\r
-          if(latinOneCEs_ == null || latinOneRegenTable_) {\r
-            if(setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it\r
-              latinOneUse_ = true;\r
-            } else {\r
-              latinOneUse_ = false;\r
-              latinOneFailed_ = true;\r
-            }\r
-            latinOneRegenTable_ = false;\r
-          } else { // latin1Table exists and it doesn't need to be regenerated, just use it\r
-            latinOneUse_ = true;\r
-          }\r
-        } else {\r
-          latinOneUse_ = false;\r
-        }\r
-\r
-    }\r
-\r
-    /**\r
-     * Initializes the RuleBasedCollator\r
-     */\r
-    private final void init()\r
-    {\r
-        for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_;\r
-             m_minUnsafe_ ++) {\r
-            // Find the smallest unsafe char.\r
-            if (isUnsafe(m_minUnsafe_)) {\r
-                break;\r
-            }\r
-        }\r
-\r
-        for (m_minContractionEnd_ = 0;\r
-             m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_;\r
-             m_minContractionEnd_ ++) {\r
-            // Find the smallest contraction-ending char.\r
-            if (isContractionEnd(m_minContractionEnd_)) {\r
-                break;\r
-            }\r
-        }\r
-        latinOneFailed_ = true;\r
-        setStrength(m_defaultStrength_);\r
-        setDecomposition(m_defaultDecomposition_);\r
-        m_variableTopValue_ = m_defaultVariableTopValue_;\r
-        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;\r
-        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;\r
-        m_isCaseLevel_ = m_defaultIsCaseLevel_;\r
-        m_caseFirst_ = m_defaultCaseFirst_;\r
-        m_isHiragana4_ = m_defaultIsHiragana4_;\r
-        m_isNumericCollation_ = m_defaultIsNumericCollation_;\r
-        latinOneFailed_ = false;\r
-        updateInternalState();\r
-    }\r
-\r
-    /**\r
-     *  Initializes utility iterators and byte buffer used by compare\r
-     */\r
-    private final void initUtility(boolean allocate) {\r
-        if (allocate) {\r
-            if (m_srcUtilIter_ == null) {\r
-                m_srcUtilIter_ = new StringUCharacterIterator();\r
-                m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);\r
-                m_tgtUtilIter_ = new StringUCharacterIterator();\r
-                m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);\r
-                m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case\r
-                m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary\r
-                m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary\r
-                m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary\r
-                m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_];  // Quaternary\r
-                m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];\r
-                m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];\r
-            }\r
-        } else {\r
-            m_srcUtilIter_ = null;\r
-            m_srcUtilColEIter_ = null;\r
-            m_tgtUtilIter_ = null;\r
-            m_tgtUtilColEIter_ = null;\r
-            m_utilBytes0_ = null;\r
-            m_utilBytes1_ = null;\r
-            m_utilBytes2_ = null;\r
-            m_utilBytes3_ = null;\r
-            m_utilBytes4_ = null;\r
-            m_srcUtilCEBuffer_ = null;\r
-            m_tgtUtilCEBuffer_ = null;\r
-        }\r
-    }\r
-\r
-    // Consts for Latin-1 special processing\r
-    private static final int ENDOFLATINONERANGE_ = 0xFF;\r
-    private static final int LATINONETABLELEN_   = (ENDOFLATINONERANGE_+50);\r
-    private static final int BAIL_OUT_CE_        = 0xFF000000;\r
-\r
-     /**\r
-     * Generate latin-1 tables\r
-     */\r
-\r
-    private class shiftValues {\r
-        int primShift = 24;\r
-        int secShift = 24;\r
-        int terShift = 24;\r
-    }\r
-\r
-    private final void\r
-    addLatinOneEntry(char ch, int CE, shiftValues sh) {\r
-      int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;\r
-      boolean reverseSecondary = false;\r
-      if(!isContinuation(CE)) {\r
-        tertiary = ((CE & m_mask3_));\r
-        tertiary ^= m_caseSwitch_;\r
-        reverseSecondary = true;\r
-      } else {\r
-        tertiary = (byte)((CE & CE_REMOVE_CONTINUATION_MASK_));\r
-        tertiary &= CE_REMOVE_CASE_;\r
-        reverseSecondary = false;\r
-      }\r
-\r
-      secondary = ((CE >>>= 8) & LAST_BYTE_MASK_);\r
-      primary2 =  ((CE >>>= 8) & LAST_BYTE_MASK_);\r
-      primary1 =  (CE >>> 8);\r
-\r
-      if(primary1 != 0) {\r
-        latinOneCEs_[ch] |= (primary1 << sh.primShift);\r
-        sh.primShift -= 8;\r
-      }\r
-      if(primary2 != 0) {\r
-        if(sh.primShift < 0) {\r
-          latinOneCEs_[ch] = BAIL_OUT_CE_;\r
-          latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_;\r
-          latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_;\r
-          return;\r
-        }\r
-        latinOneCEs_[ch] |= (primary2 << sh.primShift);\r
-        sh.primShift -= 8;\r
-      }\r
-      if(secondary != 0) {\r
-        if(reverseSecondary && m_isFrenchCollation_) { // reverse secondary\r
-          latinOneCEs_[latinOneTableLen_+ch] >>>= 8; // make space for secondary\r
-          latinOneCEs_[latinOneTableLen_+ch] |= (secondary << 24);\r
-        } else { // normal case\r
-          latinOneCEs_[latinOneTableLen_+ch] |= (secondary << sh.secShift);\r
-        }\r
-        sh.secShift -= 8;\r
-      }\r
-      if(tertiary != 0) {\r
-        latinOneCEs_[2*latinOneTableLen_+ch] |= (tertiary << sh.terShift);\r
-        sh.terShift -= 8;\r
-      }\r
-    }\r
-\r
-    private final void\r
-    resizeLatinOneTable(int newSize) {\r
-        int newTable[] = new int[3*newSize];\r
-        int sizeToCopy = ((newSize<latinOneTableLen_)?newSize:latinOneTableLen_);\r
-        //uprv_memset(newTable, 0, newSize*sizeof(uint32_t)*3); // automatically cleared.\r
-        System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy);\r
-        System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable, newSize, sizeToCopy);\r
-        System.arraycopy(latinOneCEs_, 2*latinOneTableLen_, newTable, 2*newSize, sizeToCopy);\r
-        latinOneTableLen_ = newSize;\r
-        latinOneCEs_ = newTable;\r
-    }\r
-\r
-    private final boolean setUpLatinOne() {\r
-      if(latinOneCEs_ == null || m_reallocLatinOneCEs_) {\r
-        latinOneCEs_ = new int[3*LATINONETABLELEN_];\r
-        latinOneTableLen_ = LATINONETABLELEN_;\r
-        m_reallocLatinOneCEs_ = false;\r
-      } else {\r
-        Arrays.fill(latinOneCEs_, 0);\r
-      }\r
-      if(m_ContInfo_ == null) {\r
-        m_ContInfo_ = new ContractionInfo();\r
-      }\r
-      char ch = 0;\r
-      //StringBuffer sCh = new StringBuffer();\r
-      //CollationElementIterator it = getCollationElementIterator(sCh.toString());\r
-      CollationElementIterator it = getCollationElementIterator("");\r
-\r
-      shiftValues s = new shiftValues();\r
-      int CE = 0;\r
-      char contractionOffset = ENDOFLATINONERANGE_+1;\r
-\r
-      for(ch = 0; ch <= ENDOFLATINONERANGE_; ch++) {\r
-        s.primShift = 24; s.secShift = 24; s.terShift = 24;\r
-        if(ch < 0x100) {\r
-          CE = m_trie_.getLatin1LinearValue(ch);\r
-        } else {\r
-          CE = m_trie_.getLeadValue(ch);\r
-          if(CE == CollationElementIterator.CE_NOT_FOUND_) {\r
-            CE = UCA_.m_trie_.getLeadValue(ch);\r
-          }\r
-        }\r
-        if(!isSpecial(CE)) {\r
-          addLatinOneEntry(ch, CE, s);\r
-        } else {\r
-          switch (RuleBasedCollator.getTag(CE)) {\r
-          case CollationElementIterator.CE_EXPANSION_TAG_:\r
-          case CollationElementIterator.CE_DIGIT_TAG_:\r
-            //sCh.delete(0, sCh.length());\r
-            //sCh.append(ch);\r
-            //it.setText(sCh.toString());\r
-            it.setText(UCharacter.toString(ch));\r
-            while((CE = it.next()) != CollationElementIterator.NULLORDER) {\r
-              if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {\r
-                latinOneCEs_[ch] = BAIL_OUT_CE_;\r
-                latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_;\r
-                latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_;\r
-                break;\r
-              }\r
-              addLatinOneEntry(ch, CE, s);\r
-            }\r
-            break;\r
-          case CollationElementIterator.CE_CONTRACTION_TAG_:\r
-            // here is the trick\r
-            // F2 is contraction. We do something very similar to contractions\r
-            // but have two indices, one in the real contraction table and the\r
-            // other to where we stuffed things. This hopes that we don't have\r
-            // many contractions (this should work for latin-1 tables).\r
-            {\r
-              if((CE & 0x00FFF000) != 0) {\r
-                latinOneFailed_ = true;\r
-                return false;\r
-              }\r
-\r
-              int UCharOffset = (CE & 0xFFFFFF) - m_contractionOffset_; //getContractionOffset(CE)]\r
-\r
-              CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table\r
-\r
-              latinOneCEs_[ch] = CE;\r
-              latinOneCEs_[latinOneTableLen_+ch] = CE;\r
-              latinOneCEs_[2*latinOneTableLen_+ch] = CE;\r
-\r
-              // We're going to jump into contraction table, pick the elements\r
-              // and use them\r
-              do {\r
-                  //CE = *(contractionCEs + (UCharOffset - contractionIndex));\r
-                  CE = m_contractionCE_[UCharOffset];\r
-                  if(isSpecial(CE) \r
-                     && getTag(CE) \r
-                               == CollationElementIterator.CE_EXPANSION_TAG_) {\r
-                    int i;    /* general counter */\r
-                    //uint32_t *CEOffset = (uint32_t *)image+getExpansionOffset(CE); /* find the offset to expansion table */\r
-                    int offset = ((CE & 0xFFFFF0) >> 4) - m_expansionOffset_; //it.getExpansionOffset(this, CE);\r
-                    int size = CE & 0xF; // getExpansionCount(CE);\r
-                    //CE = *CEOffset++;\r
-                    if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */\r
-                      for(i = 0; i<size; i++) {\r
-                        if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {\r
-                          latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;\r
-                          latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                          latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                          break;\r
-                        }\r
-                        addLatinOneEntry(contractionOffset, m_expansion_[offset+i], s);\r
-                      }\r
-                    } else { /* else, we do */\r
-                      while(m_expansion_[offset] != 0) {\r
-                        if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {\r
-                          latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;\r
-                          latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                          latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                          break;\r
-                        }\r
-                        addLatinOneEntry(contractionOffset, m_expansion_[offset++], s);\r
-                      }\r
-                    }\r
-                    contractionOffset++;\r
-                  } else if(!isSpecial(CE)) {\r
-                    addLatinOneEntry(contractionOffset++, CE, s);\r
-                  } else {\r
-                      latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;\r
-                      latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                      latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;\r
-                      contractionOffset++;\r
-                  }\r
-                  UCharOffset++;\r
-                  s.primShift = 24; s.secShift = 24; s.terShift = 24;\r
-                  if(contractionOffset == latinOneTableLen_) { // we need to reallocate\r
-                   resizeLatinOneTable(2*latinOneTableLen_);\r
-                  }\r
-              } while(m_contractionIndex_[UCharOffset] != 0xFFFF);\r
-            }\r
-            break;\r
-          case CollationElementIterator.CE_SPEC_PROC_TAG_:\r
-            {\r
-              // 0xB7 is a precontext character defined in UCA5.1, a special\r
-              // handle is implemeted in order to save LatinOne table for\r
-              // most locales.\r
-              if (ch == 0xb7) {\r
-                  addLatinOneEntry(ch, CE, s);\r
-              }\r
-              else {\r
-                  latinOneFailed_ = true;\r
-                  return false;\r
-              }\r
-            }\r
-            break;\r
-          default:\r
-            latinOneFailed_ = true;\r
-            return false;\r
-          }\r
-        }\r
-      }\r
-      // compact table\r
-      if(contractionOffset < latinOneTableLen_) {\r
-        resizeLatinOneTable(contractionOffset);\r
-      }\r
-      return true;\r
-    }\r
-\r
-    private class ContractionInfo {\r
-        int index;\r
-    }\r
-\r
-    ContractionInfo m_ContInfo_;\r
-\r
-    private int\r
-    getLatinOneContraction(int strength, int CE, String s) {\r
-    //int strength, int CE, String s, Integer ind) {\r
-      int len = s.length();\r
-      //const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);\r
-      int UCharOffset = (CE & 0xFFF) - m_contractionOffset_;\r
-      int offset = 1;\r
-      int latinOneOffset = (CE & 0x00FFF000) >>> 12;\r
-      char schar = 0, tchar = 0;\r
-\r
-      for(;;) {\r
-        /*\r
-        if(len == -1) {\r
-          if(s[*index] == 0) { // end of string\r
-            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);\r
-          } else {\r
-            schar = s[*index];\r
-          }\r
-        } else {\r
-        */\r
-          if(m_ContInfo_.index == len) {\r
-            return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]);\r
-          } else {\r
-            schar = s.charAt(m_ContInfo_.index);\r
-          }\r
-        //}\r
-\r
-        while(schar > (tchar = m_contractionIndex_[UCharOffset+offset]/**(UCharOffset+offset)*/)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */\r
-          offset++;\r
-        }\r
-\r
-        if (schar == tchar) {\r
-          m_ContInfo_.index++;\r
-          return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset+offset]);\r
-        }\r
-        else\r
-        {\r
-          if(schar  > ENDOFLATINONERANGE_ /*& 0xFF00*/) {\r
-            return BAIL_OUT_CE_;\r
-          }\r
-          // skip completely ignorables\r
-          int isZeroCE = m_trie_.getLeadValue(schar); //UTRIE_GET32_FROM_LEAD(coll->mapping, schar);\r
-          if(isZeroCE == 0) { // we have to ignore completely ignorables\r
-            m_ContInfo_.index++;\r
-            continue;\r
-          }\r
-\r
-          return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]);\r
-        }\r
-      }\r
-    }\r
-\r
-\r
-    /**\r
-     * This is a fast strcoll, geared towards text in Latin-1.\r
-     * It supports contractions of size two, French secondaries\r
-     * and case switching. You can use it with strengths primary\r
-     * to tertiary. It does not support shifted and case level.\r
-     * It relies on the table build by setupLatin1Table. If it\r
-     * doesn't understand something, it will go to the regular\r
-     * strcoll.\r
-     */\r
-    private final int\r
-    compareUseLatin1(String source, String target, int startOffset)\r
-    {\r
-        int sLen = source.length();\r
-        int tLen = target.length();\r
-\r
-        int strength = getStrength();\r
-\r
-        int sIndex = startOffset, tIndex = startOffset;\r
-        char sChar = 0, tChar = 0;\r
-        int sOrder=0, tOrder=0;\r
-\r
-        boolean endOfSource = false;\r
-\r
-        //uint32_t *elements = coll->latinOneCEs;\r
-\r
-        boolean haveContractions = false; // if we have contractions in our string\r
-                                        // we cannot do French secondary\r
-\r
-        int offset = latinOneTableLen_;\r
-\r
-        // Do the primary level\r
-    primLoop:\r
-        for(;;) {\r
-          while(sOrder==0) { // this loop skips primary ignorables\r
-            // sOrder=getNextlatinOneCE(source);\r
-              if(sIndex==sLen) {\r
-                endOfSource = true;\r
-                break;\r
-              }\r
-              sChar=source.charAt(sIndex++); //[sIndex++];\r
-            //}\r
-            if(sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out\r
-              //fprintf(stderr, "R");\r
-              return compareRegular(source, target, startOffset);\r
-            }\r
-            sOrder = latinOneCEs_[sChar];\r
-            if(isSpecial(sOrder)) { // if we got a special\r
-              // specials can basically be either contractions or bail-out signs. If we get anything\r
-              // else, we'll bail out anywasy\r
-              if(getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {\r
-                m_ContInfo_.index = sIndex;\r
-                sOrder = getLatinOneContraction(0, sOrder, source);\r
-                sIndex = m_ContInfo_.index;\r
-                haveContractions = true; // if there are contractions, we cannot do French secondary\r
-                // However, if there are contractions in the table, but we always use just one char,\r
-                // we might be able to do French. This should be checked out.\r
-              }\r
-              if(isSpecial(sOrder) /*== UCOL_BAIL_OUT_CE*/) {\r
-                //fprintf(stderr, "S");\r
-                return compareRegular(source, target, startOffset);\r
-              }\r
-            }\r
-          }\r
-\r
-          while(tOrder==0) {  // this loop skips primary ignorables\r
-            // tOrder=getNextlatinOneCE(target);\r
-            if(tIndex==tLen) {\r
-              if(endOfSource) {\r
-                break primLoop;\r
-              } else {\r
-                return 1;\r
-              }\r
-            }\r
-            tChar=target.charAt(tIndex++); //[tIndex++];\r
-            if(tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out\r
-              //fprintf(stderr, "R");\r
-              return compareRegular(source, target, startOffset);\r
-            }\r
-            tOrder = latinOneCEs_[tChar];\r
-            if(isSpecial(tOrder)) {\r
-              // Handling specials, see the comments for source\r
-              if(getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {\r
-                m_ContInfo_.index = tIndex;\r
-                tOrder = getLatinOneContraction(0, tOrder, target);\r
-                tIndex = m_ContInfo_.index;\r
-                haveContractions = true;\r
-              }\r
-              if(isSpecial(tOrder)/*== UCOL_BAIL_OUT_CE*/) {\r
-                //fprintf(stderr, "S");\r
-                return compareRegular(source, target, startOffset);\r
-              }\r
-            }\r
-          }\r
-          if(endOfSource) { // source is finished, but target is not, say the result.\r
-              return -1;\r
-          }\r
-\r
-          if(sOrder == tOrder) { // if we have same CEs, we continue the loop\r
-            sOrder = 0; tOrder = 0;\r
-            continue;\r
-          } else {\r
-            // compare current top bytes\r
-            if(((sOrder^tOrder)&0xFF000000)!=0) {\r
-              // top bytes differ, return difference\r
-              if(sOrder >>> 8 < tOrder >>> 8) {\r
-                return -1;\r
-              } else {\r
-                return 1;\r
-              }\r
-              // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);\r
-              // since we must return enum value\r
-            }\r
-\r
-            // top bytes match, continue with following bytes\r
-            sOrder<<=8;\r
-            tOrder<<=8;\r
-          }\r
-        }\r
-\r
-        // after primary loop, we definitely know the sizes of strings,\r
-        // so we set it and use simpler loop for secondaries and tertiaries\r
-        //sLen = sIndex; tLen = tIndex;\r
-        if(strength >= SECONDARY) {\r
-          // adjust the table beggining\r
-          //latinOneCEs_ += coll->latinOneTableLen;\r
-          endOfSource = false;\r
-\r
-          if(!m_isFrenchCollation_) { // non French\r
-            // This loop is a simplified copy of primary loop\r
-            // at this point we know that whole strings are latin-1, so we don't\r
-            // check for that. We also know that we only have contractions as\r
-            // specials.\r
-            //sIndex = 0; tIndex = 0;\r
-            sIndex = startOffset; tIndex = startOffset;\r
-    secLoop:\r
-            for(;;) {\r
-              while(sOrder==0) {\r
-                if(sIndex==sLen) {\r
-                  endOfSource = true;\r
-                  break;\r
-                }\r
-                sChar=source.charAt(sIndex++); //[sIndex++];\r
-                sOrder = latinOneCEs_[offset+sChar];\r
-                if(isSpecial(sOrder)) {\r
-                    m_ContInfo_.index = sIndex;\r
-                    sOrder = getLatinOneContraction(1, sOrder, source);\r
-                    sIndex = m_ContInfo_.index;\r
-                }\r
-              }\r
-\r
-              while(tOrder==0) {\r
-                if(tIndex==tLen) {\r
-                  if(endOfSource) {\r
-                    break secLoop;\r
-                  } else {\r
-                    return 1;\r
-                  }\r
-                }\r
-                tChar=target.charAt(tIndex++); //[tIndex++];\r
-                tOrder = latinOneCEs_[offset+tChar];\r
-                if(isSpecial(tOrder)) {\r
-                    m_ContInfo_.index = tIndex;\r
-                    tOrder = getLatinOneContraction(1, tOrder, target);\r
-                    tIndex = m_ContInfo_.index;\r
-                }\r
-              }\r
-              if(endOfSource) {\r
-                  return -1;\r
-              }\r
-\r
-              if(sOrder == tOrder) {\r
-                sOrder = 0; tOrder = 0;\r
-                continue;\r
-              } else {\r
-                // see primary loop for comments on this\r
-                if(((sOrder^tOrder)&0xFF000000)!=0) {\r
-                  if(sOrder >>> 8 < tOrder >>> 8) {\r
-                    return -1;\r
-                  } else {\r
-                    return 1;\r
-                  }\r
-                }\r
-                sOrder<<=8;\r
-                tOrder<<=8;\r
-              }\r
-            }\r
-          } else { // French\r
-            if(haveContractions) { // if we have contractions, we have to bail out\r
-              // since we don't really know how to handle them here\r
-              return compareRegular(source, target, startOffset);\r
-            }\r
-            // For French, we go backwards\r
-            sIndex = sLen; tIndex = tLen;\r
-    secFLoop:\r
-            for(;;) {\r
-              while(sOrder==0) {\r
-                if(sIndex==startOffset) {\r
-                  endOfSource = true;\r
-                  break;\r
-                }\r
-                sChar=source.charAt(--sIndex); //[--sIndex];\r
-                sOrder = latinOneCEs_[offset+sChar];\r
-                // don't even look for contractions\r
-              }\r
-\r
-              while(tOrder==0) {\r
-                if(tIndex==startOffset) {\r
-                  if(endOfSource) {\r
-                    break secFLoop;\r
-                  } else {\r
-                    return 1;\r
-                  }\r
-                }\r
-                tChar=target.charAt(--tIndex); //[--tIndex];\r
-                tOrder = latinOneCEs_[offset+tChar];\r
-                // don't even look for contractions\r
-              }\r
-              if(endOfSource) {\r
-                  return -1;\r
-              }\r
-\r
-              if(sOrder == tOrder) {\r
-                sOrder = 0; tOrder = 0;\r
-                continue;\r
-              } else {\r
-                // see the primary loop for comments\r
-                if(((sOrder^tOrder)&0xFF000000)!=0) {\r
-                  if(sOrder >>> 8 < tOrder >>> 8) {\r
-                    return -1;\r
-                  } else {\r
-                    return 1;\r
-                  }\r
-                }\r
-                sOrder<<=8;\r
-                tOrder<<=8;\r
-              }\r
-            }\r
-          }\r
-        }\r
-\r
-        if(strength >= TERTIARY) {\r
-          // tertiary loop is the same as secondary (except no French)\r
-          offset += latinOneTableLen_;\r
-          //sIndex = 0; tIndex = 0;\r
-          sIndex = startOffset; tIndex = startOffset;\r
-          endOfSource = false;\r
-          for(;;) {\r
-            while(sOrder==0) {\r
-              if(sIndex==sLen) {\r
-                endOfSource = true;\r
-                break;\r
-              }\r
-              sChar=source.charAt(sIndex++); //[sIndex++];\r
-              sOrder = latinOneCEs_[offset+sChar];\r
-              if(isSpecial(sOrder)) {\r
-                m_ContInfo_.index = sIndex;\r
-                sOrder = getLatinOneContraction(2, sOrder, source);\r
-                sIndex = m_ContInfo_.index;\r
-              }\r
-            }\r
-            while(tOrder==0) {\r
-              if(tIndex==tLen) {\r
-                if(endOfSource) {\r
-                  return 0; // if both strings are at the end, they are equal\r
-                } else {\r
-                  return 1;\r
-                }\r
-              }\r
-              tChar=target.charAt(tIndex++); //[tIndex++];\r
-              tOrder = latinOneCEs_[offset+tChar];\r
-              if(isSpecial(tOrder)) {\r
-                m_ContInfo_.index = tIndex;\r
-                tOrder = getLatinOneContraction(2, tOrder, target);\r
-                tIndex = m_ContInfo_.index;\r
-              }\r
-            }\r
-            if(endOfSource) {\r
-                return -1;\r
-            }\r
-            if(sOrder == tOrder) {\r
-              sOrder = 0; tOrder = 0;\r
-              continue;\r
-            } else {\r
-              if(((sOrder^tOrder)&0xff000000)!=0) {\r
-                if(sOrder >>> 8 < tOrder >>> 8) {\r
-                  return -1;\r
-                } else {\r
-                  return 1;\r
-                }\r
-              }\r
-              sOrder<<=8;\r
-              tOrder<<=8;\r
-            }\r
-          }\r
-        }\r
-        return 0;\r
-    }\r
-    /** \r
-     * Get the version of this collator object.\r
-     * @return the version object associated with this collator\r
-     * @stable ICU 2.8\r
-     */\r
-    public VersionInfo getVersion() {\r
-        /* RunTime version  */\r
-        int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();\r
-        /* Builder version*/\r
-        int bdVersion = m_version_.getMajor();\r
-\r
-        /* Charset Version. Need to get the version from cnv files\r
-         * makeconv should populate cnv files with version and\r
-         * an api has to be provided in ucnv.h to obtain this version\r
-         */\r
-        int csVersion = 0;\r
-\r
-        /* combine the version info */\r
-        int cmbVersion = ((rtVersion<<11) | (bdVersion<<6) | (csVersion)) & 0xFFFF;\r
-        \r
-        /* Tailoring rules */\r
-        return VersionInfo.getInstance(cmbVersion>>8, \r
-                cmbVersion & 0xFF, \r
-                m_version_.getMinor(), \r
-                UCA_.m_UCA_version_.getMajor());\r
-\r
-//        versionInfo[0] = (uint8_t)(cmbVersion>>8);\r
-//        versionInfo[1] = (uint8_t)cmbVersion;\r
-//        versionInfo[2] = coll->image->version[1];\r
-//        versionInfo[3] = coll->UCA->image->UCAVersion[0];\r
-    }\r
-    \r
-    /** \r
-     * Get the UCA version of this collator object.\r
-     * @return the version object associated with this collator\r
-     * @stable ICU 2.8\r
-     */\r
-    public VersionInfo getUCAVersion() {\r
-        return UCA_.m_UCA_version_;\r
-    }\r
-\r
-    private transient boolean m_reallocLatinOneCEs_;\r
-}\r
+//##header J2SE15
+/**
+*******************************************************************************
+* Copyright (C) 1996-2009, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.IOException;
+import java.text.CharacterIterator;
+import java.text.ParseException;
+import java.util.Arrays;
+import java.util.MissingResourceException;
+
+//#if defined(FOUNDATION10) || defined(J2SE13) || defined(ECLIPSE_FRAGMENT)
+//##import com.ibm.icu.impl.ByteBuffer;
+//#else
+import java.nio.ByteBuffer;
+//#endif
+
+import com.ibm.icu.impl.BOCU;
+import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.ImplicitCEGenerator;
+import com.ibm.icu.impl.IntTrie;
+import com.ibm.icu.impl.StringUCharacterIterator;
+import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.TrieIterator;
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ * <p>RuleBasedCollator is a concrete subclass of Collator. It allows
+ * customization of the Collator via user-specified rule sets.
+ * RuleBasedCollator is designed to be fully compliant to the <a
+ * href="http://www.unicode.org/unicode/reports/tr10/">Unicode
+ * Collation Algorithm (UCA)</a> and conforms to ISO 14651.</p>
+ *
+ * <p>Users are strongly encouraged to read <a
+ * href="http://www.icu-project.org/userguide/Collate_Intro.html">
+ * the users guide</a> for more information about the collation
+ * service before using this class.</p>
+ *
+ * <p>Create a RuleBasedCollator from a locale by calling the
+ * getInstance(Locale) factory method in the base class Collator.
+ * Collator.getInstance(Locale) creates a RuleBasedCollator object
+ * based on the collation rules defined by the argument locale.  If a
+ * customized collation ordering ar attributes is required, use the
+ * RuleBasedCollator(String) constructor with the appropriate
+ * rules. The customized RuleBasedCollator will base its ordering on
+ * UCA, while re-adjusting the attributes and orders of the characters
+ * in the specified rule accordingly.</p>
+ *
+ * <p>RuleBasedCollator provides correct collation orders for most
+ * locales supported in ICU. If specific data for a locale is not
+ * available, the orders eventually falls back to the <a
+ * href="http://www.unicode.org/unicode/reports/tr10/">UCA collation
+ * order </a>.</p>
+ *
+ * <p>For information about the collation rule syntax and details
+ * about customization, please refer to the
+ * <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the user's guide.</p>
+ *
+ * <p><strong>Note</strong> that there are some differences between
+ * the Collation rule syntax used in Java and ICU4J:
+ *
+ * <ul>
+ * <li>According to the JDK documentation:
+ * <i>
+ * <p>
+ * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule
+ * is in force when a Thai vowel of the range &#92;U0E40-&#92;U0E44 precedes a
+ * Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the
+ * range &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range
+ * &#92;U0E81-&#92;U0EAE then the
+ * vowel is placed after the consonant for collation purposes.
+ * </p>
+ * <p>
+ * If a rule is without the modifier '!', the Thai/Lao vowel-consonant
+ * swapping is not turned on.
+ * </p>
+ * </i>
+ * <p>
+ * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao
+ * vowel-consonant swapping, since the UCA clearly states that it has to be
+ * supported to ensure a correct sorting order. If a '!' is encountered, it is
+ * ignored.
+ * </p>
+ * <li>As mentioned in the documentation of the base class Collator,
+ *     compatibility decomposition mode is not supported.
+ * </ul>
+ * <p>
+ * <strong>Examples</strong>
+ * </p>
+ * <p>
+ * Creating Customized RuleBasedCollators:
+ * <blockquote>
+ * <pre>
+ * String simple = "&amp; a &lt; b &lt; c &lt; d";
+ * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
+ *
+ * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
+ *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
+ *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
+ *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "
+ *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
+ *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
+ *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
+ *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
+ * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
+ * </pre>
+ * </blockquote>
+ *
+ * Concatenating rules to combine <code>Collator</code>s:
+ * <blockquote>
+ * <pre>
+ * // Create an en_US Collator object
+ * RuleBasedCollator en_USCollator = (RuleBasedCollator)
+ *     Collator.getInstance(new Locale("en", "US", ""));
+ * // Create a da_DK Collator object
+ * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
+ *     Collator.getInstance(new Locale("da", "DK", ""));
+ * // Combine the two
+ * // First, get the collation rules from en_USCollator
+ * String en_USRules = en_USCollator.getRules();
+ * // Second, get the collation rules from da_DKCollator
+ * String da_DKRules = da_DKCollator.getRules();
+ * RuleBasedCollator newCollator =
+ *                             new RuleBasedCollator(en_USRules + da_DKRules);
+ * // newCollator has the combined rules
+ * </pre>
+ * </blockquote>
+ *
+ * Making changes to an existing RuleBasedCollator to create a new
+ * <code>Collator</code> object, by appending changes to the existing rule:
+ * <blockquote>
+ * <pre>
+ * // Create a new Collator object with additional rules
+ * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
+ * RuleBasedCollator myCollator =
+ *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
+ * // myCollator contains the new rules
+ * </pre>
+ * </blockquote>
+ *
+ * How to change the order of non-spacing accents:
+ * <blockquote>
+ * <pre>
+ * // old rule with main accents
+ * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
+ *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
+ *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
+ *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
+ *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
+ *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
+ *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
+ * // change the order of accent characters
+ * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
+ * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
+ * </pre>
+ * </blockquote>
+ *
+ * Putting in a new primary ordering before the default setting,
+ * e.g. sort English characters before or after Japanese characters in the Japanese
+ * <code>Collator</code>:
+ * <blockquote>
+ * <pre>
+ * // get en_US Collator rules
+ * RuleBasedCollator en_USCollator
+ *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
+ * // add a few Japanese characters to sort before English characters
+ * // suppose the last character before the first base letter 'a' in
+ * // the English collation rule is &#92;u2212
+ * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "
+ *                   + "&#92;u3044";
+ * RuleBasedCollator myJapaneseCollator
+ *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
+ * </pre>
+ * </blockquote>
+ * </p>
+ * <p>
+ * This class is not subclassable
+ * </p>
+ * @author Syn Wee Quek
+ * @stable ICU 2.8
+ */
+public final class RuleBasedCollator extends Collator
+{   
+    // public constructors ---------------------------------------------------
+
+    /**
+     * <p>
+     * Constructor that takes the argument rules for
+     * customization. The collator will be based on UCA,
+     * with the attributes and re-ordering of the characters specified in the
+     * argument rules.
+     * </p>
+     * <p>See the user guide's section on
+     * <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
+     * Collation Customization</a> for details on the rule syntax.
+     * </p>
+     * @param rules the collation rules to build the collation table from.
+     * @exception ParseException and IOException thrown. ParseException thrown
+     *            when argument rules have an invalid syntax. IOException
+     *            thrown when an error occured while reading internal data.
+     * @stable ICU 2.8
+     */
+    public RuleBasedCollator(String rules) throws Exception
+    {
+        checkUCA();
+        if (rules == null) {
+            throw new IllegalArgumentException(
+                                            "Collation rules can not be null");
+        }
+        init(rules);
+    }
+
+    // public methods --------------------------------------------------------
+
+    /**
+     * Clones the RuleBasedCollator
+     * @return a new instance of this RuleBasedCollator object
+     * @stable ICU 2.8
+     */
+    public Object clone() throws CloneNotSupportedException
+    {
+        RuleBasedCollator result = (RuleBasedCollator)super.clone();
+        if (latinOneCEs_ != null) {
+            result.m_reallocLatinOneCEs_ = true;
+            result.m_ContInfo_ = new ContractionInfo();
+        }
+
+        // since all collation data in the RuleBasedCollator do not change
+        // we can safely assign the result.fields to this collator
+        result.initUtility(false);  // let the new clone have their own util
+                                    // iterators
+        return result;
+    }
+
+    /**
+     * Return a CollationElementIterator for the given String.
+     * @see CollationElementIterator
+     * @stable ICU 2.8
+     */
+    public CollationElementIterator getCollationElementIterator(String source)
+    {
+        return new CollationElementIterator(source, this);
+    }
+
+    /**
+     * Return a CollationElementIterator for the given CharacterIterator.
+     * The source iterator's integrity will be preserved since a new copy
+     * will be created for use.
+     * @see CollationElementIterator
+     * @stable ICU 2.8
+     */
+    public CollationElementIterator getCollationElementIterator(
+                                                CharacterIterator source)
+    {
+        CharacterIterator newsource = (CharacterIterator)source.clone();
+        return new CollationElementIterator(newsource, this);
+    }
+    
+    /**
+     * Return a CollationElementIterator for the given UCharacterIterator.
+     * The source iterator's integrity will be preserved since a new copy
+     * will be created for use.
+     * @see CollationElementIterator
+     * @stable ICU 2.8
+     */
+    public CollationElementIterator getCollationElementIterator(
+                                                UCharacterIterator source)
+    {
+        return new CollationElementIterator(source, this);
+    }
+
+    // public setters --------------------------------------------------------
+
+    /**
+     * Sets the Hiragana Quaternary mode to be on or off.
+     * When the Hiragana Quaternary mode is turned on, the collator
+     * positions Hiragana characters before all non-ignorable characters in
+     * QUATERNARY strength. This is to produce a correct JIS collation order,
+     * distinguishing between Katakana  and Hiragana characters.
+     * @param flag true if Hiragana Quaternary mode is to be on, false
+     *        otherwise
+     * @see #setHiraganaQuaternaryDefault
+     * @see #isHiraganaQuaternary
+     * @stable ICU 2.8
+     */
+    public void setHiraganaQuaternary(boolean flag)
+    {
+        m_isHiragana4_ = flag;
+        updateInternalState();        
+    }
+
+    /**
+     * Sets the Hiragana Quaternary mode to the initial mode set during
+     * construction of the RuleBasedCollator.
+     * See setHiraganaQuaternary(boolean) for more details.
+     * @see #setHiraganaQuaternary(boolean)
+     * @see #isHiraganaQuaternary
+     * @stable ICU 2.8
+     */
+    public void setHiraganaQuaternaryDefault()
+    {
+        m_isHiragana4_ = m_defaultIsHiragana4_;
+        updateInternalState();
+    }
+
+    /**
+     * Sets whether uppercase characters sort before lowercase
+     * characters or vice versa, in strength TERTIARY. The default
+     * mode is false, and so lowercase characters sort before uppercase
+     * characters.
+     * If true, sort upper case characters first.
+     * @param upperfirst true to sort uppercase characters before
+     *                   lowercase characters, false to sort lowercase
+     *                   characters before uppercase characters
+     * @see #isLowerCaseFirst
+     * @see #isUpperCaseFirst
+     * @see #setLowerCaseFirst
+     * @see #setCaseFirstDefault
+     * @stable ICU 2.8
+     */
+    public void setUpperCaseFirst(boolean upperfirst)
+    {
+        if (upperfirst) {
+            if(m_caseFirst_ != AttributeValue.UPPER_FIRST_) {
+                latinOneRegenTable_ = true;
+            }
+            m_caseFirst_ = AttributeValue.UPPER_FIRST_;
+        }
+        else {
+            if(m_caseFirst_ != AttributeValue.OFF_) {
+                latinOneRegenTable_ = true;
+            }
+            m_caseFirst_ = AttributeValue.OFF_;
+        }
+        updateInternalState();
+    }
+
+    /**
+     * Sets the orders of lower cased characters to sort before upper cased
+     * characters, in strength TERTIARY. The default
+     * mode is false.
+     * If true is set, the RuleBasedCollator will sort lower cased characters
+     * before the upper cased ones.
+     * Otherwise, if false is set, the RuleBasedCollator will ignore case
+     * preferences.
+     * @param lowerfirst true for sorting lower cased characters before
+     *                   upper cased characters, false to ignore case
+     *                   preferences.
+     * @see #isLowerCaseFirst
+     * @see #isUpperCaseFirst
+     * @see #setUpperCaseFirst
+     * @see #setCaseFirstDefault
+     * @stable ICU 2.8
+     */
+    public void setLowerCaseFirst(boolean lowerfirst)
+    {
+        if (lowerfirst) {
+                if(m_caseFirst_ != AttributeValue.LOWER_FIRST_) {
+                    latinOneRegenTable_ = true;
+                }
+                m_caseFirst_ = AttributeValue.LOWER_FIRST_;
+        }
+        else {
+                if(m_caseFirst_ != AttributeValue.OFF_) {
+                    latinOneRegenTable_ = true;
+                }
+            m_caseFirst_ = AttributeValue.OFF_;
+            }
+        updateInternalState();
+    }
+
+    /**
+     * Sets the case first mode to the initial mode set during
+     * construction of the RuleBasedCollator.
+     * See setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more
+     * details.
+     * @see #isLowerCaseFirst
+     * @see #isUpperCaseFirst
+     * @see #setLowerCaseFirst(boolean)
+     * @see #setUpperCaseFirst(boolean)
+     * @stable ICU 2.8
+     */
+    public final void setCaseFirstDefault()
+    {
+        if(m_caseFirst_ != m_defaultCaseFirst_) {
+            latinOneRegenTable_ = true;
+        }
+        m_caseFirst_ = m_defaultCaseFirst_;
+        updateInternalState();
+    }
+
+    /**
+     * Sets the alternate handling mode to the initial mode set during
+     * construction of the RuleBasedCollator.
+     * See setAlternateHandling(boolean) for more details.
+     * @see #setAlternateHandlingShifted(boolean)
+     * @see #isAlternateHandlingShifted()
+     * @stable ICU 2.8
+     */
+    public void setAlternateHandlingDefault()
+    {
+        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+        updateInternalState();
+    }
+
+    /**
+     * Sets the case level mode to the initial mode set during
+     * construction of the RuleBasedCollator.
+     * See setCaseLevel(boolean) for more details.
+     * @see #setCaseLevel(boolean)
+     * @see #isCaseLevel
+     * @stable ICU 2.8
+     */
+    public void setCaseLevelDefault()
+    {
+        m_isCaseLevel_ = m_defaultIsCaseLevel_;
+        updateInternalState();
+    }
+
+    /**
+     * Sets the decomposition mode to the initial mode set during construction
+     * of the RuleBasedCollator.
+     * See setDecomposition(int) for more details.
+     * @see #getDecomposition
+     * @see #setDecomposition(int)
+     * @stable ICU 2.8
+     */
+    public void setDecompositionDefault()
+    {
+        setDecomposition(m_defaultDecomposition_);
+        updateInternalState();        
+    }
+
+    /**
+     * Sets the French collation mode to the initial mode set during
+     * construction of the RuleBasedCollator.
+     * See setFrenchCollation(boolean) for more details.
+     * @see #isFrenchCollation
+     * @see #setFrenchCollation(boolean)
+     * @stable ICU 2.8
+     */
+    public void setFrenchCollationDefault()
+    {
+        if(m_isFrenchCollation_ != m_defaultIsFrenchCollation_) {
+            latinOneRegenTable_ = true;
+        }
+        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+        updateInternalState();
+    }
+
+    /**
+     * Sets the collation strength to the initial mode set during the
+     * construction of the RuleBasedCollator.
+     * See setStrength(int) for more details.
+     * @see #setStrength(int)
+     * @see #getStrength
+     * @stable ICU 2.8
+     */
+    public void setStrengthDefault()
+    {
+        setStrength(m_defaultStrength_);
+        updateInternalState();        
+    }
+    
+    /**
+     * Method to set numeric collation to its default value.
+     * When numeric collation is turned on, this Collator generates a collation 
+     * key for the numeric value of substrings of digits. This is a way to get 
+     * '100' to sort AFTER '2'
+     * @see #getNumericCollation
+     * @see #setNumericCollation
+     * @stable ICU 2.8
+     */
+    public void setNumericCollationDefault()
+    {
+        setNumericCollation(m_defaultIsNumericCollation_);
+        updateInternalState();        
+    }
+
+    /**
+     * Sets the mode for the direction of SECONDARY weights to be used in
+     * French collation.
+     * The default value is false, which treats SECONDARY weights in the order
+     * they appear.
+     * If set to true, the SECONDARY weights will be sorted backwards.
+     * See the section on
+     * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
+     * French collation</a> for more information.
+     * @param flag true to set the French collation on, false to set it off
+     * @stable ICU 2.8
+     * @see #isFrenchCollation
+     * @see #setFrenchCollationDefault
+     */
+    public void setFrenchCollation(boolean flag)
+    {
+        if(m_isFrenchCollation_ != flag) {
+            latinOneRegenTable_ = true;
+        }
+        m_isFrenchCollation_ = flag;
+        updateInternalState();
+    }
+
+    /**
+     * Sets the alternate handling for QUATERNARY strength to be either
+     * shifted or non-ignorable.
+     * See the UCA definition on
+     * <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">
+     * Alternate Weighting</a>.
+     * This attribute will only be effective when QUATERNARY strength is set.
+     * The default value for this mode is false, corresponding to the
+     * NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the
+     * RuleBasedCollator will treats all the codepoints with non-ignorable
+     * primary weights in the same way.
+     * If the mode is set to true, the behaviour corresponds to SHIFTED defined
+     * in UCA, this causes codepoints with PRIMARY orders that are equal or
+     * below the variable top value to be ignored in PRIMARY order and
+     * moved to the QUATERNARY order.
+     * @param shifted true if SHIFTED behaviour for alternate handling is
+     *        desired, false for the NON_IGNORABLE behaviour.
+     * @see #isAlternateHandlingShifted
+     * @see #setAlternateHandlingDefault
+     * @stable ICU 2.8
+     */
+    public void setAlternateHandlingShifted(boolean shifted)
+    {
+        m_isAlternateHandlingShifted_ = shifted;
+        updateInternalState();
+    }
+
+    /**
+     * <p>
+     * When case level is set to true, an additional weight is formed
+     * between the SECONDARY and TERTIARY weight, known as the case level.
+     * The case level is used to distinguish large and small Japanese Kana
+     * characters. Case level could also be used in other situations.
+     * For example to distinguish certain Pinyin characters.
+     * The default value is false, which means the case level is not generated.
+     * The contents of the case level are affected by the case first
+     * mode. A simple way to ignore accent differences in a string is to set
+     * the strength to PRIMARY and enable case level.
+     * </p>
+     * <p>
+     * See the section on
+     * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
+     * case level</a> for more information.
+     * </p>
+     * @param flag true if case level sorting is required, false otherwise
+     * @stable ICU 2.8
+     * @see #setCaseLevelDefault
+     * @see #isCaseLevel
+     */
+    public void setCaseLevel(boolean flag)
+    {
+        m_isCaseLevel_ = flag;
+        updateInternalState();
+    }
+
+    /**
+     * <p>
+     * Sets this Collator's strength property. The strength property
+     * determines the minimum level of difference considered significant
+     * during comparison.
+     * </p>
+     * <p>See the Collator class description for an example of use.</p>
+     * @param newStrength the new strength value.
+     * @see #getStrength
+     * @see #setStrengthDefault
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #QUATERNARY
+     * @see #IDENTICAL
+     * @exception IllegalArgumentException If the new strength value is not one
+     *              of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+     * @stable ICU 2.8
+     */
+    public void setStrength(int newStrength)
+    {
+        super.setStrength(newStrength);
+        updateInternalState();
+    }
+    
+    /** 
+     * <p>
+     * Variable top is a two byte primary value which causes all the codepoints 
+     * with primary values that are less or equal than the variable top to be 
+     * shifted when alternate handling is set to SHIFTED.
+     * </p>
+     * <p>
+     * Sets the variable top to a collation element value of a string supplied.
+     * </p> 
+     * @param varTop one or more (if contraction) characters to which the 
+     *               variable top should be set
+     * @return a int value containing the value of the variable top in upper 16
+     *         bits. Lower 16 bits are undefined.
+     * @exception IllegalArgumentException is thrown if varTop argument is not 
+     *            a valid variable top element. A variable top element is 
+     *            invalid when 
+     *            <ul>
+     *            <li>it is a contraction that does not exist in the
+     *                Collation order
+     *            <li>when the PRIMARY strength collation element for the 
+     *                variable top has more than two bytes
+     *            <li>when the varTop argument is null or zero in length.
+     *            </ul>
+     * @see #getVariableTop
+     * @see RuleBasedCollator#setAlternateHandlingShifted
+     * @stable ICU 2.6
+     */
+    public int setVariableTop(String varTop)
+    {
+        if (varTop == null || varTop.length() == 0) {
+            throw new IllegalArgumentException(
+            "Variable top argument string can not be null or zero in length.");
+        }
+        if (m_srcUtilIter_ == null) {
+            initUtility(true);
+        }
+
+        m_srcUtilColEIter_.setText(varTop);
+        int ce = m_srcUtilColEIter_.next();
+        
+        // here we check if we have consumed all characters 
+        // you can put in either one character or a contraction
+        // you shouldn't put more... 
+        if (m_srcUtilColEIter_.getOffset() != varTop.length() 
+            || ce == CollationElementIterator.NULLORDER) {
+            throw new IllegalArgumentException(
+            "Variable top argument string is a contraction that does not exist "
+            + "in the Collation order");
+        }
+        
+        int nextCE = m_srcUtilColEIter_.next();
+        
+        if ((nextCE != CollationElementIterator.NULLORDER) 
+            && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) {
+                throw new IllegalArgumentException(
+                "Variable top argument string can only have a single collation "
+                + "element that has less than or equal to two PRIMARY strength "
+                + "bytes");
+        }
+        
+        m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16;
+        
+        return ce & CE_PRIMARY_MASK_;
+    }
+    
+    /** 
+     * Sets the variable top to a collation element value supplied.
+     * Variable top is set to the upper 16 bits. 
+     * Lower 16 bits are ignored.
+     * @param varTop Collation element value, as returned by setVariableTop or 
+     *               getVariableTop
+     * @see #getVariableTop
+     * @see #setVariableTop(String)
+     * @stable ICU 2.6
+     */
+    public void setVariableTop(int varTop)
+    {
+        m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16;
+    }
+    
+    /**
+     * When numeric collation is turned on, this Collator generates a collation 
+     * key for the numeric value of substrings of digits. This is a way to get 
+     * '100' to sort AFTER '2'
+     * @param flag true to turn numeric collation on and false to turn it off
+     * @see #getNumericCollation
+     * @see #setNumericCollationDefault
+     * @stable ICU 2.8
+     */
+    public void setNumericCollation(boolean flag)
+    {
+        // sort substrings of digits as numbers
+        m_isNumericCollation_ = flag;
+        updateInternalState();
+    }
+
+    // public getters --------------------------------------------------------
+
+    /**
+     * Gets the collation rules for this RuleBasedCollator.
+     * Equivalent to String getRules(RuleOption.FULL_RULES).
+     * @return returns the collation rules
+     * @see #getRules(boolean)
+     * @stable ICU 2.8
+     */
+    public String getRules()
+    {
+        return m_rules_;
+    }
+    
+    /**
+     * Returns current rules. The argument defines whether full rules 
+     * (UCA + tailored) rules are returned or just the tailoring. 
+     * @param fullrules true if the rules that defines the full set of 
+     *        collation order is required, otherwise false for returning only 
+     *        the tailored rules
+     * @return the current rules that defines this Collator.
+     * @see #getRules()
+     * @stable ICU 2.6
+     */
+    public String getRules(boolean fullrules)
+    {
+        if (!fullrules) {
+            return m_rules_;
+        }
+        // take the UCA rules and append real rules at the end 
+        return UCA_.m_rules_.concat(m_rules_);
+    }
+
+    /**
+     * Get an UnicodeSet that contains all the characters and sequences
+     * tailored in this collator.
+     * @return a pointer to a UnicodeSet object containing all the
+     *         code points and sequences that may sort differently than
+     *         in the UCA.
+     * @exception ParseException thrown when argument rules have an
+     *            invalid syntax. IOException
+     * @stable ICU 2.4
+     */
+    public UnicodeSet getTailoredSet()
+    {
+        try {
+           CollationRuleParser src = new CollationRuleParser(getRules());
+           return src.getTailoredSet();
+        } catch(Exception e) {
+            throw new IllegalStateException("A tailoring rule should not " +
+                "have errors. Something is quite wrong!");
+        }
+    }
+
+    private class contContext {
+        RuleBasedCollator coll;
+        UnicodeSet contractions;
+        UnicodeSet expansions;
+        UnicodeSet removedContractions;
+        boolean addPrefixes;       
+        contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions, 
+                UnicodeSet removedContractions, boolean addPrefixes) {
+            this.coll = coll;
+            this.contractions = contractions;
+            this.expansions = expansions;
+            this.removedContractions = removedContractions;
+            this.addPrefixes = addPrefixes;
+        }
+    }
+    
+    private void
+    addSpecial(contContext c, StringBuffer buffer, int CE)
+    {
+        StringBuffer b = new StringBuffer();
+        int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_;
+        int newCE = c.coll.m_contractionCE_[offset];
+        // we might have a contraction that ends from previous level
+        if(newCE != CollationElementIterator.CE_NOT_FOUND_) {
+            if(isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ 
+                    && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ 
+                    && c.addPrefixes) {
+                addSpecial(c, buffer, newCE);
+            }
+            if(buffer.length() > 1) {
+                if(c.contractions != null) {
+                    c.contractions.add(buffer.toString());
+                }
+                if(c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+                    c.expansions.add(buffer.toString());
+                }
+            }
+        }    
+        
+        offset++;
+        // check whether we're doing contraction or prefix
+        if(getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {
+            while(c.coll.m_contractionIndex_[offset] != 0xFFFF) {
+                b.delete(0, b.length());
+                b.append(buffer);
+                newCE = c.coll.m_contractionCE_[offset];
+                b.insert(0, c.coll.m_contractionIndex_[offset]);
+                if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
+                    addSpecial(c, b, newCE);
+                } else {
+                    if(c.contractions != null) {
+                        c.contractions.add(b.toString());
+                    }
+                    if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+                        c.expansions.add(b.toString());
+                    }
+                }
+                offset++;
+            }
+        } else if(getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+            while(c.coll.m_contractionIndex_[offset] != 0xFFFF) {
+                b.delete(0, b.length());
+                b.append(buffer);
+                newCE = c.coll.m_contractionCE_[offset];
+                b.append(c.coll.m_contractionIndex_[offset]);
+                if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
+                    addSpecial(c, b, newCE);
+                } else {
+                    if(c.contractions != null) {
+                        c.contractions.add(b.toString());
+                    }
+                    if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+                        c.expansions.add(b.toString());
+                    }
+                }
+                offset++;
+            }
+        }
+    }
+    
+    private
+    void processSpecials(contContext c) 
+    {
+        int internalBufferSize = 512;
+        TrieIterator trieiterator 
+        = new TrieIterator(c.coll.m_trie_);
+        RangeValueIterator.Element element = new RangeValueIterator.Element();
+        while (trieiterator.next(element)) {
+            int start = element.start;
+            int limit = element.limit;
+            int CE = element.value;
+            StringBuffer contraction = new StringBuffer(internalBufferSize);
+            
+            if(isSpecial(CE)) {
+                if(((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) {
+                    while(start < limit) {
+                        // if there are suppressed contractions, we don't 
+                        // want to add them.
+                        if(c.removedContractions != null && c.removedContractions.contains(start)) {
+                            start++;
+                            continue;
+                        }
+                        // we start our contraction from middle, since we don't know if it
+                        // will grow toward right or left
+                        contraction.append((char) start);
+                        addSpecial(c, contraction, CE);
+                        start++;
+                    }
+                } else if(c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+                    while(start < limit) {
+                        c.expansions.add(start++);
+                    }
+                }
+            }
+        }
+    }
+    
+    /**
+     * Gets unicode sets containing contractions and/or expansions of a collator
+     * @param contractions if not null, set to contain contractions
+     * @param expansions if not null, set to contain expansions
+     * @param addPrefixes add the prefix contextual elements to contractions
+     * @throws Exception 
+     * @stable ICU 3.4
+     */
+    public void
+    getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions,
+            boolean addPrefixes) throws Exception {
+        if(contractions != null) {
+            contractions.clear();
+        }
+        if(expansions != null) {
+            expansions.clear();
+        }
+        String rules = getRules();
+        try {
+            CollationRuleParser src = new CollationRuleParser(rules);
+            contContext c = new contContext(RuleBasedCollator.UCA_, 
+                    contractions, expansions, src.m_removeSet_, addPrefixes);
+            
+            // Add the UCA contractions
+            processSpecials(c);
+            // This is collator specific. Add contractions from a collator
+            c.coll = this;
+            c.removedContractions =  null;
+            processSpecials(c);
+        } catch (Exception e) {
+            throw e;
+        }
+    }
+    
+    /**
+     * <p>
+     * Get a Collation key for the argument String source from this
+     * RuleBasedCollator.
+     * </p>
+     * <p>
+     * General recommendation: <br>
+     * If comparison are to be done to the same String multiple times, it would
+     * be more efficient to generate CollationKeys for the Strings and use
+     * CollationKey.compareTo(CollationKey) for the comparisons.
+     * If the each Strings are compared to only once, using the method
+     * RuleBasedCollator.compare(String, String) will have a better performance.
+     * </p>
+     * <p>
+     * See the class documentation for an explanation about CollationKeys.
+     * </p>
+     * @param source the text String to be transformed into a collation key.
+     * @return the CollationKey for the given String based on this
+     *         RuleBasedCollator's collation rules. If the source String is
+     *         null, a null CollationKey is returned.
+     * @see CollationKey
+     * @see #compare(String, String)
+     * @see #getRawCollationKey
+     * @stable ICU 2.8
+     */
+    public CollationKey getCollationKey(String source) {
+        if (source == null) {
+            return null;
+        }
+        m_utilRawCollationKey_ = getRawCollationKey(source, 
+                                                    m_utilRawCollationKey_);
+        return new CollationKey(source, m_utilRawCollationKey_);
+    }
+    
+    /**
+     * Gets the simpler form of a CollationKey for the String source following
+     * the rules of this Collator and stores the result into the user provided 
+     * argument key. 
+     * If key has a internal byte array of length that's too small for the 
+     * result, the internal byte array will be grown to the exact required 
+     * size.
+     * @param source the text String to be transformed into a RawCollationKey  
+     * @param key output RawCollationKey to store results
+     * @return If key is null, a new instance of RawCollationKey will be 
+     *         created and returned, otherwise the user provided key will be 
+     *         returned.
+     * @see #getCollationKey 
+     * @see #compare(String, String)
+     * @see RawCollationKey
+     * @stable ICU 2.8
+     */
+    public RawCollationKey getRawCollationKey(String source, 
+                                              RawCollationKey key)
+    {
+        if (source == null) {
+            return null;
+        }
+        int strength = getStrength();
+        m_utilCompare0_ = m_isCaseLevel_;
+        //m_utilCompare1_ = true;
+        m_utilCompare2_ = strength >= SECONDARY;
+        m_utilCompare3_ = strength >= TERTIARY;
+        m_utilCompare4_ = strength >= QUATERNARY;
+        m_utilCompare5_ = strength == IDENTICAL;
+
+        m_utilBytesCount0_ = 0;
+        m_utilBytesCount1_ = 0;
+        m_utilBytesCount2_ = 0;
+        m_utilBytesCount3_ = 0;
+        m_utilBytesCount4_ = 0;
+        //m_utilBytesCount5_ = 0;
+        //m_utilCount0_ = 0;
+        //m_utilCount1_ = 0;
+        m_utilCount2_ = 0;
+        m_utilCount3_ = 0;
+        m_utilCount4_ = 0;
+        //m_utilCount5_ = 0;
+        boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;
+        // TODO: UCOL_COMMON_BOT4 should be a function of qShifted.
+        // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so
+        // high.
+        int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_;
+        byte hiragana4 = 0;
+        if (m_isHiragana4_ && m_utilCompare4_) {
+            // allocate one more space for hiragana, value for hiragana
+            hiragana4 = (byte)commonBottom4;
+            commonBottom4 ++;
+        }
+
+        int bottomCount4 = 0xFF - commonBottom4;
+        // If we need to normalize, we'll do it all at once at the beginning!
+        if (m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD,0)
+                                                    != Normalizer.YES) {
+            // if it is identical strength, we have to normalize the string to
+            // NFD so that it will be appended correctly to the end of the sort
+            // key
+            source = Normalizer.decompose(source, false);
+        }
+        else if (getDecomposition() != NO_DECOMPOSITION
+            && Normalizer.quickCheck(source, Normalizer.FCD,0)
+                                                    != Normalizer.YES) {
+            // for the rest of the strength, if decomposition is on, FCD is
+            // enough for us to work on.
+            source = Normalizer.normalize(source,Normalizer.FCD);
+        }
+        getSortKeyBytes(source, doFrench, hiragana4, commonBottom4,
+                        bottomCount4);
+        if (key == null) {
+            key = new RawCollationKey();
+        }
+        getSortKey(source, doFrench, commonBottom4, bottomCount4, key);
+        return key;
+    }
+
+    /**
+     * Return true if an uppercase character is sorted before the corresponding lowercase character.
+     * See setCaseFirst(boolean) for details.
+     * @see #setUpperCaseFirst
+     * @see #setLowerCaseFirst
+     * @see #isLowerCaseFirst
+     * @see #setCaseFirstDefault
+     * @return true if upper cased characters are sorted before lower cased
+     *         characters, false otherwise
+     * @stable ICU 2.8
+     */
+     public boolean isUpperCaseFirst()
+     {
+        return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);
+     }
+     
+    /**
+     * Return true if a lowercase character is sorted before the corresponding uppercase character.
+     * See setCaseFirst(boolean) for details.
+     * @see #setUpperCaseFirst
+     * @see #setLowerCaseFirst
+     * @see #isUpperCaseFirst
+     * @see #setCaseFirstDefault
+     * @return true lower cased characters are sorted before upper cased
+     *         characters, false otherwise
+     * @stable ICU 2.8
+     */
+    public boolean isLowerCaseFirst()
+    {
+        return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);
+    }
+
+    /**
+     * Checks if the alternate handling behaviour is the UCA defined SHIFTED or
+     * NON_IGNORABLE.
+     * If return value is true, then the alternate handling attribute for the
+     * Collator is SHIFTED. Otherwise if return value is false, then the
+     * alternate handling attribute for the Collator is NON_IGNORABLE
+     * See setAlternateHandlingShifted(boolean) for more details.
+     * @return true or false
+     * @see #setAlternateHandlingShifted(boolean)
+     * @see #setAlternateHandlingDefault
+     * @stable ICU 2.8
+     */
+    public boolean isAlternateHandlingShifted()
+    {
+        return m_isAlternateHandlingShifted_;
+    }
+
+    /**
+     * Checks if case level is set to true.
+     * See setCaseLevel(boolean) for details.
+     * @return the case level mode
+     * @see #setCaseLevelDefault
+     * @see #isCaseLevel
+     * @see #setCaseLevel(boolean)
+     * @stable ICU 2.8
+     */
+    public boolean isCaseLevel()
+    {
+        return m_isCaseLevel_;
+    }
+
+    /**
+     * Checks if French Collation is set to true.
+     * See setFrenchCollation(boolean) for details.
+     * @return true if French Collation is set to true, false otherwise
+     * @see #setFrenchCollation(boolean)
+     * @see #setFrenchCollationDefault
+     * @stable ICU 2.8
+     */
+     public boolean isFrenchCollation()
+     {
+         return m_isFrenchCollation_;
+     }
+
+    /**
+     * Checks if the Hiragana Quaternary mode is set on.
+     * See setHiraganaQuaternary(boolean) for more details.
+     * @return flag true if Hiragana Quaternary mode is on, false otherwise
+     * @see #setHiraganaQuaternaryDefault
+     * @see #setHiraganaQuaternary(boolean)
+     * @stable ICU 2.8
+     */
+    public boolean isHiraganaQuaternary()
+    {
+        return m_isHiragana4_;
+    }
+
+    /** 
+     * Gets the variable top value of a Collator. 
+     * Lower 16 bits are undefined and should be ignored.
+     * @return the variable top value of a Collator.
+     * @see #setVariableTop
+     * @stable ICU 2.6
+     */
+    public int getVariableTop()
+    {
+          return m_variableTopValue_ << 16;
+    }
+    
+    /** 
+     * Method to retrieve the numeric collation value.
+     * When numeric collation is turned on, this Collator generates a collation 
+     * key for the numeric value of substrings of digits. This is a way to get 
+     * '100' to sort AFTER '2'
+     * @see #setNumericCollation
+     * @see #setNumericCollationDefault
+     * @return true if numeric collation is turned on, false otherwise
+     * @stable ICU 2.8
+     */
+    public boolean getNumericCollation()
+    {
+        return m_isNumericCollation_;
+    }
+    
+    // public other methods -------------------------------------------------
+
+    /**
+     * Compares the equality of two RuleBasedCollator objects.
+     * RuleBasedCollator objects are equal if they have the same collation
+     * rules and the same attributes.
+     * @param obj the RuleBasedCollator to be compared to.
+     * @return true if this RuleBasedCollator has exactly the same
+     *         collation behaviour as obj, false otherwise.
+     * @stable ICU 2.8
+     */
+    public boolean equals(Object obj)
+    {
+        if (obj == null) {
+            return false;  // super does class check
+        }
+        if (this == obj) {
+            return true;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        RuleBasedCollator other = (RuleBasedCollator)obj;
+        // all other non-transient information is also contained in rules.
+        if (getStrength() != other.getStrength()
+               || getDecomposition() != other.getDecomposition()
+               || other.m_caseFirst_ != m_caseFirst_
+               || other.m_caseSwitch_ != m_caseSwitch_
+               || other.m_isAlternateHandlingShifted_
+                                             != m_isAlternateHandlingShifted_
+               || other.m_isCaseLevel_ != m_isCaseLevel_
+               || other.m_isFrenchCollation_ != m_isFrenchCollation_
+               || other.m_isHiragana4_ != m_isHiragana4_) {
+            return false;
+        }
+        boolean rules = m_rules_ == other.m_rules_;
+        if (!rules && (m_rules_ != null && other.m_rules_ != null)) {
+            rules = m_rules_.equals(other.m_rules_);
+        }
+        if (!rules || !ICUDebug.enabled("collation")) {
+            return rules;
+        }
+        if (m_addition3_ != other.m_addition3_
+                  || m_bottom3_ != other.m_bottom3_
+                  || m_bottomCount3_ != other.m_bottomCount3_
+                  || m_common3_ != other.m_common3_
+                  || m_isSimple3_ != other.m_isSimple3_
+                  || m_mask3_ != other.m_mask3_
+                  || m_minContractionEnd_ != other.m_minContractionEnd_
+                  || m_minUnsafe_ != other.m_minUnsafe_
+                  || m_top3_ != other.m_top3_
+                  || m_topCount3_ != other.m_topCount3_
+                  || !Arrays.equals(m_unsafe_, other.m_unsafe_)) {
+            return false;
+        }
+        if (!m_trie_.equals(other.m_trie_)) {
+            // we should use the trie iterator here, but then this part is
+            // only used in the test.
+            for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i --)
+            {
+                int v = m_trie_.getCodePointValue(i);
+                int otherv = other.m_trie_.getCodePointValue(i);
+                if (v != otherv) {
+                    int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_);
+                    if (mask == (otherv & 0xff000000)) {
+                        v &= 0xffffff;
+                        otherv &= 0xffffff;
+                        if (mask == 0xf1000000) {
+                            v -= (m_expansionOffset_ << 4);
+                            otherv -= (other.m_expansionOffset_ << 4);
+                        }
+                        else if (mask == 0xf2000000) {
+                            v -= m_contractionOffset_;
+                            otherv -= other.m_contractionOffset_;
+                        }
+                        if (v == otherv) {
+                            continue;
+                        }
+                    }
+                    return false;
+                }
+            }
+        }
+        if (Arrays.equals(m_contractionCE_, other.m_contractionCE_)
+            && Arrays.equals(m_contractionEnd_, other.m_contractionEnd_)
+            && Arrays.equals(m_contractionIndex_, other.m_contractionIndex_)
+            && Arrays.equals(m_expansion_, other.m_expansion_)
+            && Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) {
+            // not comparing paddings
+            for (int i = 0; i < m_expansionEndCE_.length; i ++) {
+                 if (m_expansionEndCEMaxSize_[i]
+                     != other.m_expansionEndCEMaxSize_[i]) {
+                     return false;
+                 }
+                 return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Generates a unique hash code for this RuleBasedCollator.
+     * @return the unique hash code for this Collator
+     * @stable ICU 2.8
+     */
+    public int hashCode()
+    {
+        String rules = getRules();
+        if (rules == null) {
+            rules = "";
+        }
+        return rules.hashCode();
+    }
+
+    /**
+     * Compares the source text String to the target text String according to
+     * the collation rules, strength and decomposition mode for this
+     * RuleBasedCollator.
+     * Returns an integer less than,
+     * equal to or greater than zero depending on whether the source String is
+     * less than, equal to or greater than the target String. See the Collator
+     * class description for an example of use.
+     * </p>
+     * <p>
+     * General recommendation: <br>
+     * If comparison are to be done to the same String multiple times, it would
+     * be more efficient to generate CollationKeys for the Strings and use
+     * CollationKey.compareTo(CollationKey) for the comparisons.
+     * If speed performance is critical and object instantiation is to be 
+     * reduced, further optimization may be achieved by generating a simpler 
+     * key of the form RawCollationKey and reusing this RawCollationKey 
+     * object with the method RuleBasedCollator.getRawCollationKey. Internal 
+     * byte representation can be directly accessed via RawCollationKey and
+     * stored for future use. Like CollationKey, RawCollationKey provides a
+     * method RawCollationKey.compareTo for key comparisons.
+     * If the each Strings are compared to only once, using the method
+     * RuleBasedCollator.compare(String, String) will have a better performance.
+     * </p>
+     * @param source the source text String.
+     * @param target the target text String.
+     * @return Returns an integer value. Value is less than zero if source is
+     *         less than target, value is zero if source and target are equal,
+     *         value is greater than zero if source is greater than target.
+     * @see CollationKey
+     * @see #getCollationKey
+     * @stable ICU 2.8
+     */
+    public int compare(String source, String target)
+    {
+        if (source == target) {
+            return 0;
+        }
+
+        // Find the length of any leading portion that is equal
+        int offset = getFirstUnmatchedOffset(source, target);
+        //return compareRegular(source, target, offset);
+        if(latinOneUse_) {
+          if ((offset < source.length() 
+               && source.charAt(offset) > ENDOFLATINONERANGE_) 
+              || (offset < target.length() 
+                  && target.charAt(offset) > ENDOFLATINONERANGE_)) { 
+              // source or target start with non-latin-1
+            return compareRegular(source, target, offset);
+          } else {
+            return compareUseLatin1(source, target, offset);
+          }
+        } else {
+          return compareRegular(source, target, offset);
+        }
+    }
+    
+    // package private inner interfaces --------------------------------------
+
+    /**
+     * Attribute values to be used when setting the Collator options
+     */
+    static interface AttributeValue
+    {
+        /**
+         * Indicates that the default attribute value will be used.
+         * See individual attribute for details on its default value.
+         */
+        static final int DEFAULT_ = -1;
+        /**
+         * Primary collation strength
+         */
+        static final int PRIMARY_ = Collator.PRIMARY;
+        /**
+         * Secondary collation strength
+         */
+        static final int SECONDARY_ = Collator.SECONDARY;
+        /**
+         * Tertiary collation strength
+         */
+        static final int TERTIARY_ = Collator.TERTIARY;
+        /**
+         * Default collation strength
+         */
+        static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;
+        /**
+         * Internal use for strength checks in Collation elements
+         */
+        static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;
+        /**
+         * Quaternary collation strength
+         */
+        static final int QUATERNARY_ = 3;
+        /**
+         * Identical collation strength
+         */
+        static final int IDENTICAL_ = Collator.IDENTICAL;
+        /**
+         * Internal use for strength checks
+         */
+        static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;
+        /**
+         * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL,
+         * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+         */
+        static final int OFF_ = 16;
+        /**
+         * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL,
+         * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+         */
+        static final int ON_ = 17;
+        /**
+         * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted
+         */
+        static final int SHIFTED_ = 20;
+        /**
+         * Valid for ALTERNATE_HANDLING. Alternate handling will be non
+         * ignorable
+         */
+        static final int NON_IGNORABLE_ = 21;
+        /**
+         * Valid for CASE_FIRST - lower case sorts before upper case
+         */
+        static final int LOWER_FIRST_ = 24;
+        /**
+         * Upper case sorts before lower case
+         */
+        static final int UPPER_FIRST_ = 25;
+        /**
+         * Number of attribute values
+         */
+        static final int LIMIT_ = 29;
+    }
+
+    /**
+     * Attributes that collation service understands. All the attributes can
+     * take DEFAULT value, as well as the values specific to each one.
+     */
+    static interface Attribute
+    {
+        /**
+         * Attribute for direction of secondary weights - used in French.
+         * Acceptable values are ON, which results in secondary weights being
+         * considered backwards and OFF which treats secondary weights in the
+         * order they appear.
+         */
+        static final int FRENCH_COLLATION_ = 0;
+        /**
+         * Attribute for handling variable elements. Acceptable values are
+         * NON_IGNORABLE (default) which treats all the codepoints with
+         * non-ignorable primary weights in the same way, and SHIFTED which
+         * causes codepoints with primary weights that are equal or below the
+         * variable top value to be ignored on primary level and moved to the
+         * quaternary level.
+         */
+        static final int ALTERNATE_HANDLING_ = 1;
+        /**
+         * Controls the ordering of upper and lower case letters. Acceptable
+         * values are OFF (default), which orders upper and lower case letters
+         * in accordance to their tertiary weights, UPPER_FIRST which forces
+         * upper case letters to sort before lower case letters, and
+         * LOWER_FIRST which does the opposite.
+         */
+        static final int CASE_FIRST_ = 2;
+        /**
+         * Controls whether an extra case level (positioned before the third
+         * level) is generated or not. Acceptable values are OFF (default),
+         * when case level is not generated, and ON which causes the case
+         * level to be generated. Contents of the case level are affected by
+         * the value of CASE_FIRST attribute. A simple way to ignore accent
+         * differences in a string is to set the strength to PRIMARY and
+         * enable case level.
+         */
+        static final int CASE_LEVEL_ = 3;
+        /**
+         * Controls whether the normalization check and necessary
+         * normalizations are performed. When set to OFF (default) no
+         * normalization check is performed. The correctness of the result is
+         * guaranteed only if the input data is in so-called FCD form (see
+         * users manual for more info). When set to ON, an incremental check
+         * is performed to see whether the input data is in the FCD form. If
+         * the data is not in the FCD form, incremental NFD normalization is
+         * performed.
+         */
+        static final int NORMALIZATION_MODE_ = 4;
+        /**
+         * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY,
+         * QUATERNARY or IDENTICAL. The usual strength for most locales
+         * (except Japanese) is tertiary. Quaternary strength is useful when
+         * combined with shifted setting for alternate handling attribute and
+         * for JIS x 4061 collation, when it is used to distinguish between
+         * Katakana  and Hiragana (this is achieved by setting the
+         * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is
+         * affected only by the number of non ignorable code points in the
+         * string. Identical strength is rarely useful, as it amounts to
+         * codepoints of the NFD form of the string.
+         */
+        static final int STRENGTH_ = 5;
+        /**
+         * When turned on, this attribute positions Hiragana before all
+         * non-ignorables on quaternary level. This is a sneaky way to produce
+         * JIS sort order.
+         */
+        static final int HIRAGANA_QUATERNARY_MODE_ = 6;
+        /**
+         * Attribute count
+         */
+        static final int LIMIT_ = 7;
+    }
+
+    /**
+     * DataManipulate singleton
+     */
+    static class DataManipulate implements Trie.DataManipulate
+    {
+        // public methods ----------------------------------------------------
+
+        /**
+         * Internal method called to parse a lead surrogate's ce for the offset
+         * to the next trail surrogate data.
+         * @param ce collation element of the lead surrogate
+         * @return data offset or 0 for the next trail surrogate
+         * @stable ICU 2.8
+         */
+        public final int getFoldingOffset(int ce)
+        {
+            if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
+                return (ce & 0xFFFFFF);
+            }
+            return 0;
+        }
+
+        /**
+         * Get singleton object
+         */
+        public static final DataManipulate getInstance()
+        {
+            if (m_instance_ == null) {
+                m_instance_ =  new DataManipulate();
+            }
+            return m_instance_;
+        }
+
+        // private data member ----------------------------------------------
+
+        /**
+         * Singleton instance
+         */
+        private static DataManipulate m_instance_;
+
+        // private constructor ----------------------------------------------
+
+        /**
+         * private to prevent initialization
+         */
+        private DataManipulate()
+        {
+        }
+    }
+
+    /**
+     * UCAConstants
+     */
+    static final class UCAConstants
+    {
+         int FIRST_TERTIARY_IGNORABLE_[] = new int[2];       // 0x00000000
+         int LAST_TERTIARY_IGNORABLE_[] = new int[2];        // 0x00000000
+         int FIRST_PRIMARY_IGNORABLE_[] = new int[2];        // 0x00008705
+         int FIRST_SECONDARY_IGNORABLE_[] = new int[2];      // 0x00000000
+         int LAST_SECONDARY_IGNORABLE_[] = new int[2];       // 0x00000500
+         int LAST_PRIMARY_IGNORABLE_[] = new int[2];         // 0x0000DD05
+         int FIRST_VARIABLE_[] = new int[2];                 // 0x05070505
+         int LAST_VARIABLE_[] = new int[2];                  // 0x13CF0505
+         int FIRST_NON_VARIABLE_[] = new int[2];             // 0x16200505
+         int LAST_NON_VARIABLE_[] = new int[2];              // 0x767C0505
+         int RESET_TOP_VALUE_[] = new int[2];                // 0x9F000303
+         int FIRST_IMPLICIT_[] = new int[2];
+         int LAST_IMPLICIT_[] = new int[2];
+         int FIRST_TRAILING_[] = new int[2];
+         int LAST_TRAILING_[] = new int[2];
+         int PRIMARY_TOP_MIN_;
+         int PRIMARY_IMPLICIT_MIN_; // 0xE8000000
+         int PRIMARY_IMPLICIT_MAX_; // 0xF0000000
+         int PRIMARY_TRAILING_MIN_; // 0xE8000000
+         int PRIMARY_TRAILING_MAX_; // 0xF0000000
+         int PRIMARY_SPECIAL_MIN_; // 0xE8000000
+         int PRIMARY_SPECIAL_MAX_; // 0xF0000000
+    }
+
+    // package private data member -------------------------------------------
+
+    static final byte BYTE_FIRST_TAILORED_ = (byte)0x04;
+    static final byte BYTE_COMMON_ = (byte)0x05;
+    static final int COMMON_TOP_2_ = 0x86; // int for unsigness
+    static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
+    static final int COMMON_BOTTOM_3 = 0x05;
+    /**
+     * Case strength mask
+     */
+    static final int CE_CASE_BIT_MASK_ = 0xC0;
+    static final int CE_TAG_SHIFT_ = 24;
+    static final int CE_TAG_MASK_ = 0x0F000000;
+
+    static final int CE_SPECIAL_FLAG_ = 0xF0000000;
+    /**
+     * Lead surrogate that is tailored and doesn't start a contraction
+     */
+    static final int CE_SURROGATE_TAG_ = 5;
+    /**
+     * Mask to get the primary strength of the collation element
+     */
+    static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
+    /**
+     * Mask to get the secondary strength of the collation element
+     */
+    static final int CE_SECONDARY_MASK_ = 0xFF00;
+    /**
+     * Mask to get the tertiary strength of the collation element
+     */
+    static final int CE_TERTIARY_MASK_ = 0xFF;
+    /**
+     * Primary strength shift
+     */
+    static final int CE_PRIMARY_SHIFT_ = 16;
+    /**
+     * Secondary strength shift
+     */
+    static final int CE_SECONDARY_SHIFT_ = 8;
+    /**
+     * Continuation marker
+     */
+    static final int CE_CONTINUATION_MARKER_ = 0xC0;
+
+    /**
+     * Size of collator raw data headers and options before the expansion
+     * data. This is used when expansion ces are to be retrieved. ICU4C uses
+     * the expansion offset starting from UCollator.UColHeader, hence ICU4J
+     * will have to minus that off to get the right expansion ce offset. In
+     * number of ints.
+     */
+    int m_expansionOffset_;
+    /**
+     * Size of collator raw data headers, options and expansions before
+     * contraction data. This is used when contraction ces are to be retrieved.
+     * ICU4C uses contraction offset starting from UCollator.UColHeader, hence
+     * ICU4J will have to minus that off to get the right contraction ce
+     * offset. In number of chars.
+     */
+    int m_contractionOffset_;
+    /**
+     * Flag indicator if Jamo is special
+     */
+    boolean m_isJamoSpecial_;
+
+    // Collator options ------------------------------------------------------
+    
+    int m_defaultVariableTopValue_;
+    boolean m_defaultIsFrenchCollation_;
+    boolean m_defaultIsAlternateHandlingShifted_;
+    int m_defaultCaseFirst_;
+    boolean m_defaultIsCaseLevel_;
+    int m_defaultDecomposition_;
+    int m_defaultStrength_;
+    boolean m_defaultIsHiragana4_;
+    boolean m_defaultIsNumericCollation_;
+    
+    /**
+     * Value of the variable top
+     */
+    int m_variableTopValue_;
+    /**
+     * Attribute for special Hiragana
+     */
+    boolean m_isHiragana4_;
+    /**
+     * Case sorting customization
+     */
+    int m_caseFirst_;
+    /**
+     * Numeric collation option
+     */
+    boolean m_isNumericCollation_;
+
+    // end Collator options --------------------------------------------------
+
+    /**
+     * Expansion table
+     */
+    int m_expansion_[];
+    /**
+     * Contraction index table
+     */
+    char m_contractionIndex_[];
+    /**
+     * Contraction CE table
+     */
+    int m_contractionCE_[];
+    /**
+     * Data trie
+     */
+    IntTrie m_trie_;
+    /**
+     * Table to store all collation elements that are the last element of an
+     * expansion. This is for use in StringSearch.
+     */
+    int m_expansionEndCE_[];
+    /**
+     * Table to store the maximum size of any expansions that end with the
+     * corresponding collation element in m_expansionEndCE_. For use in
+     * StringSearch too
+     */
+    byte m_expansionEndCEMaxSize_[];
+    /**
+     * Heuristic table to store information on whether a char character is
+     * considered "unsafe". "Unsafe" character are combining marks or those
+     * belonging to some contraction sequence from the offset 1 onwards.
+     * E.g. if "ABC" is the only contraction, then 'B' and 'C' are considered
+     * unsafe. If we have another contraction "ZA" with the one above, then
+     * 'A', 'B', 'C' are "unsafe" but 'Z' is not.
+     */
+    byte m_unsafe_[];
+    /**
+     * Table to store information on whether a codepoint can occur as the last
+     * character in a contraction
+     */
+    byte m_contractionEnd_[];
+    /**
+     * Original collation rules
+     */
+    String m_rules_;
+    /**
+     * The smallest "unsafe" codepoint
+     */
+    char m_minUnsafe_;
+    /**
+     * The smallest codepoint that could be the end of a contraction
+     */
+    char m_minContractionEnd_;
+    /**
+     * General version of the collator
+     */
+    VersionInfo m_version_;
+    /**
+     * UCA version
+     */
+    VersionInfo m_UCA_version_;
+    /**
+     * UCD version
+     */
+    VersionInfo m_UCD_version_;
+
+    /**
+     * UnicodeData.txt property object
+     */
+    static final RuleBasedCollator UCA_;
+    /**
+     * UCA Constants
+     */
+    static final UCAConstants UCA_CONSTANTS_;
+    /**
+     * Table for UCA and builder use
+     */
+    static final char UCA_CONTRACTIONS_[];
+
+    private static boolean UCA_INIT_COMPLETE;
+
+    /**
+     * Implicit generator
+     */
+    static final ImplicitCEGenerator impCEGen_;
+//    /**
+//     * Implicit constants
+//     */
+//    static final int IMPLICIT_BASE_BYTE_;
+//    static final int IMPLICIT_LIMIT_BYTE_;
+//    static final int IMPLICIT_4BYTE_BOUNDARY_;
+//    static final int LAST_MULTIPLIER_;
+//    static final int LAST2_MULTIPLIER_;
+//    static final int IMPLICIT_BASE_3BYTE_;
+//    static final int IMPLICIT_BASE_4BYTE_;
+//    static final int BYTES_TO_AVOID_ = 3;
+//    static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
+//    static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
+//    /**
+//     * Room for intervening, without expanding to 5 bytes
+//     */
+//    static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
+//    static final int IMPLICIT_3BYTE_COUNT_ = 1;
+//    
+    static final byte SORT_LEVEL_TERMINATOR_ = 1;
+
+//  These are values from UCA required for
+//  implicit generation and supressing sort key compression
+//  they should regularly be in the UCA, but if one
+//  is running without UCA, it could be a problem
+     static final int maxRegularPrimary  = 0xA0;
+     static final int minImplicitPrimary = 0xE0;
+     static final int maxImplicitPrimary = 0xE4;
+
+
+    // block to initialise character property database
+    static
+    {
+        // take pains to let static class init succeed, otherwise the class itself won't exist and
+        // clients will get a NoClassDefFoundException.  Instead, make the constructors fail if
+        // we can't load the UCA data.
+
+        RuleBasedCollator iUCA_ = null;
+        UCAConstants iUCA_CONSTANTS_ = null;
+        char iUCA_CONTRACTIONS_[] = null;
+        ImplicitCEGenerator iimpCEGen_ = null;
+        try
+        {
+            // !!! note what's going on here...
+            // even though the static init of the class is not yet complete, we
+            // instantiate an instance of the class.  So we'd better be sure that
+            // instantiation doesn't rely on the static initialization that's
+            // not complete yet!
+            iUCA_ = new RuleBasedCollator();
+            iUCA_CONSTANTS_ = new UCAConstants();
+            iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_);
+
+            // called before doing canonical closure for the UCA.
+            iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary);
+            //iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
+            iUCA_.init();
+            ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH);
+            iUCA_.m_rules_ = (String)rb.getObject("UCARules");
+        }
+        catch (MissingResourceException ex)
+        {
+//             throw ex;
+        }
+        catch (IOException e)
+        {
+           // e.printStackTrace();
+//             throw new MissingResourceException(e.getMessage(),"","");
+        }
+
+        UCA_ = iUCA_;
+        UCA_CONSTANTS_ = iUCA_CONSTANTS_;
+        UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;
+        impCEGen_ = iimpCEGen_;
+
+        UCA_INIT_COMPLETE = true;
+    }
+
+
+    private static void checkUCA() throws MissingResourceException {
+        if (UCA_INIT_COMPLETE && UCA_ == null) {
+            throw new MissingResourceException("Collator UCA data unavailable", "", "");
+        }
+    }
+        
+    // package private constructors ------------------------------------------
+
+    /**
+    * <p>Private contructor for use by subclasses.
+    * Public access to creating Collators is handled by the API
+    * Collator.getInstance() or RuleBasedCollator(String rules).
+    * </p>
+    * <p>
+    * This constructor constructs the UCA collator internally
+    * </p>
+    */
+    RuleBasedCollator()
+    {
+        checkUCA();
+        initUtility(false);
+    }
+
+    /**
+     * Constructors a RuleBasedCollator from the argument locale.
+     * If no resource bundle is associated with the locale, UCA is used
+     * instead.
+     * @param locale
+     */
+    RuleBasedCollator(ULocale locale)
+    {
+        checkUCA();
+        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
+        initUtility(false);
+        if (rb != null) {
+            try {
+                // Use keywords, if supplied for lookup
+                String collkey = locale.getKeywordValue("collation");
+                  if(collkey == null) {
+                      collkey = rb.getStringWithFallback("collations/default");
+                }
+                       
+                // collations/default will always give a string back
+                // keyword for the real collation data
+                // if "collations/collkey" will return null if collkey == null 
+                ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey);
+                if (elements != null) {
+                    // TODO: Determine actual & valid locale correctly
+                    ULocale uloc = rb.getULocale();
+                    setLocale(uloc, uloc);
+
+                    m_rules_ = elements.getString("Sequence");
+                    ByteBuffer buf = elements.get("%%CollationBin").getBinary();
+                    // %%CollationBin
+                    if(buf!=null){
+                    //     m_rules_ = (String)rules[1][1];
+                        byte map[] = buf.array();
+                        CollatorReader.initRBC(this, map);
+                        /*
+                        BufferedInputStream input =
+                                                 new BufferedInputStream(
+                                                    new ByteArrayInputStream(map));
+                        /*
+                        CollatorReader reader = new CollatorReader(input, false);
+                        if (map.length > MIN_BINARY_DATA_SIZE_) {
+                            reader.read(this, null);
+                        }
+                        else {
+                            reader.readHeader(this);
+                            reader.readOptions(this);
+                            // duplicating UCA_'s data
+                            setWithUCATables();
+                        }
+                        */
+                        // at this point, we have read in the collator
+                        // now we need to check whether the binary image has
+                        // the right UCA and other versions
+                        if(!m_UCA_version_.equals(UCA_.m_UCA_version_) ||
+                        !m_UCD_version_.equals(UCA_.m_UCD_version_)) {
+                            init(m_rules_);
+                            return;
+                        }
+                        init();
+                        return;
+                    }
+                    else {                        
+                        init(m_rules_);
+                        return;
+                    }
+                }
+            }
+            catch (Exception e) {
+                // e.printStackTrace();
+                // if failed use UCA.
+            }
+        }
+        setWithUCAData();
+    }
+
+    // package private methods -----------------------------------------------
+
+    /**
+     * Sets this collator to use the tables in UCA. Note options not taken
+     * care of here.
+     */
+    final void setWithUCATables()
+    {
+        m_contractionOffset_ = UCA_.m_contractionOffset_;
+        m_expansionOffset_ = UCA_.m_expansionOffset_;
+        m_expansion_ = UCA_.m_expansion_;
+        m_contractionIndex_ = UCA_.m_contractionIndex_;
+        m_contractionCE_ = UCA_.m_contractionCE_;
+        m_trie_ = UCA_.m_trie_;
+        m_expansionEndCE_ = UCA_.m_expansionEndCE_;
+        m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
+        m_unsafe_ = UCA_.m_unsafe_;
+        m_contractionEnd_ = UCA_.m_contractionEnd_;
+        m_minUnsafe_ = UCA_.m_minUnsafe_;
+        m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+    }
+
+    /**
+     * Sets this collator to use the all options and tables in UCA.
+     */
+    final void setWithUCAData()
+    {
+        latinOneFailed_ = true;
+
+        m_addition3_ = UCA_.m_addition3_;
+        m_bottom3_ = UCA_.m_bottom3_;
+        m_bottomCount3_ = UCA_.m_bottomCount3_;
+        m_caseFirst_ = UCA_.m_caseFirst_;
+        m_caseSwitch_ = UCA_.m_caseSwitch_;
+        m_common3_ = UCA_.m_common3_;
+        m_contractionOffset_ = UCA_.m_contractionOffset_;
+        setDecomposition(UCA_.getDecomposition());
+        m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;
+        m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;
+        m_defaultIsAlternateHandlingShifted_
+                                   = UCA_.m_defaultIsAlternateHandlingShifted_;
+        m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;
+        m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;
+        m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;
+        m_defaultStrength_ = UCA_.m_defaultStrength_;
+        m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;
+        m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_;
+        m_expansionOffset_ = UCA_.m_expansionOffset_;
+        m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;
+        m_isCaseLevel_ = UCA_.m_isCaseLevel_;
+        m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;
+        m_isHiragana4_ = UCA_.m_isHiragana4_;
+        m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;
+        m_isSimple3_ = UCA_.m_isSimple3_;
+        m_mask3_ = UCA_.m_mask3_;
+        m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+        m_minUnsafe_ = UCA_.m_minUnsafe_;
+        m_rules_ = UCA_.m_rules_;
+        setStrength(UCA_.getStrength());
+        m_top3_ = UCA_.m_top3_;
+        m_topCount3_ = UCA_.m_topCount3_;
+        m_variableTopValue_ = UCA_.m_variableTopValue_;
+        m_isNumericCollation_ = UCA_.m_isNumericCollation_;
+        setWithUCATables();
+        latinOneFailed_ = false;
+    }
+
+    /**
+     * Test whether a char character is potentially "unsafe" for use as a
+     * collation starting point. "Unsafe" characters are combining marks or
+     * those belonging to some contraction sequence from the offset 1 onwards.
+     * E.g. if "ABC" is the only contraction, then 'B' and
+     * 'C' are considered unsafe. If we have another contraction "ZA" with
+     * the one above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
+     * @param ch character to determin
+     * @return true if ch is unsafe, false otherwise
+     */
+    final boolean isUnsafe(char ch)
+    {
+        if (ch < m_minUnsafe_) {
+            return false;
+        }
+        
+        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+            if (UTF16.isLeadSurrogate(ch) 
+                || UTF16.isTrailSurrogate(ch)) {
+                //  Trail surrogate are always considered unsafe.
+                return true;
+            }
+            ch &= HEURISTIC_OVERFLOW_MASK_;
+            ch += HEURISTIC_OVERFLOW_OFFSET_;
+        }
+        int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];
+        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+    }
+
+    /**
+     * Approximate determination if a char character is at a contraction end.
+     * Guaranteed to be true if a character is at the end of a contraction,
+     * otherwise it is not deterministic.
+     * @param ch character to be determined
+     */
+    final boolean isContractionEnd(char ch)
+    {
+        if (UTF16.isTrailSurrogate(ch)) {
+            return true;
+        }
+
+        if (ch < m_minContractionEnd_) {
+            return false;
+        }
+
+        if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+            ch &= HEURISTIC_OVERFLOW_MASK_;
+            ch += HEURISTIC_OVERFLOW_OFFSET_;
+        }
+        int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];
+        return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+    }
+
+    /**
+     * Retrieve the tag of a special ce
+     * @param ce ce to test
+     * @return tag of ce
+     */
+    static int getTag(int ce)
+    {
+        return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
+    }
+
+    /**
+     * Checking if ce is special
+     * @param ce to check
+     * @return true if ce is special
+     */
+    static boolean isSpecial(int ce)
+    {
+        return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_;
+    }
+
+    /**
+     * Checks if the argument ce is a continuation
+     * @param ce collation element to test
+     * @return true if ce is a continuation
+     */
+    static final boolean isContinuation(int ce)
+    {
+        return ce != CollationElementIterator.NULLORDER
+                       && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
+    }
+
+    // private inner classes ------------------------------------------------
+
+    // private variables -----------------------------------------------------
+
+    /**
+     * The smallest natural unsafe or contraction end char character before
+     * tailoring.
+     * This is a combining mark.
+     */
+    private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;
+    /**
+     * Heuristic table table size. Size is 32 bytes, 1 bit for each
+     * latin 1 char, and some power of two for hashing the rest of the chars.
+     * Size in bytes.
+     */
+    private static final char HEURISTIC_SIZE_ = 1056;
+    /**
+     * Mask value down to "some power of two" - 1,
+     * number of bits, not num of bytes.
+     */
+    private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;
+    /**
+     * Unsafe character shift
+     */
+    private static final int HEURISTIC_SHIFT_ = 3;
+    /**
+     * Unsafe character addition for character too large, it has to be folded
+     * then incremented.
+     */
+    private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;
+    /**
+     * Mask value to get offset in heuristic table.
+     */
+    private static final char HEURISTIC_MASK_ = 7;
+
+    private int m_caseSwitch_;
+    private int m_common3_;
+    private int m_mask3_;
+    /**
+     * When switching case, we need to add or subtract different values.
+     */
+    private int m_addition3_;
+    /**
+     * Upper range when compressing
+     */
+    private int m_top3_;
+    /**
+     * Upper range when compressing
+     */
+    private int m_bottom3_;
+    private int m_topCount3_;
+    private int m_bottomCount3_;
+    /**
+     * Case first constants
+     */
+    private static final int CASE_SWITCH_ = 0xC0;
+    private static final int NO_CASE_SWITCH_ = 0;
+    /**
+     * Case level constants
+     */
+    private static final int CE_REMOVE_CASE_ = 0x3F;
+    private static final int CE_KEEP_CASE_ = 0xFF;
+    /**
+     * Case strength mask
+     */
+    private static final int CE_CASE_MASK_3_ = 0xFF;
+    /**
+     * Sortkey size factor. Values can be changed.
+     */
+    private static final double PROPORTION_2_ = 0.5;
+    private static final double PROPORTION_3_ = 0.667;
+
+    // These values come from the UCA ----------------------------------------
+
+    /**
+     * This is an enum that lists magic special byte values from the
+     * fractional UCA
+     */
+    //private static final byte BYTE_ZERO_ = 0x0;
+    //private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;
+    //private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
+    private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
+    /*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
+    //private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
+    static final byte CODAN_PLACEHOLDER = 0x27;
+    //private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;
+    private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;
+    private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;
+    private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;
+    private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
+    private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
+    private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;
+    private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;
+    private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;
+    private static final int COMMON_BOTTOM_3_ = 0x05;
+    private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;
+    private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ =
+                                                              COMMON_BOTTOM_3_;
+    private static final int TOP_COUNT_2_ = (int)(PROPORTION_2_ * TOTAL_2_);
+    private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;
+    private static final int COMMON_2_ = COMMON_BOTTOM_2_;
+    private static final int COMMON_UPPER_FIRST_3_ = 0xC5;
+    private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
+    //private static final int COMMON_4_ = (byte)0xFF;
+
+
+
+    /*
+     * Minimum size required for the binary collation data in bytes.
+     * Size of UCA header + size of options to 4 bytes
+     */
+    //private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
+
+    /**
+     * If this collator is to generate only simple tertiaries for fast path
+     */
+    private boolean m_isSimple3_;
+
+    /**
+     * French collation sorting flag
+     */
+    private boolean m_isFrenchCollation_;
+    /**
+     * Flag indicating if shifted is requested for Quaternary alternate
+     * handling. If this is not true, the default for alternate handling will
+     * be non-ignorable.
+     */
+    private boolean m_isAlternateHandlingShifted_;
+    /**
+     * Extra case level for sorting
+     */
+    private boolean m_isCaseLevel_;
+
+    private static final int SORT_BUFFER_INIT_SIZE_ = 128;
+    private static final int SORT_BUFFER_INIT_SIZE_1_ =
+                                                    SORT_BUFFER_INIT_SIZE_ << 3;
+    private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;
+    private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;
+    private static final int SORT_BUFFER_INIT_SIZE_CASE_ =
+                                                SORT_BUFFER_INIT_SIZE_ >> 2;
+    private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;
+
+    private static final int CE_CONTINUATION_TAG_ = 0xC0;
+    private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;
+
+    private static final int LAST_BYTE_MASK_ = 0xFF;
+
+    //private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;
+    //private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;
+
+    private static final byte SORT_CASE_BYTE_START_ = (byte)0x80;
+    private static final byte SORT_CASE_SHIFT_START_ = (byte)7;
+
+    /**
+     * CE buffer size
+     */
+    private static final int CE_BUFFER_SIZE_ = 512;
+
+    // variables for Latin-1 processing
+    boolean latinOneUse_        = false;
+    boolean latinOneRegenTable_ = false;
+    boolean latinOneFailed_     = false;
+
+    int latinOneTableLen_ = 0;
+    int latinOneCEs_[] = null;
+    /**
+     * Bunch of utility iterators
+     */
+    private StringUCharacterIterator m_srcUtilIter_;
+    private CollationElementIterator m_srcUtilColEIter_;
+    private StringUCharacterIterator m_tgtUtilIter_;
+    private CollationElementIterator m_tgtUtilColEIter_;
+    /**
+     * Utility comparison flags
+     */
+    private boolean m_utilCompare0_;
+    //private boolean m_utilCompare1_;
+    private boolean m_utilCompare2_;
+    private boolean m_utilCompare3_;
+    private boolean m_utilCompare4_;
+    private boolean m_utilCompare5_;
+    /**
+     * Utility byte buffer
+     */
+    private byte m_utilBytes0_[];
+    private byte m_utilBytes1_[];
+    private byte m_utilBytes2_[];
+    private byte m_utilBytes3_[];
+    private byte m_utilBytes4_[];
+    //private byte m_utilBytes5_[];
+    private RawCollationKey m_utilRawCollationKey_;
+
+    private int m_utilBytesCount0_;
+    private int m_utilBytesCount1_;
+    private int m_utilBytesCount2_;
+    private int m_utilBytesCount3_;
+    private int m_utilBytesCount4_;
+    //private int m_utilBytesCount5_;
+    //private int m_utilCount0_;
+    //private int m_utilCount1_;
+    private int m_utilCount2_;
+    private int m_utilCount3_;
+    private int m_utilCount4_;
+    //private int m_utilCount5_;
+
+    private int m_utilFrenchStart_;
+    private int m_utilFrenchEnd_;
+
+    /**
+     * Preparing the CE buffers. will be filled during the primary phase
+     */
+    private int m_srcUtilCEBuffer_[];
+    private int m_tgtUtilCEBuffer_[];
+    private int m_srcUtilCEBufferSize_;
+    private int m_tgtUtilCEBufferSize_;
+
+    private int m_srcUtilContOffset_;
+    private int m_tgtUtilContOffset_;
+
+    private int m_srcUtilOffset_;
+    private int m_tgtUtilOffset_;
+
+    // private methods -------------------------------------------------------
+
+    private void init(String rules) throws Exception
+    {
+        setWithUCAData();
+        CollationParsedRuleBuilder builder
+                                       = new CollationParsedRuleBuilder(rules);
+        builder.setRules(this);
+        m_rules_ = rules;
+        init();
+        initUtility(false);
+    }
+    
+    private final int compareRegular(String source, String target, int offset) {
+        if (m_srcUtilIter_ == null) {
+            initUtility(true);
+        }
+        int strength = getStrength();
+        // setting up the collator parameters
+        m_utilCompare0_ = m_isCaseLevel_;
+        //m_utilCompare1_ = true;
+        m_utilCompare2_ = strength >= SECONDARY;
+        m_utilCompare3_ = strength >= TERTIARY;
+        m_utilCompare4_ = strength >= QUATERNARY;
+        m_utilCompare5_ = strength == IDENTICAL;
+        boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;
+        boolean doShift4 = m_isAlternateHandlingShifted_ && m_utilCompare4_;
+        boolean doHiragana4 = m_isHiragana4_ && m_utilCompare4_;
+
+        if (doHiragana4 && doShift4) {
+            String sourcesub = source.substring(offset);
+            String targetsub = target.substring(offset);
+            return compareBySortKeys(sourcesub, targetsub);
+        }
+
+        // This is the lowest primary value that will not be ignored if shifted
+        int lowestpvalue = m_isAlternateHandlingShifted_
+                                            ? m_variableTopValue_ << 16 : 0;
+        m_srcUtilCEBufferSize_ = 0;
+        m_tgtUtilCEBufferSize_ = 0;
+        int result = doPrimaryCompare(doHiragana4, lowestpvalue, source,
+                                      target, offset);
+        if (m_srcUtilCEBufferSize_ == -1
+            && m_tgtUtilCEBufferSize_ == -1) {
+            // since the cebuffer is cleared when we have determined that
+            // either source is greater than target or vice versa, the return
+            // result is the comparison result and not the hiragana result
+            return result;
+        }
+
+        int hiraganaresult = result;
+
+        if (m_utilCompare2_) {
+            result = doSecondaryCompare(doFrench);
+            if (result != 0) {
+                return result;
+            }
+        }
+        // doing the case bit
+        if (m_utilCompare0_) {
+            result = doCaseCompare();
+            if (result != 0) {
+                return result;
+            }
+        }
+        // Tertiary level
+        if (m_utilCompare3_) {
+            result = doTertiaryCompare();
+            if (result != 0) {
+                return result;
+            }
+        }
+
+        if (doShift4) {  // checkQuad
+            result = doQuaternaryCompare(lowestpvalue);
+            if (result != 0) {
+                return result;
+            }
+        }
+        else if (doHiragana4 && hiraganaresult != 0) {
+            // If we're fine on quaternaries, we might be different
+            // on Hiragana. This, however, might fail us in shifted.
+            return hiraganaresult;
+        }
+
+        // For IDENTICAL comparisons, we use a bitwise character comparison
+        // as a tiebreaker if all else is equal.
+        // Getting here  should be quite rare - strings are not identical -
+        // that is checked first, but compared == through all other checks.
+        if (m_utilCompare5_) {
+            return doIdenticalCompare(source, target, offset, true);
+        }
+        return 0;
+    }
+
+    /**
+     * Gets the 2 bytes of primary order and adds it to the primary byte array
+     * @param ce current ce
+     * @param notIsContinuation flag indicating if the current bytes belong to
+     *          a continuation ce
+     * @param doShift flag indicating if ce is to be shifted
+     * @param leadPrimary lead primary used for compression
+     * @param commonBottom4 common byte value for Quaternary
+     * @param bottomCount4 smallest byte value for Quaternary
+     * @return the new lead primary for compression
+     */
+    private final int doPrimaryBytes(int ce, boolean notIsContinuation,
+                                  boolean doShift, int leadPrimary,
+                                  int commonBottom4, int bottomCount4)
+    {
+
+        int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned
+        int p1 = ce >>> 8;  // comparison
+        if (doShift) {
+            if (m_utilCount4_ > 0) {
+                while (m_utilCount4_ > bottomCount4) {
+                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                         (byte)(commonBottom4 + bottomCount4));
+                    m_utilBytesCount4_ ++;
+                    m_utilCount4_ -= bottomCount4;
+                }
+                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                       (byte)(commonBottom4
+                                              + (m_utilCount4_ - 1)));
+                m_utilBytesCount4_ ++;
+                m_utilCount4_ = 0;
+            }
+            // dealing with a variable and we're treating them as shifted
+            // This is a shifted ignorable
+            if (p1 != 0) {
+                // we need to check this since we could be in continuation
+                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                       (byte)p1);
+                m_utilBytesCount4_ ++;
+            }
+            if (p2 != 0) {
+                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                       (byte)p2);
+                m_utilBytesCount4_ ++;
+            }
+        }
+        else {
+            // Note: This code assumes that the table is well built
+            // i.e. not having 0 bytes where they are not supposed to be.
+            // Usually, we'll have non-zero primary1 & primary2, except
+            // in cases of LatinOne and friends, when primary2 will be
+            // regular and simple sortkey calc
+            if (p1 != CollationElementIterator.IGNORABLE) {
+                if (notIsContinuation) {
+                    if (leadPrimary == p1) {
+                        m_utilBytes1_ = append(m_utilBytes1_,
+                                               m_utilBytesCount1_, (byte)p2);
+                        m_utilBytesCount1_ ++;
+                    }
+                    else {
+                        if (leadPrimary != 0) {
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                   m_utilBytesCount1_,
+                                    ((p1 > leadPrimary)
+                                            ? BYTE_UNSHIFTED_MAX_
+                                            : BYTE_UNSHIFTED_MIN_)); 
+                            m_utilBytesCount1_ ++;
+                        }
+                        if (p2 == CollationElementIterator.IGNORABLE) {
+                            // one byter, not compressed
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                   m_utilBytesCount1_,
+                                                   (byte)p1);
+                            m_utilBytesCount1_ ++;
+                            leadPrimary = 0;
+                        }
+                        else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_
+                              || (p1 > maxRegularPrimary
+                    //> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0]
+                    //                                              >>> 24)
+                                && p1 < minImplicitPrimary
+                    //< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0]
+                    //                                              >>> 24)
+                    )) {
+                                // not compressible
+                                leadPrimary = 0;
+                                m_utilBytes1_ = append(m_utilBytes1_,
+                                                       m_utilBytesCount1_,
+                                                       (byte)p1);
+                                m_utilBytesCount1_ ++;
+                                m_utilBytes1_ = append(m_utilBytes1_,
+                                                       m_utilBytesCount1_,
+                                                       (byte)p2);
+                                m_utilBytesCount1_ ++;
+                        }
+                        else { // compress
+                            leadPrimary = p1;
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                   m_utilBytesCount1_,
+                                                   (byte)p1);
+                            m_utilBytesCount1_ ++;
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                  m_utilBytesCount1_, (byte)p2);
+                            m_utilBytesCount1_ ++;
+                        }
+                    }
+                }
+                else {
+                    // continuation, add primary to the key, no compression
+                    m_utilBytes1_ = append(m_utilBytes1_,
+                                           m_utilBytesCount1_, (byte)p1);
+                    m_utilBytesCount1_ ++;
+                    if (p2 != CollationElementIterator.IGNORABLE) {
+                        m_utilBytes1_ = append(m_utilBytes1_,
+                                           m_utilBytesCount1_, (byte)p2);
+                        // second part
+                        m_utilBytesCount1_ ++;
+                    }
+                }
+            }
+        }
+        return leadPrimary;
+    }
+
+    /**
+     * Gets the secondary byte and adds it to the secondary byte array
+     * @param ce current ce
+     * @param notIsContinuation flag indicating if the current bytes belong to
+     *          a continuation ce
+     * @param doFrench flag indicator if french sort is to be performed
+     */
+    private final void doSecondaryBytes(int ce, boolean notIsContinuation,
+                                        boolean doFrench)
+    {
+        int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison
+        if (s != 0) {
+            if (!doFrench) {
+                // This is compression code.
+                if (s == COMMON_2_ && notIsContinuation) {
+                   m_utilCount2_ ++;
+                }
+                else {
+                    if (m_utilCount2_ > 0) {
+                        if (s > COMMON_2_) { // not necessary for 4th level.
+                            while (m_utilCount2_ > TOP_COUNT_2_) {
+                                m_utilBytes2_ = append(m_utilBytes2_,
+                                        m_utilBytesCount2_,
+                                        (byte)(COMMON_TOP_2_ - TOP_COUNT_2_));
+                                m_utilBytesCount2_ ++;
+                                m_utilCount2_ -= TOP_COUNT_2_;
+                            }
+                            m_utilBytes2_ = append(m_utilBytes2_,
+                                                   m_utilBytesCount2_,
+                                                   (byte)(COMMON_TOP_2_
+                                                       - (m_utilCount2_ - 1)));
+                            m_utilBytesCount2_ ++;
+                        }
+                        else {
+                            while (m_utilCount2_ > BOTTOM_COUNT_2_) {
+                                m_utilBytes2_ = append(m_utilBytes2_,
+                                                       m_utilBytesCount2_,
+                                    (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+                                m_utilBytesCount2_ ++;
+                                m_utilCount2_ -= BOTTOM_COUNT_2_;
+                            }
+                            m_utilBytes2_ = append(m_utilBytes2_,
+                                                   m_utilBytesCount2_,
+                                                   (byte)(COMMON_BOTTOM_2_
+                                                       + (m_utilCount2_ - 1)));
+                            m_utilBytesCount2_ ++;
+                        }
+                        m_utilCount2_ = 0;
+                    }
+                    m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,
+                                           (byte)s);
+                    m_utilBytesCount2_ ++;
+                }
+            }
+            else {
+                  m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,
+                                         (byte)s);
+                  m_utilBytesCount2_ ++;
+                  // Do the special handling for French secondaries
+                  // We need to get continuation elements and do intermediate
+                  // restore
+                  // abc1c2c3de with french secondaries need to be edc1c2c3ba
+                  // NOT edc3c2c1ba
+                  if (notIsContinuation) {
+                        if (m_utilFrenchStart_ != -1) {
+                            // reverse secondaries from frenchStartPtr up to
+                            // frenchEndPtr
+                            reverseBuffer(m_utilBytes2_);
+                            m_utilFrenchStart_ = -1;
+                        }
+                  }
+                  else {
+                        if (m_utilFrenchStart_ == -1) {
+                            m_utilFrenchStart_  = m_utilBytesCount2_ - 2;
+                        }
+                        m_utilFrenchEnd_ = m_utilBytesCount2_ - 1;
+                  }
+            }
+        }
+    }
+
+    /**
+     * Reverse the argument buffer
+     * @param buffer to reverse
+     */
+    private void reverseBuffer(byte buffer[])
+    {
+        int start = m_utilFrenchStart_;
+        int end = m_utilFrenchEnd_;
+        while (start < end) {
+            byte b = buffer[start];
+            buffer[start ++] = buffer[end];
+            buffer[end --] = b;
+        }
+    }
+
+    /**
+     * Insert the case shifting byte if required
+     * @param caseshift value
+     * @return new caseshift value
+     */
+    private final int doCaseShift(int caseshift)
+    {
+        if (caseshift  == 0) {
+            m_utilBytes0_ = append(m_utilBytes0_, m_utilBytesCount0_,
+                                   SORT_CASE_BYTE_START_);
+            m_utilBytesCount0_ ++;
+            caseshift = SORT_CASE_SHIFT_START_;
+        }
+        return caseshift;
+    }
+
+    /**
+     * Performs the casing sort
+     * @param tertiary byte in ints for easy comparison
+     * @param notIsContinuation flag indicating if the current bytes belong to
+     *          a continuation ce
+     * @param caseshift
+     * @return the new value of case shift
+     */
+    private final int doCaseBytes(int tertiary, boolean notIsContinuation,
+                                  int caseshift)
+    {
+        caseshift = doCaseShift(caseshift);
+
+        if (notIsContinuation && tertiary != 0) {
+            byte casebits = (byte)(tertiary & 0xC0);
+            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+                if (casebits == 0) {
+                    m_utilBytes0_[m_utilBytesCount0_ - 1]
+                                                      |= (1 << (-- caseshift));
+                }
+                else {
+                     // second bit
+                     caseshift = doCaseShift(caseshift - 1);
+                     m_utilBytes0_[m_utilBytesCount0_ - 1]
+                                    |= ((casebits >> 6) & 1) << (-- caseshift);
+                }
+            }
+            else {
+                if (casebits != 0) {
+                    m_utilBytes0_[m_utilBytesCount0_ - 1]
+                                                        |= 1 << (-- caseshift);
+                    // second bit
+                    caseshift = doCaseShift(caseshift);
+                    m_utilBytes0_[m_utilBytesCount0_ - 1]
+                                  |= ((casebits >> 7) & 1) << (-- caseshift);
+                }
+                else {
+                    caseshift --;
+                }
+            }
+        }
+
+        return caseshift;
+    }
+
+    /**
+     * Gets the tertiary byte and adds it to the tertiary byte array
+     * @param tertiary byte in int for easy comparison
+     * @param notIsContinuation flag indicating if the current bytes belong to
+     *          a continuation ce
+     */
+    private final void doTertiaryBytes(int tertiary, boolean notIsContinuation)
+    {
+        if (tertiary != 0) {
+            // This is compression code.
+            // sequence size check is included in the if clause
+            if (tertiary == m_common3_ && notIsContinuation) {
+                 m_utilCount3_ ++;
+            }
+            else {
+                int common3 = m_common3_ & LAST_BYTE_MASK_;
+                if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) {
+                    tertiary += m_addition3_;
+                }
+                else if (tertiary <= common3
+                         && m_common3_ == COMMON_UPPER_FIRST_3_) {
+                    tertiary -= m_addition3_;
+                }
+                if (m_utilCount3_ > 0) {
+                    if (tertiary > common3) {
+                        while (m_utilCount3_ > m_topCount3_) {
+                            m_utilBytes3_ = append(m_utilBytes3_,
+                                                   m_utilBytesCount3_,
+                                            (byte)(m_top3_ - m_topCount3_));
+                            m_utilBytesCount3_ ++;
+                            m_utilCount3_ -= m_topCount3_;
+                        }
+                        m_utilBytes3_ = append(m_utilBytes3_,
+                                               m_utilBytesCount3_,
+                                               (byte)(m_top3_
+                                                      - (m_utilCount3_ - 1)));
+                        m_utilBytesCount3_ ++;
+                    }
+                    else {
+                        while (m_utilCount3_ > m_bottomCount3_) {
+                            m_utilBytes3_ = append(m_utilBytes3_,
+                                                   m_utilBytesCount3_,
+                                         (byte)(m_bottom3_ + m_bottomCount3_));
+                            m_utilBytesCount3_ ++;
+                            m_utilCount3_ -= m_bottomCount3_;
+                        }
+                        m_utilBytes3_ = append(m_utilBytes3_,
+                                               m_utilBytesCount3_,
+                                               (byte)(m_bottom3_
+                                                      + (m_utilCount3_ - 1)));
+                        m_utilBytesCount3_ ++;
+                    }
+                    m_utilCount3_ = 0;
+                }
+                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,
+                                       (byte)tertiary);
+                m_utilBytesCount3_ ++;
+            }
+        }
+    }
+
+    /**
+     * Gets the Quaternary byte and adds it to the Quaternary byte array
+     * @param isCodePointHiragana flag indicator if the previous codepoint
+     *          we dealt with was Hiragana
+     * @param commonBottom4 smallest common Quaternary byte
+     * @param bottomCount4 smallest Quaternary byte
+     * @param hiragana4 hiragana Quaternary byte
+     */
+    private final void doQuaternaryBytes(boolean isCodePointHiragana,
+                                      int commonBottom4, int bottomCount4,
+                                      byte hiragana4)
+    {
+        if (isCodePointHiragana) { // This was Hiragana, need to note it
+            if (m_utilCount4_ > 0) { // Close this part
+                while (m_utilCount4_ > bottomCount4) {
+                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                           (byte)(commonBottom4
+                                                        + bottomCount4));
+                    m_utilBytesCount4_ ++;
+                    m_utilCount4_ -= bottomCount4;
+                }
+                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                      (byte)(commonBottom4
+                                             + (m_utilCount4_ - 1)));
+                m_utilBytesCount4_ ++;
+                m_utilCount4_ = 0;
+            }
+            m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                   hiragana4); // Add the Hiragana
+            m_utilBytesCount4_ ++;
+        }
+        else { // This wasn't Hiragana, so we can continue adding stuff
+            m_utilCount4_ ++;
+        }
+    }
+
+    /**
+     * Iterates through the argument string for all ces.
+     * Split the ces into their relevant primaries, secondaries etc.
+     * @param source normalized string
+     * @param doFrench flag indicator if special handling of French has to be
+     *                  done
+     * @param hiragana4 offset for Hiragana quaternary
+     * @param commonBottom4 smallest common quaternary byte
+     * @param bottomCount4 smallest quaternary byte
+     */
+    private final void getSortKeyBytes(String source, boolean doFrench,
+                                       byte hiragana4, int commonBottom4,
+                                       int bottomCount4)
+
+    {
+        if (m_srcUtilIter_ == null) {
+            initUtility(true);
+        }
+        int backupDecomposition = getDecomposition();
+        setDecomposition(NO_DECOMPOSITION); // have to revert to backup later
+        m_srcUtilIter_.setText(source);
+        m_srcUtilColEIter_.setText(m_srcUtilIter_);
+        m_utilFrenchStart_ = -1;
+        m_utilFrenchEnd_ = -1;
+
+        // scriptorder not implemented yet
+        // const uint8_t *scriptOrder = coll->scriptOrder;
+
+        boolean doShift = false;
+        boolean notIsContinuation = false;
+
+        int leadPrimary = 0; // int for easier comparison
+        int caseShift = 0;
+
+        while (true) {
+            int ce = m_srcUtilColEIter_.next();
+            if (ce == CollationElementIterator.NULLORDER) {
+                break;
+            }
+
+            if (ce == CollationElementIterator.IGNORABLE) {
+                continue;
+            }
+
+            notIsContinuation = !isContinuation(ce);
+
+            /*
+             * if (notIsContinuation) {
+                    if (scriptOrder != NULL) {
+                        primary1 = scriptOrder[primary1];
+                    }
+                }*/
+            boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0;
+            // actually we can just check that the first byte is 0
+            // generation stuffs the order left first
+            boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_)
+                                               <= m_variableTopValue_;
+            doShift = (m_isAlternateHandlingShifted_
+                        && ((notIsContinuation && isSmallerThanVariableTop
+                            && !isPrimaryByteIgnorable) // primary byte not 0
+                        || (!notIsContinuation && doShift))
+                        || (doShift && isPrimaryByteIgnorable));
+            if (doShift && isPrimaryByteIgnorable) {
+                // amendment to the UCA says that primary ignorables and other
+                // ignorables should be removed if following a shifted code
+                // point
+                // if we were shifted and we got an ignorable code point
+                // we should just completely ignore it
+                continue;
+            }
+            leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift,
+                                         leadPrimary, commonBottom4,
+                                         bottomCount4);
+            if (doShift) {
+                continue;
+            }
+            if (m_utilCompare2_) {
+                doSecondaryBytes(ce, notIsContinuation, doFrench);
+            }
+
+            int t = ce & LAST_BYTE_MASK_;
+            if (!notIsContinuation) {
+                t = ce & CE_REMOVE_CONTINUATION_MASK_;
+            }
+
+            if (m_utilCompare0_ && (!isPrimaryByteIgnorable || m_utilCompare2_)) {
+                // do the case level if we need to do it. We don't want to calculate
+                // case level for primary ignorables if we have only primary strength and case level
+                // otherwise we would break well formedness of CEs 
+                caseShift = doCaseBytes(t, notIsContinuation, caseShift);
+            }
+            else if (notIsContinuation) {
+                 t ^= m_caseSwitch_;
+            }
+
+            t &= m_mask3_;
+
+            if (m_utilCompare3_) {
+                doTertiaryBytes(t, notIsContinuation);
+            }
+
+            if (m_utilCompare4_ && notIsContinuation) { // compare quad
+                doQuaternaryBytes(m_srcUtilColEIter_.m_isCodePointHiragana_,
+                                  commonBottom4, bottomCount4, hiragana4);
+            }
+        }
+        setDecomposition(backupDecomposition); // reverts to original
+        if (m_utilFrenchStart_ != -1) {
+            // one last round of checks
+            reverseBuffer(m_utilBytes2_);
+        }
+    }
+
+    /**
+     * From the individual strength byte results the final compact sortkey
+     * will be calculated.
+     * @param source text string
+     * @param doFrench flag indicating that special handling of French has to
+     *                  be done
+     * @param commonBottom4 smallest common quaternary byte
+     * @param bottomCount4 smallest quaternary byte
+     * @param key output RawCollationKey to store results, key cannot be null
+     */
+    private final void getSortKey(String source, boolean doFrench,
+                                             int commonBottom4, 
+                                             int bottomCount4,
+                                             RawCollationKey key)
+    {
+        // we have done all the CE's, now let's put them together to form
+        // a key
+        if (m_utilCompare2_) {
+            doSecondary(doFrench);
+        }
+        // adding case level should be independent of secondary level
+        if (m_utilCompare0_) {
+            doCase();
+        }
+        if (m_utilCompare3_) {
+            doTertiary();
+            if (m_utilCompare4_) {
+                doQuaternary(commonBottom4, bottomCount4);
+                if (m_utilCompare5_) {
+                    doIdentical(source);
+                }
+
+            }
+        }
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0);
+        m_utilBytesCount1_ ++;
+
+        key.set(m_utilBytes1_, 0, m_utilBytesCount1_);
+    }
+
+    /**
+     * Packs the French bytes
+     */
+    private final void doFrench()
+    {
+        for (int i = 0; i < m_utilBytesCount2_; i ++) {
+            byte s = m_utilBytes2_[m_utilBytesCount2_ - i - 1];
+            // This is compression code.
+            if (s == COMMON_2_) {
+                ++ m_utilCount2_;
+            }
+            else {
+                if (m_utilCount2_ > 0) {
+                    // getting the unsigned value
+                    if ((s & LAST_BYTE_MASK_) > COMMON_2_) {
+                        // not necessary for 4th level.
+                        while (m_utilCount2_ > TOP_COUNT_2_) {
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                   m_utilBytesCount1_,
+                                        (byte)(COMMON_TOP_2_ - TOP_COUNT_2_));
+                            m_utilBytesCount1_ ++;
+                            m_utilCount2_ -= TOP_COUNT_2_;
+                        }
+                        m_utilBytes1_ = append(m_utilBytes1_,
+                                               m_utilBytesCount1_,
+                                               (byte)(COMMON_TOP_2_
+                                                      - (m_utilCount2_ - 1)));
+                        m_utilBytesCount1_ ++;
+                    }
+                    else {
+                        while (m_utilCount2_ > BOTTOM_COUNT_2_) {
+                            m_utilBytes1_ = append(m_utilBytes1_,
+                                                   m_utilBytesCount1_,
+                                (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+                            m_utilBytesCount1_ ++;
+                            m_utilCount2_ -= BOTTOM_COUNT_2_;
+                        }
+                        m_utilBytes1_ = append(m_utilBytes1_,
+                                               m_utilBytesCount1_,
+                                               (byte)(COMMON_BOTTOM_2_
+                                                      + (m_utilCount2_ - 1)));
+                        m_utilBytesCount1_ ++;
+                    }
+                    m_utilCount2_ = 0;
+                }
+                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, s);
+                m_utilBytesCount1_ ++;
+            }
+        }
+        if (m_utilCount2_ > 0) {
+            while (m_utilCount2_ > BOTTOM_COUNT_2_) {
+                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                                       (byte)(COMMON_BOTTOM_2_
+                                                    + BOTTOM_COUNT_2_));
+                m_utilBytesCount1_ ++;
+                m_utilCount2_ -= BOTTOM_COUNT_2_;
+            }
+            m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                                   (byte)(COMMON_BOTTOM_2_
+                                                    + (m_utilCount2_ - 1)));
+            m_utilBytesCount1_ ++;
+        }
+    }
+
+    /**
+     * Compacts the secondary bytes and stores them into the primary array
+     * @param doFrench flag indicator that French has to be handled specially
+     */
+    private final void doSecondary(boolean doFrench)
+    {
+        if (m_utilCount2_ > 0) {
+            while (m_utilCount2_ > BOTTOM_COUNT_2_) {
+                m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,
+                                       (byte)(COMMON_BOTTOM_2_
+                                                        + BOTTOM_COUNT_2_));
+                m_utilBytesCount2_ ++;
+                m_utilCount2_ -= BOTTOM_COUNT_2_;
+            }
+            m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,
+                                   (byte)(COMMON_BOTTOM_2_ +
+                                                    (m_utilCount2_ - 1)));
+            m_utilBytesCount2_ ++;
+        }
+
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                               SORT_LEVEL_TERMINATOR_);
+        m_utilBytesCount1_ ++;
+
+        if (doFrench) { // do the reverse copy
+            doFrench();
+        }
+        else {
+            if (m_utilBytes1_.length <= m_utilBytesCount1_
+                                        + m_utilBytesCount2_) {
+                m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
+                                         m_utilBytesCount2_);
+            }
+            System.arraycopy(m_utilBytes2_, 0, m_utilBytes1_,
+                             m_utilBytesCount1_, m_utilBytesCount2_);
+            m_utilBytesCount1_ += m_utilBytesCount2_;
+        }
+    }
+
+    /**
+     * Increase buffer size
+     * @param buffer array of bytes
+     * @param size of the byte array
+     * @param incrementsize size to increase
+     * @return the new buffer
+     */
+    private static final byte[] increase(byte buffer[], int size,
+                                         int incrementsize)
+    {
+        byte result[] = new byte[buffer.length + incrementsize];
+        System.arraycopy(buffer, 0, result, 0, size);
+        return result;
+    }
+
+    /**
+     * Increase buffer size
+     * @param buffer array of ints
+     * @param size of the byte array
+     * @param incrementsize size to increase
+     * @return the new buffer
+     */
+    private static final int[] increase(int buffer[], int size,
+                                        int incrementsize)
+    {
+        int result[] = new int[buffer.length + incrementsize];
+        System.arraycopy(buffer, 0, result, 0, size);
+        return result;
+    }
+
+    /**
+     * Compacts the case bytes and stores them into the primary array
+     */
+    private final void doCase()
+    {
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                               SORT_LEVEL_TERMINATOR_);
+        m_utilBytesCount1_ ++;
+        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount0_) {
+            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
+                                     m_utilBytesCount0_);
+        }
+        System.arraycopy(m_utilBytes0_, 0, m_utilBytes1_, m_utilBytesCount1_,
+                         m_utilBytesCount0_);
+        m_utilBytesCount1_ += m_utilBytesCount0_;
+    }
+
+    /**
+     * Compacts the tertiary bytes and stores them into the primary array
+     */
+    private final void doTertiary()
+    {
+        if (m_utilCount3_ > 0) {
+            if (m_common3_ != COMMON_BOTTOM_3_) {
+                while (m_utilCount3_ >= m_topCount3_) {
+                    m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,
+                                           (byte)(m_top3_ - m_topCount3_));
+                    m_utilBytesCount3_ ++;
+                    m_utilCount3_ -= m_topCount3_;
+                }
+                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,
+                                       (byte)(m_top3_ - m_utilCount3_));
+                m_utilBytesCount3_ ++;
+            }
+            else {
+                while (m_utilCount3_ > m_bottomCount3_) {
+                    m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,
+                                           (byte)(m_bottom3_
+                                                        + m_bottomCount3_));
+                    m_utilBytesCount3_ ++;
+                    m_utilCount3_ -= m_bottomCount3_;
+                }
+                m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_,
+                                       (byte)(m_bottom3_
+                                              + (m_utilCount3_ - 1)));
+                m_utilBytesCount3_ ++;
+            }
+        }
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                               SORT_LEVEL_TERMINATOR_);
+        m_utilBytesCount1_ ++;
+        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount3_) {
+            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
+                                     m_utilBytesCount3_);
+        }
+        System.arraycopy(m_utilBytes3_, 0, m_utilBytes1_, m_utilBytesCount1_,
+                         m_utilBytesCount3_);
+        m_utilBytesCount1_ += m_utilBytesCount3_;
+    }
+
+    /**
+     * Compacts the quaternary bytes and stores them into the primary array
+     */
+    private final void doQuaternary(int commonbottom4, int bottomcount4)
+    {
+        if (m_utilCount4_ > 0) {
+            while (m_utilCount4_ > bottomcount4) {
+                m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                       (byte)(commonbottom4 + bottomcount4));
+                m_utilBytesCount4_ ++;
+                m_utilCount4_ -= bottomcount4;
+            }
+            m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
+                                   (byte)(commonbottom4
+                                                + (m_utilCount4_ - 1)));
+            m_utilBytesCount4_ ++;
+        }
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                               SORT_LEVEL_TERMINATOR_);
+        m_utilBytesCount1_ ++;
+        if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount4_) {
+            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
+                                     m_utilBytesCount4_);
+        }
+        System.arraycopy(m_utilBytes4_, 0, m_utilBytes1_, m_utilBytesCount1_,
+                         m_utilBytesCount4_);
+        m_utilBytesCount1_ += m_utilBytesCount4_;
+    }
+
+    /**
+     * Deals with the identical sort.
+     * Appends the BOCSU version of the source string to the ends of the
+     * byte buffer.
+     * @param source text string
+     */
+    private final void doIdentical(String source)
+    {
+        int isize = BOCU.getCompressionLength(source);
+        m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
+                               SORT_LEVEL_TERMINATOR_);
+        m_utilBytesCount1_ ++;
+        if (m_utilBytes1_.length <= m_utilBytesCount1_ + isize) {
+            m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
+                                     1 + isize);
+        }
+        m_utilBytesCount1_ = BOCU.compress(source, m_utilBytes1_,
+                                           m_utilBytesCount1_);
+    }
+
+    /**
+     * Gets the offset of the first unmatched characters in source and target.
+     * This method returns the offset of the start of a contraction or a
+     * combining sequence, if the first difference is in the middle of such a
+     * sequence.
+     * @param source string
+     * @param target string
+     * @return offset of the first unmatched characters in source and target.
+     */
+    private final int getFirstUnmatchedOffset(String source, String target)
+    {
+        int result = 0;
+        int slength = source.length();
+        int tlength = target.length();
+        int minlength = slength;
+        if (minlength > tlength) {
+            minlength = tlength;
+        }
+        while (result < minlength
+                && source.charAt(result) == target.charAt(result)) {
+            result ++;
+        }
+        if (result > 0) {
+            // There is an identical portion at the beginning of the two
+            // strings. If the identical portion ends within a contraction or a
+            // combining character sequence, back up to the start of that
+            // sequence.
+            char schar = 0;
+            char tchar = 0;
+            if (result < minlength) {
+                schar = source.charAt(result); // first differing chars
+                tchar = target.charAt(result);
+            }
+            else {
+                schar = source.charAt(minlength - 1);
+                if (isUnsafe(schar)) {
+                    tchar = schar;
+                }
+                else if (slength == tlength) {
+                        return result;
+                }
+                else if (slength < tlength) {
+                    tchar = target.charAt(result);
+                }
+                else {
+                    schar = source.charAt(result);
+                }
+            }
+            if (isUnsafe(schar) || isUnsafe(tchar))
+            {
+                // We are stopped in the middle of a contraction or combining
+                // sequence.
+                // Look backwards for the part of the string for the start of
+                // the sequence
+                // It doesn't matter which string we scan, since they are the
+                // same in this region.
+                do {
+                    result --;
+                }
+                while (result > 0 && isUnsafe(source.charAt(result)));
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Appending an byte to an array of bytes and increases it if we run out of
+     * space
+     * @param array of byte arrays
+     * @param appendindex index in the byte array to append
+     * @param value to append
+     * @return array if array size can accomodate the new value, otherwise
+     *         a bigger array will be created and returned
+     */
+    private static final byte[] append(byte array[], int appendindex,
+                                       byte value)
+    {
+        try {
+            array[appendindex] = value;
+        }
+        catch (ArrayIndexOutOfBoundsException e) {
+            array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
+            array[appendindex] = value;
+        }
+        return array;
+    }
+
+    /**
+     * This is a trick string compare function that goes in and uses sortkeys
+     * to compare. It is used when compare gets in trouble and needs to bail
+     * out.
+     * @param source text string
+     * @param target text string
+     */
+    private final int compareBySortKeys(String source, String target)
+
+    {
+        m_utilRawCollationKey_ = getRawCollationKey(source, 
+                                                    m_utilRawCollationKey_);
+        // this method is very seldom called
+        RawCollationKey targetkey = getRawCollationKey(target, null);
+        return m_utilRawCollationKey_.compareTo(targetkey);
+    }
+
+    /**
+     * Performs the primary comparisons, and fills up the CE buffer at the
+     * same time.
+     * The return value toggles between the comparison result and the hiragana
+     * result. If either the source is greater than target or vice versa, the
+     * return result is the comparison result, ie 1 or -1, furthermore the
+     * cebuffers will be cleared when that happens. If the primary comparisons
+     * are equal, we'll have to continue with secondary comparison. In this case
+     * the cebuffer will not be cleared and the return result will be the
+     * hiragana result.
+     * @param doHiragana4 flag indicator that Hiragana Quaternary has to be
+     *                  observed
+     * @param lowestpvalue the lowest primary value that will not be ignored if
+     *                      alternate handling is shifted
+     * @param source text string
+     * @param target text string
+     * @param textoffset offset in text to start the comparison
+     * @return comparion result if a primary difference is found, otherwise
+     *                      hiragana result
+     */
+    private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue,
+                                        String source, String target,
+                                        int textoffset)
+
+    {
+        // Preparing the context objects for iterating over strings
+        m_srcUtilIter_.setText(source);
+        m_srcUtilColEIter_.setText(m_srcUtilIter_, textoffset);
+        m_tgtUtilIter_.setText(target);
+        m_tgtUtilColEIter_.setText(m_tgtUtilIter_, textoffset);
+
+        // Non shifted primary processing is quite simple
+        if (!m_isAlternateHandlingShifted_) {
+            int hiraganaresult = 0;
+            while (true) {
+                int sorder = 0;
+                // We fetch CEs until we hit a non ignorable primary or end.
+                do {
+                    sorder = m_srcUtilColEIter_.next();
+                    m_srcUtilCEBuffer_ = append(m_srcUtilCEBuffer_,
+                                                m_srcUtilCEBufferSize_, sorder);
+                    m_srcUtilCEBufferSize_ ++;
+                    sorder &= CE_PRIMARY_MASK_;
+                } while (sorder == CollationElementIterator.IGNORABLE);
+
+                int torder = 0;
+                do {
+                    torder = m_tgtUtilColEIter_.next();
+                    m_tgtUtilCEBuffer_ = append(m_tgtUtilCEBuffer_,
+                                                m_tgtUtilCEBufferSize_, torder);
+                    m_tgtUtilCEBufferSize_ ++;
+                    torder &= CE_PRIMARY_MASK_;
+                } while (torder == CollationElementIterator.IGNORABLE);
+
+                // if both primaries are the same
+                if (sorder == torder) {
+                    // and there are no more CEs, we advance to the next level
+                    // see if we are at the end of either string
+                    if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1]
+                                        == CollationElementIterator.NULLORDER) {
+                        if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] 
+                            != CollationElementIterator.NULLORDER) {
+                            return -1;
+                        }
+                        break;
+                    }
+                    else if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1]
+                             == CollationElementIterator.NULLORDER) {
+                        return 1;
+                    }
+                    if (doHiragana4 && hiraganaresult == 0
+                        && m_srcUtilColEIter_.m_isCodePointHiragana_ !=
+                                        m_tgtUtilColEIter_.m_isCodePointHiragana_) {
+                        if (m_srcUtilColEIter_.m_isCodePointHiragana_) {
+                            hiraganaresult = -1;
+                        }
+                        else {
+                            hiraganaresult = 1;
+                        }
+                    }
+                }
+                else {
+                    // if two primaries are different, we are done
+                    return endPrimaryCompare(sorder, torder);
+                }
+            }
+            // no primary difference... do the rest from the buffers
+            return hiraganaresult;
+        }
+        else { // shifted - do a slightly more complicated processing :)
+            while (true) {
+                int sorder = getPrimaryShiftedCompareCE(m_srcUtilColEIter_,
+                                                        lowestpvalue, true);
+                int torder = getPrimaryShiftedCompareCE(m_tgtUtilColEIter_,
+                                                        lowestpvalue, false);
+                if (sorder == torder) {
+                    if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1]
+                            == CollationElementIterator.NULLORDER) {
+                        break;
+                    }
+                    else {
+                        continue;
+                    }
+                }
+                else {
+                    return endPrimaryCompare(sorder, torder);
+                }
+            } // no primary difference... do the rest from the buffers
+        }
+        return 0;
+    }
+
+    /**
+     * This is used only for primary strength when we know that sorder is
+     * already different from torder.
+     * Compares sorder and torder, returns -1 if sorder is less than torder.
+     * Clears the cebuffer at the same time.
+     * @param sorder source strength order
+     * @param torder target strength order
+     * @return the comparison result of sorder and torder
+     */
+    private final int endPrimaryCompare(int sorder, int torder)
+    {
+        // if we reach here, the ce offset accessed is the last ce
+        // appended to the buffer
+        boolean isSourceNullOrder = (m_srcUtilCEBuffer_[
+                                                    m_srcUtilCEBufferSize_ - 1]
+                                        == CollationElementIterator.NULLORDER);
+        boolean isTargetNullOrder = (m_tgtUtilCEBuffer_[
+                                                    m_tgtUtilCEBufferSize_ - 1]
+                                        == CollationElementIterator.NULLORDER);
+        m_srcUtilCEBufferSize_ = -1;
+        m_tgtUtilCEBufferSize_ = -1;
+        if (isSourceNullOrder) {
+            return -1;
+        }
+        if (isTargetNullOrder) {
+            return 1;
+        }
+        // getting rid of the sign
+        sorder >>>= CE_PRIMARY_SHIFT_;
+        torder >>>= CE_PRIMARY_SHIFT_;
+        if (sorder < torder) {
+            return -1;
+        }
+        return 1;
+    }
+
+    /**
+     * Calculates the next primary shifted value and fills up cebuffer with the
+     * next non-ignorable ce.
+     * @param coleiter collation element iterator
+     * @param doHiragana4 flag indicator if hiragana quaternary is to be
+     *                      handled
+     * @param lowestpvalue lowest primary shifted value that will not be
+     *                      ignored
+     * @return result next modified ce
+     */
+    private final int getPrimaryShiftedCompareCE(
+                                        CollationElementIterator coleiter,
+                                        int lowestpvalue, boolean isSrc)
+
+    {
+        boolean shifted = false;
+        int result = CollationElementIterator.IGNORABLE;
+        int cebuffer[] = m_srcUtilCEBuffer_;
+        int cebuffersize = m_srcUtilCEBufferSize_;
+        if (!isSrc) {
+            cebuffer = m_tgtUtilCEBuffer_;
+            cebuffersize = m_tgtUtilCEBufferSize_;
+        }
+        while (true) {
+            result = coleiter.next();
+            if (result == CollationElementIterator.NULLORDER) {
+                cebuffer = append(cebuffer, cebuffersize, result);
+                cebuffersize ++;
+                break;
+            }
+            else if (result == CollationElementIterator.IGNORABLE
+                     || (shifted
+                         && (result & CE_PRIMARY_MASK_)
+                                      == CollationElementIterator.IGNORABLE)) {
+                // UCA amendment - ignore ignorables that follow shifted code
+                // points
+                continue;
+            }
+            else if (isContinuation(result)) {
+                if ((result & CE_PRIMARY_MASK_)
+                                    != CollationElementIterator.IGNORABLE) {
+                    // There is primary value
+                    if (shifted) {
+                        result = (result & CE_PRIMARY_MASK_)
+                                            | CE_CONTINUATION_MARKER_;
+                        // preserve interesting continuation
+                        cebuffer = append(cebuffer, cebuffersize, result);
+                        cebuffersize ++;
+                        continue;
+                    }
+                    else {
+                        cebuffer = append(cebuffer, cebuffersize, result);
+                        cebuffersize ++;
+                        break;
+                    }
+                }
+                else { // Just lower level values
+                    if (!shifted) {
+                        cebuffer = append(cebuffer, cebuffersize, result);
+                        cebuffersize ++;
+                    }
+                }
+            }
+            else { // regular
+                if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_,
+                                            lowestpvalue) > 0) {
+                    cebuffer = append(cebuffer, cebuffersize, result);
+                    cebuffersize ++;
+                    break;
+                }
+                else {
+                    if ((result & CE_PRIMARY_MASK_) != 0) {
+                        shifted = true;
+                        result &= CE_PRIMARY_MASK_;
+                        cebuffer = append(cebuffer, cebuffersize, result);
+                        cebuffersize ++;
+                        continue;
+                    }
+                    else {
+                        cebuffer = append(cebuffer, cebuffersize, result);
+                        cebuffersize ++;
+                        shifted = false;
+                        continue;
+                    }
+                }
+            }
+        }
+        if (isSrc) {
+            m_srcUtilCEBuffer_ = cebuffer;
+            m_srcUtilCEBufferSize_ = cebuffersize;
+        }
+        else {
+            m_tgtUtilCEBuffer_ = cebuffer;
+            m_tgtUtilCEBufferSize_ = cebuffersize;
+        }
+        result &= CE_PRIMARY_MASK_;
+        return result;
+    }
+
+    /**
+     * Appending an int to an array of ints and increases it if we run out of
+     * space
+     * @param array of int arrays
+     * @param appendindex index at which value will be appended
+     * @param value to append
+     * @return array if size is not increased, otherwise a new array will be
+     *         returned
+     */
+    private static final int[] append(int array[], int appendindex, int value)
+    {
+        if (appendindex + 1 >= array.length) {
+            array = increase(array, appendindex, CE_BUFFER_SIZE_);
+        }
+        array[appendindex] = value;
+        return array;
+    }
+
+    /**
+     * Does secondary strength comparison based on the collected ces.
+     * @param doFrench flag indicates if French ordering is to be done
+     * @return the secondary strength comparison result
+     */
+    private final int doSecondaryCompare(boolean doFrench)
+    {
+        // now, we're gonna reexamine collected CEs
+        if (!doFrench) { // normal
+            int soffset = 0;
+            int toffset = 0;
+            while (true) {
+                int sorder = CollationElementIterator.IGNORABLE;
+                while (sorder == CollationElementIterator.IGNORABLE) {
+                    sorder = m_srcUtilCEBuffer_[soffset ++]
+                             & CE_SECONDARY_MASK_;
+                }
+                int torder = CollationElementIterator.IGNORABLE;
+                while (torder == CollationElementIterator.IGNORABLE) {
+                    torder = m_tgtUtilCEBuffer_[toffset ++]
+                             & CE_SECONDARY_MASK_;
+                }
+
+                if (sorder == torder) {
+                    if (m_srcUtilCEBuffer_[soffset - 1]
+                                    == CollationElementIterator.NULLORDER) {
+                        if (m_tgtUtilCEBuffer_[toffset - 1] 
+                            != CollationElementIterator.NULLORDER) {
+                            return -1;
+                        }
+                        break;
+                    }
+                    else if (m_tgtUtilCEBuffer_[toffset - 1]
+                             == CollationElementIterator.NULLORDER) {
+                        return 1;
+                    }
+                }
+                else {
+                    if (m_srcUtilCEBuffer_[soffset - 1] ==
+                            CollationElementIterator.NULLORDER) {
+                        return -1;
+                    }
+                    if (m_tgtUtilCEBuffer_[toffset - 1] ==
+                            CollationElementIterator.NULLORDER) {
+                        return 1;
+                    }
+                    return (sorder < torder) ? -1 : 1;
+                }
+            }
+        }
+        else { // do the French
+            m_srcUtilContOffset_ = 0;
+            m_tgtUtilContOffset_ = 0;
+            m_srcUtilOffset_ = m_srcUtilCEBufferSize_ - 2;
+            m_tgtUtilOffset_ = m_tgtUtilCEBufferSize_ - 2;
+            while (true) {
+                int sorder = getSecondaryFrenchCE(true);
+                int torder = getSecondaryFrenchCE(false);
+                if (sorder == torder) {
+                    if ((m_srcUtilOffset_ < 0 && m_tgtUtilOffset_ < 0)
+                        || (m_srcUtilOffset_ >= 0 
+                            && m_srcUtilCEBuffer_[m_srcUtilOffset_]
+                                    == CollationElementIterator.NULLORDER)) {
+                        break;
+                    }
+                }
+                else {
+                    return (sorder < torder) ? -1 : 1;
+                }
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Calculates the next secondary french CE.
+     * @param isSrc flag indicator if we are calculating the src ces
+     * @return result next modified ce
+     */
+    private final int getSecondaryFrenchCE(boolean isSrc)
+    {
+        int result = CollationElementIterator.IGNORABLE;
+        int offset = m_srcUtilOffset_;
+        int continuationoffset = m_srcUtilContOffset_;
+        int cebuffer[] = m_srcUtilCEBuffer_;
+        if (!isSrc) {
+            offset = m_tgtUtilOffset_;
+            continuationoffset = m_tgtUtilContOffset_;
+            cebuffer = m_tgtUtilCEBuffer_;
+        }
+
+        while (result == CollationElementIterator.IGNORABLE
+                && offset >= 0) {
+            if (continuationoffset == 0) {
+                result = cebuffer[offset];
+                while (isContinuation(cebuffer[offset --])){
+                }
+                // after this, sorder is at the start of continuation,
+                // and offset points before that
+                if (isContinuation(cebuffer[offset + 1])) {
+                    // save offset for later
+                    continuationoffset = offset;
+                    offset += 2;
+                }
+            }
+            else {
+                result = cebuffer[offset ++];
+                if (!isContinuation(result)) {
+                    // we have finished with this continuation
+                    offset = continuationoffset;
+                    // reset the pointer to before continuation
+                    continuationoffset = 0;
+                    continue;
+                }
+            }
+            result &= CE_SECONDARY_MASK_; // remove continuation bit
+        }
+        if (isSrc) {
+            m_srcUtilOffset_ = offset;
+            m_srcUtilContOffset_ = continuationoffset;
+        }
+        else {
+            m_tgtUtilOffset_ = offset;
+            m_tgtUtilContOffset_ = continuationoffset;
+        }
+        return result;
+    }
+
+    /**
+     * Does case strength comparison based on the collected ces.
+     * @return the case strength comparison result
+     */
+    private final int doCaseCompare()
+    {
+        int soffset = 0;
+        int toffset = 0;
+        while (true) {
+            int sorder = CollationElementIterator.IGNORABLE;
+            int torder = CollationElementIterator.IGNORABLE;
+            while ((sorder & CE_REMOVE_CASE_)
+                                    == CollationElementIterator.IGNORABLE) {
+                sorder = m_srcUtilCEBuffer_[soffset ++];
+                if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {
+                    // primary ignorables should not be considered on the case level when the strength is primary
+                    // otherwise, the CEs stop being well-formed
+                    sorder &= CE_CASE_MASK_3_;
+                    sorder ^= m_caseSwitch_;
+                }
+                else {
+                    sorder = CollationElementIterator.IGNORABLE;
+                }
+            }
+
+            while ((torder & CE_REMOVE_CASE_)
+                                    == CollationElementIterator.IGNORABLE) {
+                torder = m_tgtUtilCEBuffer_[toffset ++];
+                if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {
+                    // primary ignorables should not be considered on the case level when the strength is primary
+                    // otherwise, the CEs stop being well-formed
+                    torder &= CE_CASE_MASK_3_;
+                    torder ^= m_caseSwitch_;
+                }
+                else {
+                    torder = CollationElementIterator.IGNORABLE;
+                }
+            }
+
+            sorder &= CE_CASE_BIT_MASK_;
+            torder &= CE_CASE_BIT_MASK_;
+            if (sorder == torder) {
+                // checking end of strings
+                if (m_srcUtilCEBuffer_[soffset - 1]
+                                        == CollationElementIterator.NULLORDER) {
+                    if (m_tgtUtilCEBuffer_[toffset - 1] 
+                        != CollationElementIterator.NULLORDER) {
+                        return -1;
+                    }
+                    break;
+                }
+                else if (m_tgtUtilCEBuffer_[toffset - 1]
+                            == CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+            }
+            else {
+                if (m_srcUtilCEBuffer_[soffset - 1]
+                                    == CollationElementIterator.NULLORDER) {
+                    return -1;
+                }
+                if (m_tgtUtilCEBuffer_[soffset - 1]
+                                    == CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+                return (sorder < torder) ? -1 : 1;
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Does tertiary strength comparison based on the collected ces.
+     * @return the tertiary strength comparison result
+     */
+    private final int doTertiaryCompare()
+    {
+        int soffset = 0;
+        int toffset = 0;
+        while (true) {
+            int sorder = CollationElementIterator.IGNORABLE;
+            int torder = CollationElementIterator.IGNORABLE;
+            while ((sorder & CE_REMOVE_CASE_)
+                                == CollationElementIterator.IGNORABLE) {
+                sorder = m_srcUtilCEBuffer_[soffset ++] & m_mask3_;
+                if (!isContinuation(sorder)) {
+                    sorder ^= m_caseSwitch_;
+                }
+                else {
+                    sorder &= CE_REMOVE_CASE_;
+                }
+            }
+
+            while ((torder & CE_REMOVE_CASE_)
+                                == CollationElementIterator.IGNORABLE) {
+                torder = m_tgtUtilCEBuffer_[toffset ++] & m_mask3_;
+                if (!isContinuation(torder)) {
+                    torder ^= m_caseSwitch_;
+                }
+                else {
+                    torder &= CE_REMOVE_CASE_;
+                }
+            }
+
+            if (sorder == torder) {
+                if (m_srcUtilCEBuffer_[soffset - 1]
+                                    == CollationElementIterator.NULLORDER) {
+                    if (m_tgtUtilCEBuffer_[toffset - 1]
+                        != CollationElementIterator.NULLORDER) {
+                        return -1;
+                    }
+                    break;
+                }
+                else if (m_tgtUtilCEBuffer_[toffset - 1]
+                            == CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+            }
+            else {
+                if (m_srcUtilCEBuffer_[soffset - 1] ==
+                                        CollationElementIterator.NULLORDER) {
+                    return -1;
+                }
+                if (m_tgtUtilCEBuffer_[toffset - 1] ==
+                            CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+                return (sorder < torder) ? -1 : 1;
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Does quaternary strength comparison based on the collected ces.
+     * @param lowestpvalue the lowest primary value that will not be ignored if
+     *                      alternate handling is shifted
+     * @return the quaternary strength comparison result
+     */
+    private final int doQuaternaryCompare(int lowestpvalue)
+    {
+        boolean sShifted = true;
+        boolean tShifted = true;
+        int soffset = 0;
+        int toffset = 0;
+        while (true) {
+            int sorder = CollationElementIterator.IGNORABLE;
+            int torder = CollationElementIterator.IGNORABLE;
+            while (sorder == CollationElementIterator.IGNORABLE
+                    || (isContinuation(sorder) && !sShifted)) {
+                sorder = m_srcUtilCEBuffer_[soffset ++];
+                if (isContinuation(sorder)) {
+                    if (!sShifted) {
+                        continue;
+                    }
+                }
+                else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0
+                            || (sorder & CE_PRIMARY_MASK_)
+                                    == CollationElementIterator.IGNORABLE) {
+                    // non continuation
+                    sorder = CE_PRIMARY_MASK_;
+                    sShifted = false;
+                }
+                else {
+                    sShifted = true;
+                }
+            }
+            sorder >>>= CE_PRIMARY_SHIFT_;
+            while (torder == CollationElementIterator.IGNORABLE
+                    || (isContinuation(torder) && !tShifted)) {
+                torder = m_tgtUtilCEBuffer_[toffset ++];
+                if (isContinuation(torder)) {
+                    if (!tShifted) {
+                        continue;
+                    }
+                }
+                else if (Utility.compareUnsigned(torder, lowestpvalue) > 0
+                            || (torder & CE_PRIMARY_MASK_)
+                                    == CollationElementIterator.IGNORABLE) {
+                    // non continuation
+                    torder = CE_PRIMARY_MASK_;
+                    tShifted = false;
+                }
+                else {
+                    tShifted = true;
+                }
+            }
+            torder >>>= CE_PRIMARY_SHIFT_;
+
+            if (sorder == torder) {
+                if (m_srcUtilCEBuffer_[soffset - 1]
+                    == CollationElementIterator.NULLORDER) {
+                    if (m_tgtUtilCEBuffer_[toffset - 1]
+                        != CollationElementIterator.NULLORDER) {
+                        return -1;
+                    }
+                    break;
+                }
+                else if (m_tgtUtilCEBuffer_[toffset - 1]
+                            == CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+            }
+            else {
+                if (m_srcUtilCEBuffer_[soffset - 1] ==
+                    CollationElementIterator.NULLORDER) {
+                    return -1;
+                }
+                if (m_tgtUtilCEBuffer_[toffset - 1] ==
+                    CollationElementIterator.NULLORDER) {
+                    return 1;
+                }
+                return (sorder < torder) ? -1 : 1;
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Internal function. Does byte level string compare. Used by strcoll if
+     * strength == identical and strings are otherwise equal. This is a rare
+     * case. Comparison must be done on NFD normalized strings. FCD is not good
+     * enough.
+     * @param source text
+     * @param target text
+     * @param offset of the first difference in the text strings
+     * @param normalize flag indicating if we are to normalize the text before
+     *              comparison
+     * @return 1 if source is greater than target, -1 less than and 0 if equals
+     */
+    private static final int doIdenticalCompare(String source, String target,
+                                                int offset, boolean normalize)
+
+    {
+        if (normalize) {
+            if (Normalizer.quickCheck(source, Normalizer.NFD,0)
+                                                    != Normalizer.YES) {
+                source = Normalizer.decompose(source, false);
+            }
+
+            if (Normalizer.quickCheck(target, Normalizer.NFD,0)
+                                                        != Normalizer.YES) {
+                target = Normalizer.decompose(target, false);
+            }
+            offset = 0;
+        }
+
+        return doStringCompare(source, target, offset);
+    }
+
+    /**
+     * Compares string for their codepoint order.
+     * This comparison handles surrogate characters and place them after the
+     * all non surrogate characters.
+     * @param source text
+     * @param target text
+     * @param offset start offset for comparison
+     * @return 1 if source is greater than target, -1 less than and 0 if equals
+     */
+    private static final int doStringCompare(String source,
+                                             String target,
+                                             int offset)
+    {
+        // compare identical prefixes - they do not need to be fixed up
+        char schar = 0;
+        char tchar = 0;
+        int slength = source.length();
+        int tlength = target.length();
+        int minlength = Math.min(slength, tlength);
+        while (offset < minlength) {
+            schar = source.charAt(offset);
+            tchar = target.charAt(offset ++);
+            if (schar != tchar) {
+                break;
+            }
+        }
+
+        if (schar == tchar && offset == minlength) {
+            if (slength > minlength) {
+                return 1;
+            }
+            if (tlength > minlength) {
+                return -1;
+            }
+            return 0;
+        }
+
+        //  if both values are in or above the surrogate range, Fix them up.
+        if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE
+            && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            schar = fixupUTF16(schar);
+            tchar = fixupUTF16(tchar);
+        }
+
+        // now c1 and c2 are in UTF-32-compatible order
+        return (schar < tchar) ? -1 : 1; // schar and tchar has to be different
+    }
+
+    /**
+     * Rotate surrogates to the top to get code point order
+     */
+    private static final char fixupUTF16(char ch)
+    {
+        if (ch >= 0xe000) {
+            ch -= 0x800;
+        }
+        else {
+            ch += 0x2000;
+        }
+        return ch;
+    }
+
+    /**
+     * Resets the internal case data members and compression values.
+     */
+    private void updateInternalState()
+    {
+        if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+            m_caseSwitch_ = CASE_SWITCH_;
+        }
+        else {
+            m_caseSwitch_ = NO_CASE_SWITCH_;
+        }
+
+        if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
+            m_mask3_ = CE_REMOVE_CASE_;
+            m_common3_ = COMMON_NORMAL_3_;
+            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
+            m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
+            m_bottom3_ = COMMON_BOTTOM_3_;
+        }
+        else {
+            m_mask3_ = CE_KEEP_CASE_;
+            m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
+            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+                m_common3_ = COMMON_UPPER_FIRST_3_;
+                m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
+                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
+            } else {
+                m_common3_ = COMMON_NORMAL_3_;
+                m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
+                m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
+            }
+        }
+
+        // Set the compression values
+        int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
+        // we multilply double with int, but need only int
+        m_topCount3_ = (int)(PROPORTION_3_ * total3);
+        m_bottomCount3_ = total3 - m_topCount3_;
+
+        if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_
+            && !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) {
+            m_isSimple3_ = true;
+        }
+        else {
+            m_isSimple3_ = false;
+        }
+        if(!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_
+          && !m_isAlternateHandlingShifted_ && !latinOneFailed_) {
+          if(latinOneCEs_ == null || latinOneRegenTable_) {
+            if(setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it
+              latinOneUse_ = true;
+            } else {
+              latinOneUse_ = false;
+              latinOneFailed_ = true;
+            }
+            latinOneRegenTable_ = false;
+          } else { // latin1Table exists and it doesn't need to be regenerated, just use it
+            latinOneUse_ = true;
+          }
+        } else {
+          latinOneUse_ = false;
+        }
+
+    }
+
+    /**
+     * Initializes the RuleBasedCollator
+     */
+    private final void init()
+    {
+        for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_;
+             m_minUnsafe_ ++) {
+            // Find the smallest unsafe char.
+            if (isUnsafe(m_minUnsafe_)) {
+                break;
+            }
+        }
+
+        for (m_minContractionEnd_ = 0;
+             m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_;
+             m_minContractionEnd_ ++) {
+            // Find the smallest contraction-ending char.
+            if (isContractionEnd(m_minContractionEnd_)) {
+                break;
+            }
+        }
+        latinOneFailed_ = true;
+        setStrength(m_defaultStrength_);
+        setDecomposition(m_defaultDecomposition_);
+        m_variableTopValue_ = m_defaultVariableTopValue_;
+        m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+        m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+        m_isCaseLevel_ = m_defaultIsCaseLevel_;
+        m_caseFirst_ = m_defaultCaseFirst_;
+        m_isHiragana4_ = m_defaultIsHiragana4_;
+        m_isNumericCollation_ = m_defaultIsNumericCollation_;
+        latinOneFailed_ = false;
+        updateInternalState();
+    }
+
+    /**
+     *  Initializes utility iterators and byte buffer used by compare
+     */
+    private final void initUtility(boolean allocate) {
+        if (allocate) {
+            if (m_srcUtilIter_ == null) {
+                m_srcUtilIter_ = new StringUCharacterIterator();
+                m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this);
+                m_tgtUtilIter_ = new StringUCharacterIterator();
+                m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this);
+                m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
+                m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
+                m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary
+                m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary
+                m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_];  // Quaternary
+                m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
+                m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
+            }
+        } else {
+            m_srcUtilIter_ = null;
+            m_srcUtilColEIter_ = null;
+            m_tgtUtilIter_ = null;
+            m_tgtUtilColEIter_ = null;
+            m_utilBytes0_ = null;
+            m_utilBytes1_ = null;
+            m_utilBytes2_ = null;
+            m_utilBytes3_ = null;
+            m_utilBytes4_ = null;
+            m_srcUtilCEBuffer_ = null;
+            m_tgtUtilCEBuffer_ = null;
+        }
+    }
+
+    // Consts for Latin-1 special processing
+    private static final int ENDOFLATINONERANGE_ = 0xFF;
+    private static final int LATINONETABLELEN_   = (ENDOFLATINONERANGE_+50);
+    private static final int BAIL_OUT_CE_        = 0xFF000000;
+
+     /**
+     * Generate latin-1 tables
+     */
+
+    private class shiftValues {
+        int primShift = 24;
+        int secShift = 24;
+        int terShift = 24;
+    }
+
+    private final void
+    addLatinOneEntry(char ch, int CE, shiftValues sh) {
+      int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
+      boolean reverseSecondary = false;
+      if(!isContinuation(CE)) {
+        tertiary = ((CE & m_mask3_));
+        tertiary ^= m_caseSwitch_;
+        reverseSecondary = true;
+      } else {
+        tertiary = (byte)((CE & CE_REMOVE_CONTINUATION_MASK_));
+        tertiary &= CE_REMOVE_CASE_;
+        reverseSecondary = false;
+      }
+
+      secondary = ((CE >>>= 8) & LAST_BYTE_MASK_);
+      primary2 =  ((CE >>>= 8) & LAST_BYTE_MASK_);
+      primary1 =  (CE >>> 8);
+
+      if(primary1 != 0) {
+        latinOneCEs_[ch] |= (primary1 << sh.primShift);
+        sh.primShift -= 8;
+      }
+      if(primary2 != 0) {
+        if(sh.primShift < 0) {
+          latinOneCEs_[ch] = BAIL_OUT_CE_;
+          latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_;
+          latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_;
+          return;
+        }
+        latinOneCEs_[ch] |= (primary2 << sh.primShift);
+        sh.primShift -= 8;
+      }
+      if(secondary != 0) {
+        if(reverseSecondary && m_isFrenchCollation_) { // reverse secondary
+          latinOneCEs_[latinOneTableLen_+ch] >>>= 8; // make space for secondary
+          latinOneCEs_[latinOneTableLen_+ch] |= (secondary << 24);
+        } else { // normal case
+          latinOneCEs_[latinOneTableLen_+ch] |= (secondary << sh.secShift);
+        }
+        sh.secShift -= 8;
+      }
+      if(tertiary != 0) {
+        latinOneCEs_[2*latinOneTableLen_+ch] |= (tertiary << sh.terShift);
+        sh.terShift -= 8;
+      }
+    }
+
+    private final void
+    resizeLatinOneTable(int newSize) {
+        int newTable[] = new int[3*newSize];
+        int sizeToCopy = ((newSize<latinOneTableLen_)?newSize:latinOneTableLen_);
+        //uprv_memset(newTable, 0, newSize*sizeof(uint32_t)*3); // automatically cleared.
+        System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy);
+        System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable, newSize, sizeToCopy);
+        System.arraycopy(latinOneCEs_, 2*latinOneTableLen_, newTable, 2*newSize, sizeToCopy);
+        latinOneTableLen_ = newSize;
+        latinOneCEs_ = newTable;
+    }
+
+    private final boolean setUpLatinOne() {
+      if(latinOneCEs_ == null || m_reallocLatinOneCEs_) {
+        latinOneCEs_ = new int[3*LATINONETABLELEN_];
+        latinOneTableLen_ = LATINONETABLELEN_;
+        m_reallocLatinOneCEs_ = false;
+      } else {
+        Arrays.fill(latinOneCEs_, 0);
+      }
+      if(m_ContInfo_ == null) {
+        m_ContInfo_ = new ContractionInfo();
+      }
+      char ch = 0;
+      //StringBuffer sCh = new StringBuffer();
+      //CollationElementIterator it = getCollationElementIterator(sCh.toString());
+      CollationElementIterator it = getCollationElementIterator("");
+
+      shiftValues s = new shiftValues();
+      int CE = 0;
+      char contractionOffset = ENDOFLATINONERANGE_+1;
+
+      for(ch = 0; ch <= ENDOFLATINONERANGE_; ch++) {
+        s.primShift = 24; s.secShift = 24; s.terShift = 24;
+        if(ch < 0x100) {
+          CE = m_trie_.getLatin1LinearValue(ch);
+        } else {
+          CE = m_trie_.getLeadValue(ch);
+          if(CE == CollationElementIterator.CE_NOT_FOUND_) {
+            CE = UCA_.m_trie_.getLeadValue(ch);
+          }
+        }
+        if(!isSpecial(CE)) {
+          addLatinOneEntry(ch, CE, s);
+        } else {
+          switch (RuleBasedCollator.getTag(CE)) {
+          case CollationElementIterator.CE_EXPANSION_TAG_:
+          case CollationElementIterator.CE_DIGIT_TAG_:
+            //sCh.delete(0, sCh.length());
+            //sCh.append(ch);
+            //it.setText(sCh.toString());
+            it.setText(UCharacter.toString(ch));
+            while((CE = it.next()) != CollationElementIterator.NULLORDER) {
+              if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+                latinOneCEs_[ch] = BAIL_OUT_CE_;
+                latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_;
+                latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_;
+                break;
+              }
+              addLatinOneEntry(ch, CE, s);
+            }
+            break;
+          case CollationElementIterator.CE_CONTRACTION_TAG_:
+            // here is the trick
+            // F2 is contraction. We do something very similar to contractions
+            // but have two indices, one in the real contraction table and the
+            // other to where we stuffed things. This hopes that we don't have
+            // many contractions (this should work for latin-1 tables).
+            {
+              if((CE & 0x00FFF000) != 0) {
+                latinOneFailed_ = true;
+                return false;
+              }
+
+              int UCharOffset = (CE & 0xFFFFFF) - m_contractionOffset_; //getContractionOffset(CE)]
+
+              CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
+
+              latinOneCEs_[ch] = CE;
+              latinOneCEs_[latinOneTableLen_+ch] = CE;
+              latinOneCEs_[2*latinOneTableLen_+ch] = CE;
+
+              // We're going to jump into contraction table, pick the elements
+              // and use them
+              do {
+                  //CE = *(contractionCEs + (UCharOffset - contractionIndex));
+                  CE = m_contractionCE_[UCharOffset];
+                  if(isSpecial(CE) 
+                     && getTag(CE) 
+                               == CollationElementIterator.CE_EXPANSION_TAG_) {
+                    int i;    /* general counter */
+                    //uint32_t *CEOffset = (uint32_t *)image+getExpansionOffset(CE); /* find the offset to expansion table */
+                    int offset = ((CE & 0xFFFFF0) >> 4) - m_expansionOffset_; //it.getExpansionOffset(this, CE);
+                    int size = CE & 0xF; // getExpansionCount(CE);
+                    //CE = *CEOffset++;
+                    if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
+                      for(i = 0; i<size; i++) {
+                        if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+                          latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+                          latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                          latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                          break;
+                        }
+                        addLatinOneEntry(contractionOffset, m_expansion_[offset+i], s);
+                      }
+                    } else { /* else, we do */
+                      while(m_expansion_[offset] != 0) {
+                        if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+                          latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+                          latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                          latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                          break;
+                        }
+                        addLatinOneEntry(contractionOffset, m_expansion_[offset++], s);
+                      }
+                    }
+                    contractionOffset++;
+                  } else if(!isSpecial(CE)) {
+                    addLatinOneEntry(contractionOffset++, CE, s);
+                  } else {
+                      latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+                      latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                      latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_;
+                      contractionOffset++;
+                  }
+                  UCharOffset++;
+                  s.primShift = 24; s.secShift = 24; s.terShift = 24;
+                  if(contractionOffset == latinOneTableLen_) { // we need to reallocate
+                   resizeLatinOneTable(2*latinOneTableLen_);
+                  }
+              } while(m_contractionIndex_[UCharOffset] != 0xFFFF);
+            }
+            break;
+          case CollationElementIterator.CE_SPEC_PROC_TAG_:
+            {
+              // 0xB7 is a precontext character defined in UCA5.1, a special
+              // handle is implemeted in order to save LatinOne table for
+              // most locales.
+              if (ch == 0xb7) {
+                  addLatinOneEntry(ch, CE, s);
+              }
+              else {
+                  latinOneFailed_ = true;
+                  return false;
+              }
+            }
+            break;
+          default:
+            latinOneFailed_ = true;
+            return false;
+          }
+        }
+      }
+      // compact table
+      if(contractionOffset < latinOneTableLen_) {
+        resizeLatinOneTable(contractionOffset);
+      }
+      return true;
+    }
+
+    private class ContractionInfo {
+        int index;
+    }
+
+    ContractionInfo m_ContInfo_;
+
+    private int
+    getLatinOneContraction(int strength, int CE, String s) {
+    //int strength, int CE, String s, Integer ind) {
+      int len = s.length();
+      //const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
+      int UCharOffset = (CE & 0xFFF) - m_contractionOffset_;
+      int offset = 1;
+      int latinOneOffset = (CE & 0x00FFF000) >>> 12;
+      char schar = 0, tchar = 0;
+
+      for(;;) {
+        /*
+        if(len == -1) {
+          if(s[*index] == 0) { // end of string
+            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
+          } else {
+            schar = s[*index];
+          }
+        } else {
+        */
+          if(m_ContInfo_.index == len) {
+            return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]);
+          } else {
+            schar = s.charAt(m_ContInfo_.index);
+          }
+        //}
+
+        while(schar > (tchar = m_contractionIndex_[UCharOffset+offset]/**(UCharOffset+offset)*/)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
+          offset++;
+        }
+
+        if (schar == tchar) {
+          m_ContInfo_.index++;
+          return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset+offset]);
+        }
+        else
+        {
+          if(schar  > ENDOFLATINONERANGE_ /*& 0xFF00*/) {
+            return BAIL_OUT_CE_;
+          }
+          // skip completely ignorables
+          int isZeroCE = m_trie_.getLeadValue(schar); //UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
+          if(isZeroCE == 0) { // we have to ignore completely ignorables
+            m_ContInfo_.index++;
+            continue;
+          }
+
+          return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]);
+        }
+      }
+    }
+
+
+    /**
+     * This is a fast strcoll, geared towards text in Latin-1.
+     * It supports contractions of size two, French secondaries
+     * and case switching. You can use it with strengths primary
+     * to tertiary. It does not support shifted and case level.
+     * It relies on the table build by setupLatin1Table. If it
+     * doesn't understand something, it will go to the regular
+     * strcoll.
+     */
+    private final int
+    compareUseLatin1(String source, String target, int startOffset)
+    {
+        int sLen = source.length();
+        int tLen = target.length();
+
+        int strength = getStrength();
+
+        int sIndex = startOffset, tIndex = startOffset;
+        char sChar = 0, tChar = 0;
+        int sOrder=0, tOrder=0;
+
+        boolean endOfSource = false;
+
+        //uint32_t *elements = coll->latinOneCEs;
+
+        boolean haveContractions = false; // if we have contractions in our string
+                                        // we cannot do French secondary
+
+        int offset = latinOneTableLen_;
+
+        // Do the primary level
+    primLoop:
+        for(;;) {
+          while(sOrder==0) { // this loop skips primary ignorables
+            // sOrder=getNextlatinOneCE(source);
+              if(sIndex==sLen) {
+                endOfSource = true;
+                break;
+              }
+              sChar=source.charAt(sIndex++); //[sIndex++];
+            //}
+            if(sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
+              //fprintf(stderr, "R");
+              return compareRegular(source, target, startOffset);
+            }
+            sOrder = latinOneCEs_[sChar];
+            if(isSpecial(sOrder)) { // if we got a special
+              // specials can basically be either contractions or bail-out signs. If we get anything
+              // else, we'll bail out anywasy
+              if(getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+                m_ContInfo_.index = sIndex;
+                sOrder = getLatinOneContraction(0, sOrder, source);
+                sIndex = m_ContInfo_.index;
+                haveContractions = true; // if there are contractions, we cannot do French secondary
+                // However, if there are contractions in the table, but we always use just one char,
+                // we might be able to do French. This should be checked out.
+              }
+              if(isSpecial(sOrder) /*== UCOL_BAIL_OUT_CE*/) {
+                //fprintf(stderr, "S");
+                return compareRegular(source, target, startOffset);
+              }
+            }
+          }
+
+          while(tOrder==0) {  // this loop skips primary ignorables
+            // tOrder=getNextlatinOneCE(target);
+            if(tIndex==tLen) {
+              if(endOfSource) {
+                break primLoop;
+              } else {
+                return 1;
+              }
+            }
+            tChar=target.charAt(tIndex++); //[tIndex++];
+            if(tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
+              //fprintf(stderr, "R");
+              return compareRegular(source, target, startOffset);
+            }
+            tOrder = latinOneCEs_[tChar];
+            if(isSpecial(tOrder)) {
+              // Handling specials, see the comments for source
+              if(getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+                m_ContInfo_.index = tIndex;
+                tOrder = getLatinOneContraction(0, tOrder, target);
+                tIndex = m_ContInfo_.index;
+                haveContractions = true;
+              }
+              if(isSpecial(tOrder)/*== UCOL_BAIL_OUT_CE*/) {
+                //fprintf(stderr, "S");
+                return compareRegular(source, target, startOffset);
+              }
+            }
+          }
+          if(endOfSource) { // source is finished, but target is not, say the result.
+              return -1;
+          }
+
+          if(sOrder == tOrder) { // if we have same CEs, we continue the loop
+            sOrder = 0; tOrder = 0;
+            continue;
+          } else {
+            // compare current top bytes
+            if(((sOrder^tOrder)&0xFF000000)!=0) {
+              // top bytes differ, return difference
+              if(sOrder >>> 8 < tOrder >>> 8) {
+                return -1;
+              } else {
+                return 1;
+              }
+              // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
+              // since we must return enum value
+            }
+
+            // top bytes match, continue with following bytes
+            sOrder<<=8;
+            tOrder<<=8;
+          }
+        }
+
+        // after primary loop, we definitely know the sizes of strings,
+        // so we set it and use simpler loop for secondaries and tertiaries
+        //sLen = sIndex; tLen = tIndex;
+        if(strength >= SECONDARY) {
+          // adjust the table beggining
+          //latinOneCEs_ += coll->latinOneTableLen;
+          endOfSource = false;
+
+          if(!m_isFrenchCollation_) { // non French
+            // This loop is a simplified copy of primary loop
+            // at this point we know that whole strings are latin-1, so we don't
+            // check for that. We also know that we only have contractions as
+            // specials.
+            //sIndex = 0; tIndex = 0;
+            sIndex = startOffset; tIndex = startOffset;
+    secLoop:
+            for(;;) {
+              while(sOrder==0) {
+                if(sIndex==sLen) {
+                  endOfSource = true;
+                  break;
+                }
+                sChar=source.charAt(sIndex++); //[sIndex++];
+                sOrder = latinOneCEs_[offset+sChar];
+                if(isSpecial(sOrder)) {
+                    m_ContInfo_.index = sIndex;
+                    sOrder = getLatinOneContraction(1, sOrder, source);
+                    sIndex = m_ContInfo_.index;
+                }
+              }
+
+              while(tOrder==0) {
+                if(tIndex==tLen) {
+                  if(endOfSource) {
+                    break secLoop;
+                  } else {
+                    return 1;
+                  }
+                }
+                tChar=target.charAt(tIndex++); //[tIndex++];
+                tOrder = latinOneCEs_[offset+tChar];
+                if(isSpecial(tOrder)) {
+                    m_ContInfo_.index = tIndex;
+                    tOrder = getLatinOneContraction(1, tOrder, target);
+                    tIndex = m_ContInfo_.index;
+                }
+              }
+              if(endOfSource) {
+                  return -1;
+              }
+
+              if(sOrder == tOrder) {
+                sOrder = 0; tOrder = 0;
+                continue;
+              } else {
+                // see primary loop for comments on this
+                if(((sOrder^tOrder)&0xFF000000)!=0) {
+                  if(sOrder >>> 8 < tOrder >>> 8) {
+                    return -1;
+                  } else {
+                    return 1;
+                  }
+                }
+                sOrder<<=8;
+                tOrder<<=8;
+              }
+            }
+          } else { // French
+            if(haveContractions) { // if we have contractions, we have to bail out
+              // since we don't really know how to handle them here
+              return compareRegular(source, target, startOffset);
+            }
+            // For French, we go backwards
+            sIndex = sLen; tIndex = tLen;
+    secFLoop:
+            for(;;) {
+              while(sOrder==0) {
+                if(sIndex==startOffset) {
+                  endOfSource = true;
+                  break;
+                }
+                sChar=source.charAt(--sIndex); //[--sIndex];
+                sOrder = latinOneCEs_[offset+sChar];
+                // don't even look for contractions
+              }
+
+              while(tOrder==0) {
+                if(tIndex==startOffset) {
+                  if(endOfSource) {
+                    break secFLoop;
+                  } else {
+                    return 1;
+                  }
+                }
+                tChar=target.charAt(--tIndex); //[--tIndex];
+                tOrder = latinOneCEs_[offset+tChar];
+                // don't even look for contractions
+              }
+              if(endOfSource) {
+                  return -1;
+              }
+
+              if(sOrder == tOrder) {
+                sOrder = 0; tOrder = 0;
+                continue;
+              } else {
+                // see the primary loop for comments
+                if(((sOrder^tOrder)&0xFF000000)!=0) {
+                  if(sOrder >>> 8 < tOrder >>> 8) {
+                    return -1;
+                  } else {
+                    return 1;
+                  }
+                }
+                sOrder<<=8;
+                tOrder<<=8;
+              }
+            }
+          }
+        }
+
+        if(strength >= TERTIARY) {
+          // tertiary loop is the same as secondary (except no French)
+          offset += latinOneTableLen_;
+          //sIndex = 0; tIndex = 0;
+          sIndex = startOffset; tIndex = startOffset;
+          endOfSource = false;
+          for(;;) {
+            while(sOrder==0) {
+              if(sIndex==sLen) {
+                endOfSource = true;
+                break;
+              }
+              sChar=source.charAt(sIndex++); //[sIndex++];
+              sOrder = latinOneCEs_[offset+sChar];
+              if(isSpecial(sOrder)) {
+                m_ContInfo_.index = sIndex;
+                sOrder = getLatinOneContraction(2, sOrder, source);
+                sIndex = m_ContInfo_.index;
+              }
+            }
+            while(tOrder==0) {
+              if(tIndex==tLen) {
+                if(endOfSource) {
+                  return 0; // if both strings are at the end, they are equal
+                } else {
+                  return 1;
+                }
+              }
+              tChar=target.charAt(tIndex++); //[tIndex++];
+              tOrder = latinOneCEs_[offset+tChar];
+              if(isSpecial(tOrder)) {
+                m_ContInfo_.index = tIndex;
+                tOrder = getLatinOneContraction(2, tOrder, target);
+                tIndex = m_ContInfo_.index;
+              }
+            }
+            if(endOfSource) {
+                return -1;
+            }
+            if(sOrder == tOrder) {
+              sOrder = 0; tOrder = 0;
+              continue;
+            } else {
+              if(((sOrder^tOrder)&0xff000000)!=0) {
+                if(sOrder >>> 8 < tOrder >>> 8) {
+                  return -1;
+                } else {
+                  return 1;
+                }
+              }
+              sOrder<<=8;
+              tOrder<<=8;
+            }
+          }
+        }
+        return 0;
+    }
+    /** 
+     * Get the version of this collator object.
+     * @return the version object associated with this collator
+     * @stable ICU 2.8
+     */
+    public VersionInfo getVersion() {
+        /* RunTime version  */
+        int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
+        /* Builder version*/
+        int bdVersion = m_version_.getMajor();
+
+        /* Charset Version. Need to get the version from cnv files
+         * makeconv should populate cnv files with version and
+         * an api has to be provided in ucnv.h to obtain this version
+         */
+        int csVersion = 0;
+
+        /* combine the version info */
+        int cmbVersion = ((rtVersion<<11) | (bdVersion<<6) | (csVersion)) & 0xFFFF;
+        
+        /* Tailoring rules */
+        return VersionInfo.getInstance(cmbVersion>>8, 
+                cmbVersion & 0xFF, 
+                m_version_.getMinor(), 
+                UCA_.m_UCA_version_.getMajor());
+
+//        versionInfo[0] = (uint8_t)(cmbVersion>>8);
+//        versionInfo[1] = (uint8_t)cmbVersion;
+//        versionInfo[2] = coll->image->version[1];
+//        versionInfo[3] = coll->UCA->image->UCAVersion[0];
+    }
+    
+    /** 
+     * Get the UCA version of this collator object.
+     * @return the version object associated with this collator
+     * @stable ICU 2.8
+     */
+    public VersionInfo getUCAVersion() {
+        return UCA_.m_UCA_version_;
+    }
+
+    private transient boolean m_reallocLatinOneCEs_;
+}