/** ******************************************************************************* * Copyright (C) 2001-2006, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.collator; import java.util.Iterator; import java.util.Locale; import java.util.MissingResourceException; import java.util.Vector; import com.ibm.icu.dev.test.ModuleTest; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.dev.test.TestDataModule.DataMap; import com.ibm.icu.impl.LocaleUtility; import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.CollationElementIterator; import com.ibm.icu.text.CollationKey; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RawCollationKey; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.UTF16; public class CollationTest extends ModuleTest{ // public methods -------------------------------------------------------- public static void main(String[] args) throws Exception{ new CollationTest().run(args); } public CollationTest() { super("com/ibm/icu/dev/data/testdata/", "DataDrivenCollationTest"); } public void processModules() { for (Iterator iter = t.getSettingsIterator(); iter.hasNext();) { DataMap setting = (DataMap) iter.next(); processSetting(setting); } } // package private methods ---------------------------------------------- static void doTest(TestFmwk test, RuleBasedCollator col, String source, String target, int result) { doTestVariant(test, col, source, target, result); if (result == -1) { doTestVariant(test, col, target, source, 1); } else if (result == 1) { doTestVariant(test, col, target, source, -1); } else { doTestVariant(test, col, target, source, 0); } CollationElementIterator iter = col.getCollationElementIterator(source); backAndForth(test, iter); iter.setText(target); backAndForth(test, iter); } /** * Return an integer array containing all of the collation orders * returned by calls to next on the specified iterator */ static int[] getOrders(CollationElementIterator iter) { int maxSize = 100; int size = 0; int[] orders = new int[maxSize]; int order; while ((order = iter.next()) != CollationElementIterator.NULLORDER) { if (size == maxSize) { maxSize *= 2; int[] temp = new int[maxSize]; System.arraycopy(orders, 0, temp, 0, size); orders = temp; } orders[size++] = order; } if (maxSize > size) { int[] temp = new int[size]; System.arraycopy(orders, 0, temp, 0, size); orders = temp; } return orders; } static void backAndForth(TestFmwk test, CollationElementIterator iter) { // Run through the iterator forwards and stick it into an array iter.reset(); int[] orders = getOrders(iter); // Now go through it backwards and make sure we get the same values int index = orders.length; int o; // reset the iterator iter.reset(); while ((o = iter.previous()) != CollationElementIterator.NULLORDER) { if (o != orders[--index]) { if (o == 0) { index ++; } else { while (index > 0 && orders[index] == 0) { index --; } if (o != orders[index]) { test.errln("Mismatch at index " + index + ": 0x" + Integer.toHexString(orders[index]) + " vs 0x" + Integer.toHexString(o)); break; } } } } while (index != 0 && orders[index - 1] == 0) { index --; } if (index != 0) { String msg = "Didn't get back to beginning - index is "; test.errln(msg + index); iter.reset(); test.err("next: "); while ((o = iter.next()) != CollationElementIterator.NULLORDER) { String hexString = "0x" + Integer.toHexString(o) + " "; test.err(hexString); } test.errln(""); test.err("prev: "); while ((o = iter.previous()) != CollationElementIterator.NULLORDER) { String hexString = "0x" + Integer.toHexString(o) + " "; test.err(hexString); } test.errln(""); } } // private data members -------------------------------------------------- private String m_sequence_; private int m_sequenceIndex_; private String m_source_; private StringBuffer m_target_ = new StringBuffer(); private int m_nextRelation_; private int m_relation_; // private methods ------------------------------------------------------- private void processSetting(DataMap settings) { RuleBasedCollator col = null; // ok i have to be careful here since it seems like we can have // multiple locales for each test String locale = settings.getString("TestLocale"); if (locale != null) { // this is a case where we have locale try { Locale l = LocaleUtility.getLocaleFromName(locale); col = (RuleBasedCollator)Collator.getInstance(l); }catch (MissingResourceException e){ warnln("Could not load the locale data for locale " + locale); }catch (Exception e) { errln("Error creating collator for locale " + locale); } logln("Testing collator for locale " + locale); processSetting2(settings, col); } String rules = settings.getString("Rules"); // ok i have to be careful here since it seems like we can have // multiple rules for each test if (rules != null) { // here we deal with rules try { col = new RuleBasedCollator(rules); }catch (MissingResourceException e){ warnln("Could not load the locale data: " + e.getMessage()); } catch (Exception e) { errln("Error creating collator for rules " + rules); } processSetting2(settings, col); } } private void processSetting2(DataMap settings,RuleBasedCollator col) { // ok i have to be careful here since it seems like we can have // multiple rules for each test String arguments = settings.getString("Arguments"); if (arguments != null) { handleArguments(col, arguments); } processTestCases(col); } /** * Reads the options string and sets appropriate attributes in collator */ private void handleArguments(RuleBasedCollator col, String argument) { int i = 0; boolean printInfo = false; while (i < argument.length()) { if (!UCharacter.isWhitespace(argument.charAt(i))) { // eat whitespace break; } i ++; } while (i < argument.length()) { // skip opening '[' if (argument.charAt(i) == '[') { i ++; } else { if(!isModularBuild()){ errln("Error in collation arguments, missing ["); // no opening '[' } // !!! following line has no effect printInfo=true; return; } int value = argument.indexOf(' ', i); String option = argument.substring(i, value); i = argument.indexOf(']', value); String optionvalue = argument.substring(value + 1, i); i ++; // some options are not added because they have no public apis yet // TODO add the rest of the options if (option.equalsIgnoreCase("alternate")) { if (optionvalue.equalsIgnoreCase("non-ignorable")) { col.setAlternateHandlingShifted(false); } else { col.setAlternateHandlingShifted(true); } } else if (option.equals("strength")) { if (optionvalue.equalsIgnoreCase("1")) { col.setStrength(Collator.PRIMARY); } else if (optionvalue.equalsIgnoreCase("2")) { col.setStrength(Collator.SECONDARY); } else if (optionvalue.equalsIgnoreCase("3")) { col.setStrength(Collator.TERTIARY); } else if (optionvalue.equalsIgnoreCase("4")) { col.setStrength(Collator.QUATERNARY); } } } if (printInfo) { warnln("Could not load the locale data. Skipping..."); } // !!! effect is odd, if no modular build, this emits no // message at all. How come? Hmmm. printInfo is never // true if we get here, so this code is never executed. /* if(printInfo == true && isModularBuild()){ infoln("Could not load the locale data. Skipping..."); } */ } private void processTestCases(RuleBasedCollator col) { for (Iterator iter = t.getDataIterator(); iter.hasNext();) { DataMap e1 = (DataMap) iter.next(); processSequence(col, e1.getString("sequence")); } } private void processSequence(RuleBasedCollator col, String sequence) { // TODO: have a smarter tester that remembers the sequence and ensures // that the complete sequence is in order. That is why I have made a // constraint in the sequence format. m_sequence_ = sequence; m_sequenceIndex_ = 0; m_nextRelation_ = -1; m_target_.delete(0, m_target_.length()); Vector vector = new Vector(); int lastsmallerthanindex = -1; getNextInSequence(); while (getNextInSequence()) { String target = m_target_.toString(); doTest(this, col, m_source_, target, m_relation_); int vsize = vector.size(); for (int i = vsize - 1; i >= 0; i --) { String source = (String)vector.elementAt(i); if (i > lastsmallerthanindex) { doTest(this, col, source, target, m_relation_); } else { doTest(this, col, source, target, -1); } } vector.addElement(target); if (m_relation_ < 0) { lastsmallerthanindex = vsize - 1; } } } /** * Parses the sequence to be tested */ private boolean getNextInSequence() { if (m_sequenceIndex_ >= m_sequence_.length()) { return false; } boolean quoted = false; boolean quotedsingle = false; boolean done = false; int i = m_sequenceIndex_; int offset = 0; m_source_ = m_target_.toString(); m_relation_ = m_nextRelation_; m_target_.delete(0, m_target_.length()); while (i < m_sequence_.length() && !done) { int ch = UTF16.charAt(m_sequence_, i); if (UCharacter.isSupplementary(ch)) { i += 2; } else { i ++; } if (!quoted) { if (UCharacter.isWhitespace(ch)) { continue; } switch (ch) { case 0x003C : // < m_nextRelation_ = -1; done = true; break; case 0x003D : // = m_nextRelation_ = 0; done = true; break; case 0x003E : // > m_nextRelation_ = 1; done = true; break; case 0x0027 : // ' very basic quoting quoted = true; quotedsingle = false; break; case 0x005c : // \ single quote quoted = true; quotedsingle = true; break; default: UTF16.insert(m_target_, offset, ch); if (UCharacter.isSupplementary(ch)) { offset += 2; } else { offset ++; } } } else { if (ch == 0x0027) { quoted = false; } else { UTF16.insert(m_target_, offset, ch); if (UCharacter.isSupplementary(ch)) { offset += 2; } else { offset ++; } } if (quotedsingle) { quoted = false; } } } if (quoted == true) { errln("Quote in sequence not closed!"); return false; } m_sequenceIndex_ = i; return true; } private static void doTestVariant(TestFmwk test, RuleBasedCollator myCollation, String source, String target, int result) { boolean printInfo = false; int compareResult = myCollation.compare(source, target); if (compareResult != result) { // !!! if not mod build, error, else nothing. // warnln if not build, error, else always print warning. // do we need a 'quiet warning?' (err or log). Hmmm, // would it work to have the 'verbose' flag let you // suppress warnings? Are there ever some warnings you // want to suppress, and others you don't? if(!test.isModularBuild()){ test.errln("Comparing \"" + Utility.hex(source) + "\" with \"" + Utility.hex(target) + "\" expected " + result + " but got " + compareResult); }else{ printInfo = true; } } CollationKey ssk = myCollation.getCollationKey(source); CollationKey tsk = myCollation.getCollationKey(target); compareResult = ssk.compareTo(tsk); if (compareResult != result) { if(!test.isModularBuild()){ test.errln("Comparing CollationKeys of \"" + Utility.hex(source) + "\" with \"" + Utility.hex(target) + "\" expected " + result + " but got " + compareResult); }else{ printInfo = true; } } RawCollationKey srsk = new RawCollationKey(); myCollation.getRawCollationKey(source, srsk); RawCollationKey trsk = new RawCollationKey(); myCollation.getRawCollationKey(target, trsk); compareResult = ssk.compareTo(tsk); if (compareResult != result) { if(!test.isModularBuild()){ test.errln("Comparing RawCollationKeys of \"" + Utility.hex(source) + "\" with \"" + Utility.hex(target) + "\" expected " + result + " but got " + compareResult); }else{ printInfo = true; } } // hmmm, but here we issue a warning // only difference is, one warning or two, and detailed info or not? // hmmm, does seem preferable to omit detail if we know it is due to missing resource data. // well, if we label the errors as warnings, we can let people know the details, but // also know they may be due to missing resource data. basically this code is asserting // that the errors are due to missing resource data, which may or may not be true. if (printInfo) { test.warnln("Could not load locale data skipping."); } } }