2 //#if defined(FOUNDATION10) || defined(J2SE13)
5 **********************************************************************
6 * Copyright (c) 2002-2009, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 **********************************************************************
10 **********************************************************************
12 package com.ibm.icu.dev.test.cldr;
16 //import org.unicode.cldr.util.LanguageTagParser;
17 //import org.unicode.cldr.util.Utility;
18 import java.io.IOException;
19 import java.io.PrintWriter;
20 import java.io.StringWriter;
21 import java.text.ParseException;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.Date;
25 import java.util.HashMap;
26 import java.util.List;
29 import java.util.TreeMap;
30 import java.util.Iterator;
31 import java.util.TreeSet;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
35 import javax.xml.parsers.SAXParser;
36 import javax.xml.parsers.SAXParserFactory;
38 //import org.unicode.cldr.test.CLDRTest;
39 //import org.unicode.cldr.tool.GenerateCldrTests;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.SAXException;
42 import org.xml.sax.helpers.DefaultHandler;
44 import com.ibm.icu.util.Currency;
45 import com.ibm.icu.util.TimeZone;
46 import com.ibm.icu.util.ULocale;
47 import com.ibm.icu.dev.test.TestFmwk;
49 import com.ibm.icu.text.Collator;
50 import com.ibm.icu.text.DateFormat;
51 import com.ibm.icu.text.NumberFormat;
52 import com.ibm.icu.text.SimpleDateFormat;
53 import com.ibm.icu.text.Transliterator;
54 import com.ibm.icu.text.UTF16;
55 import com.ibm.icu.text.UnicodeSet;
58 * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
61 * 1. for this to work right, you have to have downloaded the CLDR data, and
62 * then set the CLDR directory correctly, using
63 * -DCLDR_DIRECTORY=<top level of cldr>
64 * 2. You probably also need to increase memory, eg with -Xmx512m
65 * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where
66 * C:\cldrcache\ is a temp directory to keep the program from hitting the net for
68 * 4. You may use other environment variables to narrow what you test. Eg
69 * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*"
70 * a. -DXML_MATCH="de.*" (or whatever regex you want) to just
71 * test certain locales.
72 * b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
73 * c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
76 public class TestCLDRVsICU extends TestFmwk {
77 static final boolean DEBUG = false;
79 //ULocale uLocale = ULocale.ENGLISH;
80 //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
81 //static PrintWriter log;
83 static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
84 static String CLDR_DIRECTORY;
87 LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
88 TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
89 ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example
91 // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT
92 CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
96 private static Matcher getEnvironmentRegex(String key, String defaultValue) {
97 return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
100 private static String getEnvironmentString(String key, String defaultValue) {
101 String temp = System.getProperty(key);
102 if (temp == null) temp = defaultValue;
103 else System.out.print("-D" + key + "=\"" + temp + "\" ");
107 public static void main(String[] args) throws Exception {
108 new TestCLDRVsICU().run(args);
111 Set allLocales = new TreeSet();
113 public void TestFiles() throws SAXException, IOException {
114 // only get ICU's locales
115 Set s = new TreeSet();
116 addLocales(NumberFormat.getAvailableULocales(), s);
117 addLocales(DateFormat.getAvailableULocales(), s);
118 addLocales(Collator.getAvailableULocales(), s);
120 // filter, to make tracking down bugs easier
122 for (Iterator it = s.iterator(); it.hasNext();) {
123 String locale = (String)it.next();
124 if (!LOCALE_MATCH.reset(locale).matches()) continue;
129 public void addLocales(ULocale[] list, Collection s) {
130 for (int i = 0; i < list.length; ++i) {
131 allLocales.add(list[i].toString());
132 s.add(list[i].getLanguage());
136 public String getLanguage(ULocale uLocale) {
137 String result = uLocale.getLanguage();
138 String script = uLocale.getScript();
139 if (script.length() != 0) result += "_" + script;
143 public void _test(String localeName) throws SAXException, IOException {
144 //uLocale = new ULocale(localeName);
145 //oLocale = uLocale.toLocale();
147 File f = new File(CLDR_DIRECTORY + "common\\test\\"+ localeName + ".xml");
148 logln("Testing " + f.getCanonicalPath());
149 SAX.parse(f, DEFAULT_HANDLER);
152 static Transliterator toUnicode = Transliterator.getInstance("any-hex");
153 static public String showString(String in) {
154 return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
156 // ============ SAX Handler Infrastructure ============
158 abstract public class Handler {
159 Map settings = new TreeMap();
161 List currentLocales = new ArrayList();
164 void setName(String name) {
167 void set(String attributeName, String attributeValue) {
168 //if (DEBUG) logln(attributeName + " => " + attributeValue);
169 settings.put(attributeName, attributeValue);
171 void checkResult(String value) {
172 if ("true".equals(settings.get("draft"))) {
173 return; // skip draft
175 ULocale ul = new ULocale("xx");
177 for (int i = 0; i < currentLocales.size(); ++i) {
178 ul = (ULocale)currentLocales.get(i);
179 //loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
180 handleResult(ul, value);
182 errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")");
186 } catch (Exception e) {
187 StringWriter sw = new StringWriter();
188 PrintWriter pw = new PrintWriter(sw);
189 e.printStackTrace(pw);
191 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
194 public void loglnSAX(String message) {
195 String temp = message + "\t[" + name;
196 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
197 String attributeName = (String) it.next();
198 String attributeValue = (String) settings.get(attributeName);
199 temp += " " + attributeName + "=<" + attributeValue + ">";
203 int lookupValue(Object x, Object[] list) {
204 for (int i = 0; i < list.length; ++i) {
205 if (x.equals(list[i])) return i;
207 loglnSAX("Unknown String: " + x);
210 abstract void handleResult(ULocale currentLocale, String value) throws Exception;
214 public void setAttributes(Attributes attributes) {
215 String localeList = attributes.getValue("locales");
216 String[] currentLocaleString = new String[50];
217 com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
218 currentLocales.clear();
219 for (int i = 0; i < currentLocaleString.length; ++i) {
220 if (currentLocaleString[i].length() == 0) continue;
221 if (allLocales.contains("")) {
222 logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
225 currentLocales.add(new ULocale(currentLocaleString[i]));
227 if (DEBUG) logln("Setting locales: " + currentLocales);
231 public Handler getHandler(String name, Attributes attributes) {
232 if (DEBUG) logln("Creating Handler: " + name);
233 Handler result = (Handler) RegisteredHandlers.get(name);
234 if (result == null) logln("Unexpected test type: " + name);
236 result.setAttributes(attributes);
241 public void addHandler(String name, Handler handler) {
242 if (!TEST_MATCH.reset(name).matches()) handler = new NullHandler();
243 handler.setName(name);
244 RegisteredHandlers.put(name, handler);
246 Map RegisteredHandlers = new HashMap();
248 class NullHandler extends Handler {
249 void handleResult(ULocale currentLocale, String value) throws Exception {}
252 // ============ Statics for Date/Number Support ============
254 static TimeZone utc = TimeZone.getTimeZone("GMT");
255 static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
257 iso.setTimeZone(utc);
259 static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL};
260 static String[] DateFormatNames = {"none", "short", "medium", "long", "full"};
262 static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific", "GBP"};
265 // ============ Handler for Collation ============
266 static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
268 static String remove(String in, UnicodeSet toRemove) {
270 StringBuffer result = new StringBuffer();
271 for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
272 cp = UTF16.charAt(in, i);
273 if (!toRemove.contains(cp)) UTF16.append(result, cp);
275 return result.toString();
279 addHandler("collation", new Handler() {
280 public void handleResult(ULocale currentLocale, String value) {
281 Collator col = Collator.getInstance(currentLocale);
282 String lastLine = "";
284 for (int pos = 0; pos < value.length();) {
285 int nextPos = value.indexOf('\n', pos);
287 nextPos = value.length();
288 String line = value.substring(pos, nextPos);
289 line = remove(line, controlsAndSpace); // HACK for SAX
290 if (line.trim().length() != 0) { // HACK for SAX
291 int comp = col.compare(lastLine, line);
294 errln("\tLine " + (count + 1) + "\tFailure: "
295 + showString(lastLine) + " should be leq "
298 logln("OK: " + line);
308 // ============ Handler for Numbers ============
309 addHandler("number", new Handler() {
310 public void handleResult(ULocale locale, String result) {
311 NumberFormat nf = null;
312 double v = Double.NaN;
313 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
314 String attributeName = (String) it.next();
315 String attributeValue = (String) settings
317 if (attributeName.equals("input")) {
318 v = Double.parseDouble(attributeValue);
321 // must be either numberType at this point
322 int index = lookupValue(attributeValue, NumberNames);
323 if (DEBUG) logln("Getting number format for " + locale);
325 case 0: nf = NumberFormat.getInstance(locale); break;
326 case 1: nf = NumberFormat.getIntegerInstance(locale); break;
327 case 2: nf = NumberFormat.getNumberInstance(locale); break;
328 case 3: nf = NumberFormat.getPercentInstance(locale); break;
329 case 4: nf = NumberFormat.getScientificInstance(locale); break;
330 default: nf = NumberFormat.getCurrencyInstance(locale);
331 nf.setCurrency(Currency.getInstance(attributeValue)); break;
333 String temp = nf.format(v).trim();
334 result = result.trim(); // HACK because of SAX
335 if (!temp.equals(result)) {
336 errln("Number: Locale: " + locale
337 + ", \tType: " + attributeValue
338 + ", \tDraft: " + settings.get("draft")
339 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
346 // ============ Handler for Dates ============
347 addHandler("date", new Handler() {
348 public void handleResult(ULocale locale, String result) throws ParseException {
351 Date date = new Date();
352 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
353 String attributeName = (String) it.next();
354 String attributeValue = (String) settings
356 if (attributeName.equals("input")) {
357 date = iso.parse(attributeValue);
360 // must be either dateType or timeType at this point
361 int index = lookupValue(attributeValue, DateFormatNames);
362 if (attributeName.equals("dateType"))
368 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
370 String temp = dt.format(date).trim();
371 result = result.trim(); // HACK because of SAX
372 if (!temp.equals(result)) {
373 errln("DateTime: Locale: " + locale
374 + ", \tDate: " + DateFormatNames[dateFormat]
375 + ", \tTime: " + DateFormatNames[timeFormat]
376 + ", \tDraft: " + settings.get("draft")
377 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
381 private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
382 if (DEBUG) logln("Getting date/time format for " + locale);
383 if (DEBUG && "ar_EG".equals(locale.toString())) {
384 System.out.println("debug here");
387 if (dateFormat == 0) {
388 dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
389 if (DEBUG) System.out.print("getTimeInstance");
390 } else if (timeFormat == 0) {
391 dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
392 if (DEBUG) System.out.print("getDateInstance");
394 dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], locale);
395 if (DEBUG) System.out.print("getDateTimeInstance");
397 if (DEBUG) System.out.println("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat)dt).toPattern());
398 return (SimpleDateFormat)dt;
402 // ============ Handler for Zones ============
403 addHandler("zoneFields", new Handler() {
409 public void handleResult(ULocale locale, String result) throws ParseException {
410 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
411 String attributeName = (String) it.next();
412 String attributeValue = (String) settings.get(attributeName);
413 if (attributeName.equals("date")) {
414 date = attributeValue;
415 } else if (attributeName.equals("field")) {
416 pattern = attributeValue;
417 } else if (attributeName.equals("zone")) {
418 zone = attributeValue;
419 } else if (attributeName.equals("parse")) {
420 parse = attributeValue;
423 if (!ZONE_MATCH.reset(zone).matches()) return;
424 Date dateValue = iso.parse(date);
425 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
426 field.setTimeZone(TimeZone.getTimeZone(zone));
427 String temp = field.format(dateValue).trim();
428 // SKIP PARSE FOR NOW
429 result = result.trim(); // HACK because of SAX
430 if (!temp.equals(result)) {
431 temp = field.format(dateValue).trim(); // call again for debugging
432 errln("Zone Format: Locale: " + locale
433 + ", \tZone: " + zone
434 + ", \tDate: " + date
435 + ", \tField: " + pattern
436 + ", \tParse: " + parse
437 + ", \tDraft: " + settings.get("draft")
438 + ", \tCLDR: <" + result
439 + ">, \tICU: <" + temp + ">");
445 // ============ Gorp for SAX ============
449 SAXParserFactory factory = SAXParserFactory.newInstance();
450 factory.setValidating(true);
451 SAX = factory.newSAXParser();
452 } catch (Exception e) {
453 throw new IllegalArgumentException("can't start");
457 DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
458 static final boolean DEBUG = false;
459 StringBuffer lastChars = new StringBuffer();
460 //boolean justPopped = false;
463 public void startElement(
467 Attributes attributes)
468 throws SAXException {
469 //data.put(new ContextStack(contextStack), lastChars);
472 if (qName.equals("cldrTest")) {
474 } else if (qName.equals("result")) {
475 for (int i = 0; i < attributes.getLength(); ++i) {
476 handler.set(attributes.getQName(i), attributes.getValue(i));
479 handler = getHandler(qName, attributes);
480 //handler.set("locale", uLocale.toString());
482 //if (DEBUG) logln("startElement:\t" + contextStack);
483 //justPopped = false;
484 } catch (RuntimeException e) {
489 public void endElement(String uri, String localName, String qName)
490 throws SAXException {
492 //if (DEBUG) logln("endElement:\t" + contextStack);
493 if (qName.equals("result")) handler.checkResult(lastChars.toString());
494 else if (qName.length() != 0) {
495 //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
497 lastChars.setLength(0);
499 } catch (RuntimeException e) {
504 // Have to hack around the fact that the character data might be in pieces
505 public void characters(char[] ch, int start, int length)
506 throws SAXException {
508 String value = new String(ch,start,length);
509 if (DEBUG) logln("characters:\t" + value);
510 lastChars.append(value);
511 //justPopped = false;
512 } catch (RuntimeException e) {
518 // just for debugging
520 public void notationDecl (String name, String publicId, String systemId)
521 throws SAXException {
522 logln("notationDecl: " + name
528 public void processingInstruction (String target, String data)
529 throws SAXException {
530 logln("processingInstruction: " + target + ", " + data);
533 public void skippedEntity (String name)
536 logln("skippedEntity: " + name
540 public void unparsedEntityDecl (String name, String publicId,
541 String systemId, String notationName)
542 throws SAXException {
543 logln("unparsedEntityDecl: " + name
546 + ", " + notationName