2 **********************************************************************
3 * Copyright (c) 2002-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 **********************************************************************
9 package com.ibm.icu.dev.test.cldr;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.io.StringWriter;
15 import java.text.ParseException;
16 import java.util.ArrayList;
17 import java.util.Collection;
18 import java.util.Date;
19 import java.util.HashMap;
20 import java.util.Iterator;
21 import java.util.List;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
29 import javax.xml.parsers.SAXParser;
30 import javax.xml.parsers.SAXParserFactory;
32 import org.xml.sax.Attributes;
33 import org.xml.sax.SAXException;
34 import org.xml.sax.helpers.DefaultHandler;
36 import com.ibm.icu.dev.test.TestFmwk;
37 import com.ibm.icu.text.DateFormat;
38 import com.ibm.icu.text.NumberFormat;
39 import com.ibm.icu.text.SimpleDateFormat;
40 import com.ibm.icu.text.UTF16;
41 import com.ibm.icu.text.UnicodeSet;
42 import com.ibm.icu.util.Currency;
43 import com.ibm.icu.util.TimeZone;
44 import com.ibm.icu.util.ULocale;
47 * This is a test file that takes in the CLDR XML test files and test against
48 * ICU4J. This test file is used to verify that ICU4J is implemented correctly.
49 * As it stands, the test generates all the errors to the console by logging it.
50 * The logging is only possible if "-v" or verbose is set as an argument.
51 * This will allow users to know what problems occurred within CLDR and ICU.
52 * Collator was disabled in this test file and therefore will be skipped.
55 * 1) In order for this to work correctly, you must download the latest CLDR data
56 * in the form of XML. You must also set the CLDR directory using:
57 * -DCLDR_DIRECTORY=<top level of cldr>
58 * 2) You may also consider increasing the memory using -Xmx512m.
59 * 3) For speed purposes, you may consider creating a temporary directory for the
61 * -DCLDR_DTD_CACHE=<cldr cache directory>
62 * 4) You may use other environment variables to narrow down your tests using:
64 * -DXML_MATCH="de.*" (or whatever regex you want) to just test certain locales.
65 * -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
66 * -DZONE_MATCH="(?!America/Argentina).*"
67 * -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
70 * @author John Huan Vu (johnvu@us.ibm.com)
72 public class TestCLDRVsICU extends TestFmwk {
73 static final boolean DEBUG = false;
75 // ULocale uLocale = ULocale.ENGLISH;
76 // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
77 // static PrintWriter log;
79 static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
80 static String CLDR_DIRECTORY;
83 LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
84 TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
85 ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*");
87 // CLDR_DIRECTORY is where all the CLDR XML test files are located
88 // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT
89 CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
93 private static Matcher getEnvironmentRegex(String key, String defaultValue) {
94 return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
97 private static String getEnvironmentString(String key, String defaultValue) {
98 String temp = System.getProperty(key);
102 System.out.print("-D" + key + "=\"" + temp + "\" ");
106 public static void main(String[] args) throws Exception {
107 new TestCLDRVsICU().run(args);
110 Set allLocales = new TreeSet();
112 public void TestFiles() throws SAXException, IOException {
113 // only get ICU's locales
114 Set s = new TreeSet();
115 addLocales(NumberFormat.getAvailableULocales(), s);
116 addLocales(DateFormat.getAvailableULocales(), s);
118 // johnvu: Collator was originally disabled
119 // addLocales(Collator.getAvailableULocales(), s);
121 // filter, to make tracking down bugs easier
122 for (Iterator it = s.iterator(); it.hasNext();) {
123 String locale = (String) it.next();
124 if (!LOCALE_MATCH.reset(locale).matches())
130 public void addLocales(ULocale[] list, Collection s) {
131 for (int i = 0; i < list.length; ++i) {
132 allLocales.add(list[i].toString());
133 s.add(list[i].getLanguage());
137 public String getLanguage(ULocale uLocale) {
138 String result = uLocale.getLanguage();
139 String script = uLocale.getScript();
140 if (script.length() != 0)
141 result += "_" + script;
145 public void _test(String localeName) throws SAXException, IOException {
146 // uLocale = new ULocale(localeName);
147 // oLocale = uLocale.toLocale();
149 File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml");
150 logln("Testing " + f.getCanonicalPath());
151 SAX.parse(f, DEFAULT_HANDLER);
154 private static class ToHex {
155 public String transliterate(String in) {
156 StringBuilder sb = new StringBuilder();
157 for (int i = 0; i < in.length(); ++i) {
158 char c = in.charAt(i);
169 sb.append(Integer.toHexString((int) c));
171 return sb.toString();
175 // static Transliterator toUnicode = Transliterator.getInstance("any-hex");
176 private static final ToHex toUnicode = new ToHex();
178 static public String showString(String in) {
179 return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
182 // ============ SAX Handler Infrastructure ============
184 abstract public class Handler {
185 Map settings = new TreeMap();
187 List currentLocales = new ArrayList();
190 void setName(String name) {
194 void set(String attributeName, String attributeValue) {
195 // if (DEBUG) logln(attributeName + " => " + attributeValue);
196 settings.put(attributeName, attributeValue);
199 void checkResult(String value) {
200 if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) {
201 return; // skip draft
203 ULocale ul = new ULocale("xx");
205 for (int i = 0; i < currentLocales.size(); ++i) {
206 ul = (ULocale) currentLocales.get(i);
207 // loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
208 handleResult(ul, value);
210 errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH)
215 } catch (Exception e) {
216 StringWriter sw = new StringWriter();
217 PrintWriter pw = new PrintWriter(sw);
218 e.printStackTrace(pw);
220 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
224 public void loglnSAX(String message) {
225 String temp = message + "\t[" + name;
226 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
227 String attributeName = (String) it.next();
228 String attributeValue = (String) settings.get(attributeName);
229 temp += " " + attributeName + "=<" + attributeValue + ">";
234 int lookupValue(Object x, Object[] list) {
235 for (int i = 0; i < list.length; ++i) {
236 if (x.equals(list[i]))
239 loglnSAX("Unknown String: " + x);
243 abstract void handleResult(ULocale currentLocale, String value) throws Exception;
248 public void setAttributes(Attributes attributes) {
249 String localeList = attributes.getValue("locales");
250 String[] currentLocaleString = new String[50];
251 com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
252 currentLocales.clear();
253 for (int i = 0; i < currentLocaleString.length; ++i) {
254 if (currentLocaleString[i].length() == 0)
256 if (allLocales.contains("")) {
257 logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
260 currentLocales.add(new ULocale(currentLocaleString[i]));
263 logln("Setting locales: " + currentLocales);
267 public Handler getHandler(String name, Attributes attributes) {
269 logln("Creating Handler: " + name);
270 Handler result = (Handler) RegisteredHandlers.get(name);
272 logln("Unexpected test type: " + name);
274 result.setAttributes(attributes);
279 public void addHandler(String name, Handler handler) {
280 if (!TEST_MATCH.reset(name).matches())
281 handler = new NullHandler();
282 handler.setName(name);
283 RegisteredHandlers.put(name, handler);
286 Map RegisteredHandlers = new HashMap();
288 class NullHandler extends Handler {
289 void handleResult(ULocale currentLocale, String value) throws Exception {
293 // ============ Statics for Date/Number Support ============
295 static TimeZone utc = TimeZone.getTimeZone("GMT");
296 static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
298 iso.setTimeZone(utc);
301 static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
303 // The following are different data format types that are part of the parameters in CLDR
304 static String[] DateFormatNames = { "none", "short", "medium", "long", "full" };
306 // The following are different number types that are part of the parameters in CLDR
307 static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" };
310 // ============ Handler for Collation ============
311 static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
313 static String remove(String in, UnicodeSet toRemove) {
315 StringBuffer result = new StringBuffer();
316 for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
317 cp = UTF16.charAt(in, i);
318 if (!toRemove.contains(cp))
319 UTF16.append(result, cp);
321 return result.toString();
325 // johnvu: Collator was originally disabled
326 // TODO (dougfelt) move this test
328 addHandler("collation", new Handler() {
329 public void handleResult(ULocale currentLocale, String value) {
330 Collator col = Collator.getInstance(currentLocale);
331 String lastLine = "";
333 for (int pos = 0; pos < value.length();) {
334 int nextPos = value.indexOf('\n', pos);
336 nextPos = value.length();
337 String line = value.substring(pos, nextPos);
338 line = remove(line, controlsAndSpace); HACK for SAX
339 if (line.trim().length() != 0) { HACK for SAX
340 int comp = col.compare(lastLine, line);
343 errln("\tLine " + (count + 1) + "\tFailure: "
344 + showString(lastLine) + " should be leq "
347 logln("OK: " + line);
358 // ============ Handler for Numbers ============
359 addHandler("number", new Handler() {
360 public void handleResult(ULocale locale, String result) {
361 NumberFormat nf = null;
362 double v = Double.NaN;
363 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
364 String attributeName = (String) it.next();
365 String attributeValue = (String) settings.get(attributeName);
367 // Checks if the attribute name is a draft and whether
368 // or not it has been approved / contributed by CLDR yet
369 // otherwise, skips it because it is most likely rejected by ICU
370 if (attributeName.equals("draft")) {
371 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
377 // Update the value to be checked
378 if (attributeName.equals("input")) {
379 v = Double.parseDouble(attributeValue);
383 // At this point, it must be a numberType
384 int index = lookupValue(attributeValue, NumberNames);
387 logln("Getting number format for " + locale);
390 nf = NumberFormat.getInstance(locale);
393 nf = NumberFormat.getIntegerInstance(locale);
396 nf = NumberFormat.getNumberInstance(locale);
399 nf = NumberFormat.getPercentInstance(locale);
402 nf = NumberFormat.getScientificInstance(locale);
405 nf = NumberFormat.getCurrencyInstance(locale);
406 nf.setCurrency(Currency.getInstance(attributeValue));
409 String temp = nf.format(v).trim();
410 result = result.trim(); // HACK because of SAX
411 if (!temp.equals(result)) {
412 logln("Number: Locale: " + locale +
413 "\n\tType: " + attributeValue +
414 "\n\tDraft: " + settings.get("draft") +
415 "\n\tCLDR: <" + result + ">" +
416 "\n\tICU: <" + temp + ">");
423 // ============ Handler for Dates ============
424 addHandler("date", new Handler() {
425 public void handleResult(ULocale locale, String result) throws ParseException {
428 Date date = new Date();
429 boolean approved = true;
431 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
432 String attributeName = (String) it.next();
433 String attributeValue = (String) settings.get(attributeName);
435 // Checks if the attribute name is a draft and whether
436 // or not it has been approved / contributed by CLDR yet
437 // otherwise, skips it because it is most likely rejected by ICU
438 if (attributeName.equals("draft")) {
439 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
446 // Update the value to be checked
447 if (attributeName.equals("input")) {
448 date = iso.parse(attributeValue);
451 // At this point, it must be either dateType or timeType
452 int index = lookupValue(attributeValue, DateFormatNames);
453 if (attributeName.equals("dateType"))
455 else if (attributeName.equals("timeType"))
460 // The attribute value must be approved in order to be checked,
461 // if it hasn't been approved, it shouldn't be checked if it
464 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
466 String temp = dt.format(date).trim();
467 result = result.trim(); // HACK because of SAX
468 if (!temp.equals(result)) {
469 logln("DateTime: Locale: " + locale +
470 "\n\tDate: " + DateFormatNames[dateFormat] +
471 "\n\tTime: " + DateFormatNames[timeFormat] +
472 "\n\tDraft: " + settings.get("draft") +
473 "\n\tCLDR: <" + result + "> " +
474 "\n\tICU: <" + temp + ">");
479 private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
481 logln("Getting date/time format for " + locale);
482 if (DEBUG && "ar_EG".equals(locale.toString())) {
486 if (dateFormat == 0) {
487 dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
489 System.out.print("getTimeInstance");
490 } else if (timeFormat == 0) {
491 dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
493 System.out.print("getDateInstance");
495 dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat],
498 System.out.print("getDateTimeInstance");
501 logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern());
502 return (SimpleDateFormat) dt;
506 // ============ Handler for Zones ============
507 addHandler("zoneFields", new Handler() {
513 public void handleResult(ULocale locale, String result) throws ParseException {
514 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
515 String attributeName = (String) it.next();
516 String attributeValue = (String) settings.get(attributeName);
517 if (attributeName.equals("date")) {
518 date = attributeValue;
519 } else if (attributeName.equals("field")) {
520 pattern = attributeValue;
521 } else if (attributeName.equals("zone")) {
522 zone = attributeValue;
523 } else if (attributeName.equals("parse")) {
524 parse = attributeValue;
528 if (!ZONE_MATCH.reset(zone).matches()) return;
529 Date dateValue = iso.parse(date);
530 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
531 field.setTimeZone(TimeZone.getTimeZone(zone));
532 String temp = field.format(dateValue).trim();
533 // SKIP PARSE FOR NOW
534 result = result.trim(); // HACK because of SAX
535 if (!temp.equals(result)) {
536 temp = field.format(dateValue).trim(); // call again for debugging
537 logln("Zone Format: Locale: " + locale
538 + "\n\tZone: " + zone
539 + "\n\tDate: " + date
540 + "\n\tField: " + pattern
541 + "\n\tParse: " + parse
542 + "\n\tDraft: " + settings.get("draft")
543 + "\n\tCLDR: <" + result
544 + ">\n\tICU: <" + temp + ">");
550 // ============ Gorp for SAX ============
554 SAXParserFactory factory = SAXParserFactory.newInstance();
555 factory.setValidating(true);
556 SAX = factory.newSAXParser();
557 } catch (Exception e) {
558 throw new IllegalArgumentException("SAXParserFacotry was unable to start.");
562 DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
563 static final boolean DEBUG = false;
564 StringBuffer lastChars = new StringBuffer();
565 // boolean justPopped = false;
568 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
569 // data.put(new ContextStack(contextStack), lastChars);
572 if (qName.equals("cldrTest")) {
574 } else if (qName.equals("result") && handler != null) {
575 for (int i = 0; i < attributes.getLength(); ++i) {
576 handler.set(attributes.getQName(i), attributes.getValue(i));
579 handler = getHandler(qName, attributes);
580 // handler.set("locale", uLocale.toString());
582 // if (DEBUG) logln("startElement:\t" + contextStack);
583 // justPopped = false;
584 } catch (RuntimeException e) {
590 public void endElement(String uri, String localName, String qName) throws SAXException {
592 // if (DEBUG) logln("endElement:\t" + contextStack);
593 if (qName.equals("result") && handler != null) {
594 handler.checkResult(lastChars.toString());
595 } else if (qName.length() != 0) {
596 // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
598 lastChars.setLength(0);
599 // justPopped = true;
600 } catch (RuntimeException e) {
606 // Have to hack around the fact that the character data might be in pieces
607 public void characters(char[] ch, int start, int length) throws SAXException {
609 String value = new String(ch, start, length);
611 logln("characters:\t" + value);
612 lastChars.append(value);
613 // justPopped = false;
614 } catch (RuntimeException e) {
620 // just for debugging
622 public void notationDecl(String name, String publicId, String systemId) throws SAXException {
623 logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
626 public void processingInstruction(String target, String data) throws SAXException {
627 logln("processingInstruction: " + target + ", " + data);
630 public void skippedEntity(String name) throws SAXException {
631 logln("skippedEntity: " + name);
634 public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName)
635 throws SAXException {
636 logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName);