2 **********************************************************************
\r
3 * Copyright (c) 2002-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
7 **********************************************************************
\r
9 package com.ibm.icu.dev.test.cldr;
\r
11 import java.io.File;
\r
12 import java.io.IOException;
\r
13 import java.io.PrintWriter;
\r
14 import java.io.StringWriter;
\r
15 import java.text.ParseException;
\r
16 import java.util.ArrayList;
\r
17 import java.util.Collection;
\r
18 import java.util.Date;
\r
19 import java.util.HashMap;
\r
20 import java.util.Iterator;
\r
21 import java.util.List;
\r
22 import java.util.Map;
\r
23 import java.util.Set;
\r
24 import java.util.TreeMap;
\r
25 import java.util.TreeSet;
\r
26 import java.util.regex.Matcher;
\r
27 import java.util.regex.Pattern;
\r
29 import javax.xml.parsers.SAXParser;
\r
30 import javax.xml.parsers.SAXParserFactory;
\r
32 import org.xml.sax.Attributes;
\r
33 import org.xml.sax.SAXException;
\r
34 import org.xml.sax.helpers.DefaultHandler;
\r
36 import com.ibm.icu.dev.test.TestFmwk;
\r
37 import com.ibm.icu.text.DateFormat;
\r
38 import com.ibm.icu.text.NumberFormat;
\r
39 import com.ibm.icu.text.SimpleDateFormat;
\r
40 import com.ibm.icu.text.UTF16;
\r
41 import com.ibm.icu.text.UnicodeSet;
\r
42 import com.ibm.icu.util.Currency;
\r
43 import com.ibm.icu.util.TimeZone;
\r
44 import com.ibm.icu.util.ULocale;
\r
47 * This is a test file that takes in the CLDR XML test files and test against
\r
48 * ICU4J. This test file is used to verify that ICU4J is implemented correctly.
\r
49 * As it stands, the test generates all the errors to the console by logging it.
\r
50 * The logging is only possible if "-v" or verbose is set as an argument.
\r
51 * This will allow users to know what problems occurred within CLDR and ICU.
\r
52 * Collator was disabled in this test file and therefore will be skipped.
\r
55 * 1) In order for this to work correctly, you must download the latest CLDR data
\r
56 * in the form of XML. You must also set the CLDR directory using:
\r
57 * -DCLDR_DIRECTORY=<top level of cldr>
\r
58 * 2) You may also consider increasing the memory using -Xmx512m.
\r
59 * 3) For speed purposes, you may consider creating a temporary directory for the
\r
61 * -DCLDR_DTD_CACHE=<cldr cache directory>
\r
62 * 4) You may use other environment variables to narrow down your tests using:
\r
64 * -DXML_MATCH="de.*" (or whatever regex you want) to just test certain locales.
\r
65 * -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
\r
66 * -DZONE_MATCH="(?!America/Argentina).*"
\r
67 * -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
\r
70 * @author John Huan Vu (johnvu@us.ibm.com)
\r
72 public class TestCLDRVsICU extends TestFmwk {
\r
73 static final boolean DEBUG = false;
\r
75 // ULocale uLocale = ULocale.ENGLISH;
\r
76 // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
\r
77 // static PrintWriter log;
\r
79 static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
\r
80 static String CLDR_DIRECTORY;
\r
82 System.out.println();
\r
83 LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
\r
84 TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
\r
85 ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*");
\r
87 // CLDR_DIRECTORY is where all the CLDR XML test files are located
\r
88 // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT
\r
89 CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
\r
90 System.out.println();
\r
93 private static Matcher getEnvironmentRegex(String key, String defaultValue) {
\r
94 return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
\r
97 private static String getEnvironmentString(String key, String defaultValue) {
\r
98 String temp = System.getProperty(key);
\r
100 temp = defaultValue;
\r
102 System.out.print("-D" + key + "=\"" + temp + "\" ");
\r
106 public static void main(String[] args) throws Exception {
\r
107 new TestCLDRVsICU().run(args);
\r
110 Set allLocales = new TreeSet();
\r
112 public void TestFiles() throws SAXException, IOException {
\r
113 // only get ICU's locales
\r
114 Set s = new TreeSet();
\r
115 addLocales(NumberFormat.getAvailableULocales(), s);
\r
116 addLocales(DateFormat.getAvailableULocales(), s);
\r
118 // johnvu: Collator was originally disabled
\r
119 // addLocales(Collator.getAvailableULocales(), s);
\r
121 // filter, to make tracking down bugs easier
\r
122 for (Iterator it = s.iterator(); it.hasNext();) {
\r
123 String locale = (String) it.next();
\r
124 if (!LOCALE_MATCH.reset(locale).matches())
\r
130 public void addLocales(ULocale[] list, Collection s) {
\r
131 for (int i = 0; i < list.length; ++i) {
\r
132 allLocales.add(list[i].toString());
\r
133 s.add(list[i].getLanguage());
\r
137 public String getLanguage(ULocale uLocale) {
\r
138 String result = uLocale.getLanguage();
\r
139 String script = uLocale.getScript();
\r
140 if (script.length() != 0)
\r
141 result += "_" + script;
\r
145 public void _test(String localeName) throws SAXException, IOException {
\r
146 // uLocale = new ULocale(localeName);
\r
147 // oLocale = uLocale.toLocale();
\r
149 File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml");
\r
150 logln("Testing " + f.getCanonicalPath());
\r
151 SAX.parse(f, DEFAULT_HANDLER);
\r
154 private static class ToHex {
\r
155 public String transliterate(String in) {
\r
156 StringBuilder sb = new StringBuilder();
\r
157 for (int i = 0; i < in.length(); ++i) {
\r
158 char c = in.charAt(i);
\r
169 sb.append(Integer.toHexString((int) c));
\r
171 return sb.toString();
\r
175 // static Transliterator toUnicode = Transliterator.getInstance("any-hex");
\r
176 private static final ToHex toUnicode = new ToHex();
\r
178 static public String showString(String in) {
\r
179 return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
\r
182 // ============ SAX Handler Infrastructure ============
\r
184 abstract public class Handler {
\r
185 Map settings = new TreeMap();
\r
187 List currentLocales = new ArrayList();
\r
190 void setName(String name) {
\r
194 void set(String attributeName, String attributeValue) {
\r
195 // if (DEBUG) logln(attributeName + " => " + attributeValue);
\r
196 settings.put(attributeName, attributeValue);
\r
199 void checkResult(String value) {
\r
200 if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) {
\r
201 return; // skip draft
\r
203 ULocale ul = new ULocale("xx");
\r
205 for (int i = 0; i < currentLocales.size(); ++i) {
\r
206 ul = (ULocale) currentLocales.get(i);
\r
207 // loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
\r
208 handleResult(ul, value);
\r
209 if (failures != 0) {
\r
210 errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH)
\r
215 } catch (Exception e) {
\r
216 StringWriter sw = new StringWriter();
\r
217 PrintWriter pw = new PrintWriter(sw);
\r
218 e.printStackTrace(pw);
\r
220 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
\r
224 public void loglnSAX(String message) {
\r
225 String temp = message + "\t[" + name;
\r
226 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
227 String attributeName = (String) it.next();
\r
228 String attributeValue = (String) settings.get(attributeName);
\r
229 temp += " " + attributeName + "=<" + attributeValue + ">";
\r
234 int lookupValue(Object x, Object[] list) {
\r
235 for (int i = 0; i < list.length; ++i) {
\r
236 if (x.equals(list[i]))
\r
239 loglnSAX("Unknown String: " + x);
\r
243 abstract void handleResult(ULocale currentLocale, String value) throws Exception;
\r
246 * @param attributes
\r
248 public void setAttributes(Attributes attributes) {
\r
249 String localeList = attributes.getValue("locales");
\r
250 String[] currentLocaleString = new String[50];
\r
251 com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
\r
252 currentLocales.clear();
\r
253 for (int i = 0; i < currentLocaleString.length; ++i) {
\r
254 if (currentLocaleString[i].length() == 0)
\r
256 if (allLocales.contains("")) {
\r
257 logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
\r
260 currentLocales.add(new ULocale(currentLocaleString[i]));
\r
263 logln("Setting locales: " + currentLocales);
\r
267 public Handler getHandler(String name, Attributes attributes) {
\r
269 logln("Creating Handler: " + name);
\r
270 Handler result = (Handler) RegisteredHandlers.get(name);
\r
271 if (result == null)
\r
272 logln("Unexpected test type: " + name);
\r
274 result.setAttributes(attributes);
\r
279 public void addHandler(String name, Handler handler) {
\r
280 if (!TEST_MATCH.reset(name).matches())
\r
281 handler = new NullHandler();
\r
282 handler.setName(name);
\r
283 RegisteredHandlers.put(name, handler);
\r
286 Map RegisteredHandlers = new HashMap();
\r
288 class NullHandler extends Handler {
\r
289 void handleResult(ULocale currentLocale, String value) throws Exception {
\r
293 // ============ Statics for Date/Number Support ============
\r
295 static TimeZone utc = TimeZone.getTimeZone("GMT");
\r
296 static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
\r
298 iso.setTimeZone(utc);
\r
301 static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
\r
303 // The following are different data format types that are part of the parameters in CLDR
\r
304 static String[] DateFormatNames = { "none", "short", "medium", "long", "full" };
\r
306 // The following are different number types that are part of the parameters in CLDR
\r
307 static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" };
\r
310 // ============ Handler for Collation ============
\r
311 static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
\r
313 static String remove(String in, UnicodeSet toRemove) {
\r
315 StringBuffer result = new StringBuffer();
\r
316 for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
\r
317 cp = UTF16.charAt(in, i);
\r
318 if (!toRemove.contains(cp))
\r
319 UTF16.append(result, cp);
\r
321 return result.toString();
\r
325 // johnvu: Collator was originally disabled
\r
326 // TODO (dougfelt) move this test
\r
328 addHandler("collation", new Handler() {
\r
329 public void handleResult(ULocale currentLocale, String value) {
\r
330 Collator col = Collator.getInstance(currentLocale);
\r
331 String lastLine = "";
\r
333 for (int pos = 0; pos < value.length();) {
\r
334 int nextPos = value.indexOf('\n', pos);
\r
336 nextPos = value.length();
\r
337 String line = value.substring(pos, nextPos);
\r
338 line = remove(line, controlsAndSpace); HACK for SAX
\r
339 if (line.trim().length() != 0) { HACK for SAX
\r
340 int comp = col.compare(lastLine, line);
\r
343 errln("\tLine " + (count + 1) + "\tFailure: "
\r
344 + showString(lastLine) + " should be leq "
\r
345 + showString(line));
\r
346 } else if (DEBUG) {
\r
347 logln("OK: " + line);
\r
358 // ============ Handler for Numbers ============
\r
359 addHandler("number", new Handler() {
\r
360 public void handleResult(ULocale locale, String result) {
\r
361 NumberFormat nf = null;
\r
362 double v = Double.NaN;
\r
363 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
364 String attributeName = (String) it.next();
\r
365 String attributeValue = (String) settings.get(attributeName);
\r
367 // Checks if the attribute name is a draft and whether
\r
368 // or not it has been approved / contributed by CLDR yet
\r
369 // otherwise, skips it because it is most likely rejected by ICU
\r
370 if (attributeName.equals("draft")) {
\r
371 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
\r
377 // Update the value to be checked
\r
378 if (attributeName.equals("input")) {
\r
379 v = Double.parseDouble(attributeValue);
\r
383 // At this point, it must be a numberType
\r
384 int index = lookupValue(attributeValue, NumberNames);
\r
387 logln("Getting number format for " + locale);
\r
390 nf = NumberFormat.getInstance(locale);
\r
393 nf = NumberFormat.getIntegerInstance(locale);
\r
396 nf = NumberFormat.getNumberInstance(locale);
\r
399 nf = NumberFormat.getPercentInstance(locale);
\r
402 nf = NumberFormat.getScientificInstance(locale);
\r
405 nf = NumberFormat.getCurrencyInstance(locale);
\r
406 nf.setCurrency(Currency.getInstance(attributeValue));
\r
409 String temp = nf.format(v).trim();
\r
410 result = result.trim(); // HACK because of SAX
\r
411 if (!temp.equals(result)) {
\r
412 logln("Number: Locale: " + locale +
\r
413 "\n\tType: " + attributeValue +
\r
414 "\n\tDraft: " + settings.get("draft") +
\r
415 "\n\tCLDR: <" + result + ">" +
\r
416 "\n\tICU: <" + temp + ">");
\r
423 // ============ Handler for Dates ============
\r
424 addHandler("date", new Handler() {
\r
425 public void handleResult(ULocale locale, String result) throws ParseException {
\r
426 int dateFormat = 0;
\r
427 int timeFormat = 0;
\r
428 Date date = new Date();
\r
429 boolean approved = true;
\r
431 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
432 String attributeName = (String) it.next();
\r
433 String attributeValue = (String) settings.get(attributeName);
\r
435 // Checks if the attribute name is a draft and whether
\r
436 // or not it has been approved / contributed by CLDR yet
\r
437 // otherwise, skips it because it is most likely rejected by ICU
\r
438 if (attributeName.equals("draft")) {
\r
439 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
\r
446 // Update the value to be checked
\r
447 if (attributeName.equals("input")) {
\r
448 date = iso.parse(attributeValue);
\r
451 // At this point, it must be either dateType or timeType
\r
452 int index = lookupValue(attributeValue, DateFormatNames);
\r
453 if (attributeName.equals("dateType"))
\r
454 dateFormat = index;
\r
455 else if (attributeName.equals("timeType"))
\r
456 timeFormat = index;
\r
460 // The attribute value must be approved in order to be checked,
\r
461 // if it hasn't been approved, it shouldn't be checked if it
\r
462 // matches with ICU
\r
464 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
\r
465 dt.setTimeZone(utc);
\r
466 String temp = dt.format(date).trim();
\r
467 result = result.trim(); // HACK because of SAX
\r
468 if (!temp.equals(result)) {
\r
469 logln("DateTime: Locale: " + locale +
\r
470 "\n\tDate: " + DateFormatNames[dateFormat] +
\r
471 "\n\tTime: " + DateFormatNames[timeFormat] +
\r
472 "\n\tDraft: " + settings.get("draft") +
\r
473 "\n\tCLDR: <" + result + "> " +
\r
474 "\n\tICU: <" + temp + ">");
\r
479 private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
\r
481 logln("Getting date/time format for " + locale);
\r
482 if (DEBUG && "ar_EG".equals(locale.toString())) {
\r
483 logln("debug here");
\r
486 if (dateFormat == 0) {
\r
487 dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
\r
489 System.out.print("getTimeInstance");
\r
490 } else if (timeFormat == 0) {
\r
491 dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
\r
493 System.out.print("getDateInstance");
\r
495 dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat],
\r
498 System.out.print("getDateTimeInstance");
\r
501 logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern());
\r
502 return (SimpleDateFormat) dt;
\r
506 // ============ Handler for Zones ============
\r
507 addHandler("zoneFields", new Handler() {
\r
511 String pattern = "";
\r
513 public void handleResult(ULocale locale, String result) throws ParseException {
\r
514 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
515 String attributeName = (String) it.next();
\r
516 String attributeValue = (String) settings.get(attributeName);
\r
517 if (attributeName.equals("date")) {
\r
518 date = attributeValue;
\r
519 } else if (attributeName.equals("field")) {
\r
520 pattern = attributeValue;
\r
521 } else if (attributeName.equals("zone")) {
\r
522 zone = attributeValue;
\r
523 } else if (attributeName.equals("parse")) {
\r
524 parse = attributeValue;
\r
528 if (!ZONE_MATCH.reset(zone).matches()) return;
\r
529 Date dateValue = iso.parse(date);
\r
530 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
\r
531 field.setTimeZone(TimeZone.getTimeZone(zone));
\r
532 String temp = field.format(dateValue).trim();
\r
533 // SKIP PARSE FOR NOW
\r
534 result = result.trim(); // HACK because of SAX
\r
535 if (!temp.equals(result)) {
\r
536 temp = field.format(dateValue).trim(); // call again for debugging
\r
537 logln("Zone Format: Locale: " + locale
\r
538 + "\n\tZone: " + zone
\r
539 + "\n\tDate: " + date
\r
540 + "\n\tField: " + pattern
\r
541 + "\n\tParse: " + parse
\r
542 + "\n\tDraft: " + settings.get("draft")
\r
543 + "\n\tCLDR: <" + result
\r
544 + ">\n\tICU: <" + temp + ">");
\r
550 // ============ Gorp for SAX ============
\r
554 SAXParserFactory factory = SAXParserFactory.newInstance();
\r
555 factory.setValidating(true);
\r
556 SAX = factory.newSAXParser();
\r
557 } catch (Exception e) {
\r
558 throw new IllegalArgumentException("SAXParserFacotry was unable to start.");
\r
562 DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
\r
563 static final boolean DEBUG = false;
\r
564 StringBuffer lastChars = new StringBuffer();
\r
565 // boolean justPopped = false;
\r
568 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
\r
569 // data.put(new ContextStack(contextStack), lastChars);
\r
572 if (qName.equals("cldrTest")) {
\r
574 } else if (qName.equals("result") && handler != null) {
\r
575 for (int i = 0; i < attributes.getLength(); ++i) {
\r
576 handler.set(attributes.getQName(i), attributes.getValue(i));
\r
579 handler = getHandler(qName, attributes);
\r
580 // handler.set("locale", uLocale.toString());
\r
582 // if (DEBUG) logln("startElement:\t" + contextStack);
\r
583 // justPopped = false;
\r
584 } catch (RuntimeException e) {
\r
585 e.printStackTrace();
\r
590 public void endElement(String uri, String localName, String qName) throws SAXException {
\r
592 // if (DEBUG) logln("endElement:\t" + contextStack);
\r
593 if (qName.equals("result") && handler != null) {
\r
594 handler.checkResult(lastChars.toString());
\r
595 } else if (qName.length() != 0) {
\r
596 // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
\r
598 lastChars.setLength(0);
\r
599 // justPopped = true;
\r
600 } catch (RuntimeException e) {
\r
601 e.printStackTrace();
\r
606 // Have to hack around the fact that the character data might be in pieces
\r
607 public void characters(char[] ch, int start, int length) throws SAXException {
\r
609 String value = new String(ch, start, length);
\r
611 logln("characters:\t" + value);
\r
612 lastChars.append(value);
\r
613 // justPopped = false;
\r
614 } catch (RuntimeException e) {
\r
615 e.printStackTrace();
\r
620 // just for debugging
\r
622 public void notationDecl(String name, String publicId, String systemId) throws SAXException {
\r
623 logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
\r
626 public void processingInstruction(String target, String data) throws SAXException {
\r
627 logln("processingInstruction: " + target + ", " + data);
\r
630 public void skippedEntity(String name) throws SAXException {
\r
631 logln("skippedEntity: " + name);
\r
634 public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName)
\r
635 throws SAXException {
\r
636 logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName);
\r