2 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
5 **********************************************************************
\r
6 * Copyright (c) 2002-2009, International Business Machines
\r
7 * Corporation and others. All Rights Reserved.
\r
8 **********************************************************************
\r
10 **********************************************************************
\r
12 package com.ibm.icu.dev.test.cldr;
\r
14 import java.io.File;
\r
16 //import org.unicode.cldr.util.LanguageTagParser;
\r
17 //import org.unicode.cldr.util.Utility;
\r
18 import java.io.IOException;
\r
19 import java.io.PrintWriter;
\r
20 import java.io.StringWriter;
\r
21 import java.text.ParseException;
\r
22 import java.util.ArrayList;
\r
23 import java.util.Collection;
\r
24 import java.util.Date;
\r
25 import java.util.HashMap;
\r
26 import java.util.List;
\r
27 import java.util.Map;
\r
28 import java.util.Set;
\r
29 import java.util.TreeMap;
\r
30 import java.util.Iterator;
\r
31 import java.util.TreeSet;
\r
32 import java.util.regex.Matcher;
\r
33 import java.util.regex.Pattern;
\r
35 import javax.xml.parsers.SAXParser;
\r
36 import javax.xml.parsers.SAXParserFactory;
\r
38 //import org.unicode.cldr.test.CLDRTest;
\r
39 //import org.unicode.cldr.tool.GenerateCldrTests;
\r
40 import org.xml.sax.Attributes;
\r
41 import org.xml.sax.SAXException;
\r
42 import org.xml.sax.helpers.DefaultHandler;
\r
44 import com.ibm.icu.util.Currency;
\r
45 import com.ibm.icu.util.TimeZone;
\r
46 import com.ibm.icu.util.ULocale;
\r
47 import com.ibm.icu.dev.test.TestFmwk;
\r
49 import com.ibm.icu.text.Collator;
\r
50 import com.ibm.icu.text.DateFormat;
\r
51 import com.ibm.icu.text.NumberFormat;
\r
52 import com.ibm.icu.text.SimpleDateFormat;
\r
53 import com.ibm.icu.text.Transliterator;
\r
54 import com.ibm.icu.text.UTF16;
\r
55 import com.ibm.icu.text.UnicodeSet;
\r
58 * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
\r
61 * 1. for this to work right, you have to have downloaded the CLDR data, and
\r
62 * then set the CLDR directory correctly, using
\r
63 * -DCLDR_DIRECTORY=<top level of cldr>
\r
64 * 2. You probably also need to increase memory, eg with -Xmx512m
\r
65 * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where
\r
66 * C:\cldrcache\ is a temp directory to keep the program from hitting the net for
\r
68 * 4. You may use other environment variables to narrow what you test. Eg
\r
69 * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*"
\r
70 * a. -DXML_MATCH="de.*" (or whatever regex you want) to just
\r
71 * test certain locales.
\r
72 * b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
\r
73 * c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
\r
76 public class TestCLDRVsICU extends TestFmwk {
\r
77 static final boolean DEBUG = false;
\r
79 //ULocale uLocale = ULocale.ENGLISH;
\r
80 //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
\r
81 //static PrintWriter log;
\r
83 static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
\r
84 static String CLDR_DIRECTORY;
\r
86 System.out.println();
\r
87 LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
\r
88 TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
\r
89 ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example
\r
91 // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT
\r
92 CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
\r
93 System.out.println();
\r
96 private static Matcher getEnvironmentRegex(String key, String defaultValue) {
\r
97 return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
\r
100 private static String getEnvironmentString(String key, String defaultValue) {
\r
101 String temp = System.getProperty(key);
\r
102 if (temp == null) temp = defaultValue;
\r
103 else System.out.print("-D" + key + "=\"" + temp + "\" ");
\r
107 public static void main(String[] args) throws Exception {
\r
108 new TestCLDRVsICU().run(args);
\r
111 Set allLocales = new TreeSet();
\r
113 public void TestFiles() throws SAXException, IOException {
\r
114 // only get ICU's locales
\r
115 Set s = new TreeSet();
\r
116 addLocales(NumberFormat.getAvailableULocales(), s);
\r
117 addLocales(DateFormat.getAvailableULocales(), s);
\r
118 addLocales(Collator.getAvailableULocales(), s);
\r
120 // filter, to make tracking down bugs easier
\r
122 for (Iterator it = s.iterator(); it.hasNext();) {
\r
123 String locale = (String)it.next();
\r
124 if (!LOCALE_MATCH.reset(locale).matches()) continue;
\r
129 public void addLocales(ULocale[] list, Collection s) {
\r
130 for (int i = 0; i < list.length; ++i) {
\r
131 allLocales.add(list[i].toString());
\r
132 s.add(list[i].getLanguage());
\r
136 public String getLanguage(ULocale uLocale) {
\r
137 String result = uLocale.getLanguage();
\r
138 String script = uLocale.getScript();
\r
139 if (script.length() != 0) result += "_" + script;
\r
143 public void _test(String localeName) throws SAXException, IOException {
\r
144 //uLocale = new ULocale(localeName);
\r
145 //oLocale = uLocale.toLocale();
\r
147 File f = new File(CLDR_DIRECTORY + "common\\test\\"+ localeName + ".xml");
\r
148 logln("Testing " + f.getCanonicalPath());
\r
149 SAX.parse(f, DEFAULT_HANDLER);
\r
152 static Transliterator toUnicode = Transliterator.getInstance("any-hex");
\r
153 static public String showString(String in) {
\r
154 return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
\r
156 // ============ SAX Handler Infrastructure ============
\r
158 abstract public class Handler {
\r
159 Map settings = new TreeMap();
\r
161 List currentLocales = new ArrayList();
\r
164 void setName(String name) {
\r
167 void set(String attributeName, String attributeValue) {
\r
168 //if (DEBUG) logln(attributeName + " => " + attributeValue);
\r
169 settings.put(attributeName, attributeValue);
\r
171 void checkResult(String value) {
\r
172 if ("true".equals(settings.get("draft"))) {
\r
173 return; // skip draft
\r
175 ULocale ul = new ULocale("xx");
\r
177 for (int i = 0; i < currentLocales.size(); ++i) {
\r
178 ul = (ULocale)currentLocales.get(i);
\r
179 //loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
\r
180 handleResult(ul, value);
\r
181 if (failures != 0) {
\r
182 errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")");
\r
186 } catch (Exception e) {
\r
187 StringWriter sw = new StringWriter();
\r
188 PrintWriter pw = new PrintWriter(sw);
\r
189 e.printStackTrace(pw);
\r
191 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
\r
194 public void loglnSAX(String message) {
\r
195 String temp = message + "\t[" + name;
\r
196 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
197 String attributeName = (String) it.next();
\r
198 String attributeValue = (String) settings.get(attributeName);
\r
199 temp += " " + attributeName + "=<" + attributeValue + ">";
\r
203 int lookupValue(Object x, Object[] list) {
\r
204 for (int i = 0; i < list.length; ++i) {
\r
205 if (x.equals(list[i])) return i;
\r
207 loglnSAX("Unknown String: " + x);
\r
210 abstract void handleResult(ULocale currentLocale, String value) throws Exception;
\r
212 * @param attributes
\r
214 public void setAttributes(Attributes attributes) {
\r
215 String localeList = attributes.getValue("locales");
\r
216 String[] currentLocaleString = new String[50];
\r
217 com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
\r
218 currentLocales.clear();
\r
219 for (int i = 0; i < currentLocaleString.length; ++i) {
\r
220 if (currentLocaleString[i].length() == 0) continue;
\r
221 if (allLocales.contains("")) {
\r
222 logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
\r
225 currentLocales.add(new ULocale(currentLocaleString[i]));
\r
227 if (DEBUG) logln("Setting locales: " + currentLocales);
\r
231 public Handler getHandler(String name, Attributes attributes) {
\r
232 if (DEBUG) logln("Creating Handler: " + name);
\r
233 Handler result = (Handler) RegisteredHandlers.get(name);
\r
234 if (result == null) logln("Unexpected test type: " + name);
\r
236 result.setAttributes(attributes);
\r
241 public void addHandler(String name, Handler handler) {
\r
242 if (!TEST_MATCH.reset(name).matches()) handler = new NullHandler();
\r
243 handler.setName(name);
\r
244 RegisteredHandlers.put(name, handler);
\r
246 Map RegisteredHandlers = new HashMap();
\r
248 class NullHandler extends Handler {
\r
249 void handleResult(ULocale currentLocale, String value) throws Exception {}
\r
252 // ============ Statics for Date/Number Support ============
\r
254 static TimeZone utc = TimeZone.getTimeZone("GMT");
\r
255 static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
\r
257 iso.setTimeZone(utc);
\r
259 static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL};
\r
260 static String[] DateFormatNames = {"none", "short", "medium", "long", "full"};
\r
262 static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific", "GBP"};
\r
265 // ============ Handler for Collation ============
\r
266 static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
\r
268 static String remove(String in, UnicodeSet toRemove) {
\r
270 StringBuffer result = new StringBuffer();
\r
271 for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
\r
272 cp = UTF16.charAt(in, i);
\r
273 if (!toRemove.contains(cp)) UTF16.append(result, cp);
\r
275 return result.toString();
\r
279 addHandler("collation", new Handler() {
\r
280 public void handleResult(ULocale currentLocale, String value) {
\r
281 Collator col = Collator.getInstance(currentLocale);
\r
282 String lastLine = "";
\r
284 for (int pos = 0; pos < value.length();) {
\r
285 int nextPos = value.indexOf('\n', pos);
\r
287 nextPos = value.length();
\r
288 String line = value.substring(pos, nextPos);
\r
289 line = remove(line, controlsAndSpace); // HACK for SAX
\r
290 if (line.trim().length() != 0) { // HACK for SAX
\r
291 int comp = col.compare(lastLine, line);
\r
294 errln("\tLine " + (count + 1) + "\tFailure: "
\r
295 + showString(lastLine) + " should be leq "
\r
296 + showString(line));
\r
297 } else if (DEBUG) {
\r
298 logln("OK: " + line);
\r
308 // ============ Handler for Numbers ============
\r
309 addHandler("number", new Handler() {
\r
310 public void handleResult(ULocale locale, String result) {
\r
311 NumberFormat nf = null;
\r
312 double v = Double.NaN;
\r
313 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
314 String attributeName = (String) it.next();
\r
315 String attributeValue = (String) settings
\r
316 .get(attributeName);
\r
317 if (attributeName.equals("input")) {
\r
318 v = Double.parseDouble(attributeValue);
\r
321 // must be either numberType at this point
\r
322 int index = lookupValue(attributeValue, NumberNames);
\r
323 if (DEBUG) logln("Getting number format for " + locale);
\r
325 case 0: nf = NumberFormat.getInstance(locale); break;
\r
326 case 1: nf = NumberFormat.getIntegerInstance(locale); break;
\r
327 case 2: nf = NumberFormat.getNumberInstance(locale); break;
\r
328 case 3: nf = NumberFormat.getPercentInstance(locale); break;
\r
329 case 4: nf = NumberFormat.getScientificInstance(locale); break;
\r
330 default: nf = NumberFormat.getCurrencyInstance(locale);
\r
331 nf.setCurrency(Currency.getInstance(attributeValue)); break;
\r
333 String temp = nf.format(v).trim();
\r
334 result = result.trim(); // HACK because of SAX
\r
335 if (!temp.equals(result)) {
\r
336 errln("Number: Locale: " + locale
\r
337 + ", \tType: " + attributeValue
\r
338 + ", \tDraft: " + settings.get("draft")
\r
339 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
\r
346 // ============ Handler for Dates ============
\r
347 addHandler("date", new Handler() {
\r
348 public void handleResult(ULocale locale, String result) throws ParseException {
\r
349 int dateFormat = 0;
\r
350 int timeFormat = 0;
\r
351 Date date = new Date();
\r
352 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
353 String attributeName = (String) it.next();
\r
354 String attributeValue = (String) settings
\r
355 .get(attributeName);
\r
356 if (attributeName.equals("input")) {
\r
357 date = iso.parse(attributeValue);
\r
360 // must be either dateType or timeType at this point
\r
361 int index = lookupValue(attributeValue, DateFormatNames);
\r
362 if (attributeName.equals("dateType"))
\r
363 dateFormat = index;
\r
365 timeFormat = index;
\r
368 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
\r
369 dt.setTimeZone(utc);
\r
370 String temp = dt.format(date).trim();
\r
371 result = result.trim(); // HACK because of SAX
\r
372 if (!temp.equals(result)) {
\r
373 errln("DateTime: Locale: " + locale
\r
374 + ", \tDate: " + DateFormatNames[dateFormat]
\r
375 + ", \tTime: " + DateFormatNames[timeFormat]
\r
376 + ", \tDraft: " + settings.get("draft")
\r
377 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
\r
381 private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
\r
382 if (DEBUG) logln("Getting date/time format for " + locale);
\r
383 if (DEBUG && "ar_EG".equals(locale.toString())) {
\r
384 System.out.println("debug here");
\r
387 if (dateFormat == 0) {
\r
388 dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
\r
389 if (DEBUG) System.out.print("getTimeInstance");
\r
390 } else if (timeFormat == 0) {
\r
391 dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
\r
392 if (DEBUG) System.out.print("getDateInstance");
\r
394 dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], locale);
\r
395 if (DEBUG) System.out.print("getDateTimeInstance");
\r
397 if (DEBUG) System.out.println("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat)dt).toPattern());
\r
398 return (SimpleDateFormat)dt;
\r
402 // ============ Handler for Zones ============
\r
403 addHandler("zoneFields", new Handler() {
\r
407 String pattern = "";
\r
409 public void handleResult(ULocale locale, String result) throws ParseException {
\r
410 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
\r
411 String attributeName = (String) it.next();
\r
412 String attributeValue = (String) settings.get(attributeName);
\r
413 if (attributeName.equals("date")) {
\r
414 date = attributeValue;
\r
415 } else if (attributeName.equals("field")) {
\r
416 pattern = attributeValue;
\r
417 } else if (attributeName.equals("zone")) {
\r
418 zone = attributeValue;
\r
419 } else if (attributeName.equals("parse")) {
\r
420 parse = attributeValue;
\r
423 if (!ZONE_MATCH.reset(zone).matches()) return;
\r
424 Date dateValue = iso.parse(date);
\r
425 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
\r
426 field.setTimeZone(TimeZone.getTimeZone(zone));
\r
427 String temp = field.format(dateValue).trim();
\r
428 // SKIP PARSE FOR NOW
\r
429 result = result.trim(); // HACK because of SAX
\r
430 if (!temp.equals(result)) {
\r
431 temp = field.format(dateValue).trim(); // call again for debugging
\r
432 errln("Zone Format: Locale: " + locale
\r
433 + ", \tZone: " + zone
\r
434 + ", \tDate: " + date
\r
435 + ", \tField: " + pattern
\r
436 + ", \tParse: " + parse
\r
437 + ", \tDraft: " + settings.get("draft")
\r
438 + ", \tCLDR: <" + result
\r
439 + ">, \tICU: <" + temp + ">");
\r
445 // ============ Gorp for SAX ============
\r
449 SAXParserFactory factory = SAXParserFactory.newInstance();
\r
450 factory.setValidating(true);
\r
451 SAX = factory.newSAXParser();
\r
452 } catch (Exception e) {
\r
453 throw new IllegalArgumentException("can't start");
\r
457 DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
\r
458 static final boolean DEBUG = false;
\r
459 StringBuffer lastChars = new StringBuffer();
\r
460 //boolean justPopped = false;
\r
463 public void startElement(
\r
467 Attributes attributes)
\r
468 throws SAXException {
\r
469 //data.put(new ContextStack(contextStack), lastChars);
\r
472 if (qName.equals("cldrTest")) {
\r
474 } else if (qName.equals("result")) {
\r
475 for (int i = 0; i < attributes.getLength(); ++i) {
\r
476 handler.set(attributes.getQName(i), attributes.getValue(i));
\r
479 handler = getHandler(qName, attributes);
\r
480 //handler.set("locale", uLocale.toString());
\r
482 //if (DEBUG) logln("startElement:\t" + contextStack);
\r
483 //justPopped = false;
\r
484 } catch (RuntimeException e) {
\r
485 e.printStackTrace();
\r
489 public void endElement(String uri, String localName, String qName)
\r
490 throws SAXException {
\r
492 //if (DEBUG) logln("endElement:\t" + contextStack);
\r
493 if (qName.equals("result")) handler.checkResult(lastChars.toString());
\r
494 else if (qName.length() != 0) {
\r
495 //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
\r
497 lastChars.setLength(0);
\r
498 //justPopped = true;
\r
499 } catch (RuntimeException e) {
\r
500 e.printStackTrace();
\r
504 // Have to hack around the fact that the character data might be in pieces
\r
505 public void characters(char[] ch, int start, int length)
\r
506 throws SAXException {
\r
508 String value = new String(ch,start,length);
\r
509 if (DEBUG) logln("characters:\t" + value);
\r
510 lastChars.append(value);
\r
511 //justPopped = false;
\r
512 } catch (RuntimeException e) {
\r
513 e.printStackTrace();
\r
518 // just for debugging
\r
520 public void notationDecl (String name, String publicId, String systemId)
\r
521 throws SAXException {
\r
522 logln("notationDecl: " + name
\r
528 public void processingInstruction (String target, String data)
\r
529 throws SAXException {
\r
530 logln("processingInstruction: " + target + ", " + data);
\r
533 public void skippedEntity (String name)
\r
534 throws SAXException
\r
536 logln("skippedEntity: " + name
\r
540 public void unparsedEntityDecl (String name, String publicId,
\r
541 String systemId, String notationName)
\r
542 throws SAXException {
\r
543 logln("unparsedEntityDecl: " + name
\r
546 + ", " + notationName
\r