]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java
Clean up imports.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / core / src / com / ibm / icu / dev / test / cldr / TestCLDRVsICU.java
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2010, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Mark Davis
7 **********************************************************************
8 */
9 package com.ibm.icu.dev.test.cldr;
10
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.io.StringWriter;
15 import java.text.ParseException;
16 import java.util.ArrayList;
17 import java.util.Collection;
18 import java.util.Date;
19 import java.util.HashMap;
20 import java.util.Iterator;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import javax.xml.parsers.SAXParser;
30 import javax.xml.parsers.SAXParserFactory;
31
32 import org.xml.sax.Attributes;
33 import org.xml.sax.SAXException;
34 import org.xml.sax.helpers.DefaultHandler;
35
36 import com.ibm.icu.dev.test.TestFmwk;
37 import com.ibm.icu.text.DateFormat;
38 import com.ibm.icu.text.NumberFormat;
39 import com.ibm.icu.text.SimpleDateFormat;
40 import com.ibm.icu.text.UTF16;
41 import com.ibm.icu.text.UnicodeSet;
42 import com.ibm.icu.util.Currency;
43 import com.ibm.icu.util.TimeZone;
44 import com.ibm.icu.util.ULocale;
45
46 /**
47  * This is a test file that takes in the CLDR XML test files and test against
48  * ICU4J. This test file is used to verify that ICU4J is implemented correctly.
49  * As it stands, the test generates all the errors to the console by logging it.
50  * The logging is only possible if "-v" or verbose is set as an argument.
51  * This will allow users to know what problems occurred within CLDR and ICU.
52  * Collator was disabled in this test file and therefore will be skipped.
53  * 
54  * Instructions:
55  * 1)   In order for this to work correctly, you must download the latest CLDR data
56  *      in the form of XML. You must also set the CLDR directory using:
57  *          -DCLDR_DIRECTORY=<top level of cldr>
58  * 2)   You may also consider increasing the memory using -Xmx512m.
59  * 3)   For speed purposes, you may consider creating a temporary directory for the
60  *      CLDR cache using:
61  *          -DCLDR_DTD_CACHE=<cldr cache directory>
62  * 4)   You may use other environment variables to narrow down your tests using:
63  *          -DXML_MATCH=".*"
64  *              -DXML_MATCH="de.*"  (or whatever regex you want) to just test certain locales.
65  *          -DTEST_MATCH="zone.*"   (or whatever regex you want) to just test collation, numbers, etc.
66  *          -DZONE_MATCH="(?!America/Argentina).*" 
67  *              -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
68
69  * @author medavis
70  * @author John Huan Vu (johnvu@us.ibm.com)
71  */
72 public class TestCLDRVsICU extends TestFmwk {
73     static final boolean DEBUG = false;
74
75     // ULocale uLocale = ULocale.ENGLISH;
76     // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
77     // static PrintWriter log;
78     SAXParser SAX;
79     static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
80     static String CLDR_DIRECTORY;
81     static {
82         System.out.println();
83         LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
84         TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
85         ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*");
86
87         // CLDR_DIRECTORY is where all the CLDR XML test files are located
88         // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT
89         CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
90         System.out.println();
91     }
92
93     private static Matcher getEnvironmentRegex(String key, String defaultValue) {
94         return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
95     }
96
97     private static String getEnvironmentString(String key, String defaultValue) {
98         String temp = System.getProperty(key);
99         if (temp == null)
100             temp = defaultValue;
101         else
102             System.out.print("-D" + key + "=\"" + temp + "\" ");
103         return temp;
104     }
105
106     public static void main(String[] args) throws Exception {
107         new TestCLDRVsICU().run(args);
108     }
109
110     Set allLocales = new TreeSet();
111
112     public void TestFiles() throws SAXException, IOException {
113         // only get ICU's locales
114         Set s = new TreeSet();
115         addLocales(NumberFormat.getAvailableULocales(), s);
116         addLocales(DateFormat.getAvailableULocales(), s);
117
118         // johnvu: Collator was originally disabled
119         // addLocales(Collator.getAvailableULocales(), s);
120
121         // filter, to make tracking down bugs easier
122         for (Iterator it = s.iterator(); it.hasNext();) {
123             String locale = (String) it.next();
124             if (!LOCALE_MATCH.reset(locale).matches())
125                 continue;
126             _test(locale);
127         }
128     }
129
130     public void addLocales(ULocale[] list, Collection s) {
131         for (int i = 0; i < list.length; ++i) {
132             allLocales.add(list[i].toString());
133             s.add(list[i].getLanguage());
134         }
135     }
136
137     public String getLanguage(ULocale uLocale) {
138         String result = uLocale.getLanguage();
139         String script = uLocale.getScript();
140         if (script.length() != 0)
141             result += "_" + script;
142         return result;
143     }
144
145     public void _test(String localeName) throws SAXException, IOException {
146         // uLocale = new ULocale(localeName);
147         // oLocale = uLocale.toLocale();
148
149         File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml");
150         logln("Testing " + f.getCanonicalPath());
151         SAX.parse(f, DEFAULT_HANDLER);
152     }  
153
154     private static class ToHex {
155         public String transliterate(String in) {
156             StringBuilder sb = new StringBuilder();
157             for (int i = 0; i < in.length(); ++i) {
158                 char c = in.charAt(i);
159                 sb.append("\\u");
160                 if (c < 1000) {
161                     sb.append('0');
162                     if (c < 100) {
163                         sb.append('0');
164                         if (c < 10) {
165                             sb.append('0');
166                         }
167                     }
168                 }
169                 sb.append(Integer.toHexString((int) c));
170             }
171             return sb.toString();
172         }
173     }
174
175     // static Transliterator toUnicode = Transliterator.getInstance("any-hex");
176     private static final ToHex toUnicode = new ToHex();
177
178     static public String showString(String in) {
179         return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
180     }
181
182     // ============ SAX Handler Infrastructure ============
183
184     abstract public class Handler {
185         Map settings = new TreeMap();
186         String name;
187         List currentLocales = new ArrayList();
188         int failures = 0;
189
190         void setName(String name) {
191             this.name = name;
192         }
193
194         void set(String attributeName, String attributeValue) {
195             // if (DEBUG) logln(attributeName + " => " + attributeValue);
196             settings.put(attributeName, attributeValue);
197         }
198
199         void checkResult(String value) {
200             if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) {
201                 return; // skip draft
202             }
203             ULocale ul = new ULocale("xx");
204             try {
205                 for (int i = 0; i < currentLocales.size(); ++i) {
206                     ul = (ULocale) currentLocales.get(i);
207                     // loglnSAX("  Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
208                     handleResult(ul, value);
209                     if (failures != 0) {
210                         errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH)
211                                 + ")");
212                         failures = 0;
213                     }
214                 }
215             } catch (Exception e) {
216                 StringWriter sw = new StringWriter();
217                 PrintWriter pw = new PrintWriter(sw);
218                 e.printStackTrace(pw);
219                 pw.flush();
220                 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
221             }
222         }
223
224         public void loglnSAX(String message) {
225             String temp = message + "\t[" + name;
226             for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
227                 String attributeName = (String) it.next();
228                 String attributeValue = (String) settings.get(attributeName);
229                 temp += " " + attributeName + "=<" + attributeValue + ">";
230             }
231             logln(temp + "]");
232         }
233
234         int lookupValue(Object x, Object[] list) {
235             for (int i = 0; i < list.length; ++i) {
236                 if (x.equals(list[i]))
237                     return i;
238             }
239             loglnSAX("Unknown String: " + x);
240             return -1;
241         }
242
243         abstract void handleResult(ULocale currentLocale, String value) throws Exception;
244
245         /**
246          * @param attributes
247          */
248         public void setAttributes(Attributes attributes) {
249             String localeList = attributes.getValue("locales");
250             String[] currentLocaleString = new String[50];
251             com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
252             currentLocales.clear();
253             for (int i = 0; i < currentLocaleString.length; ++i) {
254                 if (currentLocaleString[i].length() == 0)
255                     continue;
256                 if (allLocales.contains("")) {
257                     logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
258                     continue;
259                 }
260                 currentLocales.add(new ULocale(currentLocaleString[i]));
261             }
262             if (DEBUG)
263                 logln("Setting locales: " + currentLocales);
264         }
265     }
266
267     public Handler getHandler(String name, Attributes attributes) {
268         if (DEBUG)
269             logln("Creating Handler: " + name);
270         Handler result = (Handler) RegisteredHandlers.get(name);
271         if (result == null)
272             logln("Unexpected test type: " + name);
273         else {
274             result.setAttributes(attributes);
275         }
276         return result;
277     }
278
279     public void addHandler(String name, Handler handler) {
280         if (!TEST_MATCH.reset(name).matches())
281             handler = new NullHandler();
282         handler.setName(name);
283         RegisteredHandlers.put(name, handler);
284     }
285
286     Map RegisteredHandlers = new HashMap();
287
288     class NullHandler extends Handler {
289         void handleResult(ULocale currentLocale, String value) throws Exception {
290         }
291     }
292
293     // ============ Statics for Date/Number Support ============
294
295     static TimeZone utc = TimeZone.getTimeZone("GMT");
296     static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
297     {
298         iso.setTimeZone(utc);
299     }
300
301     static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
302
303     // The following are different data format types that are part of the parameters in CLDR
304     static String[] DateFormatNames = { "none", "short", "medium", "long", "full" };
305
306     // The following are different number types that are part of the parameters in CLDR
307     static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" };
308
309
310     // ============ Handler for Collation ============
311     static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
312
313     static String remove(String in, UnicodeSet toRemove) {
314         int cp;
315         StringBuffer result = new StringBuffer();
316         for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
317             cp = UTF16.charAt(in, i);
318             if (!toRemove.contains(cp))
319                 UTF16.append(result, cp);
320         }
321         return result.toString();
322     }
323
324     {
325         // johnvu: Collator was originally disabled
326         // TODO (dougfelt) move this test
327         /*
328           addHandler("collation", new Handler() {
329              public void handleResult(ULocale currentLocale, String value) {
330                  Collator col = Collator.getInstance(currentLocale);
331                  String lastLine = "";
332                  int count = 0;
333                  for (int pos = 0; pos < value.length();) {
334                      int nextPos = value.indexOf('\n', pos);
335                      if (nextPos < 0)
336                          nextPos = value.length();
337                      String line = value.substring(pos, nextPos);
338                      line = remove(line, controlsAndSpace);  HACK for SAX
339                      if (line.trim().length() != 0) {  HACK for SAX
340                          int comp = col.compare(lastLine, line);
341                          if (comp > 0) {
342                              failures++;
343                              errln("\tLine " + (count + 1) + "\tFailure: "
344                                      + showString(lastLine) + " should be leq "
345                                      + showString(line));
346                          } else if (DEBUG) {
347                              logln("OK: " + line);
348                          }
349                          lastLine = line;
350                      }
351                      pos = nextPos + 1;
352                      count++;
353                  }
354              }
355          });
356         */
357
358         // ============ Handler for Numbers ============
359         addHandler("number", new Handler() {
360             public void handleResult(ULocale locale, String result) {
361                 NumberFormat nf = null;
362                 double v = Double.NaN;
363                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
364                     String attributeName = (String) it.next();
365                     String attributeValue = (String) settings.get(attributeName);
366
367                     // Checks if the attribute name is a draft and whether
368                     // or not it has been approved / contributed by CLDR yet
369                     // otherwise, skips it because it is most likely rejected by ICU
370                     if (attributeName.equals("draft")) {
371                         if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
372                             break;
373                         }
374                         continue;
375                     }
376
377                     // Update the value to be checked
378                     if (attributeName.equals("input")) {
379                         v = Double.parseDouble(attributeValue);
380                         continue;
381                     }
382
383                     // At this point, it must be a numberType
384                     int index = lookupValue(attributeValue, NumberNames);
385
386                     if (DEBUG)
387                         logln("Getting number format for " + locale);
388                     switch (index) {
389                     case 0:
390                         nf = NumberFormat.getInstance(locale);
391                         break;
392                     case 1:
393                         nf = NumberFormat.getIntegerInstance(locale);
394                         break;
395                     case 2:
396                         nf = NumberFormat.getNumberInstance(locale);
397                         break;
398                     case 3:
399                         nf = NumberFormat.getPercentInstance(locale);
400                         break;
401                     case 4:
402                         nf = NumberFormat.getScientificInstance(locale);
403                         break;
404                     default:
405                         nf = NumberFormat.getCurrencyInstance(locale);
406                         nf.setCurrency(Currency.getInstance(attributeValue));
407                         break;
408                     }
409                     String temp = nf.format(v).trim();
410                     result = result.trim(); // HACK because of SAX
411                     if (!temp.equals(result)) {
412                         logln("Number: Locale: " + locale +
413                                 "\n\tType: " + attributeValue +
414                                 "\n\tDraft: " + settings.get("draft") +
415                                 "\n\tCLDR: <" + result + ">" +
416                                 "\n\tICU: <" + temp + ">");
417                     }
418
419                 }
420             }
421         });
422
423         // ============ Handler for Dates ============
424         addHandler("date", new Handler() {
425             public void handleResult(ULocale locale, String result) throws ParseException {
426                 int dateFormat = 0;
427                 int timeFormat = 0;
428                 Date date = new Date();
429                 boolean approved = true;
430
431                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
432                     String attributeName = (String) it.next();
433                     String attributeValue = (String) settings.get(attributeName);
434
435                     // Checks if the attribute name is a draft and whether
436                     // or not it has been approved / contributed by CLDR yet
437                     // otherwise, skips it because it is most likely rejected by ICU
438                     if (attributeName.equals("draft")) {
439                         if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
440                             approved = false;
441                             break;
442                         }
443                         continue;
444                     }
445
446                     // Update the value to be checked
447                     if (attributeName.equals("input")) {
448                         date = iso.parse(attributeValue);
449                         continue;
450                     }
451                     // At this point, it must be either dateType or timeType
452                     int index = lookupValue(attributeValue, DateFormatNames);
453                     if (attributeName.equals("dateType"))
454                         dateFormat = index;
455                     else if (attributeName.equals("timeType"))
456                         timeFormat = index;
457
458                 }
459
460                 // The attribute value must be approved in order to be checked,
461                 // if it hasn't been approved, it shouldn't be checked if it
462                 // matches with ICU
463                 if (approved) {
464                     SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
465                     dt.setTimeZone(utc);
466                     String temp = dt.format(date).trim();
467                     result = result.trim(); // HACK because of SAX
468                     if (!temp.equals(result)) {
469                         logln("DateTime: Locale: " + locale +
470                                 "\n\tDate: " + DateFormatNames[dateFormat] +
471                                 "\n\tTime: " + DateFormatNames[timeFormat] +
472                                 "\n\tDraft: " + settings.get("draft") +
473                                 "\n\tCLDR: <" + result + "> " +
474                                 "\n\tICU: <" + temp + ">");
475                     }
476                 }
477             }
478
479             private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
480                 if (DEBUG)
481                     logln("Getting date/time format for " + locale);
482                 if (DEBUG && "ar_EG".equals(locale.toString())) {
483                     logln("debug here");
484                 }
485                 DateFormat dt;
486                 if (dateFormat == 0) {
487                     dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
488                     if (DEBUG)
489                         System.out.print("getTimeInstance");
490                 } else if (timeFormat == 0) {
491                     dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
492                     if (DEBUG)
493                         System.out.print("getDateInstance");
494                 } else {
495                     dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat],
496                             locale);
497                     if (DEBUG)
498                         System.out.print("getDateTimeInstance");
499                 }
500                 if (DEBUG)
501                     logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern());
502                 return (SimpleDateFormat) dt;
503             }
504         });
505
506         // ============ Handler for Zones ============
507         addHandler("zoneFields", new Handler() {
508             String date = "";
509             String zone = "";
510             String parse = "";
511             String pattern = "";
512
513             public void handleResult(ULocale locale, String result) throws ParseException {
514                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
515                     String attributeName = (String) it.next();
516                     String attributeValue = (String) settings.get(attributeName);
517                     if (attributeName.equals("date")) {
518                         date = attributeValue;
519                     } else if (attributeName.equals("field")) {
520                         pattern = attributeValue;
521                     } else if (attributeName.equals("zone")) {
522                         zone = attributeValue;
523                     } else if (attributeName.equals("parse")) {
524                         parse = attributeValue;
525                     }
526                 }
527                 
528                 if (!ZONE_MATCH.reset(zone).matches()) return;
529                 Date dateValue = iso.parse(date);
530                 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
531                 field.setTimeZone(TimeZone.getTimeZone(zone));
532                 String temp = field.format(dateValue).trim();
533                 // SKIP PARSE FOR NOW
534                 result = result.trim(); // HACK because of SAX
535                 if (!temp.equals(result)) {
536                     temp = field.format(dateValue).trim(); // call again for debugging
537                     logln("Zone Format: Locale: " + locale 
538                             + "\n\tZone: " + zone
539                             + "\n\tDate: " + date
540                             + "\n\tField: " + pattern
541                             + "\n\tParse: " + parse
542                             + "\n\tDraft: " + settings.get("draft")
543                             + "\n\tCLDR: <" + result
544                             + ">\n\tICU: <" + temp + ">");
545                 }
546             }
547         });
548     }
549
550     // ============ Gorp for SAX ============
551
552     {
553         try {
554             SAXParserFactory factory = SAXParserFactory.newInstance();
555             factory.setValidating(true);
556             SAX = factory.newSAXParser();
557         } catch (Exception e) {
558             throw new IllegalArgumentException("SAXParserFacotry was unable to start.");
559         }
560     }
561
562     DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
563         static final boolean DEBUG = false;
564         StringBuffer lastChars = new StringBuffer();
565         // boolean justPopped = false;
566         Handler handler;
567
568         public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
569             // data.put(new ContextStack(contextStack), lastChars);
570             // lastChars = "";
571             try {
572                 if (qName.equals("cldrTest")) {
573                     // skip
574                 } else if (qName.equals("result") && handler != null) {
575                     for (int i = 0; i < attributes.getLength(); ++i) {
576                         handler.set(attributes.getQName(i), attributes.getValue(i));
577                     }
578                 } else {
579                     handler = getHandler(qName, attributes);
580                     // handler.set("locale", uLocale.toString());
581                 }
582                 // if (DEBUG) logln("startElement:\t" + contextStack);
583                 // justPopped = false;
584             } catch (RuntimeException e) {
585                 e.printStackTrace();
586                 throw e;
587             }
588         }
589
590         public void endElement(String uri, String localName, String qName) throws SAXException {
591             try {
592                 // if (DEBUG) logln("endElement:\t" + contextStack);
593                 if (qName.equals("result") && handler != null) {
594                     handler.checkResult(lastChars.toString());
595                 } else if (qName.length() != 0) {
596                     // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
597                 }
598                 lastChars.setLength(0);
599                 // justPopped = true;
600             } catch (RuntimeException e) {
601                 e.printStackTrace();
602                 throw e;
603             }
604         }
605
606         // Have to hack around the fact that the character data might be in pieces
607         public void characters(char[] ch, int start, int length) throws SAXException {
608             try {
609                 String value = new String(ch, start, length);
610                 if (DEBUG)
611                     logln("characters:\t" + value);
612                 lastChars.append(value);
613                 // justPopped = false;
614             } catch (RuntimeException e) {
615                 e.printStackTrace();
616                 throw e;
617             }
618         }
619
620         // just for debugging
621
622         public void notationDecl(String name, String publicId, String systemId) throws SAXException {
623             logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
624         }
625
626         public void processingInstruction(String target, String data) throws SAXException {
627             logln("processingInstruction: " + target + ", " + data);
628         }
629
630         public void skippedEntity(String name) throws SAXException {
631             logln("skippedEntity: " + name);
632         }
633
634         public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName)
635                 throws SAXException {
636             logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName);
637         }
638     };
639 }