]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java
go
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / cldr / TestCLDRVsICU.java
1 //##header J2SE15
2 //#if defined(FOUNDATION10) || defined(J2SE13)
3 //#else
4 /*
5 **********************************************************************
6 * Copyright (c) 2002-2009, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 * Author: Mark Davis
10 **********************************************************************
11 */
12 package com.ibm.icu.dev.test.cldr;
13
14 import java.io.File;
15
16 //import org.unicode.cldr.util.LanguageTagParser;
17 //import org.unicode.cldr.util.Utility;
18 import java.io.IOException;
19 import java.io.PrintWriter;
20 import java.io.StringWriter;
21 import java.text.ParseException;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.Date;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeMap;
30 import java.util.Iterator;
31 import java.util.TreeSet;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34
35 import javax.xml.parsers.SAXParser;
36 import javax.xml.parsers.SAXParserFactory;
37
38 //import org.unicode.cldr.test.CLDRTest;
39 //import org.unicode.cldr.tool.GenerateCldrTests;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.SAXException;
42 import org.xml.sax.helpers.DefaultHandler;
43
44 import com.ibm.icu.util.Currency;
45 import com.ibm.icu.util.TimeZone;
46 import com.ibm.icu.util.ULocale;
47 import com.ibm.icu.dev.test.TestFmwk;
48
49 import com.ibm.icu.text.Collator;
50 import com.ibm.icu.text.DateFormat;
51 import com.ibm.icu.text.NumberFormat;
52 import com.ibm.icu.text.SimpleDateFormat;
53 import com.ibm.icu.text.Transliterator;
54 import com.ibm.icu.text.UTF16;
55 import com.ibm.icu.text.UnicodeSet;
56
57 /**
58  * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
59  * correctly.
60  * WARNING: 
61  * 1. for this to work right, you have to have downloaded the CLDR data, and
62  * then set the CLDR directory correctly, using
63  * -DCLDR_DIRECTORY=<top level of cldr>
64  * 2. You probably also need to increase memory, eg with -Xmx512m
65  * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where
66  * C:\cldrcache\ is a temp directory to keep the program from hitting the net for
67  * each file access.
68  * 4. You may use other environment variables to narrow what you test. Eg
69  * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*" 
70  *   a. -DXML_MATCH="de.*" (or whatever regex you want) to just
71  *   test certain locales.
72  *   b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
73  *   c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
74  * @author medavis
75  */
76 public class TestCLDRVsICU extends TestFmwk {
77     static final boolean DEBUG = false;
78
79     //ULocale uLocale = ULocale.ENGLISH;
80     //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
81     //static PrintWriter log;
82     SAXParser SAX;
83     static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
84     static String CLDR_DIRECTORY;
85     static {
86         System.out.println();
87         LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
88         TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
89         ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example
90
91         // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT
92         CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
93         System.out.println();
94     }
95
96     private static Matcher getEnvironmentRegex(String key, String defaultValue) {
97         return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
98     }
99
100     private static String getEnvironmentString(String key, String defaultValue) {
101         String temp = System.getProperty(key);
102         if (temp == null) temp = defaultValue;
103         else System.out.print("-D" + key + "=\"" + temp + "\" ");
104         return temp;
105     }
106
107     public static void main(String[] args) throws Exception {
108         new TestCLDRVsICU().run(args);
109     }
110
111      Set allLocales = new TreeSet();
112
113     public void TestFiles() throws SAXException, IOException {
114         // only get ICU's locales
115         Set s = new TreeSet();
116         addLocales(NumberFormat.getAvailableULocales(), s);
117         addLocales(DateFormat.getAvailableULocales(), s);
118         addLocales(Collator.getAvailableULocales(), s);
119
120         // filter, to make tracking down bugs easier
121
122         for (Iterator it = s.iterator(); it.hasNext();) {
123             String locale = (String)it.next();
124             if (!LOCALE_MATCH.reset(locale).matches()) continue;
125             _test(locale);
126         }
127     }
128     
129     public void addLocales(ULocale[] list, Collection s) {
130         for (int i = 0; i < list.length; ++i) {
131             allLocales.add(list[i].toString());
132             s.add(list[i].getLanguage());
133         }
134     }
135
136     public String getLanguage(ULocale uLocale) {
137         String result = uLocale.getLanguage();
138         String script = uLocale.getScript();
139         if (script.length() != 0) result += "_" + script;
140         return result;
141     }
142
143     public void _test(String localeName) throws SAXException, IOException {
144         //uLocale = new ULocale(localeName);
145         //oLocale = uLocale.toLocale();
146
147         File f = new File(CLDR_DIRECTORY + "common\\test\\"+ localeName + ".xml");
148         logln("Testing " + f.getCanonicalPath());
149         SAX.parse(f, DEFAULT_HANDLER);
150     }
151
152     static Transliterator toUnicode = Transliterator.getInstance("any-hex");
153     static public String showString(String in) {
154         return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
155     }
156     // ============ SAX Handler Infrastructure ============
157
158     abstract public class Handler {
159         Map settings = new TreeMap();
160         String name;
161         List currentLocales = new ArrayList();
162         int failures = 0;
163
164         void setName(String name) {
165             this.name = name;
166         }
167         void set(String attributeName, String attributeValue) {
168             //if (DEBUG) logln(attributeName + " => " + attributeValue);
169             settings.put(attributeName, attributeValue);
170         }
171         void checkResult(String value) {
172             if ("true".equals(settings.get("draft"))) {
173                 return; // skip draft
174             }
175             ULocale ul = new ULocale("xx");
176             try {
177                 for (int i = 0; i < currentLocales.size(); ++i) {
178                     ul = (ULocale)currentLocales.get(i);
179                     //loglnSAX("  Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
180                     handleResult(ul, value);
181                     if (failures != 0) {
182                         errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")");
183                         failures = 0;
184                     }
185                 }
186             } catch (Exception e) {
187                 StringWriter sw = new StringWriter();
188                 PrintWriter pw = new PrintWriter(sw);
189                 e.printStackTrace(pw);
190                 pw.flush();
191                 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
192             }
193         }
194         public void loglnSAX(String message) {
195             String temp = message + "\t[" + name;
196             for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
197                 String attributeName = (String) it.next();
198                 String attributeValue = (String) settings.get(attributeName);
199                 temp += " " + attributeName + "=<" + attributeValue + ">";
200             }
201             logln(temp + "]");
202         }
203         int lookupValue(Object x, Object[] list) {
204             for (int i = 0; i < list.length; ++i) {
205                 if (x.equals(list[i])) return i;
206             }
207             loglnSAX("Unknown String: " + x);
208             return -1;
209         }
210         abstract void handleResult(ULocale currentLocale, String value) throws Exception;
211         /**
212          * @param attributes
213          */
214         public void setAttributes(Attributes attributes) {
215             String localeList = attributes.getValue("locales");
216             String[] currentLocaleString = new String[50];
217             com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
218             currentLocales.clear();
219             for (int i = 0; i < currentLocaleString.length; ++i) {
220                 if (currentLocaleString[i].length() == 0) continue;
221                 if (allLocales.contains("")) {
222                     logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
223                     continue;
224                 }
225                 currentLocales.add(new ULocale(currentLocaleString[i]));
226             }
227             if (DEBUG) logln("Setting locales: " + currentLocales);
228         }
229     }
230
231     public Handler getHandler(String name, Attributes attributes) {
232         if (DEBUG) logln("Creating Handler: " + name);
233         Handler result = (Handler) RegisteredHandlers.get(name);
234         if (result == null) logln("Unexpected test type: " + name);
235         else {
236             result.setAttributes(attributes);
237         }
238         return result;
239     }
240
241     public void addHandler(String name, Handler handler) {
242         if (!TEST_MATCH.reset(name).matches()) handler = new NullHandler();
243         handler.setName(name);
244         RegisteredHandlers.put(name, handler);
245     }
246     Map RegisteredHandlers = new HashMap();
247
248     class NullHandler extends Handler {
249         void handleResult(ULocale currentLocale, String value) throws Exception {}        
250     }
251
252     // ============ Statics for Date/Number Support ============
253
254     static TimeZone utc = TimeZone.getTimeZone("GMT");
255     static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
256     {
257         iso.setTimeZone(utc);
258     }
259     static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL};
260     static String[] DateFormatNames = {"none", "short", "medium", "long", "full"};
261
262     static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific", "GBP"};
263
264
265     // ============ Handler for Collation ============ 
266     static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
267     
268     static String remove(String in, UnicodeSet toRemove) {
269         int cp;
270         StringBuffer result = new StringBuffer();
271         for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
272             cp = UTF16.charAt(in, i);
273             if (!toRemove.contains(cp)) UTF16.append(result, cp);
274         }
275         return result.toString();
276     }
277
278     {
279         addHandler("collation", new Handler() {
280             public void handleResult(ULocale currentLocale, String value) {
281                 Collator col = Collator.getInstance(currentLocale);
282                 String lastLine = "";
283                 int count = 0;
284                 for (int pos = 0; pos < value.length();) {
285                     int nextPos = value.indexOf('\n', pos);
286                     if (nextPos < 0)
287                         nextPos = value.length();
288                     String line = value.substring(pos, nextPos);
289                     line = remove(line, controlsAndSpace); // HACK for SAX
290                     if (line.trim().length() != 0) { // HACK for SAX
291                         int comp = col.compare(lastLine, line);
292                         if (comp > 0) {
293                             failures++;
294                             errln("\tLine " + (count + 1) + "\tFailure: "
295                                     + showString(lastLine) + " should be leq "
296                                     + showString(line));
297                         } else if (DEBUG) {
298                             logln("OK: " + line);
299                         }
300                         lastLine = line;
301                     }
302                     pos = nextPos + 1;
303                     count++;
304                 }
305             }
306         });
307
308         // ============ Handler for Numbers ============ 
309         addHandler("number", new Handler() {
310             public void handleResult(ULocale locale, String result) {
311                 NumberFormat nf = null;
312                 double v = Double.NaN;
313                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
314                     String attributeName = (String) it.next();
315                     String attributeValue = (String) settings
316                             .get(attributeName);
317                     if (attributeName.equals("input")) {
318                         v = Double.parseDouble(attributeValue);
319                         continue;
320                     }
321                     // must be either numberType at this point
322                     int index = lookupValue(attributeValue, NumberNames);
323                     if (DEBUG) logln("Getting number format for " + locale);
324                     switch(index) {
325                     case 0: nf = NumberFormat.getInstance(locale); break;
326                     case 1: nf = NumberFormat.getIntegerInstance(locale); break;
327                     case 2: nf = NumberFormat.getNumberInstance(locale); break;
328                     case 3: nf = NumberFormat.getPercentInstance(locale); break;
329                     case 4: nf = NumberFormat.getScientificInstance(locale); break;
330                     default: nf = NumberFormat.getCurrencyInstance(locale); 
331                         nf.setCurrency(Currency.getInstance(attributeValue)); break;
332                     }
333                     String temp = nf.format(v).trim();
334                     result = result.trim(); // HACK because of SAX
335                     if (!temp.equals(result)) {
336                         errln("Number: Locale: " + locale
337                                 + ", \tType: " + attributeValue
338                                 + ", \tDraft: " + settings.get("draft")
339                                 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
340                     }
341
342                 }
343             }
344         });
345
346         // ============ Handler for Dates ============
347         addHandler("date", new Handler() {
348             public void handleResult(ULocale locale, String result) throws ParseException {
349                 int dateFormat = 0;
350                 int timeFormat = 0;
351                 Date date = new Date();
352                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
353                     String attributeName = (String) it.next();
354                     String attributeValue = (String) settings
355                             .get(attributeName);
356                     if (attributeName.equals("input")) {
357                         date = iso.parse(attributeValue);
358                         continue;
359                     }
360                     // must be either dateType or timeType at this point
361                     int index = lookupValue(attributeValue, DateFormatNames);
362                     if (attributeName.equals("dateType"))
363                         dateFormat = index;
364                     else
365                         timeFormat = index;
366
367                 }
368                 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
369                 dt.setTimeZone(utc);
370                 String temp = dt.format(date).trim();
371                 result = result.trim(); // HACK because of SAX
372                 if (!temp.equals(result)) {
373                     errln("DateTime: Locale: " + locale 
374                             + ", \tDate: " + DateFormatNames[dateFormat]
375                             + ", \tTime: " + DateFormatNames[timeFormat]
376                             + ", \tDraft: " + settings.get("draft")
377                             + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");
378                 }
379             }
380
381             private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
382                 if (DEBUG) logln("Getting date/time format for " + locale);
383                 if (DEBUG && "ar_EG".equals(locale.toString())) {
384                     System.out.println("debug here");
385                 }
386                 DateFormat dt;
387                 if (dateFormat == 0) {
388                     dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
389                     if (DEBUG) System.out.print("getTimeInstance");
390                 } else if (timeFormat == 0) {
391                     dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
392                     if (DEBUG) System.out.print("getDateInstance");
393                 } else {
394                     dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], locale);
395                     if (DEBUG) System.out.print("getDateTimeInstance");
396                 }
397                 if (DEBUG) System.out.println("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat)dt).toPattern());
398                 return (SimpleDateFormat)dt;
399             }
400         });
401
402         // ============ Handler for Zones ============
403         addHandler("zoneFields", new Handler() {
404             String date = "";
405             String zone = "";
406             String parse = "";
407             String pattern = "";
408             
409             public void handleResult(ULocale locale, String result) throws ParseException {
410                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
411                     String attributeName = (String) it.next();
412                     String attributeValue = (String) settings.get(attributeName);
413                     if (attributeName.equals("date")) {
414                         date = attributeValue;
415                     } else if (attributeName.equals("field")) {
416                         pattern = attributeValue;
417                     } else if (attributeName.equals("zone")) {
418                         zone = attributeValue;
419                     } else if (attributeName.equals("parse")) {
420                         parse = attributeValue;
421                     }
422                 }
423                 if (!ZONE_MATCH.reset(zone).matches()) return;
424                 Date dateValue = iso.parse(date);
425                 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
426                 field.setTimeZone(TimeZone.getTimeZone(zone));
427                 String temp = field.format(dateValue).trim();
428                 // SKIP PARSE FOR NOW
429                 result = result.trim(); // HACK because of SAX
430                 if (!temp.equals(result)) {
431                     temp = field.format(dateValue).trim(); // call again for debugging
432                     errln("Zone Format: Locale: " + locale 
433                             + ", \tZone: " + zone
434                             + ", \tDate: " + date
435                             + ", \tField: " + pattern
436                             + ", \tParse: " + parse
437                             + ", \tDraft: " + settings.get("draft")
438                             + ", \tCLDR: <" + result
439                             + ">, \tICU: <" + temp + ">");
440                 }
441             }
442         });
443     }
444
445     // ============ Gorp for SAX ============
446
447     {
448         try {
449             SAXParserFactory factory = SAXParserFactory.newInstance();
450             factory.setValidating(true);
451             SAX = factory.newSAXParser();
452         } catch (Exception e) {
453             throw new IllegalArgumentException("can't start");
454         }
455     }
456
457     DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
458         static final boolean DEBUG = false;
459         StringBuffer lastChars = new StringBuffer();
460         //boolean justPopped = false;
461         Handler handler;
462
463         public void startElement(
464             String uri,
465             String localName,
466             String qName,
467             Attributes attributes)
468             throws SAXException {
469                 //data.put(new ContextStack(contextStack), lastChars);
470                 //lastChars = "";
471                 try {
472                     if (qName.equals("cldrTest")) {
473                      // skip
474                     } else if (qName.equals("result")) {
475                         for (int i = 0; i < attributes.getLength(); ++i) {
476                             handler.set(attributes.getQName(i), attributes.getValue(i));
477                         }
478                     } else {
479                         handler = getHandler(qName, attributes);
480                         //handler.set("locale", uLocale.toString());
481                     }
482                     //if (DEBUG) logln("startElement:\t" + contextStack);
483                     //justPopped = false;
484                 } catch (RuntimeException e) {
485                     e.printStackTrace();
486                     throw e;
487                 }
488         }
489         public void endElement(String uri, String localName, String qName)
490             throws SAXException {
491                 try {
492                     //if (DEBUG) logln("endElement:\t" + contextStack);
493                     if (qName.equals("result")) handler.checkResult(lastChars.toString());
494                     else if (qName.length() != 0) {
495                         //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
496                     }
497                     lastChars.setLength(0);
498                     //justPopped = true;
499                 } catch (RuntimeException e) {
500                     e.printStackTrace();
501                     throw e;
502                 }
503             }
504         // Have to hack around the fact that the character data might be in pieces
505         public void characters(char[] ch, int start, int length)
506             throws SAXException {
507                 try {
508                     String value = new String(ch,start,length);
509                     if (DEBUG) logln("characters:\t" + value);
510                     lastChars.append(value);
511                     //justPopped = false;
512                 } catch (RuntimeException e) {
513                     e.printStackTrace();
514                     throw e;
515                 }
516             }
517
518         // just for debugging
519
520         public void notationDecl (String name, String publicId, String systemId)
521         throws SAXException {
522             logln("notationDecl: " + name
523             + ", " + publicId
524             + ", " + systemId
525             );
526         }
527
528         public void processingInstruction (String target, String data)
529         throws SAXException {
530             logln("processingInstruction: " + target + ", " + data);
531         }
532
533         public void skippedEntity (String name)
534         throws SAXException
535         {
536             logln("skippedEntity: " + name
537             );
538         }
539
540         public void unparsedEntityDecl (String name, String publicId,
541                         String systemId, String notationName)
542         throws SAXException {
543             logln("unparsedEntityDecl: " + name
544             + ", " + publicId
545             + ", " + systemId
546             + ", " + notationName
547             );
548         }
549
550     };
551 }
552 //#endif