]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / cldr / TestCLDRVsICU.java
1 //##header\r
2 //#if defined(FOUNDATION10) || defined(J2SE13)\r
3 //#else\r
4 /*\r
5 **********************************************************************\r
6 * Copyright (c) 2002-2009, International Business Machines\r
7 * Corporation and others.  All Rights Reserved.\r
8 **********************************************************************\r
9 * Author: Mark Davis\r
10 **********************************************************************\r
11 */\r
12 package com.ibm.icu.dev.test.cldr;\r
13 \r
14 import java.io.File;\r
15 \r
16 //import org.unicode.cldr.util.LanguageTagParser;\r
17 //import org.unicode.cldr.util.Utility;\r
18 import java.io.IOException;\r
19 import java.io.PrintWriter;\r
20 import java.io.StringWriter;\r
21 import java.text.ParseException;\r
22 import java.util.ArrayList;\r
23 import java.util.Collection;\r
24 import java.util.Date;\r
25 import java.util.HashMap;\r
26 import java.util.List;\r
27 import java.util.Map;\r
28 import java.util.Set;\r
29 import java.util.TreeMap;\r
30 import java.util.Iterator;\r
31 import java.util.TreeSet;\r
32 import java.util.regex.Matcher;\r
33 import java.util.regex.Pattern;\r
34 \r
35 import javax.xml.parsers.SAXParser;\r
36 import javax.xml.parsers.SAXParserFactory;\r
37 \r
38 //import org.unicode.cldr.test.CLDRTest;\r
39 //import org.unicode.cldr.tool.GenerateCldrTests;\r
40 import org.xml.sax.Attributes;\r
41 import org.xml.sax.SAXException;\r
42 import org.xml.sax.helpers.DefaultHandler;\r
43 \r
44 import com.ibm.icu.util.Currency;\r
45 import com.ibm.icu.util.TimeZone;\r
46 import com.ibm.icu.util.ULocale;\r
47 import com.ibm.icu.dev.test.TestFmwk;\r
48 \r
49 import com.ibm.icu.text.Collator;\r
50 import com.ibm.icu.text.DateFormat;\r
51 import com.ibm.icu.text.NumberFormat;\r
52 import com.ibm.icu.text.SimpleDateFormat;\r
53 import com.ibm.icu.text.Transliterator;\r
54 import com.ibm.icu.text.UTF16;\r
55 import com.ibm.icu.text.UnicodeSet;\r
56 \r
57 /**\r
58  * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them\r
59  * correctly.\r
60  * WARNING: \r
61  * 1. for this to work right, you have to have downloaded the CLDR data, and\r
62  * then set the CLDR directory correctly, using\r
63  * -DCLDR_DIRECTORY=<top level of cldr>\r
64  * 2. You probably also need to increase memory, eg with -Xmx512m\r
65  * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where\r
66  * C:\cldrcache\ is a temp directory to keep the program from hitting the net for\r
67  * each file access.\r
68  * 4. You may use other environment variables to narrow what you test. Eg\r
69  * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*" \r
70  *   a. -DXML_MATCH="de.*" (or whatever regex you want) to just\r
71  *   test certain locales.\r
72  *   b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.\r
73  *   c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones)\r
74  * @author medavis\r
75  */\r
76 public class TestCLDRVsICU extends TestFmwk {\r
77     static final boolean DEBUG = false;\r
78 \r
79     //ULocale uLocale = ULocale.ENGLISH;\r
80     //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere\r
81     //static PrintWriter log;\r
82     SAXParser SAX;\r
83     static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;\r
84     static String CLDR_DIRECTORY;\r
85     static {\r
86         System.out.println();\r
87         LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");\r
88         TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");\r
89         ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example\r
90 \r
91         // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT\r
92         CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");\r
93         System.out.println();\r
94     }\r
95 \r
96     private static Matcher getEnvironmentRegex(String key, String defaultValue) {\r
97         return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");\r
98     }\r
99 \r
100     private static String getEnvironmentString(String key, String defaultValue) {\r
101         String temp = System.getProperty(key);\r
102         if (temp == null) temp = defaultValue;\r
103         else System.out.print("-D" + key + "=\"" + temp + "\" ");\r
104         return temp;\r
105     }\r
106 \r
107     public static void main(String[] args) throws Exception {\r
108         new TestCLDRVsICU().run(args);\r
109     }\r
110 \r
111      Set allLocales = new TreeSet();\r
112 \r
113     public void TestFiles() throws SAXException, IOException {\r
114         // only get ICU's locales\r
115         Set s = new TreeSet();\r
116         addLocales(NumberFormat.getAvailableULocales(), s);\r
117         addLocales(DateFormat.getAvailableULocales(), s);\r
118         addLocales(Collator.getAvailableULocales(), s);\r
119 \r
120         // filter, to make tracking down bugs easier\r
121 \r
122         for (Iterator it = s.iterator(); it.hasNext();) {\r
123             String locale = (String)it.next();\r
124             if (!LOCALE_MATCH.reset(locale).matches()) continue;\r
125             _test(locale);\r
126         }\r
127     }\r
128     \r
129     public void addLocales(ULocale[] list, Collection s) {\r
130         for (int i = 0; i < list.length; ++i) {\r
131             allLocales.add(list[i].toString());\r
132             s.add(list[i].getLanguage());\r
133         }\r
134     }\r
135 \r
136     public String getLanguage(ULocale uLocale) {\r
137         String result = uLocale.getLanguage();\r
138         String script = uLocale.getScript();\r
139         if (script.length() != 0) result += "_" + script;\r
140         return result;\r
141     }\r
142 \r
143     public void _test(String localeName) throws SAXException, IOException {\r
144         //uLocale = new ULocale(localeName);\r
145         //oLocale = uLocale.toLocale();\r
146 \r
147         File f = new File(CLDR_DIRECTORY + "common\\test\\"+ localeName + ".xml");\r
148         logln("Testing " + f.getCanonicalPath());\r
149         SAX.parse(f, DEFAULT_HANDLER);\r
150     }\r
151 \r
152     static Transliterator toUnicode = Transliterator.getInstance("any-hex");\r
153     static public String showString(String in) {\r
154         return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";\r
155     }\r
156     // ============ SAX Handler Infrastructure ============\r
157 \r
158     abstract public class Handler {\r
159         Map settings = new TreeMap();\r
160         String name;\r
161         List currentLocales = new ArrayList();\r
162         int failures = 0;\r
163 \r
164         void setName(String name) {\r
165             this.name = name;\r
166         }\r
167         void set(String attributeName, String attributeValue) {\r
168             //if (DEBUG) logln(attributeName + " => " + attributeValue);\r
169             settings.put(attributeName, attributeValue);\r
170         }\r
171         void checkResult(String value) {\r
172             if ("true".equals(settings.get("draft"))) {\r
173                 return; // skip draft\r
174             }\r
175             ULocale ul = new ULocale("xx");\r
176             try {\r
177                 for (int i = 0; i < currentLocales.size(); ++i) {\r
178                     ul = (ULocale)currentLocales.get(i);\r
179                     //loglnSAX("  Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);\r
180                     handleResult(ul, value);\r
181                     if (failures != 0) {\r
182                         errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")");\r
183                         failures = 0;\r
184                     }\r
185                 }\r
186             } catch (Exception e) {\r
187                 StringWriter sw = new StringWriter();\r
188                 PrintWriter pw = new PrintWriter(sw);\r
189                 e.printStackTrace(pw);\r
190                 pw.flush();\r
191                 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());\r
192             }\r
193         }\r
194         public void loglnSAX(String message) {\r
195             String temp = message + "\t[" + name;\r
196             for (Iterator it = settings.keySet().iterator(); it.hasNext();) {\r
197                 String attributeName = (String) it.next();\r
198                 String attributeValue = (String) settings.get(attributeName);\r
199                 temp += " " + attributeName + "=<" + attributeValue + ">";\r
200             }\r
201             logln(temp + "]");\r
202         }\r
203         int lookupValue(Object x, Object[] list) {\r
204             for (int i = 0; i < list.length; ++i) {\r
205                 if (x.equals(list[i])) return i;\r
206             }\r
207             loglnSAX("Unknown String: " + x);\r
208             return -1;\r
209         }\r
210         abstract void handleResult(ULocale currentLocale, String value) throws Exception;\r
211         /**\r
212          * @param attributes\r
213          */\r
214         public void setAttributes(Attributes attributes) {\r
215             String localeList = attributes.getValue("locales");\r
216             String[] currentLocaleString = new String[50];\r
217             com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);\r
218             currentLocales.clear();\r
219             for (int i = 0; i < currentLocaleString.length; ++i) {\r
220                 if (currentLocaleString[i].length() == 0) continue;\r
221                 if (allLocales.contains("")) {\r
222                     logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);\r
223                     continue;\r
224                 }\r
225                 currentLocales.add(new ULocale(currentLocaleString[i]));\r
226             }\r
227             if (DEBUG) logln("Setting locales: " + currentLocales);\r
228         }\r
229     }\r
230 \r
231     public Handler getHandler(String name, Attributes attributes) {\r
232         if (DEBUG) logln("Creating Handler: " + name);\r
233         Handler result = (Handler) RegisteredHandlers.get(name);\r
234         if (result == null) logln("Unexpected test type: " + name);\r
235         else {\r
236             result.setAttributes(attributes);\r
237         }\r
238         return result;\r
239     }\r
240 \r
241     public void addHandler(String name, Handler handler) {\r
242         if (!TEST_MATCH.reset(name).matches()) handler = new NullHandler();\r
243         handler.setName(name);\r
244         RegisteredHandlers.put(name, handler);\r
245     }\r
246     Map RegisteredHandlers = new HashMap();\r
247 \r
248     class NullHandler extends Handler {\r
249         void handleResult(ULocale currentLocale, String value) throws Exception {}        \r
250     }\r
251 \r
252     // ============ Statics for Date/Number Support ============\r
253 \r
254     static TimeZone utc = TimeZone.getTimeZone("GMT");\r
255     static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");\r
256     {\r
257         iso.setTimeZone(utc);\r
258     }\r
259     static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL};\r
260     static String[] DateFormatNames = {"none", "short", "medium", "long", "full"};\r
261 \r
262     static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific", "GBP"};\r
263 \r
264 \r
265     // ============ Handler for Collation ============ \r
266     static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");\r
267     \r
268     static String remove(String in, UnicodeSet toRemove) {\r
269         int cp;\r
270         StringBuffer result = new StringBuffer();\r
271         for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {\r
272             cp = UTF16.charAt(in, i);\r
273             if (!toRemove.contains(cp)) UTF16.append(result, cp);\r
274         }\r
275         return result.toString();\r
276     }\r
277 \r
278     {\r
279         addHandler("collation", new Handler() {\r
280             public void handleResult(ULocale currentLocale, String value) {\r
281                 Collator col = Collator.getInstance(currentLocale);\r
282                 String lastLine = "";\r
283                 int count = 0;\r
284                 for (int pos = 0; pos < value.length();) {\r
285                     int nextPos = value.indexOf('\n', pos);\r
286                     if (nextPos < 0)\r
287                         nextPos = value.length();\r
288                     String line = value.substring(pos, nextPos);\r
289                     line = remove(line, controlsAndSpace); // HACK for SAX\r
290                     if (line.trim().length() != 0) { // HACK for SAX\r
291                         int comp = col.compare(lastLine, line);\r
292                         if (comp > 0) {\r
293                             failures++;\r
294                             errln("\tLine " + (count + 1) + "\tFailure: "\r
295                                     + showString(lastLine) + " should be leq "\r
296                                     + showString(line));\r
297                         } else if (DEBUG) {\r
298                             logln("OK: " + line);\r
299                         }\r
300                         lastLine = line;\r
301                     }\r
302                     pos = nextPos + 1;\r
303                     count++;\r
304                 }\r
305             }\r
306         });\r
307 \r
308         // ============ Handler for Numbers ============ \r
309         addHandler("number", new Handler() {\r
310             public void handleResult(ULocale locale, String result) {\r
311                 NumberFormat nf = null;\r
312                 double v = Double.NaN;\r
313                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {\r
314                     String attributeName = (String) it.next();\r
315                     String attributeValue = (String) settings\r
316                             .get(attributeName);\r
317                     if (attributeName.equals("input")) {\r
318                         v = Double.parseDouble(attributeValue);\r
319                         continue;\r
320                     }\r
321                     // must be either numberType at this point\r
322                     int index = lookupValue(attributeValue, NumberNames);\r
323                     if (DEBUG) logln("Getting number format for " + locale);\r
324                     switch(index) {\r
325                     case 0: nf = NumberFormat.getInstance(locale); break;\r
326                     case 1: nf = NumberFormat.getIntegerInstance(locale); break;\r
327                     case 2: nf = NumberFormat.getNumberInstance(locale); break;\r
328                     case 3: nf = NumberFormat.getPercentInstance(locale); break;\r
329                     case 4: nf = NumberFormat.getScientificInstance(locale); break;\r
330                     default: nf = NumberFormat.getCurrencyInstance(locale); \r
331                         nf.setCurrency(Currency.getInstance(attributeValue)); break;\r
332                     }\r
333                     String temp = nf.format(v).trim();\r
334                     result = result.trim(); // HACK because of SAX\r
335                     if (!temp.equals(result)) {\r
336                         errln("Number: Locale: " + locale\r
337                                 + ", \tType: " + attributeValue\r
338                                 + ", \tDraft: " + settings.get("draft")\r
339                                 + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");\r
340                     }\r
341 \r
342                 }\r
343             }\r
344         });\r
345 \r
346         // ============ Handler for Dates ============\r
347         addHandler("date", new Handler() {\r
348             public void handleResult(ULocale locale, String result) throws ParseException {\r
349                 int dateFormat = 0;\r
350                 int timeFormat = 0;\r
351                 Date date = new Date();\r
352                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {\r
353                     String attributeName = (String) it.next();\r
354                     String attributeValue = (String) settings\r
355                             .get(attributeName);\r
356                     if (attributeName.equals("input")) {\r
357                         date = iso.parse(attributeValue);\r
358                         continue;\r
359                     }\r
360                     // must be either dateType or timeType at this point\r
361                     int index = lookupValue(attributeValue, DateFormatNames);\r
362                     if (attributeName.equals("dateType"))\r
363                         dateFormat = index;\r
364                     else\r
365                         timeFormat = index;\r
366 \r
367                 }\r
368                 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);\r
369                 dt.setTimeZone(utc);\r
370                 String temp = dt.format(date).trim();\r
371                 result = result.trim(); // HACK because of SAX\r
372                 if (!temp.equals(result)) {\r
373                     errln("DateTime: Locale: " + locale \r
374                             + ", \tDate: " + DateFormatNames[dateFormat]\r
375                             + ", \tTime: " + DateFormatNames[timeFormat]\r
376                             + ", \tDraft: " + settings.get("draft")\r
377                             + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">");\r
378                 }\r
379             }\r
380 \r
381             private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {\r
382                 if (DEBUG) logln("Getting date/time format for " + locale);\r
383                 if (DEBUG && "ar_EG".equals(locale.toString())) {\r
384                     System.out.println("debug here");\r
385                 }\r
386                 DateFormat dt;\r
387                 if (dateFormat == 0) {\r
388                     dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);\r
389                     if (DEBUG) System.out.print("getTimeInstance");\r
390                 } else if (timeFormat == 0) {\r
391                     dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);\r
392                     if (DEBUG) System.out.print("getDateInstance");\r
393                 } else {\r
394                     dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], locale);\r
395                     if (DEBUG) System.out.print("getDateTimeInstance");\r
396                 }\r
397                 if (DEBUG) System.out.println("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat)dt).toPattern());\r
398                 return (SimpleDateFormat)dt;\r
399             }\r
400         });\r
401 \r
402         // ============ Handler for Zones ============\r
403         addHandler("zoneFields", new Handler() {\r
404             String date = "";\r
405             String zone = "";\r
406             String parse = "";\r
407             String pattern = "";\r
408             \r
409             public void handleResult(ULocale locale, String result) throws ParseException {\r
410                 for (Iterator it = settings.keySet().iterator(); it.hasNext();) {\r
411                     String attributeName = (String) it.next();\r
412                     String attributeValue = (String) settings.get(attributeName);\r
413                     if (attributeName.equals("date")) {\r
414                         date = attributeValue;\r
415                     } else if (attributeName.equals("field")) {\r
416                         pattern = attributeValue;\r
417                     } else if (attributeName.equals("zone")) {\r
418                         zone = attributeValue;\r
419                     } else if (attributeName.equals("parse")) {\r
420                         parse = attributeValue;\r
421                     }\r
422                 }\r
423                 if (!ZONE_MATCH.reset(zone).matches()) return;\r
424                 Date dateValue = iso.parse(date);\r
425                 SimpleDateFormat field = new SimpleDateFormat(pattern, locale);\r
426                 field.setTimeZone(TimeZone.getTimeZone(zone));\r
427                 String temp = field.format(dateValue).trim();\r
428                 // SKIP PARSE FOR NOW\r
429                 result = result.trim(); // HACK because of SAX\r
430                 if (!temp.equals(result)) {\r
431                     temp = field.format(dateValue).trim(); // call again for debugging\r
432                     errln("Zone Format: Locale: " + locale \r
433                             + ", \tZone: " + zone\r
434                             + ", \tDate: " + date\r
435                             + ", \tField: " + pattern\r
436                             + ", \tParse: " + parse\r
437                             + ", \tDraft: " + settings.get("draft")\r
438                             + ", \tCLDR: <" + result\r
439                             + ">, \tICU: <" + temp + ">");\r
440                 }\r
441             }\r
442         });\r
443     }\r
444 \r
445     // ============ Gorp for SAX ============\r
446 \r
447     {\r
448         try {\r
449             SAXParserFactory factory = SAXParserFactory.newInstance();\r
450             factory.setValidating(true);\r
451             SAX = factory.newSAXParser();\r
452         } catch (Exception e) {\r
453             throw new IllegalArgumentException("can't start");\r
454         }\r
455     }\r
456 \r
457     DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {\r
458         static final boolean DEBUG = false;\r
459         StringBuffer lastChars = new StringBuffer();\r
460         //boolean justPopped = false;\r
461         Handler handler;\r
462 \r
463         public void startElement(\r
464             String uri,\r
465             String localName,\r
466             String qName,\r
467             Attributes attributes)\r
468             throws SAXException {\r
469                 //data.put(new ContextStack(contextStack), lastChars);\r
470                 //lastChars = "";\r
471                 try {\r
472                     if (qName.equals("cldrTest")) {\r
473                      // skip\r
474                     } else if (qName.equals("result")) {\r
475                         for (int i = 0; i < attributes.getLength(); ++i) {\r
476                             handler.set(attributes.getQName(i), attributes.getValue(i));\r
477                         }\r
478                     } else {\r
479                         handler = getHandler(qName, attributes);\r
480                         //handler.set("locale", uLocale.toString());\r
481                     }\r
482                     //if (DEBUG) logln("startElement:\t" + contextStack);\r
483                     //justPopped = false;\r
484                 } catch (RuntimeException e) {\r
485                     e.printStackTrace();\r
486                     throw e;\r
487                 }\r
488         }\r
489         public void endElement(String uri, String localName, String qName)\r
490             throws SAXException {\r
491                 try {\r
492                     //if (DEBUG) logln("endElement:\t" + contextStack);\r
493                     if (qName.equals("result")) handler.checkResult(lastChars.toString());\r
494                     else if (qName.length() != 0) {\r
495                         //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");\r
496                     }\r
497                     lastChars.setLength(0);\r
498                     //justPopped = true;\r
499                 } catch (RuntimeException e) {\r
500                     e.printStackTrace();\r
501                     throw e;\r
502                 }\r
503             }\r
504         // Have to hack around the fact that the character data might be in pieces\r
505         public void characters(char[] ch, int start, int length)\r
506             throws SAXException {\r
507                 try {\r
508                     String value = new String(ch,start,length);\r
509                     if (DEBUG) logln("characters:\t" + value);\r
510                     lastChars.append(value);\r
511                     //justPopped = false;\r
512                 } catch (RuntimeException e) {\r
513                     e.printStackTrace();\r
514                     throw e;\r
515                 }\r
516             }\r
517 \r
518         // just for debugging\r
519 \r
520         public void notationDecl (String name, String publicId, String systemId)\r
521         throws SAXException {\r
522             logln("notationDecl: " + name\r
523             + ", " + publicId\r
524             + ", " + systemId\r
525             );\r
526         }\r
527 \r
528         public void processingInstruction (String target, String data)\r
529         throws SAXException {\r
530             logln("processingInstruction: " + target + ", " + data);\r
531         }\r
532 \r
533         public void skippedEntity (String name)\r
534         throws SAXException\r
535         {\r
536             logln("skippedEntity: " + name\r
537             );\r
538         }\r
539 \r
540         public void unparsedEntityDecl (String name, String publicId,\r
541                         String systemId, String notationName)\r
542         throws SAXException {\r
543             logln("unparsedEntityDecl: " + name\r
544             + ", " + publicId\r
545             + ", " + systemId\r
546             + ", " + notationName\r
547             );\r
548         }\r
549 \r
550     };\r
551 }\r
552 //#endif\r