2 *******************************************************************************
3 * Copyright (C) 1996-2008, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
8 package com.ibm.icu.dev.test.normalizer;
10 import java.io.BufferedReader;
11 import java.io.IOException;
13 import com.ibm.icu.dev.test.TestFmwk;
14 import com.ibm.icu.dev.test.TestUtil;
15 import com.ibm.icu.text.UTF16;
16 import com.ibm.icu.text.UnicodeSet;
19 public class UnicodeNormalizerConformanceTest extends TestFmwk {
21 UnicodeNormalizer normalizer_C, normalizer_D, normalizer_KC, normalizer_KD;
23 public static void main(String[] args) throws Exception {
24 new UnicodeNormalizerConformanceTest().run(args);
27 public UnicodeNormalizerConformanceTest() {
28 // Doesn't matter what the string and mode are; we'll change
29 // them later as needed.
30 normalizer_C = new UnicodeNormalizer(UnicodeNormalizer.C, true);
31 normalizer_D = new UnicodeNormalizer(UnicodeNormalizer.D, false);
32 normalizer_KC = new UnicodeNormalizer(UnicodeNormalizer.KC, false);
33 normalizer_KD = new UnicodeNormalizer(UnicodeNormalizer.KD, false);
36 // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt
37 static String[] moreCases ={
39 "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0",
41 // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129
42 "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1"
46 * Test the conformance of NewNormalizer to
47 * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.
48 * This file must be located at the path specified as TEST_SUITE_FILE.
50 public void TestConformance() throws Exception{
51 BufferedReader input = null;
53 String[] fields = new String[5];
54 StringBuffer buf = new StringBuffer();
57 UnicodeSet other = new UnicodeSet(0, 0x10ffff);
60 input = TestUtil.getDataReader("unicode/NormalizationTest.txt");
61 for (int count = 0;;++count) {
62 line = input.readLine();
64 //read the extra test cases
65 if(count > moreCases.length) {
67 } else if(count == moreCases.length) {
71 line = moreCases[count++];
73 if (line.length() == 0) continue;
75 // Expect 5 columns of this format:
76 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments>
79 if (line.charAt(0) == '#' || line.charAt(0)=='@') continue;
81 // Parse out the fields
82 hexsplit(line, ';', fields, buf);
84 // Remove a single code point from the "other" UnicodeSet
85 if(fields[0].length()==UTF16.moveCodePointOffset(fields[0],0, 1)) {
86 c=UTF16.charAt(fields[0],0);
87 if(0xac20<=c && c<=0xd73f) {
88 // not an exhaustive test run: skip most Hangul syllables
90 other.remove(0xac20, 0xd73f);
96 if (checkConformance(fields, line)) {
101 if ((count % 1000) == 999) {
102 logln("Line " + (count+1));
105 } catch (IOException ex) {
109 } catch (Exception ex2) {
110 System.out.print("");
113 ex.printStackTrace();
114 throw new IllegalArgumentException("Couldn't read file "
115 + ex.getClass().getName() + " " + ex.getMessage()
120 if (failCount != 0) {
121 errln("Total: " + failCount + " lines failed, " +
122 passCount + " lines passed");
124 logln("Total: " + passCount + " lines passed");
129 * Verify the conformance of the given line of the Unicode
130 * normalization (UTR 15) test suite file. For each line,
131 * there are five columns, corresponding to field[0]..field[4].
133 * The following invariants must be true for all conformant implementations
134 * c2 == NFC(c1) == NFC(c2) == NFC(c3)
135 * c3 == NFD(c1) == NFD(c2) == NFD(c3)
136 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
137 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
139 * @param field the 5 columns
140 * @param line the source line from the test suite file
141 * @return true if the test passes
143 private boolean checkConformance(String[] field, String line) throws Exception{
145 // StringBuffer buf = new StringBuffer(); // scratch
148 for (i=0; i<5; ++i) {
150 out = normalizer_C.normalize(field[i]);
151 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1));
153 out = normalizer_D.normalize(field[i]);
154 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1));
157 out = normalizer_KC.normalize(field[i]);
158 pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1));
160 out = normalizer_KD.normalize(field[i]);
161 pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1));
166 errln("FAIL: " + line);
173 * @param op name of normalization form, e.g., "KC"
174 * @param s string being normalized
175 * @param got value received
176 * @param exp expected value
177 * @param msg description of this test
178 * @returns true if got == exp
180 private boolean assertEqual(String op, String s, String got,
181 String exp, String msg) {
182 if (exp.equals(got)) {
185 errln((" " + msg + ") " + op + "(" + s + ")=" + hex(got) +
186 ", exp. " + hex(exp)));
191 * Split a string into pieces based on the given delimiter
192 * character. Then, parse the resultant fields from hex into
193 * characters. That is, "0040 0400;0C00;0899" -> new String[] {
194 * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to
195 * be of the proper length already, and exactly output.length
196 * fields are parsed. If there are too few an exception is
197 * thrown. If there are too many the extras are ignored.
199 * @param buf scratch buffer
201 private static void hexsplit(String s, char delimiter,
202 String[] output, StringBuffer buf) {
205 for (i=0; i<output.length; ++i) {
206 int delim = s.indexOf(delimiter, pos);
208 throw new IllegalArgumentException("Missing field in " + s);
210 // Our field is from pos..delim-1.
213 String toHex = s.substring(pos,delim);
216 int len = toHex.length();
218 if(toHex.charAt(index)==' '){
221 int spacePos = toHex.indexOf(' ', index);
223 appendInt(buf,toHex.substring(index,len),s);
226 appendInt(buf,toHex.substring(index, spacePos),s);
232 if (buf.length() < 1) {
233 throw new IllegalArgumentException("Empty field " + i + " in " + s);
235 output[i] = buf.toString();
236 ++pos; // Skip over delim
239 public static void appendInt(StringBuffer buf, String strToHex, String s){
240 int hex = Integer.parseInt(strToHex,16);
242 throw new IllegalArgumentException("Out of range hex " +
244 }else if (hex > 0xFFFF){
245 buf.append((char)((hex>>10)+0xd7c0));
246 buf.append((char)((hex&0x3ff)|0xdc00));
248 buf.append((char) hex);
252 // Specific tests for debugging. These are generally failures
253 // taken from the conformance file, but culled out to make
254 // debugging easier. These can be eliminated without affecting
257 public void _hideTestCase6() throws Exception{
258 _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;");
261 public void _testOneLine(String line) throws Exception{
262 String[] fields = new String[5];
263 StringBuffer buf = new StringBuffer();
264 // Parse out the fields
265 hexsplit(line, ';', fields, buf);
266 checkConformance(fields, line);