2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
7 package com.ibm.icu.dev.test.normalizer;
9 import java.util.Collections;
10 import java.util.EnumSet;
13 import java.util.TreeMap;
15 import com.ibm.icu.dev.test.TestFmwk;
16 import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
17 import com.ibm.icu.text.IDNA;
20 * UTS #46 (IDNA2008) test.
21 * @author Markus Scherer
24 public class UTS46Test extends TestFmwk {
25 public static void main(String[] args) throws Exception {
26 new UTS46Test().run(args);
30 IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI|
31 IDNA.CHECK_CONTEXTJ|IDNA.CHECK_CONTEXTO;
32 trans=IDNA.getUTS46Instance(commonOptions);
33 nontrans=IDNA.getUTS46Instance(commonOptions|
34 IDNA.NONTRANSITIONAL_TO_ASCII|IDNA.NONTRANSITIONAL_TO_UNICODE);
37 public void TestAPI() {
38 StringBuilder result=new StringBuilder();
39 IDNA.Info info=new IDNA.Info();
40 String input="www.eXample.cOm";
41 String expected="www.example.com";
42 trans.nameToASCII(input, result, info);
43 if(info.hasErrors() || !UTF16Plus.equal(result, expected)) {
44 errln(String.format("T.nameToASCII(www.example.com) info.errors=%s result matches=%b",
45 info.getErrors(), UTF16Plus.equal(result, expected)));
47 input="xn--bcher.de-65a";
48 expected="xn--bcher\uFFFDde-65a";
49 nontrans.labelToASCII(input, result, info);
50 if( !info.getErrors().equals(EnumSet.of(IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL)) ||
51 !UTF16Plus.equal(result, expected)
53 errln(String.format("N.labelToASCII(label-with-dot) failed with errors %s",
56 // Java API tests that are not parallel to C++ tests
57 // because the C++ specifics (error codes etc.) do not apply here.
58 String resultString=trans.nameToUnicode("fA\u00DF.de", result, info).toString();
59 if(info.hasErrors() || !resultString.equals("fass.de")) {
60 errln(String.format("T.nameToUnicode(fA\u00DF.de) info.errors=%s result matches=%b",
61 info.getErrors(), resultString.equals("fass.de")));
64 nontrans.labelToUnicode(result, result, info);
65 errln("N.labelToUnicode(result, result) did not throw an Exception");
66 } catch(Exception e) {
67 // as expected (should be an IllegalArgumentException, or an ICU version of it)
71 public void TestNotSTD3() {
72 IDNA not3=IDNA.getUTS46Instance(IDNA.CHECK_BIDI);
73 String input="\u0000A_2+2=4\n.e\u00DFen.net";
74 StringBuilder result=new StringBuilder();
75 IDNA.Info info=new IDNA.Info();
76 if( !not3.nameToUnicode(input, result, info).toString().equals("\u0000a_2+2=4\n.essen.net") ||
79 errln(String.format("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %s string %s",
80 info.getErrors(), prettify(result.toString())));
82 // A space (BiDi class WS) is not allowed in a BiDi domain name.
83 input="a z.xn--4db.edu";
84 not3.nameToASCII(input, result, info);
85 if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) {
86 errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
88 // Characters that are canonically equivalent to sequences with non-LDH ASCII.
89 input="a\u2260b\u226Ec\u226Fd";
90 not3.nameToUnicode(input, result, info);
91 if(!UTF16Plus.equal(result, input) || info.hasErrors()) {
92 errln(String.format("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %s string %s",
93 info.getErrors().toString(), prettify(result.toString())));
97 private static final Map<String, IDNA.Error> errorNamesToErrors;
99 errorNamesToErrors=new TreeMap<String, IDNA.Error>();
100 errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL);
101 errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG);
102 errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG);
103 errorNamesToErrors.put("UIDNA_ERROR_LEADING_HYPHEN", IDNA.Error.LEADING_HYPHEN);
104 errorNamesToErrors.put("UIDNA_ERROR_TRAILING_HYPHEN", IDNA.Error.TRAILING_HYPHEN);
105 errorNamesToErrors.put("UIDNA_ERROR_HYPHEN_3_4", IDNA.Error.HYPHEN_3_4);
106 errorNamesToErrors.put("UIDNA_ERROR_LEADING_COMBINING_MARK", IDNA.Error.LEADING_COMBINING_MARK);
107 errorNamesToErrors.put("UIDNA_ERROR_DISALLOWED", IDNA.Error.DISALLOWED);
108 errorNamesToErrors.put("UIDNA_ERROR_PUNYCODE", IDNA.Error.PUNYCODE);
109 errorNamesToErrors.put("UIDNA_ERROR_LABEL_HAS_DOT", IDNA.Error.LABEL_HAS_DOT);
110 errorNamesToErrors.put("UIDNA_ERROR_INVALID_ACE_LABEL", IDNA.Error.INVALID_ACE_LABEL);
111 errorNamesToErrors.put("UIDNA_ERROR_BIDI", IDNA.Error.BIDI);
112 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTJ", IDNA.Error.CONTEXTJ);
113 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_PUNCTUATION", IDNA.Error.CONTEXTO_PUNCTUATION);
114 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_DIGITS", IDNA.Error.CONTEXTO_DIGITS);
117 private static final class TestCase {
119 errors=EnumSet.noneOf(IDNA.Error.class);
121 private void set(String[] data) {
126 if(data[3].length()!=0) {
127 for(String e: data[3].split("\\|")) {
128 errors.add(errorNamesToErrors.get(e));
132 // Input string and options string (Nontransitional/Transitional/Both).
134 // Expected Unicode result string.
136 private EnumSet<IDNA.Error> errors;
139 private static final String testCases[][]={
140 { "www.eXample.cOm", "B", // all ASCII
141 "www.example.com", "" },
142 { "B\u00FCcher.de", "B", // u-umlaut
143 "b\u00FCcher.de", "" },
144 { "\u00D6BB", "B", // O-umlaut
146 { "fa\u00DF.de", "N", // sharp s
148 { "fa\u00DF.de", "T", // sharp s
150 { "XN--fA-hia.dE", "B", // sharp s in Punycode
152 { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "N", // Greek with final sigma
153 "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "" },
154 { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "T", // Greek with final sigma
155 "\u03B2\u03CC\u03BB\u03BF\u03C3.com", "" },
156 { "xn--nxasmm1c", "B", // Greek with final sigma in Punycode
157 "\u03B2\u03CC\u03BB\u03BF\u03C2", "" },
158 { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "N", // "Sri" in "Sri Lanka" has a ZWJ
159 "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
160 { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "T", // "Sri" in "Sri Lanka" has a ZWJ
161 "www.\u0DC1\u0DCA\u0DBB\u0DD3.com", "" },
162 { "www.xn--10cl1a0b660p.com", "B", // "Sri" in Punycode
163 "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
164 { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "N", // ZWNJ
165 "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "" },
166 { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "T", // ZWNJ
167 "\u0646\u0627\u0645\u0647\u0627\u06CC", "" },
168 { "xn--mgba3gch31f060k.com", "B", // ZWNJ in Punycode
169 "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com", "" },
170 { "a.b\uFF0Ec\u3002d\uFF61", "B",
172 { "U\u0308.xn--tda", "B", // U+umlaut.u-umlaut
173 "\u00FC.\u00FC", "" },
174 { "xn--u-ccb", "B", // u+umlaut in Punycode
175 "xn--u-ccb\uFFFD", "UIDNA_ERROR_INVALID_ACE_LABEL" },
176 { "a\u2488com", "B", // contains 1-dot
177 "a\uFFFDcom", "UIDNA_ERROR_DISALLOWED" },
178 { "xn--a-ecp.ru", "B", // contains 1-dot in Punycode
179 "xn--a-ecp\uFFFD.ru", "UIDNA_ERROR_INVALID_ACE_LABEL" },
180 { "xn--0.pt", "B", // invalid Punycode
181 "xn--0\uFFFD.pt", "UIDNA_ERROR_PUNYCODE" },
182 { "xn--a.pt", "B", // U+0080
183 "xn--a\uFFFD.pt", "UIDNA_ERROR_INVALID_ACE_LABEL" },
184 { "xn--a-\u00C4.pt", "B", // invalid Punycode
185 "xn--a-\u00E4.pt", "UIDNA_ERROR_PUNYCODE" },
186 { "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B", // Japanese with fullwidth ".jp"
187 "\u65E5\u672C\u8A9E.jp", "" },
188 { "\u2615", "B", "\u2615", "" }, // Unicode 4.0 HOT BEVERAGE
189 // some characters are disallowed because they are canonically equivalent
190 // to sequences with non-LDH ASCII
191 { "a\u2260b\u226Ec\u226Fd", "B",
192 "a\uFFFDb\uFFFDc\uFFFDd", "UIDNA_ERROR_DISALLOWED" },
193 // many deviation characters, test the special mapping code
194 { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
195 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
196 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
197 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
198 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "N",
199 "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
200 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
201 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
202 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
203 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz",
204 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ" },
205 { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
206 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
207 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
208 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
209 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "T",
211 "\u03C3\u03C3sssssssssssssssse"+
212 "ssssssssssssssssssssx"+
213 "ssssssssssssssssssssy"+
214 "sssssssssssssss\u015Dssz", "UIDNA_ERROR_LABEL_TOO_LONG" },
215 // "xn--bss" with deviation characters
216 { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "N",
217 "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "UIDNA_ERROR_CONTEXTJ" },
218 { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "T",
220 // "xn--bssffl" written as:
221 // 02E3 MODIFIER LETTER SMALL X
222 // 034F COMBINING GRAPHEME JOINER (ignored)
223 // 2115 DOUBLE-STRUCK CAPITAL N
224 // 200B ZERO WIDTH SPACE (ignored)
225 // FE63 SMALL HYPHEN-MINUS
226 // 00AD SOFT HYPHEN (ignored)
227 // FF0D FULLWIDTH HYPHEN-MINUS
228 // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored)
229 // 212C SCRIPT CAPITAL B
230 // FE00 VARIATION SELECTOR-1 (ignored)
231 // 017F LATIN SMALL LETTER LONG S
232 // 2064 INVISIBLE PLUS (ignored)
233 // 1D530 MATHEMATICAL FRAKTUR SMALL S
234 // E01EF VARIATION SELECTOR-256 (ignored)
235 // FB04 LATIN SMALL LIGATURE FFL
236 { "\u02E3\u034F\u2115\u200B\uFE63\u00AD\uFF0D\u180C"+
237 "\u212C\uFE00\u017F\u2064"+"\uD835\uDD30\uDB40\uDDEF"/*1D530 E01EF*/+"\uFB04", "B",
238 "\u5921\u591E\u591C\u5919", "" },
239 { "123456789012345678901234567890123456789012345678901234567890123."+
240 "123456789012345678901234567890123456789012345678901234567890123."+
241 "123456789012345678901234567890123456789012345678901234567890123."+
242 "1234567890123456789012345678901234567890123456789012345678901", "B",
243 "123456789012345678901234567890123456789012345678901234567890123."+
244 "123456789012345678901234567890123456789012345678901234567890123."+
245 "123456789012345678901234567890123456789012345678901234567890123."+
246 "1234567890123456789012345678901234567890123456789012345678901", "" },
247 { "123456789012345678901234567890123456789012345678901234567890123."+
248 "123456789012345678901234567890123456789012345678901234567890123."+
249 "123456789012345678901234567890123456789012345678901234567890123."+
250 "1234567890123456789012345678901234567890123456789012345678901.", "B",
251 "123456789012345678901234567890123456789012345678901234567890123."+
252 "123456789012345678901234567890123456789012345678901234567890123."+
253 "123456789012345678901234567890123456789012345678901234567890123."+
254 "1234567890123456789012345678901234567890123456789012345678901.", "" },
255 // Domain name >256 characters, forces slow path in UTF-8 processing.
256 { "123456789012345678901234567890123456789012345678901234567890123."+
257 "123456789012345678901234567890123456789012345678901234567890123."+
258 "123456789012345678901234567890123456789012345678901234567890123."+
259 "123456789012345678901234567890123456789012345678901234567890123."+
260 "12345678901234567890123456789012345678901234567890123456789012", "B",
261 "123456789012345678901234567890123456789012345678901234567890123."+
262 "123456789012345678901234567890123456789012345678901234567890123."+
263 "123456789012345678901234567890123456789012345678901234567890123."+
264 "123456789012345678901234567890123456789012345678901234567890123."+
265 "12345678901234567890123456789012345678901234567890123456789012",
266 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
267 { "123456789012345678901234567890123456789012345678901234567890123."+
268 "123456789012345678901234567890123456789012345678901234567890123."+
269 "123456789012345678901234567890123456789012345678901234567890123."+
270 "123456789012345678901234567890123456789012345678901234567890123."+
271 "1234567890123456789012345678901234567890123456789\u05D0", "B",
272 "123456789012345678901234567890123456789012345678901234567890123."+
273 "123456789012345678901234567890123456789012345678901234567890123."+
274 "123456789012345678901234567890123456789012345678901234567890123."+
275 "123456789012345678901234567890123456789012345678901234567890123."+
276 "1234567890123456789012345678901234567890123456789\u05D0",
277 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI" },
278 { "123456789012345678901234567890123456789012345678901234567890123."+
279 "1234567890123456789012345678901234567890123456789012345678901234."+
280 "123456789012345678901234567890123456789012345678901234567890123."+
281 "123456789012345678901234567890123456789012345678901234567890", "B",
282 "123456789012345678901234567890123456789012345678901234567890123."+
283 "1234567890123456789012345678901234567890123456789012345678901234."+
284 "123456789012345678901234567890123456789012345678901234567890123."+
285 "123456789012345678901234567890123456789012345678901234567890",
286 "UIDNA_ERROR_LABEL_TOO_LONG" },
287 { "123456789012345678901234567890123456789012345678901234567890123."+
288 "1234567890123456789012345678901234567890123456789012345678901234."+
289 "123456789012345678901234567890123456789012345678901234567890123."+
290 "123456789012345678901234567890123456789012345678901234567890.", "B",
291 "123456789012345678901234567890123456789012345678901234567890123."+
292 "1234567890123456789012345678901234567890123456789012345678901234."+
293 "123456789012345678901234567890123456789012345678901234567890123."+
294 "123456789012345678901234567890123456789012345678901234567890.",
295 "UIDNA_ERROR_LABEL_TOO_LONG" },
296 { "123456789012345678901234567890123456789012345678901234567890123."+
297 "1234567890123456789012345678901234567890123456789012345678901234."+
298 "123456789012345678901234567890123456789012345678901234567890123."+
299 "1234567890123456789012345678901234567890123456789012345678901", "B",
300 "123456789012345678901234567890123456789012345678901234567890123."+
301 "1234567890123456789012345678901234567890123456789012345678901234."+
302 "123456789012345678901234567890123456789012345678901234567890123."+
303 "1234567890123456789012345678901234567890123456789012345678901",
304 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
305 // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te
306 { "\u00E41234567890123456789012345678901234567890123456789012345", "B",
307 "\u00E41234567890123456789012345678901234567890123456789012345", "" },
308 { "1234567890\u00E41234567890123456789012345678901234567890123456", "B",
309 "1234567890\u00E41234567890123456789012345678901234567890123456", "UIDNA_ERROR_LABEL_TOO_LONG" },
310 { "123456789012345678901234567890123456789012345678901234567890123."+
311 "1234567890\u00E4123456789012345678901234567890123456789012345."+
312 "123456789012345678901234567890123456789012345678901234567890123."+
313 "1234567890123456789012345678901234567890123456789012345678901", "B",
314 "123456789012345678901234567890123456789012345678901234567890123."+
315 "1234567890\u00E4123456789012345678901234567890123456789012345."+
316 "123456789012345678901234567890123456789012345678901234567890123."+
317 "1234567890123456789012345678901234567890123456789012345678901", "" },
318 { "123456789012345678901234567890123456789012345678901234567890123."+
319 "1234567890\u00E4123456789012345678901234567890123456789012345."+
320 "123456789012345678901234567890123456789012345678901234567890123."+
321 "1234567890123456789012345678901234567890123456789012345678901.", "B",
322 "123456789012345678901234567890123456789012345678901234567890123."+
323 "1234567890\u00E4123456789012345678901234567890123456789012345."+
324 "123456789012345678901234567890123456789012345678901234567890123."+
325 "1234567890123456789012345678901234567890123456789012345678901.", "" },
326 { "123456789012345678901234567890123456789012345678901234567890123."+
327 "1234567890\u00E4123456789012345678901234567890123456789012345."+
328 "123456789012345678901234567890123456789012345678901234567890123."+
329 "12345678901234567890123456789012345678901234567890123456789012", "B",
330 "123456789012345678901234567890123456789012345678901234567890123."+
331 "1234567890\u00E4123456789012345678901234567890123456789012345."+
332 "123456789012345678901234567890123456789012345678901234567890123."+
333 "12345678901234567890123456789012345678901234567890123456789012",
334 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
335 { "123456789012345678901234567890123456789012345678901234567890123."+
336 "1234567890\u00E41234567890123456789012345678901234567890123456."+
337 "123456789012345678901234567890123456789012345678901234567890123."+
338 "123456789012345678901234567890123456789012345678901234567890", "B",
339 "123456789012345678901234567890123456789012345678901234567890123."+
340 "1234567890\u00E41234567890123456789012345678901234567890123456."+
341 "123456789012345678901234567890123456789012345678901234567890123."+
342 "123456789012345678901234567890123456789012345678901234567890",
343 "UIDNA_ERROR_LABEL_TOO_LONG" },
344 { "123456789012345678901234567890123456789012345678901234567890123."+
345 "1234567890\u00E41234567890123456789012345678901234567890123456."+
346 "123456789012345678901234567890123456789012345678901234567890123."+
347 "123456789012345678901234567890123456789012345678901234567890.", "B",
348 "123456789012345678901234567890123456789012345678901234567890123."+
349 "1234567890\u00E41234567890123456789012345678901234567890123456."+
350 "123456789012345678901234567890123456789012345678901234567890123."+
351 "123456789012345678901234567890123456789012345678901234567890.",
352 "UIDNA_ERROR_LABEL_TOO_LONG" },
353 { "123456789012345678901234567890123456789012345678901234567890123."+
354 "1234567890\u00E41234567890123456789012345678901234567890123456."+
355 "123456789012345678901234567890123456789012345678901234567890123."+
356 "1234567890123456789012345678901234567890123456789012345678901", "B",
357 "123456789012345678901234567890123456789012345678901234567890123."+
358 "1234567890\u00E41234567890123456789012345678901234567890123456."+
359 "123456789012345678901234567890123456789012345678901234567890123."+
360 "1234567890123456789012345678901234567890123456789012345678901",
361 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
362 // hyphen errors and empty-label errors
363 // "xn---q----jra"=="-q--a-umlaut-"
364 { "a.b..-q--a-.e", "B", "a.b..-q--a-.e",
365 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
366 "UIDNA_ERROR_HYPHEN_3_4" },
367 { "a.b..-q--\u00E4-.e", "B", "a.b..-q--\u00E4-.e",
368 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
369 "UIDNA_ERROR_HYPHEN_3_4" },
370 { "a.b..xn---q----jra.e", "B", "a.b..-q--\u00E4-.e",
371 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
372 "UIDNA_ERROR_HYPHEN_3_4" },
373 { "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
374 { "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
375 { "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
376 { "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
377 { "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
378 { "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
379 { "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
380 { "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
381 { "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
382 { "\u00E4.bc--de.f", "B", "\u00E4.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
383 { "a.b.\u0308c.d", "B", "a.b.\uFFFDc.d", "UIDNA_ERROR_LEADING_COMBINING_MARK" },
384 { "a.b.xn--c-bcb.d", "B",
385 "a.b.xn--c-bcb\uFFFD.d", "UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL" },
387 { "A0", "B", "a0", "" },
388 { "0A", "B", "0a", "" }, // all-LTR is ok to start with a digit (EN)
389 { "0A.\u05D0", "B", // ASCII label does not start with L/R/AL
390 "0a.\u05D0", "UIDNA_ERROR_BIDI" },
391 { "c.xn--0-eha.xn--4db", "B", // 2nd label does not start with L/R/AL
392 "c.0\u00FC.\u05D0", "UIDNA_ERROR_BIDI" },
393 { "b-.\u05D0", "B", // label does not end with L/EN
394 "b-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
395 { "d.xn----dha.xn--4db", "B", // 2nd label does not end with L/EN
396 "d.\u00FC-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
397 { "a\u05D0", "B", "a\u05D0", "UIDNA_ERROR_BIDI" }, // first dir != last dir
398 { "\u05D0\u05C7", "B", "\u05D0\u05C7", "" },
399 { "\u05D09\u05C7", "B", "\u05D09\u05C7", "" },
400 { "\u05D0a\u05C7", "B", "\u05D0a\u05C7", "UIDNA_ERROR_BIDI" }, // first dir != last dir
401 { "\u05D0\u05EA", "B", "\u05D0\u05EA", "" },
402 { "\u05D0\u05F3\u05EA", "B", "\u05D0\u05F3\u05EA", "" },
403 { "a\u05D0Tz", "B", "a\u05D0tz", "UIDNA_ERROR_BIDI" }, // mixed dir
404 { "\u05D0T\u05EA", "B", "\u05D0t\u05EA", "UIDNA_ERROR_BIDI" }, // mixed dir
405 { "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
406 { "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle
407 { "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
408 { "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL
409 "\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
411 { "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ
412 { "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
413 { "\u200D", "N", "\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
415 { "\u0BB9\u0BCD\u200C", "N", "\u0BB9\u0BCD\u200C", "" }, // Virama+ZWNJ
416 { "\u0BB9\u200C", "N", "\u0BB9\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
417 { "\u200C", "N", "\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
418 { "\u0644\u0670\u200C\u06ED\u06EF", "N", // Joining types D T ZWNJ T R
419 "\u0644\u0670\u200C\u06ED\u06EF", "" },
420 { "\u0644\u0670\u200C\u06EF", "N", // D T ZWNJ R
421 "\u0644\u0670\u200C\u06EF", "" },
422 { "\u0644\u200C\u06ED\u06EF", "N", // D ZWNJ T R
423 "\u0644\u200C\u06ED\u06EF", "" },
424 { "\u0644\u200C\u06EF", "N", // D ZWNJ R
425 "\u0644\u200C\u06EF", "" },
426 { "\u0644\u0670\u200C\u06ED", "N", // D T ZWNJ T
427 "\u0644\u0670\u200C\u06ED", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
428 { "\u06EF\u200C\u06EF", "N", // R ZWNJ R
429 "\u06EF\u200C\u06EF", "UIDNA_ERROR_CONTEXTJ" },
430 { "\u0644\u200C", "N", // D ZWNJ
431 "\u0644\u200C", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
432 { "\u0660\u0661", "B", // Arabic-Indic Digits alone
433 "\u0660\u0661", "UIDNA_ERROR_BIDI" },
434 { "\u06F0\u06F1", "B", // Extended Arabic-Indic Digits alone
435 "\u06F0\u06F1", "" },
436 { "\u0660\u06F1", "B", // Mixed Arabic-Indic Digits
437 "\u0660\u06F1", "UIDNA_ERROR_CONTEXTO_DIGITS|UIDNA_ERROR_BIDI" },
438 // All of the CONTEXTO "Would otherwise have been DISALLOWED" characters
439 // in their correct contexts,
440 // then each in incorrect context.
441 { "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "B",
442 "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "UIDNA_ERROR_BIDI" },
444 "l\u00B7", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
446 "\u00B7l", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
448 "\u0375", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
449 { "\u03B1\u05F3", "B",
450 "\u03B1\u05F3", "UIDNA_ERROR_CONTEXTO_PUNCTUATION|UIDNA_ERROR_BIDI" },
452 "\u05F4", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
454 "l\u30FB", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
459 public void TestSomeCases() {
460 StringBuilder aT=new StringBuilder(), uT=new StringBuilder();
461 StringBuilder aN=new StringBuilder(), uN=new StringBuilder();
462 IDNA.Info aTInfo=new IDNA.Info(), uTInfo=new IDNA.Info();
463 IDNA.Info aNInfo=new IDNA.Info(), uNInfo=new IDNA.Info();
465 StringBuilder aTuN=new StringBuilder(), uTaN=new StringBuilder();
466 StringBuilder aNuN=new StringBuilder(), uNaN=new StringBuilder();
467 IDNA.Info aTuNInfo=new IDNA.Info(), uTaNInfo=new IDNA.Info();
468 IDNA.Info aNuNInfo=new IDNA.Info(), uNaNInfo=new IDNA.Info();
470 StringBuilder aTL=new StringBuilder(), uTL=new StringBuilder();
471 StringBuilder aNL=new StringBuilder(), uNL=new StringBuilder();
472 IDNA.Info aTLInfo=new IDNA.Info(), uTLInfo=new IDNA.Info();
473 IDNA.Info aNLInfo=new IDNA.Info(), uNLInfo=new IDNA.Info();
475 EnumSet<IDNA.Error> uniErrors=EnumSet.noneOf(IDNA.Error.class);
477 TestCase testCase=new TestCase();
479 for(i=0; i<testCases.length; ++i) {
480 testCase.set(testCases[i]);
481 String input=testCase.s;
482 String expected=testCase.u;
483 // ToASCII/ToUnicode, transitional/nontransitional
485 trans.nameToASCII(input, aT, aTInfo);
486 trans.nameToUnicode(input, uT, uTInfo);
487 nontrans.nameToASCII(input, aN, aNInfo);
488 nontrans.nameToUnicode(input, uN, uNInfo);
489 } catch(Exception e) {
490 errln(String.format("first-level processing [%d/%s] %s - %s",
491 i, testCase.o, testCase.s, e));
494 // ToUnicode does not set length errors.
496 uniErrors.addAll(testCase.errors);
497 uniErrors.removeAll(lengthErrors);
498 char mode=testCase.o.charAt(0);
499 if(mode=='B' || mode=='N') {
500 if(!sameErrors(uNInfo, uniErrors)) {
501 errln(String.format("N.nameToUnicode([%d] %s) unexpected errors %s",
502 i, testCase.s, uNInfo.getErrors()));
505 if(!UTF16Plus.equal(uN, expected)) {
506 errln(String.format("N.nameToUnicode([%d] %s) unexpected string %s",
507 i, testCase.s, prettify(uN.toString())));
510 if(!sameErrors(aNInfo, testCase.errors)) {
511 errln(String.format("N.nameToASCII([%d] %s) unexpected errors %s",
512 i, testCase.s, aNInfo.getErrors()));
516 if(mode=='B' || mode=='T') {
517 if(!sameErrors(uTInfo, uniErrors)) {
518 errln(String.format("T.nameToUnicode([%d] %s) unexpected errors %s",
519 i, testCase.s, uTInfo.getErrors()));
522 if(!UTF16Plus.equal(uT, expected)) {
523 errln(String.format("T.nameToUnicode([%d] %s) unexpected string %s",
524 i, testCase.s, prettify(uT.toString())));
527 if(!sameErrors(aTInfo, testCase.errors)) {
528 errln(String.format("T.nameToASCII([%d] %s) unexpected errors %s",
529 i, testCase.s, aTInfo.getErrors()));
533 // ToASCII is all-ASCII if no severe errors
534 if(!hasCertainErrors(aNInfo, severeErrors) && !isASCII(aN)) {
535 errln(String.format("N.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
536 i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
539 if(!hasCertainErrors(aTInfo, severeErrors) && !isASCII(aT)) {
540 errln(String.format("T.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
541 i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
545 char m= mode=='B' ? mode : 'N';
546 logln(String.format("%c.nameToASCII([%d] %s) (errors %s) result string: %s",
547 m, i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
549 logln(String.format("T.nameToASCII([%d] %s) (errors %s) result string: %s",
550 i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
553 // second-level processing
555 nontrans.nameToUnicode(aT, aTuN, aTuNInfo);
556 nontrans.nameToASCII(uT, uTaN, uTaNInfo);
557 nontrans.nameToUnicode(aN, aNuN, aNuNInfo);
558 nontrans.nameToASCII(uN, uNaN, uNaNInfo);
559 } catch(Exception e) {
560 errln(String.format("second-level processing [%d/%s] %s - %s",
561 i, testCase.o, testCase.s, e));
564 if(!UTF16Plus.equal(aN, uNaN)) {
565 errln(String.format("N.nameToASCII([%d] %s)!=N.nameToUnicode().N.nameToASCII() "+
566 "(errors %s) %s vs. %s",
567 i, testCase.s, aNInfo.getErrors(),
568 prettify(aN.toString()), prettify(uNaN.toString())));
571 if(!UTF16Plus.equal(aT, uTaN)) {
572 errln(String.format("T.nameToASCII([%d] %s)!=T.nameToUnicode().N.nameToASCII() "+
573 "(errors %s) %s vs. %s",
574 i, testCase.s, aNInfo.getErrors(),
575 prettify(aT.toString()), prettify(uTaN.toString())));
578 if(!UTF16Plus.equal(uN, aNuN)) {
579 errln(String.format("N.nameToUnicode([%d] %s)!=N.nameToASCII().N.nameToUnicode() "+
580 "(errors %s) %s vs. %s",
581 i, testCase.s, uNInfo.getErrors(), prettify(uN.toString()), prettify(aNuN.toString())));
584 if(!UTF16Plus.equal(uT, aTuN)) {
585 errln(String.format("T.nameToUnicode([%d] %s)!=T.nameToASCII().N.nameToUnicode() "+
586 "(errors %s) %s vs. %s",
587 i, testCase.s, uNInfo.getErrors(),
588 prettify(uT.toString()), prettify(aTuN.toString())));
593 trans.labelToASCII(input, aTL, aTLInfo);
594 trans.labelToUnicode(input, uTL, uTLInfo);
595 nontrans.labelToASCII(input, aNL, aNLInfo);
596 nontrans.labelToUnicode(input, uNL, uNLInfo);
597 } catch(Exception e) {
598 errln(String.format("labelToXYZ processing [%d/%s] %s - %s",
599 i, testCase.o, testCase.s, e));
602 if(aN.indexOf(".")<0) {
603 if(!UTF16Plus.equal(aN, aNL) || !sameErrors(aNInfo, aNLInfo)) {
604 errln(String.format("N.nameToASCII([%d] %s)!=N.labelToASCII() "+
605 "(errors %s vs %s) %s vs. %s",
606 i, testCase.s, aNInfo.getErrors().toString(), aNLInfo.getErrors().toString(),
607 prettify(aN.toString()), prettify(aNL.toString())));
611 if(!hasError(aNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
612 errln(String.format("N.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
613 i, testCase.s, aNLInfo.getErrors()));
617 if(aT.indexOf(".")<0) {
618 if(!UTF16Plus.equal(aT, aTL) || !sameErrors(aTInfo, aTLInfo)) {
619 errln(String.format("T.nameToASCII([%d] %s)!=T.labelToASCII() "+
620 "(errors %s vs %s) %s vs. %s",
621 i, testCase.s, aTInfo.getErrors().toString(), aTLInfo.getErrors().toString(),
622 prettify(aT.toString()), prettify(aTL.toString())));
626 if(!hasError(aTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
627 errln(String.format("T.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
628 i, testCase.s, aTLInfo.getErrors()));
632 if(uN.indexOf(".")<0) {
633 if(!UTF16Plus.equal(uN, uNL) || !sameErrors(uNInfo, uNLInfo)) {
634 errln(String.format("N.nameToUnicode([%d] %s)!=N.labelToUnicode() "+
635 "(errors %s vs %s) %s vs. %s",
636 i, testCase.s, uNInfo.getErrors().toString(), uNLInfo.getErrors().toString(),
637 prettify(uN.toString()), prettify(uNL.toString())));
641 if(!hasError(uNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
642 errln(String.format("N.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
643 i, testCase.s, uNLInfo.getErrors()));
647 if(uT.indexOf(".")<0) {
648 if(!UTF16Plus.equal(uT, uTL) || !sameErrors(uTInfo, uTLInfo)) {
649 errln(String.format("T.nameToUnicode([%d] %s)!=T.labelToUnicode() "+
650 "(errors %s vs %s) %s vs. %s",
651 i, testCase.s, uTInfo.getErrors().toString(), uTLInfo.getErrors().toString(),
652 prettify(uT.toString()), prettify(uTL.toString())));
656 if(!hasError(uTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
657 errln(String.format("T.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
658 i, testCase.s, uTLInfo.getErrors()));
662 // Differences between transitional and nontransitional processing
664 if( aNInfo.isTransitionalDifferent() ||
665 aTInfo.isTransitionalDifferent() ||
666 uNInfo.isTransitionalDifferent() ||
667 uTInfo.isTransitionalDifferent() ||
668 aNLInfo.isTransitionalDifferent() ||
669 aTLInfo.isTransitionalDifferent() ||
670 uNLInfo.isTransitionalDifferent() ||
671 uTLInfo.isTransitionalDifferent()
673 errln(String.format("B.process([%d] %s) isTransitionalDifferent()", i, testCase.s));
676 if( !UTF16Plus.equal(aN, aT) || !UTF16Plus.equal(uN, uT) ||
677 !UTF16Plus.equal(aNL, aTL) || !UTF16Plus.equal(uNL, uTL) ||
678 !sameErrors(aNInfo, aTInfo) || !sameErrors(uNInfo, uTInfo) ||
679 !sameErrors(aNLInfo, aTLInfo) || !sameErrors(uNLInfo, uTLInfo)
681 errln(String.format("N.process([%d] %s) vs. T.process() different errors or result strings",
686 if( !aNInfo.isTransitionalDifferent() ||
687 !aTInfo.isTransitionalDifferent() ||
688 !uNInfo.isTransitionalDifferent() ||
689 !uTInfo.isTransitionalDifferent() ||
690 !aNLInfo.isTransitionalDifferent() ||
691 !aTLInfo.isTransitionalDifferent() ||
692 !uNLInfo.isTransitionalDifferent() ||
693 !uTLInfo.isTransitionalDifferent()
695 errln(String.format("%s.process([%d] %s) !isTransitionalDifferent()",
696 testCase.o, i, testCase.s));
699 if( UTF16Plus.equal(aN, aT) || UTF16Plus.equal(uN, uT) ||
700 UTF16Plus.equal(aNL, aTL) || UTF16Plus.equal(uNL, uTL)
702 errln(String.format("N.process([%d] %s) vs. T.process() same result strings",
710 private final IDNA trans, nontrans;
712 private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
713 IDNA.Error.LEADING_COMBINING_MARK,
714 IDNA.Error.DISALLOWED,
716 IDNA.Error.LABEL_HAS_DOT,
717 IDNA.Error.INVALID_ACE_LABEL);
718 private static final EnumSet<IDNA.Error> lengthErrors=EnumSet.of(
719 IDNA.Error.EMPTY_LABEL,
720 IDNA.Error.LABEL_TOO_LONG,
721 IDNA.Error.DOMAIN_NAME_TOO_LONG);
723 private boolean hasError(IDNA.Info info, IDNA.Error error) {
724 return info.getErrors().contains(error);
726 // assumes that certainErrors is not empty
727 private boolean hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors) {
728 return !errors.isEmpty() && !Collections.disjoint(errors, certainErrors);
730 private boolean hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors) {
731 return hasCertainErrors(info.getErrors(), certainErrors);
733 private boolean sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b) {
736 private boolean sameErrors(IDNA.Info a, IDNA.Info b) {
737 return sameErrors(a.getErrors(), b.getErrors());
739 private boolean sameErrors(IDNA.Info a, Set<IDNA.Error> b) {
740 return sameErrors(a.getErrors(), b);
743 private static boolean
744 isASCII(CharSequence str) {
745 int length=str.length();
746 for(int i=0; i<length; ++i) {
747 if(str.charAt(i)>=0x80) {