2 *******************************************************************************
\r
3 * Copyright (C) 2001-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.bidi;
\r
10 import java.util.Arrays;
\r
12 import com.ibm.icu.dev.test.TestFmwk;
\r
13 import com.ibm.icu.impl.Utility;
\r
14 import com.ibm.icu.lang.UCharacter;
\r
15 import com.ibm.icu.text.Bidi;
\r
16 import com.ibm.icu.text.BidiRun;
\r
17 import com.ibm.icu.util.VersionInfo;
\r
20 * A base class for the Bidi test suite.
\r
22 * @author Lina Kemmel, Matitiahu Allouche
\r
25 public class BidiTest extends TestFmwk {
\r
27 protected static final char[] charFromDirProp = {
\r
28 /* L R EN ES ET AN CS B S WS ON */
\r
29 0x61, 0x5d0, 0x30, 0x2f, 0x25, 0x660, 0x2c, 0xa, 0x9, 0x20, 0x26,
\r
30 /* LRE LRO AL RLE RLO PDF NSM BN */
\r
31 0x202a, 0x202d, 0x627, 0x202b, 0x202e, 0x202c, 0x308, 0x200c
\r
35 initCharFromDirProps();
\r
38 private static void initCharFromDirProps() {
\r
39 final VersionInfo ucd401 = VersionInfo.getInstance(4, 0, 1, 0);
\r
40 VersionInfo ucdVersion = VersionInfo.getInstance(0, 0, 0, 0);
\r
42 /* lazy initialization */
\r
43 if (ucdVersion.getMajor() > 0) {
\r
47 ucdVersion = UCharacter.getUnicodeVersion();
\r
48 if (ucdVersion.compareTo(ucd401) >= 0) {
\r
49 /* Unicode 4.0.1 changes bidi classes for +-/ */
\r
50 /* change ES character from / to + */
\r
51 charFromDirProp[TestData.ES] = 0x2b;
\r
55 protected boolean assertEquals(String message, String expected, String actual,
\r
56 String src, String mode, String option,
\r
58 if (expected == null || actual == null) {
\r
59 return super.assertEquals(message, expected, actual);
\r
61 if (expected.equals(actual)) {
\r
67 errcontln("source : \"" + Utility.escape(src) + "\"");
\r
69 errcontln("expected : \"" + Utility.escape(expected) + "\"");
\r
70 errcontln("actual : \"" + Utility.escape(actual) + "\"");
\r
72 errcontln("reordering mode : " + mode);
\r
74 if (option != null) {
\r
75 errcontln("reordering option : " + option);
\r
77 if (level != null) {
\r
78 errcontln("paragraph level : " + level);
\r
83 protected static String valueOf(int[] array) {
\r
84 StringBuffer result = new StringBuffer(array.length * 4);
\r
85 for (int i = 0; i < array.length; i++) {
\r
87 result.append(array[i]);
\r
89 return result.toString();
\r
92 private static final String[] modeDescriptions = {
\r
94 "REORDER_NUMBERS_SPECIAL",
\r
95 "REORDER_GROUP_NUMBERS_WITH_R",
\r
96 "REORDER_RUNS_ONLY",
\r
97 "REORDER_INVERSE_NUMBERS_AS_L",
\r
98 "REORDER_INVERSE_LIKE_DIRECT",
\r
99 "REORDER_INVERSE_FOR_NUMBERS_SPECIAL"
\r
102 protected static String modeToString(int mode) {
\r
103 if (mode < Bidi.REORDER_DEFAULT ||
\r
104 mode > Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) {
\r
107 return modeDescriptions[mode];
\r
110 private static final short SETPARA_MASK = Bidi.OPTION_INSERT_MARKS |
\r
111 Bidi.OPTION_REMOVE_CONTROLS | Bidi.OPTION_STREAMING;
\r
113 private static final String[] setParaDescriptions = {
\r
114 "OPTION_INSERT_MARKS",
\r
115 "OPTION_REMOVE_CONTROLS",
\r
119 protected static String spOptionsToString(int option) {
\r
120 return optionToString(option, SETPARA_MASK, setParaDescriptions);
\r
123 private static final int MAX_WRITE_REORDERED_OPTION = Bidi.OUTPUT_REVERSE;
\r
124 private static final int REORDER_MASK = (MAX_WRITE_REORDERED_OPTION << 1) - 1;
\r
126 private static final String[] writeReorderedDescriptions = {
\r
127 "KEEP_BASE_COMBINING", // 1
\r
128 "DO_MIRRORING", // 2
\r
129 "INSERT_LRM_FOR_NUMERIC", // 4
\r
130 "REMOVE_BIDI_CONTROLS", // 8
\r
131 "OUTPUT_REVERSE" // 16
\r
134 public static String wrOptionsToString(int option) {
\r
135 return optionToString(option, REORDER_MASK, writeReorderedDescriptions);
\r
137 public static String optionToString(int option, int mask,
\r
138 String[] descriptions) {
\r
139 StringBuffer desc = new StringBuffer(50);
\r
141 if ((option &= mask) == 0) {
\r
146 for (int i = 0; option > 0; i++, option >>= 1) {
\r
147 if ((option & 1) != 0) {
\r
148 if (desc.length() > 0) {
\r
149 desc.append(" | ");
\r
151 desc.append(descriptions[i]);
\r
154 return desc.toString();
\r
157 static final String columnString =
\r
158 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
\r
159 static final char[] columns = columnString.toCharArray();
\r
160 private static final int TABLE_SIZE = 256;
\r
161 private static boolean tablesInitialized = false;
\r
162 private static char[] pseudoToUChar;
\r
163 private static char[] UCharToPseudo; /* used for Unicode chars < 0x0100 */
\r
164 private static char[] UCharToPseud2; /* used for Unicode chars >=0x0100 */
\r
166 static void buildPseudoTables()
\r
168 The rules for pseudo-Bidi are as follows:
\r
176 - A-F == Arabic Letters 0631-0636
\r
177 - G-V == Hebrew letters 05d7-05ea
\r
178 - W-Z == Unassigned RTL 08d0-08d3
\r
179 - 0-5 == western digits 0030-0035
\r
180 - 6-9 == Arabic-Indic digits 0666-0669
\r
181 - ` == Combining Grave Accent 0300 (NSM)
\r
182 - ~ == Delete 007f (BN)
\r
183 - | == Paragraph Separator 2029 (B)
\r
184 - _ == Info Separator 1 001f (S)
\r
185 All other characters represent themselves as Latin-1, with the corresponding
\r
193 /* initialize all tables to unknown */
\r
194 pseudoToUChar = new char[TABLE_SIZE];
\r
195 UCharToPseudo = new char[TABLE_SIZE];
\r
196 UCharToPseud2 = new char[TABLE_SIZE];
\r
197 for (i = 0; i < TABLE_SIZE; i++) {
\r
198 pseudoToUChar[i] = 0xFFFD;
\r
199 UCharToPseudo[i] = '?';
\r
200 UCharToPseud2[i] = '?';
\r
202 /* initialize non letters or digits */
\r
203 pseudoToUChar[ 0 ] = 0x0000; UCharToPseudo[0x00] = 0 ;
\r
204 pseudoToUChar[' '] = 0x0020; UCharToPseudo[0x20] = ' ';
\r
205 pseudoToUChar['!'] = 0x0021; UCharToPseudo[0x21] = '!';
\r
206 pseudoToUChar['"'] = 0x0022; UCharToPseudo[0x22] = '"';
\r
207 pseudoToUChar['#'] = 0x0023; UCharToPseudo[0x23] = '#';
\r
208 pseudoToUChar['$'] = 0x0024; UCharToPseudo[0x24] = '$';
\r
209 pseudoToUChar['%'] = 0x0025; UCharToPseudo[0x25] = '%';
\r
210 pseudoToUChar['\'']= 0x0027; UCharToPseudo[0x27] = '\'';
\r
211 pseudoToUChar['('] = 0x0028; UCharToPseudo[0x28] = '(';
\r
212 pseudoToUChar[')'] = 0x0029; UCharToPseudo[0x29] = ')';
\r
213 pseudoToUChar['*'] = 0x002A; UCharToPseudo[0x2A] = '*';
\r
214 pseudoToUChar['+'] = 0x002B; UCharToPseudo[0x2B] = '+';
\r
215 pseudoToUChar[','] = 0x002C; UCharToPseudo[0x2C] = ',';
\r
216 pseudoToUChar['-'] = 0x002D; UCharToPseudo[0x2D] = '-';
\r
217 pseudoToUChar['.'] = 0x002E; UCharToPseudo[0x2E] = '.';
\r
218 pseudoToUChar['/'] = 0x002F; UCharToPseudo[0x2F] = '/';
\r
219 pseudoToUChar[':'] = 0x003A; UCharToPseudo[0x3A] = ':';
\r
220 pseudoToUChar[';'] = 0x003B; UCharToPseudo[0x3B] = ';';
\r
221 pseudoToUChar['<'] = 0x003C; UCharToPseudo[0x3C] = '<';
\r
222 pseudoToUChar['='] = 0x003D; UCharToPseudo[0x3D] = '=';
\r
223 pseudoToUChar['>'] = 0x003E; UCharToPseudo[0x3E] = '>';
\r
224 pseudoToUChar['?'] = 0x003F; UCharToPseudo[0x3F] = '?';
\r
225 pseudoToUChar['\\']= 0x005C; UCharToPseudo[0x5C] = '\\';
\r
226 /* initialize specially used characters */
\r
227 pseudoToUChar['`'] = 0x0300; UCharToPseud2[0x00] = '`'; /* NSM */
\r
228 pseudoToUChar['@'] = 0x200E; UCharToPseud2[0x0E] = '@'; /* LRM */
\r
229 pseudoToUChar['&'] = 0x200F; UCharToPseud2[0x0F] = '&'; /* RLM */
\r
230 pseudoToUChar['_'] = 0x001F; UCharToPseudo[0x1F] = '_'; /* S */
\r
231 pseudoToUChar['|'] = 0x2029; UCharToPseud2[0x29] = '|'; /* B */
\r
232 pseudoToUChar['['] = 0x202A; UCharToPseud2[0x2A] = '['; /* LRE */
\r
233 pseudoToUChar[']'] = 0x202B; UCharToPseud2[0x2B] = ']'; /* RLE */
\r
234 pseudoToUChar['^'] = 0x202C; UCharToPseud2[0x2C] = '^'; /* PDF */
\r
235 pseudoToUChar['{'] = 0x202D; UCharToPseud2[0x2D] = '{'; /* LRO */
\r
236 pseudoToUChar['}'] = 0x202E; UCharToPseud2[0x2E] = '}'; /* RLO */
\r
237 pseudoToUChar['~'] = 0x007F; UCharToPseudo[0x7F] = '~'; /* BN */
\r
238 /* initialize western digits */
\r
239 for (i = 0, uchar = 0x0030; i < 6; i++, uchar++) {
\r
241 pseudoToUChar[c] = uchar;
\r
242 UCharToPseudo[uchar & 0x00ff] = c;
\r
244 /* initialize Hindi digits */
\r
245 for (i = 6, uchar = 0x0666; i < 10; i++, uchar++) {
\r
247 pseudoToUChar[c] = uchar;
\r
248 UCharToPseud2[uchar & 0x00ff] = c;
\r
250 /* initialize Arabic letters */
\r
251 for (i = 10, uchar = 0x0631; i < 16; i++, uchar++) {
\r
253 pseudoToUChar[c] = uchar;
\r
254 UCharToPseud2[uchar & 0x00ff] = c;
\r
256 /* initialize Hebrew letters */
\r
257 for (i = 16, uchar = 0x05D7; i < 32; i++, uchar++) {
\r
259 pseudoToUChar[c] = uchar;
\r
260 UCharToPseud2[uchar & 0x00ff] = c;
\r
262 /* initialize Unassigned code points */
\r
263 for (i = 32, uchar = 0x08D0; i < 36; i++, uchar++) {
\r
265 pseudoToUChar[c] = uchar;
\r
266 UCharToPseud2[uchar & 0x00ff] = c;
\r
268 /* initialize Latin lower case letters */
\r
269 for (i = 36, uchar = 0x0061; i < 62; i++, uchar++) {
\r
271 pseudoToUChar[c] = uchar;
\r
272 UCharToPseudo[uchar & 0x00ff] = c;
\r
274 tablesInitialized = true;
\r
277 /*----------------------------------------------------------------------*/
\r
279 static String pseudoToU16(String input)
\r
280 /* This function converts a pseudo-Bidi string into a char string.
\r
281 It returns the char string.
\r
284 int len = input.length();
\r
285 char[] output = new char[len];
\r
287 if (!tablesInitialized) {
\r
288 buildPseudoTables();
\r
290 for (i = 0; i < len; i++)
\r
291 output[i] = pseudoToUChar[input.charAt(i)];
\r
292 return new String(output);
\r
295 /*----------------------------------------------------------------------*/
\r
297 static String u16ToPseudo(String input)
\r
298 /* This function converts a char string into a pseudo-Bidi string.
\r
299 It returns the pseudo-Bidi string.
\r
302 int len = input.length();
\r
303 char[] output = new char[len];
\r
306 if (!tablesInitialized) {
\r
307 buildPseudoTables();
\r
309 for (i = 0; i < len; i++)
\r
311 uchar = input.charAt(i);
\r
312 output[i] = uchar < 0x0100 ? UCharToPseudo[uchar] :
\r
313 UCharToPseud2[uchar & 0x00ff];
\r
315 return new String(output);
\r
318 void errcont(String message) {
\r
319 msg(message, ERR, false, false);
\r
322 void errcontln(String message) {
\r
323 msg(message, ERR, false, true);
\r
326 void printCaseInfo(Bidi bidi, String src, String dst)
\r
328 int length = bidi.getProcessedLength();
\r
329 byte[] levels = bidi.getLevels();
\r
330 char[] levelChars = new char[length];
\r
332 int runCount = bidi.countRuns();
\r
333 errcontln("========================================");
\r
334 errcontln("Processed length: " + length);
\r
335 for (int i = 0; i < length; i++) {
\r
338 levelChars[i] = '-';
\r
339 } else if (lev < columns.length) {
\r
340 levelChars[i] = columns[lev];
\r
342 levelChars[i] = '+';
\r
345 errcontln("Levels: " + new String(levelChars));
\r
346 errcontln("Source: " + src);
\r
347 errcontln("Result: " + dst);
\r
348 errcontln("Direction: " + bidi.getDirection());
\r
349 errcontln("paraLevel: " + Byte.toString(bidi.getParaLevel()));
\r
350 errcontln("reorderingMode: " + modeToString(bidi.getReorderingMode()));
\r
351 errcontln("reorderingOptions: " + spOptionsToString(bidi.getReorderingOptions()));
\r
352 errcont("Runs: " + runCount + " => logicalStart.length/level: ");
\r
353 for (int i = 0; i < runCount; i++) {
\r
355 run = bidi.getVisualRun(i);
\r
356 errcont(" " + run.getStart() + "." + run.getLength() + "/" +
\r
357 run.getEmbeddingLevel());
\r
362 static final String mates1 = "<>()[]{}";
\r
363 static final String mates2 = "><)(][}{";
\r
364 static final char[] mates1Chars = mates1.toCharArray();
\r
365 static final char[] mates2Chars = mates2.toCharArray();
\r
367 boolean matchingPair(Bidi bidi, int i, char c1, char c2)
\r
372 /* For REORDER_RUNS_ONLY, it would not be correct to check levels[i],
\r
373 so we use the appropriate run's level, which is good for all cases.
\r
375 if (bidi.getLogicalRun(i).getDirection() == 0) {
\r
378 for (int k = 0; k < mates1Chars.length; k++) {
\r
379 if ((c1 == mates1Chars[k]) && (c2 == mates2Chars[k])) {
\r
386 boolean checkWhatYouCan(Bidi bidi, String src, String dst)
\r
388 int i, idx, logLimit, visLimit;
\r
389 boolean testOK, errMap, errDst;
\r
390 char[] srcChars = src.toCharArray();
\r
391 char[] dstChars = dst.toCharArray();
\r
392 int[] visMap = bidi.getVisualMap();
\r
393 int[] logMap = bidi.getLogicalMap();
\r
396 errMap = errDst = false;
\r
397 logLimit = bidi.getProcessedLength();
\r
398 visLimit = bidi.getResultLength();
\r
399 if (visLimit > dstChars.length) {
\r
400 visLimit = dstChars.length;
\r
402 char[] accumSrc = new char[logLimit];
\r
403 char[] accumDst = new char[visLimit];
\r
404 Arrays.fill(accumSrc, '?');
\r
405 Arrays.fill(accumDst, '?');
\r
407 if (logMap.length != logLimit) {
\r
410 for (i = 0; i < logLimit; i++) {
\r
411 idx = bidi.getVisualIndex(i);
\r
412 if (idx != logMap[i]) {
\r
415 if (idx == Bidi.MAP_NOWHERE) {
\r
418 if (idx >= visLimit) {
\r
421 accumDst[idx] = srcChars[i];
\r
422 if (!matchingPair(bidi, i, srcChars[i], dstChars[idx])) {
\r
428 printCaseInfo(bidi, src, dst);
\r
431 errln("Mismatch between getLogicalMap() and getVisualIndex()");
\r
432 errcont("Map :" + valueOf(logMap));
\r
434 errcont("Indexes:");
\r
435 for (i = 0; i < logLimit; i++) {
\r
436 errcont(" " + bidi.getVisualIndex(i));
\r
442 printCaseInfo(bidi, src, dst);
\r
445 errln("Source does not map to Result");
\r
446 errcontln("We got: " + new String(accumDst));
\r
449 errMap = errDst = false;
\r
450 if (visMap.length != visLimit) {
\r
453 for (i = 0; i < visLimit; i++) {
\r
454 idx = bidi.getLogicalIndex(i);
\r
455 if (idx != visMap[i]) {
\r
458 if (idx == Bidi.MAP_NOWHERE) {
\r
461 if (idx >= logLimit) {
\r
464 accumSrc[idx] = dstChars[i];
\r
465 if (!matchingPair(bidi, idx, srcChars[idx], dstChars[i])) {
\r
471 printCaseInfo(bidi, src, dst);
\r
474 errln("Mismatch between getVisualMap() and getLogicalIndex()");
\r
475 errcont("Map :" + valueOf(visMap));
\r
477 errcont("Indexes:");
\r
478 for (i = 0; i < visLimit; i++) {
\r
479 errcont(" " + bidi.getLogicalIndex(i));
\r
485 printCaseInfo(bidi, src, dst);
\r
488 errln("Result does not map to Source");
\r
489 errcontln("We got: " + new String(accumSrc));
\r