2 *******************************************************************************
\r
3 * Copyright (C) 2001-2007, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.bidi;
\r
10 import java.util.Arrays;
\r
11 import com.ibm.icu.lang.UCharacter;
\r
12 import com.ibm.icu.util.VersionInfo;
\r
13 import com.ibm.icu.dev.test.TestFmwk;
\r
14 import com.ibm.icu.impl.Utility;
\r
15 import com.ibm.icu.text.Bidi;
\r
16 import com.ibm.icu.text.BidiRun;
\r
19 * A base class for the Bidi test suite.
\r
21 * @author Lina Kemmel, Matitiahu Allouche
\r
24 public class BidiTest extends TestFmwk {
\r
26 protected static final char[] charFromDirProp = {
\r
27 /* L R EN ES ET AN CS B S WS ON */
\r
28 0x61, 0x5d0, 0x30, 0x2f, 0x25, 0x660, 0x2c, 0xa, 0x9, 0x20, 0x26,
\r
29 /* LRE LRO AL RLE RLO PDF NSM BN */
\r
30 0x202a, 0x202d, 0x627, 0x202b, 0x202e, 0x202c, 0x308, 0x200c
\r
34 initCharFromDirProps();
\r
37 private static void initCharFromDirProps() {
\r
38 final VersionInfo ucd401 = VersionInfo.getInstance(4, 0, 1, 0);
\r
39 VersionInfo ucdVersion = VersionInfo.getInstance(0, 0, 0, 0);
\r
41 /* lazy initialization */
\r
42 if (ucdVersion.getMajor() > 0) {
\r
46 ucdVersion = UCharacter.getUnicodeVersion();
\r
47 if (ucdVersion.compareTo(ucd401) >= 0) {
\r
48 /* Unicode 4.0.1 changes bidi classes for +-/ */
\r
49 /* change ES character from / to + */
\r
50 charFromDirProp[TestData.ES] = 0x2b;
\r
54 protected boolean assertEquals(String message, String expected, String actual,
\r
55 String src, String mode, String option,
\r
57 if (expected == null || actual == null) {
\r
58 return super.assertEquals(message, expected, actual);
\r
60 if (expected.equals(actual)) {
\r
66 errcontln("source : \"" + Utility.escape(src) + "\"");
\r
68 errcontln("expected : \"" + Utility.escape(expected) + "\"");
\r
69 errcontln("actual : \"" + Utility.escape(actual) + "\"");
\r
71 errcontln("reordering mode : " + mode);
\r
73 if (option != null) {
\r
74 errcontln("reordering option : " + option);
\r
76 if (level != null) {
\r
77 errcontln("paragraph level : " + level);
\r
82 protected static String valueOf(int[] array) {
\r
83 StringBuffer result = new StringBuffer(array.length * 4);
\r
84 for (int i = 0; i < array.length; i++) {
\r
86 result.append(array[i]);
\r
88 return result.toString();
\r
91 private static final String[] modeDescriptions = {
\r
93 "REORDER_NUMBERS_SPECIAL",
\r
94 "REORDER_GROUP_NUMBERS_WITH_R",
\r
95 "REORDER_RUNS_ONLY",
\r
96 "REORDER_INVERSE_NUMBERS_AS_L",
\r
97 "REORDER_INVERSE_LIKE_DIRECT",
\r
98 "REORDER_INVERSE_FOR_NUMBERS_SPECIAL"
\r
101 protected static String modeToString(int mode) {
\r
102 if (mode < Bidi.REORDER_DEFAULT ||
\r
103 mode > Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) {
\r
106 return modeDescriptions[mode];
\r
109 private static final short SETPARA_MASK = Bidi.OPTION_INSERT_MARKS |
\r
110 Bidi.OPTION_REMOVE_CONTROLS | Bidi.OPTION_STREAMING;
\r
112 private static final String[] setParaDescriptions = {
\r
113 "OPTION_INSERT_MARKS",
\r
114 "OPTION_REMOVE_CONTROLS",
\r
118 protected static String spOptionsToString(int option) {
\r
119 return optionToString(option, SETPARA_MASK, setParaDescriptions);
\r
122 private static final int MAX_WRITE_REORDERED_OPTION = Bidi.OUTPUT_REVERSE;
\r
123 private static final int REORDER_MASK = (MAX_WRITE_REORDERED_OPTION << 1) - 1;
\r
125 private static final String[] writeReorderedDescriptions = {
\r
126 "KEEP_BASE_COMBINING", // 1
\r
127 "DO_MIRRORING", // 2
\r
128 "INSERT_LRM_FOR_NUMERIC", // 4
\r
129 "REMOVE_BIDI_CONTROLS", // 8
\r
130 "OUTPUT_REVERSE" // 16
\r
133 public static String wrOptionsToString(int option) {
\r
134 return optionToString(option, REORDER_MASK, writeReorderedDescriptions);
\r
136 public static String optionToString(int option, int mask,
\r
137 String[] descriptions) {
\r
138 StringBuffer desc = new StringBuffer(50);
\r
140 if ((option &= mask) == 0) {
\r
145 for (int i = 0; option > 0; i++, option >>= 1) {
\r
146 if ((option & 1) != 0) {
\r
147 if (desc.length() > 0) {
\r
148 desc.append(" | ");
\r
150 desc.append(descriptions[i]);
\r
153 return desc.toString();
\r
156 static final String columnString =
\r
157 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
\r
158 static final char[] columns = columnString.toCharArray();
\r
159 private static final int TABLE_SIZE = 256;
\r
160 private static boolean tablesInitialized = false;
\r
161 private static char[] pseudoToUChar;
\r
162 private static char[] UCharToPseudo; /* used for Unicode chars < 0x0100 */
\r
163 private static char[] UCharToPseud2; /* used for Unicode chars >=0x0100 */
\r
165 static void buildPseudoTables()
\r
167 The rules for pseudo-Bidi are as follows:
\r
175 - A-F == Arabic Letters 0631-0636
\r
176 - G-V == Hebrew letters 05d7-05ea
\r
177 - W-Z == Unassigned RTL 08d0-08d3
\r
178 - 0-5 == western digits 0030-0035
\r
179 - 6-9 == Arabic-Indic digits 0666-0669
\r
180 - ` == Combining Grave Accent 0300 (NSM)
\r
181 - ~ == Delete 007f (BN)
\r
182 - | == Paragraph Separator 2029 (B)
\r
183 - _ == Info Separator 1 001f (S)
\r
184 All other characters represent themselves as Latin-1, with the corresponding
\r
192 /* initialize all tables to unknown */
\r
193 pseudoToUChar = new char[TABLE_SIZE];
\r
194 UCharToPseudo = new char[TABLE_SIZE];
\r
195 UCharToPseud2 = new char[TABLE_SIZE];
\r
196 for (i = 0; i < TABLE_SIZE; i++) {
\r
197 pseudoToUChar[i] = 0xFFFD;
\r
198 UCharToPseudo[i] = '?';
\r
199 UCharToPseud2[i] = '?';
\r
201 /* initialize non letters or digits */
\r
202 pseudoToUChar[ 0 ] = 0x0000; UCharToPseudo[0x00] = 0 ;
\r
203 pseudoToUChar[' '] = 0x0020; UCharToPseudo[0x20] = ' ';
\r
204 pseudoToUChar['!'] = 0x0021; UCharToPseudo[0x21] = '!';
\r
205 pseudoToUChar['"'] = 0x0022; UCharToPseudo[0x22] = '"';
\r
206 pseudoToUChar['#'] = 0x0023; UCharToPseudo[0x23] = '#';
\r
207 pseudoToUChar['$'] = 0x0024; UCharToPseudo[0x24] = '$';
\r
208 pseudoToUChar['%'] = 0x0025; UCharToPseudo[0x25] = '%';
\r
209 pseudoToUChar['\'']= 0x0027; UCharToPseudo[0x27] = '\'';
\r
210 pseudoToUChar['('] = 0x0028; UCharToPseudo[0x28] = '(';
\r
211 pseudoToUChar[')'] = 0x0029; UCharToPseudo[0x29] = ')';
\r
212 pseudoToUChar['*'] = 0x002A; UCharToPseudo[0x2A] = '*';
\r
213 pseudoToUChar['+'] = 0x002B; UCharToPseudo[0x2B] = '+';
\r
214 pseudoToUChar[','] = 0x002C; UCharToPseudo[0x2C] = ',';
\r
215 pseudoToUChar['-'] = 0x002D; UCharToPseudo[0x2D] = '-';
\r
216 pseudoToUChar['.'] = 0x002E; UCharToPseudo[0x2E] = '.';
\r
217 pseudoToUChar['/'] = 0x002F; UCharToPseudo[0x2F] = '/';
\r
218 pseudoToUChar[':'] = 0x003A; UCharToPseudo[0x3A] = ':';
\r
219 pseudoToUChar[';'] = 0x003B; UCharToPseudo[0x3B] = ';';
\r
220 pseudoToUChar['<'] = 0x003C; UCharToPseudo[0x3C] = '<';
\r
221 pseudoToUChar['='] = 0x003D; UCharToPseudo[0x3D] = '=';
\r
222 pseudoToUChar['>'] = 0x003E; UCharToPseudo[0x3E] = '>';
\r
223 pseudoToUChar['?'] = 0x003F; UCharToPseudo[0x3F] = '?';
\r
224 pseudoToUChar['\\']= 0x005C; UCharToPseudo[0x5C] = '\\';
\r
225 /* initialize specially used characters */
\r
226 pseudoToUChar['`'] = 0x0300; UCharToPseud2[0x00] = '`'; /* NSM */
\r
227 pseudoToUChar['@'] = 0x200E; UCharToPseud2[0x0E] = '@'; /* LRM */
\r
228 pseudoToUChar['&'] = 0x200F; UCharToPseud2[0x0F] = '&'; /* RLM */
\r
229 pseudoToUChar['_'] = 0x001F; UCharToPseudo[0x1F] = '_'; /* S */
\r
230 pseudoToUChar['|'] = 0x2029; UCharToPseud2[0x29] = '|'; /* B */
\r
231 pseudoToUChar['['] = 0x202A; UCharToPseud2[0x2A] = '['; /* LRE */
\r
232 pseudoToUChar[']'] = 0x202B; UCharToPseud2[0x2B] = ']'; /* RLE */
\r
233 pseudoToUChar['^'] = 0x202C; UCharToPseud2[0x2C] = '^'; /* PDF */
\r
234 pseudoToUChar['{'] = 0x202D; UCharToPseud2[0x2D] = '{'; /* LRO */
\r
235 pseudoToUChar['}'] = 0x202E; UCharToPseud2[0x2E] = '}'; /* RLO */
\r
236 pseudoToUChar['~'] = 0x007F; UCharToPseudo[0x7F] = '~'; /* BN */
\r
237 /* initialize western digits */
\r
238 for (i = 0, uchar = 0x0030; i < 6; i++, uchar++) {
\r
240 pseudoToUChar[c] = uchar;
\r
241 UCharToPseudo[uchar & 0x00ff] = c;
\r
243 /* initialize Hindi digits */
\r
244 for (i = 6, uchar = 0x0666; i < 10; i++, uchar++) {
\r
246 pseudoToUChar[c] = uchar;
\r
247 UCharToPseud2[uchar & 0x00ff] = c;
\r
249 /* initialize Arabic letters */
\r
250 for (i = 10, uchar = 0x0631; i < 16; i++, uchar++) {
\r
252 pseudoToUChar[c] = uchar;
\r
253 UCharToPseud2[uchar & 0x00ff] = c;
\r
255 /* initialize Hebrew letters */
\r
256 for (i = 16, uchar = 0x05D7; i < 32; i++, uchar++) {
\r
258 pseudoToUChar[c] = uchar;
\r
259 UCharToPseud2[uchar & 0x00ff] = c;
\r
261 /* initialize Unassigned code points */
\r
262 for (i = 32, uchar = 0x08D0; i < 36; i++, uchar++) {
\r
264 pseudoToUChar[c] = uchar;
\r
265 UCharToPseud2[uchar & 0x00ff] = c;
\r
267 /* initialize Latin lower case letters */
\r
268 for (i = 36, uchar = 0x0061; i < 62; i++, uchar++) {
\r
270 pseudoToUChar[c] = uchar;
\r
271 UCharToPseudo[uchar & 0x00ff] = c;
\r
273 tablesInitialized = true;
\r
276 /*----------------------------------------------------------------------*/
\r
278 static String pseudoToU16(String input)
\r
279 /* This function converts a pseudo-Bidi string into a char string.
\r
280 It returns the char string.
\r
283 int len = input.length();
\r
284 char[] output = new char[len];
\r
286 if (!tablesInitialized) {
\r
287 buildPseudoTables();
\r
289 for (i = 0; i < len; i++)
\r
290 output[i] = pseudoToUChar[input.charAt(i)];
\r
291 return new String(output);
\r
294 /*----------------------------------------------------------------------*/
\r
296 static String u16ToPseudo(String input)
\r
297 /* This function converts a char string into a pseudo-Bidi string.
\r
298 It returns the pseudo-Bidi string.
\r
301 int len = input.length();
\r
302 char[] output = new char[len];
\r
305 if (!tablesInitialized) {
\r
306 buildPseudoTables();
\r
308 for (i = 0; i < len; i++)
\r
310 uchar = input.charAt(i);
\r
311 output[i] = uchar < 0x0100 ? UCharToPseudo[uchar] :
\r
312 UCharToPseud2[uchar & 0x00ff];
\r
314 return new String(output);
\r
317 void errcont(String message) {
\r
318 msg(message, ERR, false, false);
\r
321 void errcontln(String message) {
\r
322 msg(message, ERR, false, true);
\r
325 void printCaseInfo(Bidi bidi, String src, String dst)
\r
327 int length = bidi.getProcessedLength();
\r
328 byte[] levels = bidi.getLevels();
\r
329 char[] levelChars = new char[length];
\r
331 int runCount = bidi.countRuns();
\r
332 errcontln("========================================");
\r
333 errcontln("Processed length: " + length);
\r
334 for (int i = 0; i < length; i++) {
\r
337 levelChars[i] = '-';
\r
338 } else if (lev < columns.length) {
\r
339 levelChars[i] = columns[lev];
\r
341 levelChars[i] = '+';
\r
344 errcontln("Levels: " + new String(levelChars));
\r
345 errcontln("Source: " + src);
\r
346 errcontln("Result: " + dst);
\r
347 errcontln("Direction: " + bidi.getDirection());
\r
348 errcontln("paraLevel: " + Byte.toString(bidi.getParaLevel()));
\r
349 errcontln("reorderingMode: " + modeToString(bidi.getReorderingMode()));
\r
350 errcontln("reorderingOptions: " + spOptionsToString(bidi.getReorderingOptions()));
\r
351 errcont("Runs: " + runCount + " => logicalStart.length/level: ");
\r
352 for (int i = 0; i < runCount; i++) {
\r
354 run = bidi.getVisualRun(i);
\r
355 errcont(" " + run.getStart() + "." + run.getLength() + "/" +
\r
356 run.getEmbeddingLevel());
\r
361 static final String mates1 = "<>()[]{}";
\r
362 static final String mates2 = "><)(][}{";
\r
363 static final char[] mates1Chars = mates1.toCharArray();
\r
364 static final char[] mates2Chars = mates2.toCharArray();
\r
366 boolean matchingPair(Bidi bidi, int i, char c1, char c2)
\r
371 /* For REORDER_RUNS_ONLY, it would not be correct to check levels[i],
\r
372 so we use the appropriate run's level, which is good for all cases.
\r
374 if (bidi.getLogicalRun(i).getDirection() == 0) {
\r
377 for (int k = 0; k < mates1Chars.length; k++) {
\r
378 if ((c1 == mates1Chars[k]) && (c2 == mates2Chars[k])) {
\r
385 boolean checkWhatYouCan(Bidi bidi, String src, String dst)
\r
387 int i, idx, logLimit, visLimit;
\r
388 boolean testOK, errMap, errDst;
\r
389 char[] srcChars = src.toCharArray();
\r
390 char[] dstChars = dst.toCharArray();
\r
391 int[] visMap = bidi.getVisualMap();
\r
392 int[] logMap = bidi.getLogicalMap();
\r
395 errMap = errDst = false;
\r
396 logLimit = bidi.getProcessedLength();
\r
397 visLimit = bidi.getResultLength();
\r
398 if (visLimit > dstChars.length) {
\r
399 visLimit = dstChars.length;
\r
401 char[] accumSrc = new char[logLimit];
\r
402 char[] accumDst = new char[visLimit];
\r
403 Arrays.fill(accumSrc, '?');
\r
404 Arrays.fill(accumDst, '?');
\r
406 if (logMap.length != logLimit) {
\r
409 for (i = 0; i < logLimit; i++) {
\r
410 idx = bidi.getVisualIndex(i);
\r
411 if (idx != logMap[i]) {
\r
414 if (idx == Bidi.MAP_NOWHERE) {
\r
417 if (idx >= visLimit) {
\r
420 accumDst[idx] = srcChars[i];
\r
421 if (!matchingPair(bidi, i, srcChars[i], dstChars[idx])) {
\r
427 printCaseInfo(bidi, src, dst);
\r
430 errln("Mismatch between getLogicalMap() and getVisualIndex()");
\r
431 errcont("Map :" + valueOf(logMap));
\r
433 errcont("Indexes:");
\r
434 for (i = 0; i < logLimit; i++) {
\r
435 errcont(" " + bidi.getVisualIndex(i));
\r
441 printCaseInfo(bidi, src, dst);
\r
444 errln("Source does not map to Result");
\r
445 errcontln("We got: " + new String(accumDst));
\r
448 errMap = errDst = false;
\r
449 if (visMap.length != visLimit) {
\r
452 for (i = 0; i < visLimit; i++) {
\r
453 idx = bidi.getLogicalIndex(i);
\r
454 if (idx != visMap[i]) {
\r
457 if (idx == Bidi.MAP_NOWHERE) {
\r
460 if (idx >= logLimit) {
\r
463 accumSrc[idx] = dstChars[i];
\r
464 if (!matchingPair(bidi, idx, srcChars[idx], dstChars[i])) {
\r
470 printCaseInfo(bidi, src, dst);
\r
473 errln("Mismatch between getVisualMap() and getLogicalIndex()");
\r
474 errcont("Map :" + valueOf(visMap));
\r
476 errcont("Indexes:");
\r
477 for (i = 0; i < visLimit; i++) {
\r
478 errcont(" " + bidi.getLogicalIndex(i));
\r
484 printCaseInfo(bidi, src, dst);
\r
487 errln("Result does not map to Source");
\r
488 errcontln("We got: " + new String(accumSrc));
\r