2 *******************************************************************************
\r
3 * Copyright (C) 2001-2007, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 *******************************************************************************
\r
7 /* Written by Simon Montagu, Matitiahu Allouche
\r
8 * (ported from C code written by Markus W. Scherer)
\r
11 package com.ibm.icu.text;
\r
13 import com.ibm.icu.lang.UCharacter;
\r
14 import com.ibm.icu.text.Bidi;
\r
16 final class BidiWriter {
\r
18 /** Bidi control code points */
\r
19 static final char LRM_CHAR = 0x200e;
\r
20 static final char RLM_CHAR = 0x200f;
\r
21 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
\r
22 1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
\r
24 private static boolean IsCombining(int type)
\r
27 (1<<UCharacter.NON_SPACING_MARK |
\r
28 1<<UCharacter.COMBINING_SPACING_MARK |
\r
29 1<<UCharacter.ENCLOSING_MARK)) != 0);
\r
33 * When we have OUTPUT_REVERSE set on writeReordered(), then we
\r
34 * semantically write RTL runs in reverse and later reverse them again.
\r
35 * Instead, we actually write them in forward order to begin with.
\r
36 * However, if the RTL run was to be mirrored, we need to mirror here now
\r
37 * since the implicit second reversal must not do it.
\r
38 * It looks strange to do mirroring in LTR output, but it is only because
\r
39 * we are writing RTL output in reverse.
\r
41 private static String doWriteForward(String src, int options) {
\r
42 /* optimize for several combinations of options */
\r
43 switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
\r
45 /* simply copy the LTR run to the destination */
\r
46 return new String(src);
\r
48 case Bidi.DO_MIRRORING: {
\r
49 StringBuffer dest = new StringBuffer(src.length());
\r
56 c = UTF16.charAt(src, i);
\r
57 i += UTF16.getCharCount(c);
\r
58 UTF16.append(dest, UCharacter.getMirror(c));
\r
59 } while(i < src.length());
\r
60 return dest.toString();
\r
62 case Bidi.REMOVE_BIDI_CONTROLS: {
\r
63 StringBuffer dest = new StringBuffer(src.length());
\r
65 /* copy the LTR run and remove any Bidi control characters */
\r
69 c = src.charAt(i++);
\r
70 if(!Bidi.IsBidiControlChar(c)) {
\r
73 } while(i < src.length());
\r
74 return dest.toString();
\r
77 StringBuffer dest = new StringBuffer(src.length());
\r
79 /* remove Bidi control characters and do mirroring */
\r
83 c = UTF16.charAt(src, i);
\r
84 i += UTF16.getCharCount(c);
\r
85 if(!Bidi.IsBidiControlChar(c)) {
\r
86 UTF16.append(dest, UCharacter.getMirror(c));
\r
88 } while(i < src.length());
\r
89 return dest.toString();
\r
91 } /* end of switch */
\r
94 private static String doWriteForward(char[] text, int start, int limit,
\r
97 return doWriteForward(new String(text, start, limit - start), options);
\r
100 static String writeReverse(String src, int options) {
\r
104 * RTL runs need to be copied to the destination in reverse order
\r
105 * of code points, not code units, to keep Unicode characters intact.
\r
107 * The general strategy for this is to read the source text
\r
108 * in backward order, collect all code units for a code point
\r
109 * (and optionally following combining characters, see below),
\r
110 * and copy all these code units in ascending order
\r
111 * to the destination for this run.
\r
113 * Several options request whether combining characters
\r
114 * should be kept after their base characters,
\r
115 * whether Bidi control characters should be removed, and
\r
116 * whether characters should be replaced by their mirror-image
\r
117 * equivalent Unicode characters.
\r
119 StringBuffer dest = new StringBuffer(src.length());
\r
121 /* optimize for several combinations of options */
\r
123 (Bidi.REMOVE_BIDI_CONTROLS |
\r
124 Bidi.DO_MIRRORING |
\r
125 Bidi.KEEP_BASE_COMBINING)) {
\r
129 * With none of the "complicated" options set, the destination
\r
130 * run will have the same length as the source run,
\r
131 * and there is no mirroring and no keeping combining characters
\r
132 * with their base characters.
\r
134 * XXX: or dest = UTF16.reverse(new StringBuffer(src));
\r
137 int srcLength = src.length();
\r
139 /* preserve character integrity */
\r
141 /* i is always after the last code unit known to need to be kept
\r
142 * in this segment */
\r
145 /* collect code units for one base character */
\r
146 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
\r
149 /* copy this base character */
\r
150 dest.append(src.substring(srcLength, i));
\r
151 } while(srcLength > 0);
\r
154 case Bidi.KEEP_BASE_COMBINING:
\r
156 * Here, too, the destination
\r
157 * run will have the same length as the source run,
\r
158 * and there is no mirroring.
\r
159 * We do need to keep combining characters with their base
\r
162 srcLength = src.length();
\r
164 /* preserve character integrity */
\r
166 /* i is always after the last code unit known to need to be kept
\r
167 * in this segment */
\r
171 /* collect code units and modifier letters for one base
\r
174 c = UTF16.charAt(src, srcLength - 1);
\r
175 srcLength -= UTF16.getCharCount(c);
\r
176 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
\r
178 /* copy this "user character" */
\r
179 dest.append(src.substring(srcLength, i));
\r
180 } while(srcLength > 0);
\r
185 * With several "complicated" options set, this is the most
\r
186 * general and the slowest copying of an RTL run.
\r
187 * We will do mirroring, remove Bidi controls, and
\r
188 * keep combining characters with their base characters
\r
191 srcLength = src.length();
\r
193 /* preserve character integrity */
\r
195 /* i is always after the last code unit known to need to be kept
\r
196 * in this segment */
\r
199 /* collect code units for one base character */
\r
200 int c = UTF16.charAt(src, srcLength - 1);
\r
201 srcLength -= UTF16.getCharCount(c);
\r
202 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
\r
203 /* collect modifier letters for this base character */
\r
204 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
\r
205 c = UTF16.charAt(src, srcLength - 1);
\r
206 srcLength -= UTF16.getCharCount(c);
\r
210 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
\r
211 Bidi.IsBidiControlChar(c)) {
\r
212 /* do not copy this Bidi control character */
\r
216 /* copy this "user character" */
\r
218 if((options & Bidi.DO_MIRRORING) != 0) {
\r
219 /* mirror only the base character */
\r
220 c = UCharacter.getMirror(c);
\r
221 UTF16.append(dest, c);
\r
222 j += UTF16.getCharCount(c);
\r
224 dest.append(src.substring(j, i));
\r
225 } while(srcLength > 0);
\r
227 } /* end of switch */
\r
229 return dest.toString();
\r
232 static String doWriteReverse(char[] text, int start, int limit, int options)
\r
234 return writeReverse(new String(text, start, limit - start), options);
\r
237 static String writeReordered(Bidi bidi, int options)
\r
241 char[] text = bidi.text;
\r
242 runCount = bidi.countRuns();
\r
245 * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
\r
246 * reordering mode (checked below) is appropriate.
\r
248 if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
\r
249 options |= Bidi.INSERT_LRM_FOR_NUMERIC;
\r
250 options &= ~Bidi.REMOVE_BIDI_CONTROLS;
\r
253 * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
\r
254 * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
\r
256 if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
\r
257 options |= Bidi.REMOVE_BIDI_CONTROLS;
\r
258 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
\r
261 * If we do not perform the "inverse Bidi" algorithm, then we
\r
262 * don't need to insert any LRMs, and don't need to test for it.
\r
264 if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
\r
265 (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) &&
\r
266 (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
\r
267 (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
\r
268 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
\r
270 dest = new StringBuffer((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
\r
271 bidi.length * 2 : bidi.length);
\r
273 * Iterate through all visual runs and copy the run text segments to
\r
274 * the destination, according to the options.
\r
276 * The tests for where to insert LRMs ignore the fact that there may be
\r
277 * BN codes or non-BMP code points at the beginning and end of a run;
\r
278 * they may insert LRMs unnecessarily but the tests are faster this way
\r
279 * (this would have to be improved for UTF-8).
\r
281 if ((options & Bidi.OUTPUT_REVERSE) == 0) {
\r
282 /* forward output */
\r
283 if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
\r
284 /* do not insert Bidi controls */
\r
285 for (run = 0; run < runCount; ++run) {
\r
286 BidiRun bidiRun = bidi.getVisualRun(run);
\r
287 if (bidiRun.isEvenRun()) {
\r
288 dest.append(doWriteForward(text, bidiRun.start,
\r
290 options & ~Bidi.DO_MIRRORING));
\r
292 dest.append(doWriteReverse(text, bidiRun.start,
\r
293 bidiRun.limit, options));
\r
297 /* insert Bidi controls for "inverse Bidi" */
\r
298 byte[] dirProps = bidi.dirProps;
\r
302 for (run = 0; run < runCount; ++run) {
\r
303 BidiRun bidiRun = bidi.getVisualRun(run);
\r
305 /* check if something relevant in insertPoints */
\r
306 markFlag = bidi.runs[run].insertRemove;
\r
307 if (markFlag < 0) { /* bidi controls count */
\r
310 if (bidiRun.isEvenRun()) {
\r
311 if (bidi.isInverse() &&
\r
312 dirProps[bidiRun.start] != Bidi.L) {
\r
313 markFlag |= Bidi.LRM_BEFORE;
\r
315 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
\r
317 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
\r
325 dest.append(doWriteForward(text,
\r
326 bidiRun.start, bidiRun.limit,
\r
327 options & ~Bidi.DO_MIRRORING));
\r
329 if (bidi.isInverse() &&
\r
330 dirProps[bidiRun.limit - 1] != Bidi.L) {
\r
331 markFlag |= Bidi.LRM_AFTER;
\r
333 if ((markFlag & Bidi.LRM_AFTER) != 0) {
\r
335 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
\r
343 } else { /* RTL run */
\r
344 if (bidi.isInverse() &&
\r
345 !bidi.testDirPropFlagAt(MASK_R_AL,
\r
346 bidiRun.limit - 1)) {
\r
347 markFlag |= Bidi.RLM_BEFORE;
\r
349 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
\r
351 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
\r
359 dest.append(doWriteReverse(text, bidiRun.start,
\r
360 bidiRun.limit, options));
\r
362 if(bidi.isInverse() &&
\r
363 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
\r
364 markFlag |= Bidi.RLM_AFTER;
\r
366 if ((markFlag & Bidi.LRM_AFTER) != 0) {
\r
368 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
\r
380 /* reverse output */
\r
381 if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
\r
382 /* do not insert Bidi controls */
\r
383 for(run = runCount; --run >= 0; ) {
\r
384 BidiRun bidiRun = bidi.getVisualRun(run);
\r
385 if (bidiRun.isEvenRun()) {
\r
386 dest.append(doWriteReverse(text,
\r
387 bidiRun.start, bidiRun.limit,
\r
388 options & ~Bidi.DO_MIRRORING));
\r
390 dest.append(doWriteForward(text, bidiRun.start,
\r
391 bidiRun.limit, options));
\r
395 /* insert Bidi controls for "inverse Bidi" */
\r
397 byte[] dirProps = bidi.dirProps;
\r
399 for (run = runCount; --run >= 0; ) {
\r
400 /* reverse output */
\r
401 BidiRun bidiRun = bidi.getVisualRun(run);
\r
402 if (bidiRun.isEvenRun()) {
\r
403 if (dirProps[bidiRun.limit - 1] != Bidi.L) {
\r
404 dest.append(LRM_CHAR);
\r
407 dest.append(doWriteReverse(text, bidiRun.start,
\r
408 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
\r
410 if (dirProps[bidiRun.start] != Bidi.L) {
\r
411 dest.append(LRM_CHAR);
\r
414 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
\r
415 dest.append(RLM_CHAR);
\r
418 dest.append(doWriteForward(text, bidiRun.start,
\r
419 bidiRun.limit, options));
\r
421 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
\r
422 dest.append(RLM_CHAR);
\r
429 return dest.toString();
\r