2 *******************************************************************************
3 * Copyright (C) 2001-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
7 /* Written by Simon Montagu, Matitiahu Allouche
8 * (ported from C code written by Markus W. Scherer)
11 package com.ibm.icu.text;
13 import com.ibm.icu.lang.UCharacter;
15 final class BidiWriter {
17 /** Bidi control code points */
18 static final char LRM_CHAR = 0x200e;
19 static final char RLM_CHAR = 0x200f;
20 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
21 1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
23 private static boolean IsCombining(int type)
26 (1<<UCharacter.NON_SPACING_MARK |
27 1<<UCharacter.COMBINING_SPACING_MARK |
28 1<<UCharacter.ENCLOSING_MARK)) != 0);
32 * When we have OUTPUT_REVERSE set on writeReordered(), then we
33 * semantically write RTL runs in reverse and later reverse them again.
34 * Instead, we actually write them in forward order to begin with.
35 * However, if the RTL run was to be mirrored, we need to mirror here now
36 * since the implicit second reversal must not do it.
37 * It looks strange to do mirroring in LTR output, but it is only because
38 * we are writing RTL output in reverse.
40 private static String doWriteForward(String src, int options) {
41 /* optimize for several combinations of options */
42 switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
44 /* simply return the LTR run */
47 case Bidi.DO_MIRRORING: {
48 StringBuffer dest = new StringBuffer(src.length());
55 c = UTF16.charAt(src, i);
56 i += UTF16.getCharCount(c);
57 UTF16.append(dest, UCharacter.getMirror(c));
58 } while(i < src.length());
59 return dest.toString();
61 case Bidi.REMOVE_BIDI_CONTROLS: {
62 StringBuilder dest = new StringBuilder(src.length());
64 /* copy the LTR run and remove any Bidi control characters */
69 if(!Bidi.IsBidiControlChar(c)) {
72 } while(i < src.length());
73 return dest.toString();
76 StringBuffer dest = new StringBuffer(src.length());
78 /* remove Bidi control characters and do mirroring */
82 c = UTF16.charAt(src, i);
83 i += UTF16.getCharCount(c);
84 if(!Bidi.IsBidiControlChar(c)) {
85 UTF16.append(dest, UCharacter.getMirror(c));
87 } while(i < src.length());
88 return dest.toString();
93 private static String doWriteForward(char[] text, int start, int limit,
96 return doWriteForward(new String(text, start, limit - start), options);
99 static String writeReverse(String src, int options) {
103 * RTL runs need to be copied to the destination in reverse order
104 * of code points, not code units, to keep Unicode characters intact.
106 * The general strategy for this is to read the source text
107 * in backward order, collect all code units for a code point
108 * (and optionally following combining characters, see below),
109 * and copy all these code units in ascending order
110 * to the destination for this run.
112 * Several options request whether combining characters
113 * should be kept after their base characters,
114 * whether Bidi control characters should be removed, and
115 * whether characters should be replaced by their mirror-image
116 * equivalent Unicode characters.
118 StringBuffer dest = new StringBuffer(src.length());
120 /* optimize for several combinations of options */
122 (Bidi.REMOVE_BIDI_CONTROLS |
124 Bidi.KEEP_BASE_COMBINING)) {
128 * With none of the "complicated" options set, the destination
129 * run will have the same length as the source run,
130 * and there is no mirroring and no keeping combining characters
131 * with their base characters.
133 * XXX: or dest = UTF16.reverse(new StringBuffer(src));
136 int srcLength = src.length();
138 /* preserve character integrity */
140 /* i is always after the last code unit known to need to be kept
144 /* collect code units for one base character */
145 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
148 /* copy this base character */
149 dest.append(src.substring(srcLength, i));
150 } while(srcLength > 0);
153 case Bidi.KEEP_BASE_COMBINING:
155 * Here, too, the destination
156 * run will have the same length as the source run,
157 * and there is no mirroring.
158 * We do need to keep combining characters with their base
161 srcLength = src.length();
163 /* preserve character integrity */
165 /* i is always after the last code unit known to need to be kept
170 /* collect code units and modifier letters for one base
173 c = UTF16.charAt(src, srcLength - 1);
174 srcLength -= UTF16.getCharCount(c);
175 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
177 /* copy this "user character" */
178 dest.append(src.substring(srcLength, i));
179 } while(srcLength > 0);
184 * With several "complicated" options set, this is the most
185 * general and the slowest copying of an RTL run.
186 * We will do mirroring, remove Bidi controls, and
187 * keep combining characters with their base characters
190 srcLength = src.length();
192 /* preserve character integrity */
194 /* i is always after the last code unit known to need to be kept
198 /* collect code units for one base character */
199 int c = UTF16.charAt(src, srcLength - 1);
200 srcLength -= UTF16.getCharCount(c);
201 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
202 /* collect modifier letters for this base character */
203 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
204 c = UTF16.charAt(src, srcLength - 1);
205 srcLength -= UTF16.getCharCount(c);
209 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
210 Bidi.IsBidiControlChar(c)) {
211 /* do not copy this Bidi control character */
215 /* copy this "user character" */
217 if((options & Bidi.DO_MIRRORING) != 0) {
218 /* mirror only the base character */
219 c = UCharacter.getMirror(c);
220 UTF16.append(dest, c);
221 j += UTF16.getCharCount(c);
223 dest.append(src.substring(j, i));
224 } while(srcLength > 0);
226 } /* end of switch */
228 return dest.toString();
231 static String doWriteReverse(char[] text, int start, int limit, int options)
233 return writeReverse(new String(text, start, limit - start), options);
236 static String writeReordered(Bidi bidi, int options)
240 char[] text = bidi.text;
241 runCount = bidi.countRuns();
244 * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
245 * reordering mode (checked below) is appropriate.
247 if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
248 options |= Bidi.INSERT_LRM_FOR_NUMERIC;
249 options &= ~Bidi.REMOVE_BIDI_CONTROLS;
252 * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
253 * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
255 if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
256 options |= Bidi.REMOVE_BIDI_CONTROLS;
257 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
260 * If we do not perform the "inverse Bidi" algorithm, then we
261 * don't need to insert any LRMs, and don't need to test for it.
263 if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
264 (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) &&
265 (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
266 (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
267 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
269 dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
270 bidi.length * 2 : bidi.length);
272 * Iterate through all visual runs and copy the run text segments to
273 * the destination, according to the options.
275 * The tests for where to insert LRMs ignore the fact that there may be
276 * BN codes or non-BMP code points at the beginning and end of a run;
277 * they may insert LRMs unnecessarily but the tests are faster this way
278 * (this would have to be improved for UTF-8).
280 if ((options & Bidi.OUTPUT_REVERSE) == 0) {
282 if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
283 /* do not insert Bidi controls */
284 for (run = 0; run < runCount; ++run) {
285 BidiRun bidiRun = bidi.getVisualRun(run);
286 if (bidiRun.isEvenRun()) {
287 dest.append(doWriteForward(text, bidiRun.start,
289 options & ~Bidi.DO_MIRRORING));
291 dest.append(doWriteReverse(text, bidiRun.start,
292 bidiRun.limit, options));
296 /* insert Bidi controls for "inverse Bidi" */
297 byte[] dirProps = bidi.dirProps;
301 for (run = 0; run < runCount; ++run) {
302 BidiRun bidiRun = bidi.getVisualRun(run);
304 /* check if something relevant in insertPoints */
305 markFlag = bidi.runs[run].insertRemove;
306 if (markFlag < 0) { /* bidi controls count */
309 if (bidiRun.isEvenRun()) {
310 if (bidi.isInverse() &&
311 dirProps[bidiRun.start] != Bidi.L) {
312 markFlag |= Bidi.LRM_BEFORE;
314 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
316 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
324 dest.append(doWriteForward(text,
325 bidiRun.start, bidiRun.limit,
326 options & ~Bidi.DO_MIRRORING));
328 if (bidi.isInverse() &&
329 dirProps[bidiRun.limit - 1] != Bidi.L) {
330 markFlag |= Bidi.LRM_AFTER;
332 if ((markFlag & Bidi.LRM_AFTER) != 0) {
334 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
342 } else { /* RTL run */
343 if (bidi.isInverse() &&
344 !bidi.testDirPropFlagAt(MASK_R_AL,
345 bidiRun.limit - 1)) {
346 markFlag |= Bidi.RLM_BEFORE;
348 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
350 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
358 dest.append(doWriteReverse(text, bidiRun.start,
359 bidiRun.limit, options));
361 if(bidi.isInverse() &&
362 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
363 markFlag |= Bidi.RLM_AFTER;
365 if ((markFlag & Bidi.LRM_AFTER) != 0) {
367 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
380 if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
381 /* do not insert Bidi controls */
382 for(run = runCount; --run >= 0; ) {
383 BidiRun bidiRun = bidi.getVisualRun(run);
384 if (bidiRun.isEvenRun()) {
385 dest.append(doWriteReverse(text,
386 bidiRun.start, bidiRun.limit,
387 options & ~Bidi.DO_MIRRORING));
389 dest.append(doWriteForward(text, bidiRun.start,
390 bidiRun.limit, options));
394 /* insert Bidi controls for "inverse Bidi" */
396 byte[] dirProps = bidi.dirProps;
398 for (run = runCount; --run >= 0; ) {
400 BidiRun bidiRun = bidi.getVisualRun(run);
401 if (bidiRun.isEvenRun()) {
402 if (dirProps[bidiRun.limit - 1] != Bidi.L) {
403 dest.append(LRM_CHAR);
406 dest.append(doWriteReverse(text, bidiRun.start,
407 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
409 if (dirProps[bidiRun.start] != Bidi.L) {
410 dest.append(LRM_CHAR);
413 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
414 dest.append(RLM_CHAR);
417 dest.append(doWriteForward(text, bidiRun.start,
418 bidiRun.limit, options));
420 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
421 dest.append(RLM_CHAR);
428 return dest.toString();