2 *******************************************************************************
\r
3 * Copyright (C) 2001-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 *******************************************************************************
\r
7 /* Written by Simon Montagu, Matitiahu Allouche
\r
8 * (ported from C code written by Markus W. Scherer)
\r
11 package com.ibm.icu.text;
\r
13 import com.ibm.icu.lang.UCharacter;
\r
15 final class BidiWriter {
\r
17 /** Bidi control code points */
\r
18 static final char LRM_CHAR = 0x200e;
\r
19 static final char RLM_CHAR = 0x200f;
\r
20 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
\r
21 1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
\r
23 private static boolean IsCombining(int type)
\r
26 (1<<UCharacter.NON_SPACING_MARK |
\r
27 1<<UCharacter.COMBINING_SPACING_MARK |
\r
28 1<<UCharacter.ENCLOSING_MARK)) != 0);
\r
32 * When we have OUTPUT_REVERSE set on writeReordered(), then we
\r
33 * semantically write RTL runs in reverse and later reverse them again.
\r
34 * Instead, we actually write them in forward order to begin with.
\r
35 * However, if the RTL run was to be mirrored, we need to mirror here now
\r
36 * since the implicit second reversal must not do it.
\r
37 * It looks strange to do mirroring in LTR output, but it is only because
\r
38 * we are writing RTL output in reverse.
\r
40 private static String doWriteForward(String src, int options) {
\r
41 /* optimize for several combinations of options */
\r
42 switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
\r
44 /* simply return the LTR run */
\r
47 case Bidi.DO_MIRRORING: {
\r
48 StringBuffer dest = new StringBuffer(src.length());
\r
55 c = UTF16.charAt(src, i);
\r
56 i += UTF16.getCharCount(c);
\r
57 UTF16.append(dest, UCharacter.getMirror(c));
\r
58 } while(i < src.length());
\r
59 return dest.toString();
\r
61 case Bidi.REMOVE_BIDI_CONTROLS: {
\r
62 StringBuilder dest = new StringBuilder(src.length());
\r
64 /* copy the LTR run and remove any Bidi control characters */
\r
68 c = src.charAt(i++);
\r
69 if(!Bidi.IsBidiControlChar(c)) {
\r
72 } while(i < src.length());
\r
73 return dest.toString();
\r
76 StringBuffer dest = new StringBuffer(src.length());
\r
78 /* remove Bidi control characters and do mirroring */
\r
82 c = UTF16.charAt(src, i);
\r
83 i += UTF16.getCharCount(c);
\r
84 if(!Bidi.IsBidiControlChar(c)) {
\r
85 UTF16.append(dest, UCharacter.getMirror(c));
\r
87 } while(i < src.length());
\r
88 return dest.toString();
\r
90 } /* end of switch */
\r
93 private static String doWriteForward(char[] text, int start, int limit,
\r
96 return doWriteForward(new String(text, start, limit - start), options);
\r
99 static String writeReverse(String src, int options) {
\r
103 * RTL runs need to be copied to the destination in reverse order
\r
104 * of code points, not code units, to keep Unicode characters intact.
\r
106 * The general strategy for this is to read the source text
\r
107 * in backward order, collect all code units for a code point
\r
108 * (and optionally following combining characters, see below),
\r
109 * and copy all these code units in ascending order
\r
110 * to the destination for this run.
\r
112 * Several options request whether combining characters
\r
113 * should be kept after their base characters,
\r
114 * whether Bidi control characters should be removed, and
\r
115 * whether characters should be replaced by their mirror-image
\r
116 * equivalent Unicode characters.
\r
118 StringBuffer dest = new StringBuffer(src.length());
\r
120 /* optimize for several combinations of options */
\r
122 (Bidi.REMOVE_BIDI_CONTROLS |
\r
123 Bidi.DO_MIRRORING |
\r
124 Bidi.KEEP_BASE_COMBINING)) {
\r
128 * With none of the "complicated" options set, the destination
\r
129 * run will have the same length as the source run,
\r
130 * and there is no mirroring and no keeping combining characters
\r
131 * with their base characters.
\r
133 * XXX: or dest = UTF16.reverse(new StringBuffer(src));
\r
136 int srcLength = src.length();
\r
138 /* preserve character integrity */
\r
140 /* i is always after the last code unit known to need to be kept
\r
141 * in this segment */
\r
144 /* collect code units for one base character */
\r
145 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
\r
148 /* copy this base character */
\r
149 dest.append(src.substring(srcLength, i));
\r
150 } while(srcLength > 0);
\r
153 case Bidi.KEEP_BASE_COMBINING:
\r
155 * Here, too, the destination
\r
156 * run will have the same length as the source run,
\r
157 * and there is no mirroring.
\r
158 * We do need to keep combining characters with their base
\r
161 srcLength = src.length();
\r
163 /* preserve character integrity */
\r
165 /* i is always after the last code unit known to need to be kept
\r
166 * in this segment */
\r
170 /* collect code units and modifier letters for one base
\r
173 c = UTF16.charAt(src, srcLength - 1);
\r
174 srcLength -= UTF16.getCharCount(c);
\r
175 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
\r
177 /* copy this "user character" */
\r
178 dest.append(src.substring(srcLength, i));
\r
179 } while(srcLength > 0);
\r
184 * With several "complicated" options set, this is the most
\r
185 * general and the slowest copying of an RTL run.
\r
186 * We will do mirroring, remove Bidi controls, and
\r
187 * keep combining characters with their base characters
\r
190 srcLength = src.length();
\r
192 /* preserve character integrity */
\r
194 /* i is always after the last code unit known to need to be kept
\r
195 * in this segment */
\r
198 /* collect code units for one base character */
\r
199 int c = UTF16.charAt(src, srcLength - 1);
\r
200 srcLength -= UTF16.getCharCount(c);
\r
201 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
\r
202 /* collect modifier letters for this base character */
\r
203 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
\r
204 c = UTF16.charAt(src, srcLength - 1);
\r
205 srcLength -= UTF16.getCharCount(c);
\r
209 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
\r
210 Bidi.IsBidiControlChar(c)) {
\r
211 /* do not copy this Bidi control character */
\r
215 /* copy this "user character" */
\r
217 if((options & Bidi.DO_MIRRORING) != 0) {
\r
218 /* mirror only the base character */
\r
219 c = UCharacter.getMirror(c);
\r
220 UTF16.append(dest, c);
\r
221 j += UTF16.getCharCount(c);
\r
223 dest.append(src.substring(j, i));
\r
224 } while(srcLength > 0);
\r
226 } /* end of switch */
\r
228 return dest.toString();
\r
231 static String doWriteReverse(char[] text, int start, int limit, int options)
\r
233 return writeReverse(new String(text, start, limit - start), options);
\r
236 static String writeReordered(Bidi bidi, int options)
\r
239 StringBuilder dest;
\r
240 char[] text = bidi.text;
\r
241 runCount = bidi.countRuns();
\r
244 * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
\r
245 * reordering mode (checked below) is appropriate.
\r
247 if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
\r
248 options |= Bidi.INSERT_LRM_FOR_NUMERIC;
\r
249 options &= ~Bidi.REMOVE_BIDI_CONTROLS;
\r
252 * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
\r
253 * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
\r
255 if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
\r
256 options |= Bidi.REMOVE_BIDI_CONTROLS;
\r
257 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
\r
260 * If we do not perform the "inverse Bidi" algorithm, then we
\r
261 * don't need to insert any LRMs, and don't need to test for it.
\r
263 if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
\r
264 (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) &&
\r
265 (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
\r
266 (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
\r
267 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
\r
269 dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
\r
270 bidi.length * 2 : bidi.length);
\r
272 * Iterate through all visual runs and copy the run text segments to
\r
273 * the destination, according to the options.
\r
275 * The tests for where to insert LRMs ignore the fact that there may be
\r
276 * BN codes or non-BMP code points at the beginning and end of a run;
\r
277 * they may insert LRMs unnecessarily but the tests are faster this way
\r
278 * (this would have to be improved for UTF-8).
\r
280 if ((options & Bidi.OUTPUT_REVERSE) == 0) {
\r
281 /* forward output */
\r
282 if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
\r
283 /* do not insert Bidi controls */
\r
284 for (run = 0; run < runCount; ++run) {
\r
285 BidiRun bidiRun = bidi.getVisualRun(run);
\r
286 if (bidiRun.isEvenRun()) {
\r
287 dest.append(doWriteForward(text, bidiRun.start,
\r
289 options & ~Bidi.DO_MIRRORING));
\r
291 dest.append(doWriteReverse(text, bidiRun.start,
\r
292 bidiRun.limit, options));
\r
296 /* insert Bidi controls for "inverse Bidi" */
\r
297 byte[] dirProps = bidi.dirProps;
\r
301 for (run = 0; run < runCount; ++run) {
\r
302 BidiRun bidiRun = bidi.getVisualRun(run);
\r
304 /* check if something relevant in insertPoints */
\r
305 markFlag = bidi.runs[run].insertRemove;
\r
306 if (markFlag < 0) { /* bidi controls count */
\r
309 if (bidiRun.isEvenRun()) {
\r
310 if (bidi.isInverse() &&
\r
311 dirProps[bidiRun.start] != Bidi.L) {
\r
312 markFlag |= Bidi.LRM_BEFORE;
\r
314 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
\r
316 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
\r
324 dest.append(doWriteForward(text,
\r
325 bidiRun.start, bidiRun.limit,
\r
326 options & ~Bidi.DO_MIRRORING));
\r
328 if (bidi.isInverse() &&
\r
329 dirProps[bidiRun.limit - 1] != Bidi.L) {
\r
330 markFlag |= Bidi.LRM_AFTER;
\r
332 if ((markFlag & Bidi.LRM_AFTER) != 0) {
\r
334 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
\r
342 } else { /* RTL run */
\r
343 if (bidi.isInverse() &&
\r
344 !bidi.testDirPropFlagAt(MASK_R_AL,
\r
345 bidiRun.limit - 1)) {
\r
346 markFlag |= Bidi.RLM_BEFORE;
\r
348 if ((markFlag & Bidi.LRM_BEFORE) != 0) {
\r
350 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
\r
358 dest.append(doWriteReverse(text, bidiRun.start,
\r
359 bidiRun.limit, options));
\r
361 if(bidi.isInverse() &&
\r
362 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
\r
363 markFlag |= Bidi.RLM_AFTER;
\r
365 if ((markFlag & Bidi.LRM_AFTER) != 0) {
\r
367 } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
\r
379 /* reverse output */
\r
380 if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
\r
381 /* do not insert Bidi controls */
\r
382 for(run = runCount; --run >= 0; ) {
\r
383 BidiRun bidiRun = bidi.getVisualRun(run);
\r
384 if (bidiRun.isEvenRun()) {
\r
385 dest.append(doWriteReverse(text,
\r
386 bidiRun.start, bidiRun.limit,
\r
387 options & ~Bidi.DO_MIRRORING));
\r
389 dest.append(doWriteForward(text, bidiRun.start,
\r
390 bidiRun.limit, options));
\r
394 /* insert Bidi controls for "inverse Bidi" */
\r
396 byte[] dirProps = bidi.dirProps;
\r
398 for (run = runCount; --run >= 0; ) {
\r
399 /* reverse output */
\r
400 BidiRun bidiRun = bidi.getVisualRun(run);
\r
401 if (bidiRun.isEvenRun()) {
\r
402 if (dirProps[bidiRun.limit - 1] != Bidi.L) {
\r
403 dest.append(LRM_CHAR);
\r
406 dest.append(doWriteReverse(text, bidiRun.start,
\r
407 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
\r
409 if (dirProps[bidiRun.start] != Bidi.L) {
\r
410 dest.append(LRM_CHAR);
\r
413 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
\r
414 dest.append(RLM_CHAR);
\r
417 dest.append(doWriteForward(text, bidiRun.start,
\r
418 bidiRun.limit, options));
\r
420 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
\r
421 dest.append(RLM_CHAR);
\r
428 return dest.toString();
\r