]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/text/BidiWriter.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / core / src / com / ibm / icu / text / BidiWriter.java
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2001-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 */
7 /* Written by Simon Montagu, Matitiahu Allouche
8  * (ported from C code written by Markus W. Scherer)
9  */
10
11 package com.ibm.icu.text;
12
13 import com.ibm.icu.lang.UCharacter;
14
15 final class BidiWriter {
16
17     /** Bidi control code points */
18     static final char LRM_CHAR = 0x200e;
19     static final char RLM_CHAR = 0x200f;
20     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
21                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
22
23     private static boolean IsCombining(int type)
24     {
25         return ((1<<type &
26                 (1<<UCharacter.NON_SPACING_MARK |
27                  1<<UCharacter.COMBINING_SPACING_MARK |
28                  1<<UCharacter.ENCLOSING_MARK)) != 0);
29     }
30
31     /*
32      * When we have OUTPUT_REVERSE set on writeReordered(), then we
33      * semantically write RTL runs in reverse and later reverse them again.
34      * Instead, we actually write them in forward order to begin with.
35      * However, if the RTL run was to be mirrored, we need to mirror here now
36      * since the implicit second reversal must not do it.
37      * It looks strange to do mirroring in LTR output, but it is only because
38      * we are writing RTL output in reverse.
39      */
40     private static String doWriteForward(String src, int options) {
41         /* optimize for several combinations of options */
42         switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
43         case 0: {
44             /* simply return the LTR run */
45             return src;
46         }
47         case Bidi.DO_MIRRORING: {
48             StringBuffer dest = new StringBuffer(src.length());
49
50             /* do mirroring */
51             int i=0;
52             int c;
53
54             do {
55                 c = UTF16.charAt(src, i);
56                 i += UTF16.getCharCount(c);
57                 UTF16.append(dest, UCharacter.getMirror(c));
58             } while(i < src.length());
59             return dest.toString();
60         }
61         case Bidi.REMOVE_BIDI_CONTROLS: {
62             StringBuilder dest = new StringBuilder(src.length());
63
64             /* copy the LTR run and remove any Bidi control characters */
65             int i = 0;
66             char c;
67             do {
68                 c = src.charAt(i++);
69                 if(!Bidi.IsBidiControlChar(c)) {
70                     dest.append(c);
71                 }
72             } while(i < src.length());
73             return dest.toString();
74         }
75         default: {
76             StringBuffer dest = new StringBuffer(src.length());
77
78             /* remove Bidi control characters and do mirroring */
79             int i = 0;
80             int c;
81             do {
82                 c = UTF16.charAt(src, i);
83                 i += UTF16.getCharCount(c);
84                 if(!Bidi.IsBidiControlChar(c)) {
85                     UTF16.append(dest, UCharacter.getMirror(c));
86                 }
87             } while(i < src.length());
88             return dest.toString();
89         }
90         } /* end of switch */
91     }
92
93     private static String doWriteForward(char[] text, int start, int limit,
94                                          int options)
95     {
96         return doWriteForward(new String(text, start, limit - start), options);
97     }
98
99     static String writeReverse(String src, int options) {
100         /*
101          * RTL run -
102          *
103          * RTL runs need to be copied to the destination in reverse order
104          * of code points, not code units, to keep Unicode characters intact.
105          *
106          * The general strategy for this is to read the source text
107          * in backward order, collect all code units for a code point
108          * (and optionally following combining characters, see below),
109          * and copy all these code units in ascending order
110          * to the destination for this run.
111          *
112          * Several options request whether combining characters
113          * should be kept after their base characters,
114          * whether Bidi control characters should be removed, and
115          * whether characters should be replaced by their mirror-image
116          * equivalent Unicode characters.
117          */
118         StringBuffer dest = new StringBuffer(src.length());
119
120         /* optimize for several combinations of options */
121         switch (options &
122                 (Bidi.REMOVE_BIDI_CONTROLS |
123                  Bidi.DO_MIRRORING |
124                  Bidi.KEEP_BASE_COMBINING)) {
125
126         case 0:
127             /*
128              * With none of the "complicated" options set, the destination
129              * run will have the same length as the source run,
130              * and there is no mirroring and no keeping combining characters
131              * with their base characters.
132              *
133              * XXX: or dest = UTF16.reverse(new StringBuffer(src));
134              */
135
136             int srcLength = src.length();
137
138             /* preserve character integrity */
139             do {
140                 /* i is always after the last code unit known to need to be kept
141                  *  in this segment */
142                 int i = srcLength;
143
144                 /* collect code units for one base character */
145                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
146                                                              srcLength - 1));
147
148                 /* copy this base character */
149                 dest.append(src.substring(srcLength, i));
150             } while(srcLength > 0);
151             break;
152
153         case Bidi.KEEP_BASE_COMBINING:
154             /*
155              * Here, too, the destination
156              * run will have the same length as the source run,
157              * and there is no mirroring.
158              * We do need to keep combining characters with their base
159              * characters.
160              */
161             srcLength = src.length();
162
163             /* preserve character integrity */
164             do {
165                 /* i is always after the last code unit known to need to be kept
166                  *  in this segment */
167                 int c;
168                 int i = srcLength;
169
170                 /* collect code units and modifier letters for one base
171                  * character */
172                 do {
173                     c = UTF16.charAt(src, srcLength - 1);
174                     srcLength -= UTF16.getCharCount(c);
175                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
176
177                 /* copy this "user character" */
178                 dest.append(src.substring(srcLength, i));
179             } while(srcLength > 0);
180             break;
181
182         default:
183             /*
184              * With several "complicated" options set, this is the most
185              * general and the slowest copying of an RTL run.
186              * We will do mirroring, remove Bidi controls, and
187              * keep combining characters with their base characters
188              * as requested.
189              */
190             srcLength = src.length();
191
192             /* preserve character integrity */
193             do {
194                 /* i is always after the last code unit known to need to be kept
195                  *  in this segment */
196                 int i = srcLength;
197
198                 /* collect code units for one base character */
199                 int c = UTF16.charAt(src, srcLength - 1);
200                 srcLength -= UTF16.getCharCount(c);
201                 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
202                     /* collect modifier letters for this base character */
203                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
204                         c = UTF16.charAt(src, srcLength - 1);
205                         srcLength -= UTF16.getCharCount(c);
206                     }
207                 }
208
209                 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
210                     Bidi.IsBidiControlChar(c)) {
211                     /* do not copy this Bidi control character */
212                     continue;
213                 }
214
215                 /* copy this "user character" */
216                 int j = srcLength;
217                 if((options & Bidi.DO_MIRRORING) != 0) {
218                     /* mirror only the base character */
219                     c = UCharacter.getMirror(c);
220                     UTF16.append(dest, c);
221                     j += UTF16.getCharCount(c);
222                 }
223                 dest.append(src.substring(j, i));
224             } while(srcLength > 0);
225             break;
226         } /* end of switch */
227
228         return dest.toString();
229     }
230
231     static String doWriteReverse(char[] text, int start, int limit, int options)
232     {
233         return writeReverse(new String(text, start, limit - start), options);
234     }
235
236     static String writeReordered(Bidi bidi, int options)
237     {
238         int run, runCount;
239         StringBuilder dest;
240         char[] text = bidi.text;
241         runCount = bidi.countRuns();
242
243         /*
244          * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
245          * reordering mode (checked below) is appropriate.
246          */
247         if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
248             options |= Bidi.INSERT_LRM_FOR_NUMERIC;
249             options &= ~Bidi.REMOVE_BIDI_CONTROLS;
250         }
251         /*
252          * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
253          * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
254          */
255         if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
256             options |= Bidi.REMOVE_BIDI_CONTROLS;
257             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
258         }
259         /*
260          * If we do not perform the "inverse Bidi" algorithm, then we
261          * don't need to insert any LRMs, and don't need to test for it.
262          */
263         if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
264             (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT)  &&
265             (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
266             (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
267             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
268         }
269         dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
270                                  bidi.length * 2 : bidi.length);
271         /*
272          * Iterate through all visual runs and copy the run text segments to
273          * the destination, according to the options.
274          *
275          * The tests for where to insert LRMs ignore the fact that there may be
276          * BN codes or non-BMP code points at the beginning and end of a run;
277          * they may insert LRMs unnecessarily but the tests are faster this way
278          * (this would have to be improved for UTF-8).
279          */
280         if ((options & Bidi.OUTPUT_REVERSE) == 0) {
281             /* forward output */
282             if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
283                 /* do not insert Bidi controls */
284                 for (run = 0; run < runCount; ++run) {
285                     BidiRun bidiRun = bidi.getVisualRun(run);
286                     if (bidiRun.isEvenRun()) {
287                         dest.append(doWriteForward(text, bidiRun.start,
288                                                    bidiRun.limit,
289                                                    options & ~Bidi.DO_MIRRORING));
290                      } else {
291                         dest.append(doWriteReverse(text, bidiRun.start,
292                                                    bidiRun.limit, options));
293                      }
294                 }
295             } else {
296                 /* insert Bidi controls for "inverse Bidi" */
297                 byte[] dirProps = bidi.dirProps;
298                 char uc;
299                 int markFlag;
300
301                 for (run = 0; run < runCount; ++run) {
302                     BidiRun bidiRun = bidi.getVisualRun(run);
303                     markFlag=0;
304                     /* check if something relevant in insertPoints */
305                     markFlag = bidi.runs[run].insertRemove;
306                     if (markFlag < 0) { /* bidi controls count */
307                         markFlag = 0;
308                     }
309                     if (bidiRun.isEvenRun()) {
310                         if (bidi.isInverse() &&
311                                 dirProps[bidiRun.start] != Bidi.L) {
312                             markFlag |= Bidi.LRM_BEFORE;
313                         }
314                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
315                             uc = LRM_CHAR;
316                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
317                             uc = RLM_CHAR;
318                         } else {
319                             uc = 0;
320                         }
321                         if (uc != 0) {
322                             dest.append(uc);
323                         }
324                         dest.append(doWriteForward(text,
325                                                    bidiRun.start, bidiRun.limit,
326                                                    options & ~Bidi.DO_MIRRORING));
327
328                         if (bidi.isInverse() &&
329                              dirProps[bidiRun.limit - 1] != Bidi.L) {
330                             markFlag |= Bidi.LRM_AFTER;
331                         }
332                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
333                             uc = LRM_CHAR;
334                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
335                             uc = RLM_CHAR;
336                         } else {
337                             uc = 0;
338                         }
339                         if (uc != 0) {
340                             dest.append(uc);
341                         }
342                     } else { /* RTL run */
343                         if (bidi.isInverse() &&
344                             !bidi.testDirPropFlagAt(MASK_R_AL,
345                                                     bidiRun.limit - 1)) {
346                             markFlag |= Bidi.RLM_BEFORE;
347                         }
348                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
349                             uc = LRM_CHAR;
350                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
351                             uc = RLM_CHAR;
352                         } else {
353                             uc = 0;
354                         }
355                         if (uc != 0) {
356                             dest.append(uc);
357                         }
358                         dest.append(doWriteReverse(text, bidiRun.start,
359                                                    bidiRun.limit, options));
360
361                         if(bidi.isInverse() &&
362                                 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
363                             markFlag |= Bidi.RLM_AFTER;
364                         }
365                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
366                             uc = LRM_CHAR;
367                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
368                             uc = RLM_CHAR;
369                         } else {
370                             uc = 0;
371                         }
372                         if (uc != 0) {
373                             dest.append(uc);
374                         }
375                     }
376                 }
377             }
378         } else {
379             /* reverse output */
380             if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
381                 /* do not insert Bidi controls */
382                 for(run = runCount; --run >= 0; ) {
383                     BidiRun bidiRun = bidi.getVisualRun(run);
384                     if (bidiRun.isEvenRun()) {
385                         dest.append(doWriteReverse(text,
386                                                    bidiRun.start, bidiRun.limit,
387                                                    options & ~Bidi.DO_MIRRORING));
388                     } else {
389                         dest.append(doWriteForward(text, bidiRun.start,
390                                                    bidiRun.limit, options));
391                     }
392                 }
393             } else {
394                 /* insert Bidi controls for "inverse Bidi" */
395
396                 byte[] dirProps = bidi.dirProps;
397
398                 for (run = runCount; --run >= 0; ) {
399                     /* reverse output */
400                     BidiRun bidiRun = bidi.getVisualRun(run);
401                     if (bidiRun.isEvenRun()) {
402                         if (dirProps[bidiRun.limit - 1] != Bidi.L) {
403                             dest.append(LRM_CHAR);
404                         }
405
406                         dest.append(doWriteReverse(text, bidiRun.start,
407                                 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
408
409                         if (dirProps[bidiRun.start] != Bidi.L) {
410                             dest.append(LRM_CHAR);
411                         }
412                     } else {
413                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
414                             dest.append(RLM_CHAR);
415                         }
416
417                         dest.append(doWriteForward(text, bidiRun.start,
418                                                    bidiRun.limit, options));
419
420                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
421                             dest.append(RLM_CHAR);
422                         }
423                     }
424                 }
425             }
426         }
427
428         return dest.toString();
429     }
430 }