]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/text/BidiWriter.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / text / BidiWriter.java
1 /*\r
2 *******************************************************************************\r
3 *   Copyright (C) 2001-2007, International Business Machines\r
4 *   Corporation and others.  All Rights Reserved.\r
5 *******************************************************************************\r
6 */\r
7 /* Written by Simon Montagu, Matitiahu Allouche\r
8  * (ported from C code written by Markus W. Scherer)\r
9  */\r
10 \r
11 package com.ibm.icu.text;\r
12 \r
13 import com.ibm.icu.lang.UCharacter;\r
14 import com.ibm.icu.text.Bidi;\r
15 \r
16 final class BidiWriter {\r
17 \r
18     /** Bidi control code points */\r
19     static final char LRM_CHAR = 0x200e;\r
20     static final char RLM_CHAR = 0x200f;\r
21     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |\r
22                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);\r
23 \r
24     private static boolean IsCombining(int type)\r
25     {\r
26         return ((1<<type &\r
27                 (1<<UCharacter.NON_SPACING_MARK |\r
28                  1<<UCharacter.COMBINING_SPACING_MARK |\r
29                  1<<UCharacter.ENCLOSING_MARK)) != 0);\r
30     }\r
31 \r
32     /*\r
33      * When we have OUTPUT_REVERSE set on writeReordered(), then we\r
34      * semantically write RTL runs in reverse and later reverse them again.\r
35      * Instead, we actually write them in forward order to begin with.\r
36      * However, if the RTL run was to be mirrored, we need to mirror here now\r
37      * since the implicit second reversal must not do it.\r
38      * It looks strange to do mirroring in LTR output, but it is only because\r
39      * we are writing RTL output in reverse.\r
40      */\r
41     private static String doWriteForward(String src, int options) {\r
42         /* optimize for several combinations of options */\r
43         switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {\r
44         case 0: {\r
45             /* simply copy the LTR run to the destination */\r
46             return new String(src);\r
47         }\r
48         case Bidi.DO_MIRRORING: {\r
49             StringBuffer dest = new StringBuffer(src.length());\r
50 \r
51             /* do mirroring */\r
52             int i=0;\r
53             int c;\r
54 \r
55             do {\r
56                 c = UTF16.charAt(src, i);\r
57                 i += UTF16.getCharCount(c);\r
58                 UTF16.append(dest, UCharacter.getMirror(c));\r
59             } while(i < src.length());\r
60             return dest.toString();\r
61         }\r
62         case Bidi.REMOVE_BIDI_CONTROLS: {\r
63             StringBuffer dest = new StringBuffer(src.length());\r
64 \r
65             /* copy the LTR run and remove any Bidi control characters */\r
66             int i = 0;\r
67             char c;\r
68             do {\r
69                 c = src.charAt(i++);\r
70                 if(!Bidi.IsBidiControlChar(c)) {\r
71                     dest.append(c);\r
72                 }\r
73             } while(i < src.length());\r
74             return dest.toString();\r
75         }\r
76         default: {\r
77             StringBuffer dest = new StringBuffer(src.length());\r
78 \r
79             /* remove Bidi control characters and do mirroring */\r
80             int i = 0;\r
81             int c;\r
82             do {\r
83                 c = UTF16.charAt(src, i);\r
84                 i += UTF16.getCharCount(c);\r
85                 if(!Bidi.IsBidiControlChar(c)) {\r
86                     UTF16.append(dest, UCharacter.getMirror(c));\r
87                 }\r
88             } while(i < src.length());\r
89             return dest.toString();\r
90         }\r
91         } /* end of switch */\r
92     }\r
93 \r
94     private static String doWriteForward(char[] text, int start, int limit,\r
95                                          int options)\r
96     {\r
97         return doWriteForward(new String(text, start, limit - start), options);\r
98     }\r
99 \r
100     static String writeReverse(String src, int options) {\r
101         /*\r
102          * RTL run -\r
103          *\r
104          * RTL runs need to be copied to the destination in reverse order\r
105          * of code points, not code units, to keep Unicode characters intact.\r
106          *\r
107          * The general strategy for this is to read the source text\r
108          * in backward order, collect all code units for a code point\r
109          * (and optionally following combining characters, see below),\r
110          * and copy all these code units in ascending order\r
111          * to the destination for this run.\r
112          *\r
113          * Several options request whether combining characters\r
114          * should be kept after their base characters,\r
115          * whether Bidi control characters should be removed, and\r
116          * whether characters should be replaced by their mirror-image\r
117          * equivalent Unicode characters.\r
118          */\r
119         StringBuffer dest = new StringBuffer(src.length());\r
120 \r
121         /* optimize for several combinations of options */\r
122         switch (options &\r
123                 (Bidi.REMOVE_BIDI_CONTROLS |\r
124                  Bidi.DO_MIRRORING |\r
125                  Bidi.KEEP_BASE_COMBINING)) {\r
126 \r
127         case 0:\r
128             /*\r
129              * With none of the "complicated" options set, the destination\r
130              * run will have the same length as the source run,\r
131              * and there is no mirroring and no keeping combining characters\r
132              * with their base characters.\r
133              *\r
134              * XXX: or dest = UTF16.reverse(new StringBuffer(src));\r
135              */\r
136 \r
137             int srcLength = src.length();\r
138 \r
139             /* preserve character integrity */\r
140             do {\r
141                 /* i is always after the last code unit known to need to be kept\r
142                  *  in this segment */\r
143                 int i = srcLength;\r
144 \r
145                 /* collect code units for one base character */\r
146                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,\r
147                                                              srcLength - 1));\r
148 \r
149                 /* copy this base character */\r
150                 dest.append(src.substring(srcLength, i));\r
151             } while(srcLength > 0);\r
152             break;\r
153 \r
154         case Bidi.KEEP_BASE_COMBINING:\r
155             /*\r
156              * Here, too, the destination\r
157              * run will have the same length as the source run,\r
158              * and there is no mirroring.\r
159              * We do need to keep combining characters with their base\r
160              * characters.\r
161              */\r
162             srcLength = src.length();\r
163 \r
164             /* preserve character integrity */\r
165             do {\r
166                 /* i is always after the last code unit known to need to be kept\r
167                  *  in this segment */\r
168                 int c;\r
169                 int i = srcLength;\r
170 \r
171                 /* collect code units and modifier letters for one base\r
172                  * character */\r
173                 do {\r
174                     c = UTF16.charAt(src, srcLength - 1);\r
175                     srcLength -= UTF16.getCharCount(c);\r
176                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));\r
177 \r
178                 /* copy this "user character" */\r
179                 dest.append(src.substring(srcLength, i));\r
180             } while(srcLength > 0);\r
181             break;\r
182 \r
183         default:\r
184             /*\r
185              * With several "complicated" options set, this is the most\r
186              * general and the slowest copying of an RTL run.\r
187              * We will do mirroring, remove Bidi controls, and\r
188              * keep combining characters with their base characters\r
189              * as requested.\r
190              */\r
191             srcLength = src.length();\r
192 \r
193             /* preserve character integrity */\r
194             do {\r
195                 /* i is always after the last code unit known to need to be kept\r
196                  *  in this segment */\r
197                 int i = srcLength;\r
198 \r
199                 /* collect code units for one base character */\r
200                 int c = UTF16.charAt(src, srcLength - 1);\r
201                 srcLength -= UTF16.getCharCount(c);\r
202                 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {\r
203                     /* collect modifier letters for this base character */\r
204                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {\r
205                         c = UTF16.charAt(src, srcLength - 1);\r
206                         srcLength -= UTF16.getCharCount(c);\r
207                     }\r
208                 }\r
209 \r
210                 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&\r
211                     Bidi.IsBidiControlChar(c)) {\r
212                     /* do not copy this Bidi control character */\r
213                     continue;\r
214                 }\r
215 \r
216                 /* copy this "user character" */\r
217                 int j = srcLength;\r
218                 if((options & Bidi.DO_MIRRORING) != 0) {\r
219                     /* mirror only the base character */\r
220                     c = UCharacter.getMirror(c);\r
221                     UTF16.append(dest, c);\r
222                     j += UTF16.getCharCount(c);\r
223                 }\r
224                 dest.append(src.substring(j, i));\r
225             } while(srcLength > 0);\r
226             break;\r
227         } /* end of switch */\r
228 \r
229         return dest.toString();\r
230     }\r
231 \r
232     static String doWriteReverse(char[] text, int start, int limit, int options)\r
233     {\r
234         return writeReverse(new String(text, start, limit - start), options);\r
235     }\r
236 \r
237     static String writeReordered(Bidi bidi, int options)\r
238     {\r
239         int run, runCount;\r
240         StringBuffer dest;\r
241         char[] text = bidi.text;\r
242         runCount = bidi.countRuns();\r
243 \r
244         /*\r
245          * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the\r
246          * reordering mode (checked below) is appropriate.\r
247          */\r
248         if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {\r
249             options |= Bidi.INSERT_LRM_FOR_NUMERIC;\r
250             options &= ~Bidi.REMOVE_BIDI_CONTROLS;\r
251         }\r
252         /*\r
253          * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS\r
254          * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.\r
255          */\r
256         if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {\r
257             options |= Bidi.REMOVE_BIDI_CONTROLS;\r
258             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;\r
259         }\r
260         /*\r
261          * If we do not perform the "inverse Bidi" algorithm, then we\r
262          * don't need to insert any LRMs, and don't need to test for it.\r
263          */\r
264         if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&\r
265             (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT)  &&\r
266             (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&\r
267             (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {\r
268             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;\r
269         }\r
270         dest = new StringBuffer((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?\r
271                                  bidi.length * 2 : bidi.length);\r
272         /*\r
273          * Iterate through all visual runs and copy the run text segments to\r
274          * the destination, according to the options.\r
275          *\r
276          * The tests for where to insert LRMs ignore the fact that there may be\r
277          * BN codes or non-BMP code points at the beginning and end of a run;\r
278          * they may insert LRMs unnecessarily but the tests are faster this way\r
279          * (this would have to be improved for UTF-8).\r
280          */\r
281         if ((options & Bidi.OUTPUT_REVERSE) == 0) {\r
282             /* forward output */\r
283             if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {\r
284                 /* do not insert Bidi controls */\r
285                 for (run = 0; run < runCount; ++run) {\r
286                     BidiRun bidiRun = bidi.getVisualRun(run);\r
287                     if (bidiRun.isEvenRun()) {\r
288                         dest.append(doWriteForward(text, bidiRun.start,\r
289                                                    bidiRun.limit,\r
290                                                    options & ~Bidi.DO_MIRRORING));\r
291                      } else {\r
292                         dest.append(doWriteReverse(text, bidiRun.start,\r
293                                                    bidiRun.limit, options));\r
294                      }\r
295                 }\r
296             } else {\r
297                 /* insert Bidi controls for "inverse Bidi" */\r
298                 byte[] dirProps = bidi.dirProps;\r
299                 char uc;\r
300                 int markFlag;\r
301 \r
302                 for (run = 0; run < runCount; ++run) {\r
303                     BidiRun bidiRun = bidi.getVisualRun(run);\r
304                     markFlag=0;\r
305                     /* check if something relevant in insertPoints */\r
306                     markFlag = bidi.runs[run].insertRemove;\r
307                     if (markFlag < 0) { /* bidi controls count */\r
308                         markFlag = 0;\r
309                     }\r
310                     if (bidiRun.isEvenRun()) {\r
311                         if (bidi.isInverse() &&\r
312                                 dirProps[bidiRun.start] != Bidi.L) {\r
313                             markFlag |= Bidi.LRM_BEFORE;\r
314                         }\r
315                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {\r
316                             uc = LRM_CHAR;\r
317                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {\r
318                             uc = RLM_CHAR;\r
319                         } else {\r
320                             uc = 0;\r
321                         }\r
322                         if (uc != 0) {\r
323                             dest.append(uc);\r
324                         }\r
325                         dest.append(doWriteForward(text,\r
326                                                    bidiRun.start, bidiRun.limit,\r
327                                                    options & ~Bidi.DO_MIRRORING));\r
328 \r
329                         if (bidi.isInverse() &&\r
330                              dirProps[bidiRun.limit - 1] != Bidi.L) {\r
331                             markFlag |= Bidi.LRM_AFTER;\r
332                         }\r
333                         if ((markFlag & Bidi.LRM_AFTER) != 0) {\r
334                             uc = LRM_CHAR;\r
335                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {\r
336                             uc = RLM_CHAR;\r
337                         } else {\r
338                             uc = 0;\r
339                         }\r
340                         if (uc != 0) {\r
341                             dest.append(uc);\r
342                         }\r
343                     } else { /* RTL run */\r
344                         if (bidi.isInverse() &&\r
345                             !bidi.testDirPropFlagAt(MASK_R_AL,\r
346                                                     bidiRun.limit - 1)) {\r
347                             markFlag |= Bidi.RLM_BEFORE;\r
348                         }\r
349                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {\r
350                             uc = LRM_CHAR;\r
351                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {\r
352                             uc = RLM_CHAR;\r
353                         } else {\r
354                             uc = 0;\r
355                         }\r
356                         if (uc != 0) {\r
357                             dest.append(uc);\r
358                         }\r
359                         dest.append(doWriteReverse(text, bidiRun.start,\r
360                                                    bidiRun.limit, options));\r
361 \r
362                         if(bidi.isInverse() &&\r
363                                 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {\r
364                             markFlag |= Bidi.RLM_AFTER;\r
365                         }\r
366                         if ((markFlag & Bidi.LRM_AFTER) != 0) {\r
367                             uc = LRM_CHAR;\r
368                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {\r
369                             uc = RLM_CHAR;\r
370                         } else {\r
371                             uc = 0;\r
372                         }\r
373                         if (uc != 0) {\r
374                             dest.append(uc);\r
375                         }\r
376                     }\r
377                 }\r
378             }\r
379         } else {\r
380             /* reverse output */\r
381             if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {\r
382                 /* do not insert Bidi controls */\r
383                 for(run = runCount; --run >= 0; ) {\r
384                     BidiRun bidiRun = bidi.getVisualRun(run);\r
385                     if (bidiRun.isEvenRun()) {\r
386                         dest.append(doWriteReverse(text,\r
387                                                    bidiRun.start, bidiRun.limit,\r
388                                                    options & ~Bidi.DO_MIRRORING));\r
389                     } else {\r
390                         dest.append(doWriteForward(text, bidiRun.start,\r
391                                                    bidiRun.limit, options));\r
392                     }\r
393                 }\r
394             } else {\r
395                 /* insert Bidi controls for "inverse Bidi" */\r
396 \r
397                 byte[] dirProps = bidi.dirProps;\r
398 \r
399                 for (run = runCount; --run >= 0; ) {\r
400                     /* reverse output */\r
401                     BidiRun bidiRun = bidi.getVisualRun(run);\r
402                     if (bidiRun.isEvenRun()) {\r
403                         if (dirProps[bidiRun.limit - 1] != Bidi.L) {\r
404                             dest.append(LRM_CHAR);\r
405                         }\r
406 \r
407                         dest.append(doWriteReverse(text, bidiRun.start,\r
408                                 bidiRun.limit, options & ~Bidi.DO_MIRRORING));\r
409 \r
410                         if (dirProps[bidiRun.start] != Bidi.L) {\r
411                             dest.append(LRM_CHAR);\r
412                         }\r
413                     } else {\r
414                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {\r
415                             dest.append(RLM_CHAR);\r
416                         }\r
417 \r
418                         dest.append(doWriteForward(text, bidiRun.start,\r
419                                                    bidiRun.limit, options));\r
420 \r
421                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {\r
422                             dest.append(RLM_CHAR);\r
423                         }\r
424                     }\r
425                 }\r
426             }\r
427         }\r
428 \r
429         return dest.toString();\r
430     }\r
431 }\r