]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/core/src/com/ibm/icu/text/BidiWriter.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / core / src / com / ibm / icu / text / BidiWriter.java
1 /*\r
2 *******************************************************************************\r
3 *   Copyright (C) 2001-2010, International Business Machines\r
4 *   Corporation and others.  All Rights Reserved.\r
5 *******************************************************************************\r
6 */\r
7 /* Written by Simon Montagu, Matitiahu Allouche\r
8  * (ported from C code written by Markus W. Scherer)\r
9  */\r
10 \r
11 package com.ibm.icu.text;\r
12 \r
13 import com.ibm.icu.lang.UCharacter;\r
14 \r
15 final class BidiWriter {\r
16 \r
17     /** Bidi control code points */\r
18     static final char LRM_CHAR = 0x200e;\r
19     static final char RLM_CHAR = 0x200f;\r
20     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |\r
21                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);\r
22 \r
23     private static boolean IsCombining(int type)\r
24     {\r
25         return ((1<<type &\r
26                 (1<<UCharacter.NON_SPACING_MARK |\r
27                  1<<UCharacter.COMBINING_SPACING_MARK |\r
28                  1<<UCharacter.ENCLOSING_MARK)) != 0);\r
29     }\r
30 \r
31     /*\r
32      * When we have OUTPUT_REVERSE set on writeReordered(), then we\r
33      * semantically write RTL runs in reverse and later reverse them again.\r
34      * Instead, we actually write them in forward order to begin with.\r
35      * However, if the RTL run was to be mirrored, we need to mirror here now\r
36      * since the implicit second reversal must not do it.\r
37      * It looks strange to do mirroring in LTR output, but it is only because\r
38      * we are writing RTL output in reverse.\r
39      */\r
40     private static String doWriteForward(String src, int options) {\r
41         /* optimize for several combinations of options */\r
42         switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {\r
43         case 0: {\r
44             /* simply return the LTR run */\r
45             return src;\r
46         }\r
47         case Bidi.DO_MIRRORING: {\r
48             StringBuffer dest = new StringBuffer(src.length());\r
49 \r
50             /* do mirroring */\r
51             int i=0;\r
52             int c;\r
53 \r
54             do {\r
55                 c = UTF16.charAt(src, i);\r
56                 i += UTF16.getCharCount(c);\r
57                 UTF16.append(dest, UCharacter.getMirror(c));\r
58             } while(i < src.length());\r
59             return dest.toString();\r
60         }\r
61         case Bidi.REMOVE_BIDI_CONTROLS: {\r
62             StringBuilder dest = new StringBuilder(src.length());\r
63 \r
64             /* copy the LTR run and remove any Bidi control characters */\r
65             int i = 0;\r
66             char c;\r
67             do {\r
68                 c = src.charAt(i++);\r
69                 if(!Bidi.IsBidiControlChar(c)) {\r
70                     dest.append(c);\r
71                 }\r
72             } while(i < src.length());\r
73             return dest.toString();\r
74         }\r
75         default: {\r
76             StringBuffer dest = new StringBuffer(src.length());\r
77 \r
78             /* remove Bidi control characters and do mirroring */\r
79             int i = 0;\r
80             int c;\r
81             do {\r
82                 c = UTF16.charAt(src, i);\r
83                 i += UTF16.getCharCount(c);\r
84                 if(!Bidi.IsBidiControlChar(c)) {\r
85                     UTF16.append(dest, UCharacter.getMirror(c));\r
86                 }\r
87             } while(i < src.length());\r
88             return dest.toString();\r
89         }\r
90         } /* end of switch */\r
91     }\r
92 \r
93     private static String doWriteForward(char[] text, int start, int limit,\r
94                                          int options)\r
95     {\r
96         return doWriteForward(new String(text, start, limit - start), options);\r
97     }\r
98 \r
99     static String writeReverse(String src, int options) {\r
100         /*\r
101          * RTL run -\r
102          *\r
103          * RTL runs need to be copied to the destination in reverse order\r
104          * of code points, not code units, to keep Unicode characters intact.\r
105          *\r
106          * The general strategy for this is to read the source text\r
107          * in backward order, collect all code units for a code point\r
108          * (and optionally following combining characters, see below),\r
109          * and copy all these code units in ascending order\r
110          * to the destination for this run.\r
111          *\r
112          * Several options request whether combining characters\r
113          * should be kept after their base characters,\r
114          * whether Bidi control characters should be removed, and\r
115          * whether characters should be replaced by their mirror-image\r
116          * equivalent Unicode characters.\r
117          */\r
118         StringBuffer dest = new StringBuffer(src.length());\r
119 \r
120         /* optimize for several combinations of options */\r
121         switch (options &\r
122                 (Bidi.REMOVE_BIDI_CONTROLS |\r
123                  Bidi.DO_MIRRORING |\r
124                  Bidi.KEEP_BASE_COMBINING)) {\r
125 \r
126         case 0:\r
127             /*\r
128              * With none of the "complicated" options set, the destination\r
129              * run will have the same length as the source run,\r
130              * and there is no mirroring and no keeping combining characters\r
131              * with their base characters.\r
132              *\r
133              * XXX: or dest = UTF16.reverse(new StringBuffer(src));\r
134              */\r
135 \r
136             int srcLength = src.length();\r
137 \r
138             /* preserve character integrity */\r
139             do {\r
140                 /* i is always after the last code unit known to need to be kept\r
141                  *  in this segment */\r
142                 int i = srcLength;\r
143 \r
144                 /* collect code units for one base character */\r
145                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,\r
146                                                              srcLength - 1));\r
147 \r
148                 /* copy this base character */\r
149                 dest.append(src.substring(srcLength, i));\r
150             } while(srcLength > 0);\r
151             break;\r
152 \r
153         case Bidi.KEEP_BASE_COMBINING:\r
154             /*\r
155              * Here, too, the destination\r
156              * run will have the same length as the source run,\r
157              * and there is no mirroring.\r
158              * We do need to keep combining characters with their base\r
159              * characters.\r
160              */\r
161             srcLength = src.length();\r
162 \r
163             /* preserve character integrity */\r
164             do {\r
165                 /* i is always after the last code unit known to need to be kept\r
166                  *  in this segment */\r
167                 int c;\r
168                 int i = srcLength;\r
169 \r
170                 /* collect code units and modifier letters for one base\r
171                  * character */\r
172                 do {\r
173                     c = UTF16.charAt(src, srcLength - 1);\r
174                     srcLength -= UTF16.getCharCount(c);\r
175                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));\r
176 \r
177                 /* copy this "user character" */\r
178                 dest.append(src.substring(srcLength, i));\r
179             } while(srcLength > 0);\r
180             break;\r
181 \r
182         default:\r
183             /*\r
184              * With several "complicated" options set, this is the most\r
185              * general and the slowest copying of an RTL run.\r
186              * We will do mirroring, remove Bidi controls, and\r
187              * keep combining characters with their base characters\r
188              * as requested.\r
189              */\r
190             srcLength = src.length();\r
191 \r
192             /* preserve character integrity */\r
193             do {\r
194                 /* i is always after the last code unit known to need to be kept\r
195                  *  in this segment */\r
196                 int i = srcLength;\r
197 \r
198                 /* collect code units for one base character */\r
199                 int c = UTF16.charAt(src, srcLength - 1);\r
200                 srcLength -= UTF16.getCharCount(c);\r
201                 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {\r
202                     /* collect modifier letters for this base character */\r
203                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {\r
204                         c = UTF16.charAt(src, srcLength - 1);\r
205                         srcLength -= UTF16.getCharCount(c);\r
206                     }\r
207                 }\r
208 \r
209                 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&\r
210                     Bidi.IsBidiControlChar(c)) {\r
211                     /* do not copy this Bidi control character */\r
212                     continue;\r
213                 }\r
214 \r
215                 /* copy this "user character" */\r
216                 int j = srcLength;\r
217                 if((options & Bidi.DO_MIRRORING) != 0) {\r
218                     /* mirror only the base character */\r
219                     c = UCharacter.getMirror(c);\r
220                     UTF16.append(dest, c);\r
221                     j += UTF16.getCharCount(c);\r
222                 }\r
223                 dest.append(src.substring(j, i));\r
224             } while(srcLength > 0);\r
225             break;\r
226         } /* end of switch */\r
227 \r
228         return dest.toString();\r
229     }\r
230 \r
231     static String doWriteReverse(char[] text, int start, int limit, int options)\r
232     {\r
233         return writeReverse(new String(text, start, limit - start), options);\r
234     }\r
235 \r
236     static String writeReordered(Bidi bidi, int options)\r
237     {\r
238         int run, runCount;\r
239         StringBuilder dest;\r
240         char[] text = bidi.text;\r
241         runCount = bidi.countRuns();\r
242 \r
243         /*\r
244          * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the\r
245          * reordering mode (checked below) is appropriate.\r
246          */\r
247         if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {\r
248             options |= Bidi.INSERT_LRM_FOR_NUMERIC;\r
249             options &= ~Bidi.REMOVE_BIDI_CONTROLS;\r
250         }\r
251         /*\r
252          * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS\r
253          * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.\r
254          */\r
255         if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {\r
256             options |= Bidi.REMOVE_BIDI_CONTROLS;\r
257             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;\r
258         }\r
259         /*\r
260          * If we do not perform the "inverse Bidi" algorithm, then we\r
261          * don't need to insert any LRMs, and don't need to test for it.\r
262          */\r
263         if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&\r
264             (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT)  &&\r
265             (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&\r
266             (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {\r
267             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;\r
268         }\r
269         dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?\r
270                                  bidi.length * 2 : bidi.length);\r
271         /*\r
272          * Iterate through all visual runs and copy the run text segments to\r
273          * the destination, according to the options.\r
274          *\r
275          * The tests for where to insert LRMs ignore the fact that there may be\r
276          * BN codes or non-BMP code points at the beginning and end of a run;\r
277          * they may insert LRMs unnecessarily but the tests are faster this way\r
278          * (this would have to be improved for UTF-8).\r
279          */\r
280         if ((options & Bidi.OUTPUT_REVERSE) == 0) {\r
281             /* forward output */\r
282             if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {\r
283                 /* do not insert Bidi controls */\r
284                 for (run = 0; run < runCount; ++run) {\r
285                     BidiRun bidiRun = bidi.getVisualRun(run);\r
286                     if (bidiRun.isEvenRun()) {\r
287                         dest.append(doWriteForward(text, bidiRun.start,\r
288                                                    bidiRun.limit,\r
289                                                    options & ~Bidi.DO_MIRRORING));\r
290                      } else {\r
291                         dest.append(doWriteReverse(text, bidiRun.start,\r
292                                                    bidiRun.limit, options));\r
293                      }\r
294                 }\r
295             } else {\r
296                 /* insert Bidi controls for "inverse Bidi" */\r
297                 byte[] dirProps = bidi.dirProps;\r
298                 char uc;\r
299                 int markFlag;\r
300 \r
301                 for (run = 0; run < runCount; ++run) {\r
302                     BidiRun bidiRun = bidi.getVisualRun(run);\r
303                     markFlag=0;\r
304                     /* check if something relevant in insertPoints */\r
305                     markFlag = bidi.runs[run].insertRemove;\r
306                     if (markFlag < 0) { /* bidi controls count */\r
307                         markFlag = 0;\r
308                     }\r
309                     if (bidiRun.isEvenRun()) {\r
310                         if (bidi.isInverse() &&\r
311                                 dirProps[bidiRun.start] != Bidi.L) {\r
312                             markFlag |= Bidi.LRM_BEFORE;\r
313                         }\r
314                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {\r
315                             uc = LRM_CHAR;\r
316                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {\r
317                             uc = RLM_CHAR;\r
318                         } else {\r
319                             uc = 0;\r
320                         }\r
321                         if (uc != 0) {\r
322                             dest.append(uc);\r
323                         }\r
324                         dest.append(doWriteForward(text,\r
325                                                    bidiRun.start, bidiRun.limit,\r
326                                                    options & ~Bidi.DO_MIRRORING));\r
327 \r
328                         if (bidi.isInverse() &&\r
329                              dirProps[bidiRun.limit - 1] != Bidi.L) {\r
330                             markFlag |= Bidi.LRM_AFTER;\r
331                         }\r
332                         if ((markFlag & Bidi.LRM_AFTER) != 0) {\r
333                             uc = LRM_CHAR;\r
334                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {\r
335                             uc = RLM_CHAR;\r
336                         } else {\r
337                             uc = 0;\r
338                         }\r
339                         if (uc != 0) {\r
340                             dest.append(uc);\r
341                         }\r
342                     } else { /* RTL run */\r
343                         if (bidi.isInverse() &&\r
344                             !bidi.testDirPropFlagAt(MASK_R_AL,\r
345                                                     bidiRun.limit - 1)) {\r
346                             markFlag |= Bidi.RLM_BEFORE;\r
347                         }\r
348                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {\r
349                             uc = LRM_CHAR;\r
350                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {\r
351                             uc = RLM_CHAR;\r
352                         } else {\r
353                             uc = 0;\r
354                         }\r
355                         if (uc != 0) {\r
356                             dest.append(uc);\r
357                         }\r
358                         dest.append(doWriteReverse(text, bidiRun.start,\r
359                                                    bidiRun.limit, options));\r
360 \r
361                         if(bidi.isInverse() &&\r
362                                 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {\r
363                             markFlag |= Bidi.RLM_AFTER;\r
364                         }\r
365                         if ((markFlag & Bidi.LRM_AFTER) != 0) {\r
366                             uc = LRM_CHAR;\r
367                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {\r
368                             uc = RLM_CHAR;\r
369                         } else {\r
370                             uc = 0;\r
371                         }\r
372                         if (uc != 0) {\r
373                             dest.append(uc);\r
374                         }\r
375                     }\r
376                 }\r
377             }\r
378         } else {\r
379             /* reverse output */\r
380             if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {\r
381                 /* do not insert Bidi controls */\r
382                 for(run = runCount; --run >= 0; ) {\r
383                     BidiRun bidiRun = bidi.getVisualRun(run);\r
384                     if (bidiRun.isEvenRun()) {\r
385                         dest.append(doWriteReverse(text,\r
386                                                    bidiRun.start, bidiRun.limit,\r
387                                                    options & ~Bidi.DO_MIRRORING));\r
388                     } else {\r
389                         dest.append(doWriteForward(text, bidiRun.start,\r
390                                                    bidiRun.limit, options));\r
391                     }\r
392                 }\r
393             } else {\r
394                 /* insert Bidi controls for "inverse Bidi" */\r
395 \r
396                 byte[] dirProps = bidi.dirProps;\r
397 \r
398                 for (run = runCount; --run >= 0; ) {\r
399                     /* reverse output */\r
400                     BidiRun bidiRun = bidi.getVisualRun(run);\r
401                     if (bidiRun.isEvenRun()) {\r
402                         if (dirProps[bidiRun.limit - 1] != Bidi.L) {\r
403                             dest.append(LRM_CHAR);\r
404                         }\r
405 \r
406                         dest.append(doWriteReverse(text, bidiRun.start,\r
407                                 bidiRun.limit, options & ~Bidi.DO_MIRRORING));\r
408 \r
409                         if (dirProps[bidiRun.start] != Bidi.L) {\r
410                             dest.append(LRM_CHAR);\r
411                         }\r
412                     } else {\r
413                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {\r
414                             dest.append(RLM_CHAR);\r
415                         }\r
416 \r
417                         dest.append(doWriteForward(text, bidiRun.start,\r
418                                                    bidiRun.limit, options));\r
419 \r
420                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {\r
421                             dest.append(RLM_CHAR);\r
422                         }\r
423                     }\r
424                 }\r
425             }\r
426         }\r
427 \r
428         return dest.toString();\r
429     }\r
430 }\r