/* ******************************************************************************* * Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ /* Written by Simon Montagu, Matitiahu Allouche * (ported from C code written by Markus W. Scherer) */ package com.ibm.icu.text; import com.ibm.icu.lang.UCharacter; final class BidiWriter { /** Bidi control code points */ static final char LRM_CHAR = 0x200e; static final char RLM_CHAR = 0x200f; static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | 1 << UCharacter.RIGHT_TO_LEFT_ARABIC); private static boolean IsCombining(int type) { return ((1< 0); break; case Bidi.KEEP_BASE_COMBINING: /* * Here, too, the destination * run will have the same length as the source run, * and there is no mirroring. * We do need to keep combining characters with their base * characters. */ srcLength = src.length(); /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept * in this segment */ int c; int i = srcLength; /* collect code units and modifier letters for one base * character */ do { c = UTF16.charAt(src, srcLength - 1); srcLength -= UTF16.getCharCount(c); } while(srcLength > 0 && IsCombining(UCharacter.getType(c))); /* copy this "user character" */ dest.append(src.substring(srcLength, i)); } while(srcLength > 0); break; default: /* * With several "complicated" options set, this is the most * general and the slowest copying of an RTL run. * We will do mirroring, remove Bidi controls, and * keep combining characters with their base characters * as requested. */ srcLength = src.length(); /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept * in this segment */ int i = srcLength; /* collect code units for one base character */ int c = UTF16.charAt(src, srcLength - 1); srcLength -= UTF16.getCharCount(c); if ((options & Bidi.KEEP_BASE_COMBINING) != 0) { /* collect modifier letters for this base character */ while(srcLength > 0 && IsCombining(UCharacter.getType(c))) { c = UTF16.charAt(src, srcLength - 1); srcLength -= UTF16.getCharCount(c); } } if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 && Bidi.IsBidiControlChar(c)) { /* do not copy this Bidi control character */ continue; } /* copy this "user character" */ int j = srcLength; if((options & Bidi.DO_MIRRORING) != 0) { /* mirror only the base character */ c = UCharacter.getMirror(c); UTF16.append(dest, c); j += UTF16.getCharCount(c); } dest.append(src.substring(j, i)); } while(srcLength > 0); break; } /* end of switch */ return dest.toString(); } static String doWriteReverse(char[] text, int start, int limit, int options) { return writeReverse(new String(text, start, limit - start), options); } static String writeReordered(Bidi bidi, int options) { int run, runCount; StringBuilder dest; char[] text = bidi.text; runCount = bidi.countRuns(); /* * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the * reordering mode (checked below) is appropriate. */ if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) { options |= Bidi.INSERT_LRM_FOR_NUMERIC; options &= ~Bidi.REMOVE_BIDI_CONTROLS; } /* * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS * and cancels Bidi.INSERT_LRM_FOR_NUMERIC. */ if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) { options |= Bidi.REMOVE_BIDI_CONTROLS; options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; } /* * If we do not perform the "inverse Bidi" algorithm, then we * don't need to insert any LRMs, and don't need to test for it. */ if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) && (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) && (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) { options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; } dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ? bidi.length * 2 : bidi.length); /* * Iterate through all visual runs and copy the run text segments to * the destination, according to the options. * * The tests for where to insert LRMs ignore the fact that there may be * BN codes or non-BMP code points at the beginning and end of a run; * they may insert LRMs unnecessarily but the tests are faster this way * (this would have to be improved for UTF-8). */ if ((options & Bidi.OUTPUT_REVERSE) == 0) { /* forward output */ if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { /* do not insert Bidi controls */ for (run = 0; run < runCount; ++run) { BidiRun bidiRun = bidi.getVisualRun(run); if (bidiRun.isEvenRun()) { dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options & ~Bidi.DO_MIRRORING)); } else { dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options)); } } } else { /* insert Bidi controls for "inverse Bidi" */ byte[] dirProps = bidi.dirProps; char uc; int markFlag; for (run = 0; run < runCount; ++run) { BidiRun bidiRun = bidi.getVisualRun(run); markFlag=0; /* check if something relevant in insertPoints */ markFlag = bidi.runs[run].insertRemove; if (markFlag < 0) { /* bidi controls count */ markFlag = 0; } if (bidiRun.isEvenRun()) { if (bidi.isInverse() && dirProps[bidiRun.start] != Bidi.L) { markFlag |= Bidi.LRM_BEFORE; } if ((markFlag & Bidi.LRM_BEFORE) != 0) { uc = LRM_CHAR; } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { uc = RLM_CHAR; } else { uc = 0; } if (uc != 0) { dest.append(uc); } dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options & ~Bidi.DO_MIRRORING)); if (bidi.isInverse() && dirProps[bidiRun.limit - 1] != Bidi.L) { markFlag |= Bidi.LRM_AFTER; } if ((markFlag & Bidi.LRM_AFTER) != 0) { uc = LRM_CHAR; } else if ((markFlag & Bidi.RLM_AFTER) != 0) { uc = RLM_CHAR; } else { uc = 0; } if (uc != 0) { dest.append(uc); } } else { /* RTL run */ if (bidi.isInverse() && !bidi.testDirPropFlagAt(MASK_R_AL, bidiRun.limit - 1)) { markFlag |= Bidi.RLM_BEFORE; } if ((markFlag & Bidi.LRM_BEFORE) != 0) { uc = LRM_CHAR; } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { uc = RLM_CHAR; } else { uc = 0; } if (uc != 0) { dest.append(uc); } dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options)); if(bidi.isInverse() && (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { markFlag |= Bidi.RLM_AFTER; } if ((markFlag & Bidi.LRM_AFTER) != 0) { uc = LRM_CHAR; } else if ((markFlag & Bidi.RLM_AFTER) != 0) { uc = RLM_CHAR; } else { uc = 0; } if (uc != 0) { dest.append(uc); } } } } } else { /* reverse output */ if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { /* do not insert Bidi controls */ for(run = runCount; --run >= 0; ) { BidiRun bidiRun = bidi.getVisualRun(run); if (bidiRun.isEvenRun()) { dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options & ~Bidi.DO_MIRRORING)); } else { dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options)); } } } else { /* insert Bidi controls for "inverse Bidi" */ byte[] dirProps = bidi.dirProps; for (run = runCount; --run >= 0; ) { /* reverse output */ BidiRun bidiRun = bidi.getVisualRun(run); if (bidiRun.isEvenRun()) { if (dirProps[bidiRun.limit - 1] != Bidi.L) { dest.append(LRM_CHAR); } dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options & ~Bidi.DO_MIRRORING)); if (dirProps[bidiRun.start] != Bidi.L) { dest.append(LRM_CHAR); } } else { if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { dest.append(RLM_CHAR); } dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options)); if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) { dest.append(RLM_CHAR); } } } } } return dest.toString(); } }