2 *******************************************************************************
\r
3 * Copyright (C) 2008-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.charset;
\r
9 import java.nio.ByteBuffer;
\r
10 import java.nio.CharBuffer;
\r
11 import java.nio.IntBuffer;
\r
12 import java.nio.charset.CharsetDecoder;
\r
13 import java.nio.charset.CharsetEncoder;
\r
14 import java.nio.charset.CoderResult;
\r
16 import com.ibm.icu.text.UTF16;
\r
17 import com.ibm.icu.text.UnicodeSet;
\r
20 * @author Michael Ow
\r
23 class CharsetISCII extends CharsetICU {
\r
24 private static final short UCNV_OPTIONS_VERSION_MASK = 0X0f;
\r
25 //private static final short NUKTA = 0x093c;
\r
26 //private static final short HALANT = 0x094d;
\r
27 private static final short ZWNJ = 0x200c; /* Zero Width Non Joiner */
\r
28 private static final short ZWJ = 0x200d; /* Zero Width Joiner */
\r
29 //private static final int INVALID_CHAR = 0xffff;
\r
30 private static final short ATR = 0xef; /* Attribute code */
\r
31 private static final short EXT = 0xf0; /* Extension code */
\r
32 private static final short DANDA = 0x0964;
\r
33 private static final short DOUBLE_DANDA = 0x0965;
\r
34 private static final short ISCII_NUKTA = 0xe9;
\r
35 private static final short ISCII_HALANT = 0xe8;
\r
36 private static final short ISCII_DANDA = 0xea;
\r
37 private static final short ISCII_VOWEL_SIGN_E = 0xe0;
\r
38 private static final short ISCII_INV = 0xd9;
\r
39 private static final short INDIC_BLOCK_BEGIN = 0x0900;
\r
40 private static final short INDIC_BLOCK_END = 0x0d7f;
\r
41 private static final short INDIC_RANGE = (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN);
\r
42 private static final short VOCALLIC_RR = 0x0931;
\r
43 private static final short LF = 0x0a;
\r
44 private static final short ASCII_END = 0xa0;
\r
45 private static final short TELUGU_DELTA = (UniLang.DELTA * UniLang.TELUGU);
\r
46 private static final short DEV_ABBR_SIGN = 0x0970;
\r
47 private static final short DEV_ANUDATTA = 0x0952;
\r
48 private static final short EXT_RANGE_BEGIN = 0xa1;
\r
49 private static final short EXT_RANGE_END = 0xee;
\r
50 private static final short PNJ_DELTA = 0x100;
\r
51 private static final int NO_CHAR_MARKER = 0xfffe;
\r
53 /* Used for proper conversion to and from Gurmukhi */
\r
54 private static UnicodeSet PNJ_BINDI_TIPPI_SET;
\r
55 private static UnicodeSet PNJ_CONSONANT_SET;
\r
56 private static final short PNJ_BINDI = 0x0a02;
\r
57 private static final short PNJ_TIPPI = 0x0a70;
\r
58 private static final short PNJ_SIGN_VIRAMA = 0x0a4d;
\r
59 private static final short PNJ_ADHAK = 0x0a71;
\r
60 private static final short PNJ_HA = 0x0a39;
\r
61 private static final short PNJ_RRA = 0x0a5c;
\r
63 private static final class UniLang {
\r
64 static final short DEVALANGARI = 0;
\r
65 static final short BENGALI = DEVALANGARI + 1;
\r
66 static final short GURMUKHI = BENGALI + 1;
\r
67 static final short GUJARATI = GURMUKHI + 1;
\r
68 static final short ORIYA = GUJARATI + 1;
\r
69 static final short TAMIL = ORIYA + 1;
\r
70 static final short TELUGU = TAMIL + 1;
\r
71 static final short KANNADA = TELUGU + 1;
\r
72 static final short MALAYALAM = KANNADA + 1;
\r
73 static final short DELTA = 0x80;
\r
75 @SuppressWarnings("unused")
\r
76 private static final class ISCIILang {
\r
77 static final short DEF = 0x40;
\r
78 static final short RMN = 0x41;
\r
79 static final short DEV = 0x42;
\r
80 static final short BNG = 0x43;
\r
81 static final short TML = 0x44;
\r
82 static final short TLG = 0x45;
\r
83 static final short ASM = 0x46;
\r
84 static final short ORI = 0x47;
\r
85 static final short KND = 0x48;
\r
86 static final short MLM = 0x49;
\r
87 static final short GJR = 0x4a;
\r
88 static final short PNJ = 0x4b;
\r
89 static final short ARB = 0x71;
\r
90 static final short PES = 0x72;
\r
91 static final short URD = 0x73;
\r
92 static final short SND = 0x74;
\r
93 static final short KSM = 0x75;
\r
94 static final short PST = 0x76;
\r
97 private static final class MaskEnum {
\r
98 static final short DEV_MASK = 0x80;
\r
99 static final short PNJ_MASK = 0x40;
\r
100 static final short GJR_MASK = 0x20;
\r
101 static final short ORI_MASK = 0x10;
\r
102 static final short BNG_MASK = 0x08;
\r
103 static final short KND_MASK = 0x04;
\r
104 static final short MLM_MASK = 0x02;
\r
105 static final short TML_MASK = 0x01;
\r
106 static final short ZERO = 0x00;
\r
109 private final String ISCII_CNV_PREFIX = "ISCII,version=";
\r
111 @SuppressWarnings("unused")
\r
112 private final class UConverterDataISCII {
\r
114 int contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
\r
115 int contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
\r
116 short defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
\r
117 short currentDeltaFromUnicode; /* current delta in Indic block */
\r
118 short currentDeltaToUnicode; /* current delta in Indic block */
\r
119 short currentMaskFromUnicode; /* mask for current state in fromUnicode */
\r
120 short currentMaskToUnicode; /* mask for current state in toUnicode */
\r
121 short defMaskToUnicode; /* mask for default state in toUnicode */
\r
122 boolean isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
\r
123 boolean resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered */
\r
125 int prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
\r
127 UConverterDataISCII(int option, String name) {
\r
128 this.option = option;
\r
134 void initialize() {
\r
135 this.contextCharToUnicode = NO_CHAR_MARKER; /* contextCharToUnicode */
\r
136 this.currentDeltaFromUnicode = 0x0000; /* contextCharFromUnicode */
\r
137 this.defDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* defDeltaToUnicode */
\r
138 this.currentDeltaFromUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaFromUnicode */
\r
139 this.currentDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaToUnicode */
\r
140 this.currentMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskToUnicode */
\r
141 this.currentMaskFromUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskFromUnicode */
\r
142 this.defMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* defMaskToUnicode */
\r
143 this.isFirstBuffer = true; /* isFirstBuffer */
\r
144 this.resetToDefaultToUnicode = false; /* resetToDefaultToUnicode */
\r
145 this.prevToUnicodeStatus = 0x0000;
\r
149 private static final class LookupDataStruct {
\r
154 LookupDataStruct(short uniLang, short maskEnum, short isciiLang) {
\r
155 this.uniLang = uniLang;
\r
156 this.maskEnum = maskEnum;
\r
157 this.isciiLang = isciiLang;
\r
161 private static final LookupDataStruct [] lookupInitialData = {
\r
162 new LookupDataStruct(UniLang.DEVALANGARI, MaskEnum.DEV_MASK, ISCIILang.DEV),
\r
163 new LookupDataStruct(UniLang.BENGALI, MaskEnum.BNG_MASK, ISCIILang.BNG),
\r
164 new LookupDataStruct(UniLang.GURMUKHI, MaskEnum.PNJ_MASK, ISCIILang.PNJ),
\r
165 new LookupDataStruct(UniLang.GUJARATI, MaskEnum.GJR_MASK, ISCIILang.GJR),
\r
166 new LookupDataStruct(UniLang.ORIYA, MaskEnum.ORI_MASK, ISCIILang.ORI),
\r
167 new LookupDataStruct(UniLang.TAMIL, MaskEnum.TML_MASK, ISCIILang.TML),
\r
168 new LookupDataStruct(UniLang.TELUGU, MaskEnum.KND_MASK, ISCIILang.TLG),
\r
169 new LookupDataStruct(UniLang.KANNADA, MaskEnum.KND_MASK, ISCIILang.KND),
\r
170 new LookupDataStruct(UniLang.MALAYALAM, MaskEnum.MLM_MASK, ISCIILang.MLM)
\r
174 * The values in validity table are indexed by the lower bits of Unicode
\r
175 * range 0x0900 - 0x09ff. The values have a structure like:
\r
176 * -----------------------------------------------------------------
\r
177 * |DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
\r
178 * | | | | | ASM | KND | | |
\r
179 * -----------------------------------------------------------------
\r
180 * If a code point is valid in a particular script
\r
181 * then that bit is turned on
\r
183 * Unicode does not distinguish between Bengali and Assamese aso we use 1 bit for
\r
184 * to represent these languages
\r
186 * Telugu and Kannda have same codepoints except for Vocallic_RR which we special case
\r
187 * and combine and use 1 bit to represent these languages
\r
189 private static final short validityTable[] = {
\r
190 /* This state table is tool generated so please do not edit unless you know exactly what you are doing */
\r
191 /* Note: This table was edited to mirror the Windows XP implementation */
\r
192 /* ISCII: Valid: Unicode */
\r
193 /* 0xa0: 0x00: 0x900 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
194 /* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
195 /* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
196 /* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
197 /* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
198 /* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
199 /* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
200 /* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
201 /* 0xa7: 0xff: 0x908 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
202 /* 0xa8: 0xff: 0x909 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
203 /* 0xa9: 0xff: 0x90a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
204 /* 0xaa: 0xfe: 0x90b */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
205 /* 0x00: 0x00: 0x90c */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
206 /* 0xae: 0x80: 0x90d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
207 /* 0xab: 0x87: 0x90e */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
208 /* 0xac: 0xff: 0x90f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
209 /* 0xad: 0xff: 0x910 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
210 /* 0xb2: 0x80: 0x911 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
211 /* 0xaf: 0x87: 0x912 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
212 /* 0xb0: 0xff: 0x913 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
213 /* 0xb1: 0xff: 0x914 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
214 /* 0xb3: 0xff: 0x915 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
215 /* 0xb4: 0xfe: 0x916 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
216 /* 0xb5: 0xfe: 0x917 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
217 /* 0xb6: 0xfe: 0x918 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
218 /* 0xb7: 0xff: 0x919 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
219 /* 0xb8: 0xff: 0x91a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
220 /* 0xb9: 0xfe: 0x91b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
221 /* 0xba: 0xff: 0x91c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
222 /* 0xbb: 0xfe: 0x91d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
223 /* 0xbc: 0xff: 0x91e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
224 /* 0xbd: 0xff: 0x91f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
225 /* 0xbe: 0xfe: 0x920 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
226 /* 0xbf: 0xfe: 0x921 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
227 /* 0xc0: 0xfe: 0x922 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
228 /* 0xc1: 0xff: 0x923 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
229 /* 0xc2: 0xff: 0x924 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
230 /* 0xc3: 0xfe: 0x925 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
231 /* 0xc4: 0xfe: 0x926 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
232 /* 0xc5: 0xfe: 0x927 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
233 /* 0xc6: 0xff: 0x928 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
234 /* 0xc7: 0x81: 0x929 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.TML_MASK,
\r
235 /* 0xc8: 0xff: 0x92a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
236 /* 0xc9: 0xfe: 0x92b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
237 /* 0xca: 0xfe: 0x92c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
238 /* 0xcb: 0xfe: 0x92d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
239 /* 0xcc: 0xfe: 0x92e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
240 /* 0xcd: 0xff: 0x92f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
241 /* 0xcf: 0xff: 0x930 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
242 /* 0xd0: 0x87: 0x931 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
243 /* 0xd1: 0xff: 0x932 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
244 /* 0xd2: 0xb7: 0x933 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
245 /* 0xd3: 0x83: 0x934 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
246 /* 0xd4: 0xff: 0x935 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
247 /* 0xd5: 0xfe: 0x936 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
248 /* 0xd6: 0xbf: 0x937 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
249 /* 0xd7: 0xff: 0x938 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
250 /* 0xd8: 0xff: 0x939 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
251 /* 0x00: 0x00: 0x93a */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
252 /* 0x00: 0x00: 0x93b */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
253 /* 0xe9: 0xda: 0x93c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
254 /* 0x00: 0x00: 0x93d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
255 /* 0xda: 0xff: 0x93e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
256 /* 0xdb: 0xff: 0x93f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
257 /* 0xdc: 0xff: 0x940 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
258 /* 0xdd: 0xff: 0x941 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
259 /* 0xde: 0xff: 0x942 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
260 /* 0xdf: 0xbe: 0x943 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
261 /* 0x00: 0x00: 0x944 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
\r
262 /* 0xe3: 0x80: 0x945 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
263 /* 0xe0: 0x87: 0x946 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
264 /* 0xe1: 0xff: 0x947 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
265 /* 0xe2: 0xff: 0x948 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
266 /* 0xe7: 0x80: 0x949 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
267 /* 0xe4: 0x87: 0x94a */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
268 /* 0xe5: 0xff: 0x94b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
269 /* 0xe6: 0xff: 0x94c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
270 /* 0xe8: 0xff: 0x94d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
271 /* 0xec: 0x00: 0x94e */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
272 /* 0xed: 0x00: 0x94f */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
273 /* 0x00: 0x00: 0x950 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
274 /* 0x00: 0x00: 0x951 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
275 /* 0x00: 0x00: 0x952 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
276 /* 0x00: 0x00: 0x953 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
277 /* 0x00: 0x00: 0x954 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
278 /* 0x00: 0x00: 0x955 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
\r
279 /* 0x00: 0x00: 0x956 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
\r
280 /* 0x00: 0x00: 0x957 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
281 /* 0x00: 0x00: 0x958 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
282 /* 0x00: 0x00: 0x959 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
283 /* 0x00: 0x00: 0x95a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
284 /* 0x00: 0x00: 0x95b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
285 /* 0x00: 0x00: 0x95c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
286 /* 0x00: 0x00: 0x95d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
287 /* 0x00: 0x00: 0x95e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
288 /* 0xce: 0x98: 0x95f */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
289 /* 0x00: 0x00: 0x960 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
290 /* 0x00: 0x00: 0x961 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
\r
291 /* 0x00: 0x00: 0x962 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
292 /* 0x00: 0x00: 0x963 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
293 /* 0xea: 0xf8: 0x964 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
294 /* 0xeaea: 0x00: 0x965 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
295 /* 0xf1: 0xff: 0x966 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
296 /* 0xf2: 0xff: 0x967 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
297 /* 0xf3: 0xff: 0x968 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
298 /* 0xf4: 0xff: 0x969 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
299 /* 0xf5: 0xff: 0x96a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
300 /* 0xf6: 0xff: 0x96b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
301 /* 0xf7: 0xff: 0x96c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
302 /* 0xf8: 0xff: 0x96d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
303 /* 0xf9: 0xff: 0x96e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
304 /* 0xfa: 0xff: 0x96f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
\r
305 /* 0x00: 0x80: 0x970 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
\r
308 * The length of the array is 128 to provide values for 0x900..0x97f.
\r
309 * The last 15 entries for 0x971..0x97f of the table are all zero
\r
310 * because no Indic script uses such Unicode code points.
\r
313 /* 0x00: 0x00: 0x971 */ MaskEnum.ZERO,
\r
314 /* 0x00: 0x00: 0x972 */ MaskEnum.ZERO,
\r
315 /* 0x00: 0x00: 0x973 */ MaskEnum.ZERO,
\r
316 /* 0x00: 0x00: 0x974 */ MaskEnum.ZERO,
\r
317 /* 0x00: 0x00: 0x975 */ MaskEnum.ZERO,
\r
318 /* 0x00: 0x00: 0x976 */ MaskEnum.ZERO,
\r
319 /* 0x00: 0x00: 0x977 */ MaskEnum.ZERO,
\r
320 /* 0x00: 0x00: 0x978 */ MaskEnum.ZERO,
\r
321 /* 0x00: 0x00: 0x979 */ MaskEnum.ZERO,
\r
322 /* 0x00: 0x00: 0x97A */ MaskEnum.ZERO,
\r
323 /* 0x00: 0x00: 0x97B */ MaskEnum.ZERO,
\r
324 /* 0x00: 0x00: 0x97C */ MaskEnum.ZERO,
\r
325 /* 0x00: 0x00: 0x97D */ MaskEnum.ZERO,
\r
326 /* 0x00: 0x00: 0x97E */ MaskEnum.ZERO,
\r
327 /* 0x00: 0x00: 0x97F */ MaskEnum.ZERO,
\r
330 private static final char fromUnicodeTable[] = {
\r
331 0x00a0, /* 0x0900 */
\r
332 0x00a1, /* 0x0901 */
\r
333 0x00a2, /* 0x0902 */
\r
334 0x00a3, /* 0x0903 */
\r
335 0xa4e0, /* 0x0904 */
\r
336 0x00a4, /* 0x0905 */
\r
337 0x00a5, /* 0x0906 */
\r
338 0x00a6, /* 0x0907 */
\r
339 0x00a7, /* 0x0908 */
\r
340 0x00a8, /* 0x0909 */
\r
341 0x00a9, /* 0x090a */
\r
342 0x00aa, /* 0x090b */
\r
343 0xA6E9, /* 0x090c */
\r
344 0x00ae, /* 0x090d */
\r
345 0x00ab, /* 0x090e */
\r
346 0x00ac, /* 0x090f */
\r
347 0x00ad, /* 0x0910 */
\r
348 0x00b2, /* 0x0911 */
\r
349 0x00af, /* 0x0912 */
\r
350 0x00b0, /* 0x0913 */
\r
351 0x00b1, /* 0x0914 */
\r
352 0x00b3, /* 0x0915 */
\r
353 0x00b4, /* 0x0916 */
\r
354 0x00b5, /* 0x0917 */
\r
355 0x00b6, /* 0x0918 */
\r
356 0x00b7, /* 0x0919 */
\r
357 0x00b8, /* 0x091a */
\r
358 0x00b9, /* 0x091b */
\r
359 0x00ba, /* 0x091c */
\r
360 0x00bb, /* 0x091d */
\r
361 0x00bc, /* 0x091e */
\r
362 0x00bd, /* 0x091f */
\r
363 0x00be, /* 0x0920 */
\r
364 0x00bf, /* 0x0921 */
\r
365 0x00c0, /* 0x0922 */
\r
366 0x00c1, /* 0x0923 */
\r
367 0x00c2, /* 0x0924 */
\r
368 0x00c3, /* 0x0925 */
\r
369 0x00c4, /* 0x0926 */
\r
370 0x00c5, /* 0x0927 */
\r
371 0x00c6, /* 0x0928 */
\r
372 0x00c7, /* 0x0929 */
\r
373 0x00c8, /* 0x092a */
\r
374 0x00c9, /* 0x092b */
\r
375 0x00ca, /* 0x092c */
\r
376 0x00cb, /* 0x092d */
\r
377 0x00cc, /* 0x092e */
\r
378 0x00cd, /* 0x092f */
\r
379 0x00cf, /* 0x0930 */
\r
380 0x00d0, /* 0x0931 */
\r
381 0x00d1, /* 0x0932 */
\r
382 0x00d2, /* 0x0933 */
\r
383 0x00d3, /* 0x0934 */
\r
384 0x00d4, /* 0x0935 */
\r
385 0x00d5, /* 0x0936 */
\r
386 0x00d6, /* 0x0937 */
\r
387 0x00d7, /* 0x0938 */
\r
388 0x00d8, /* 0x0939 */
\r
389 0xFFFF, /* 0x093a */
\r
390 0xFFFF, /* 0x093b */
\r
391 0x00e9, /* 0x093c */
\r
392 0xEAE9, /* 0x093d */
\r
393 0x00da, /* 0x093e */
\r
394 0x00db, /* 0x093f */
\r
395 0x00dc, /* 0x0940 */
\r
396 0x00dd, /* 0x0941 */
\r
397 0x00de, /* 0x0942 */
\r
398 0x00df, /* 0x0943 */
\r
399 0xDFE9, /* 0x0944 */
\r
400 0x00e3, /* 0x0945 */
\r
401 0x00e0, /* 0x0946 */
\r
402 0x00e1, /* 0x0947 */
\r
403 0x00e2, /* 0x0948 */
\r
404 0x00e7, /* 0x0949 */
\r
405 0x00e4, /* 0x094a */
\r
406 0x00e5, /* 0x094b */
\r
407 0x00e6, /* 0x094c */
\r
408 0x00e8, /* 0x094d */
\r
409 0x00ec, /* 0x094e */
\r
410 0x00ed, /* 0x094f */
\r
411 0xA1E9, /* 0x0950 */ /* OM Symbol */
\r
412 0xFFFF, /* 0x0951 */
\r
413 0xF0B8, /* 0x0952 */
\r
414 0xFFFF, /* 0x0953 */
\r
415 0xFFFF, /* 0x0954 */
\r
416 0xFFFF, /* 0x0955 */
\r
417 0xFFFF, /* 0x0956 */
\r
418 0xFFFF, /* 0x0957 */
\r
419 0xb3e9, /* 0x0958 */
\r
420 0xb4e9, /* 0x0959 */
\r
421 0xb5e9, /* 0x095a */
\r
422 0xbae9, /* 0x095b */
\r
423 0xbfe9, /* 0x095c */
\r
424 0xC0E9, /* 0x095d */
\r
425 0xc9e9, /* 0x095e */
\r
426 0x00ce, /* 0x095f */
\r
427 0xAAe9, /* 0x0960 */
\r
428 0xA7E9, /* 0x0961 */
\r
429 0xDBE9, /* 0x0962 */
\r
430 0xDCE9, /* 0x0963 */
\r
431 0x00ea, /* 0x0964 */
\r
432 0xeaea, /* 0x0965 */
\r
433 0x00f1, /* 0x0966 */
\r
434 0x00f2, /* 0x0967 */
\r
435 0x00f3, /* 0x0968 */
\r
436 0x00f4, /* 0x0969 */
\r
437 0x00f5, /* 0x096a */
\r
438 0x00f6, /* 0x096b */
\r
439 0x00f7, /* 0x096c */
\r
440 0x00f8, /* 0x096d */
\r
441 0x00f9, /* 0x096e */
\r
442 0x00fa, /* 0x096f */
\r
443 0xF0BF, /* 0x0970 */
\r
444 0xFFFF, /* 0x0971 */
\r
445 0xFFFF, /* 0x0972 */
\r
446 0xFFFF, /* 0x0973 */
\r
447 0xFFFF, /* 0x0974 */
\r
448 0xFFFF, /* 0x0975 */
\r
449 0xFFFF, /* 0x0976 */
\r
450 0xFFFF, /* 0x0977 */
\r
451 0xFFFF, /* 0x0978 */
\r
452 0xFFFF, /* 0x0979 */
\r
453 0xFFFF, /* 0x097a */
\r
454 0xFFFF, /* 0x097b */
\r
455 0xFFFF, /* 0x097c */
\r
456 0xFFFF, /* 0x097d */
\r
457 0xFFFF, /* 0x097e */
\r
458 0xFFFF, /* 0x097f */
\r
460 private static final char toUnicodeTable[] = {
\r
718 private static final char nuktaSpecialCases[][] = {
\r
719 { 16 /* length of array */ , 0 },
\r
736 private static final char vowelSignESpecialCases[][] = {
\r
737 { 2 /* length of array */ , 0 },
\r
741 private static final short lookupTable[][] = {
\r
742 { MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */
\r
743 { MaskEnum.ZERO, MaskEnum.ZERO }, /* ROMAN */
\r
744 { UniLang.DEVALANGARI, MaskEnum.DEV_MASK },
\r
745 { UniLang.BENGALI, MaskEnum.BNG_MASK },
\r
746 { UniLang.TAMIL, MaskEnum.TML_MASK },
\r
747 { UniLang.TELUGU, MaskEnum.KND_MASK },
\r
748 { UniLang.BENGALI, MaskEnum.BNG_MASK },
\r
749 { UniLang.ORIYA, MaskEnum.ORI_MASK },
\r
750 { UniLang.KANNADA, MaskEnum.KND_MASK },
\r
751 { UniLang.MALAYALAM, MaskEnum.MLM_MASK },
\r
752 { UniLang.GUJARATI, MaskEnum.GJR_MASK },
\r
753 { UniLang.GURMUKHI, MaskEnum.PNJ_MASK }
\r
756 private UConverterDataISCII extraInfo = null;
\r
757 protected byte[] fromUSubstitution = new byte[]{(byte)0x1A};
\r
759 public CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
\r
760 super(icuCanonicalName, javaCanonicalName, aliases);
\r
761 maxBytesPerChar = 4;
\r
762 minBytesPerChar = 1;
\r
763 maxCharsPerByte = 1;
\r
764 //get the version number of the ISCII converter
\r
765 int option = Integer.parseInt(icuCanonicalName.substring(14));
\r
767 extraInfo = new UConverterDataISCII(
\r
769 new String(ISCII_CNV_PREFIX + (option & UCNV_OPTIONS_VERSION_MASK)) /* name */
\r
772 initializePNJSets();
\r
775 /* Initialize the two UnicodeSets use for proper Gurmukhi conversion if they have not already been created. */
\r
776 private void initializePNJSets() {
\r
777 if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) {
\r
780 PNJ_BINDI_TIPPI_SET = new UnicodeSet();
\r
781 PNJ_CONSONANT_SET = new UnicodeSet();
\r
783 PNJ_CONSONANT_SET.add(0x0a15, 0x0a28);
\r
784 PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30);
\r
785 PNJ_CONSONANT_SET.add(0x0a35, 0x0a36);
\r
786 PNJ_CONSONANT_SET.add(0x0a38, 0x0a39);
\r
788 PNJ_BINDI_TIPPI_SET.addAll(PNJ_CONSONANT_SET);
\r
789 PNJ_BINDI_TIPPI_SET.add(0x0a05);
\r
790 PNJ_BINDI_TIPPI_SET.add(0x0a07);
\r
792 PNJ_BINDI_TIPPI_SET.add(0x0a41, 0x0a42);
\r
793 PNJ_BINDI_TIPPI_SET.add(0x0a3f);
\r
795 PNJ_CONSONANT_SET.compact();
\r
796 PNJ_BINDI_TIPPI_SET.compact();
\r
800 * Rules for ISCII to Unicode converter
\r
801 * ISCII is a stateful encoding. To convert ISCII bytes to Unicode,
\r
802 * which is both precomposed and decomposed from characters
\r
803 * pre-context and post-context need to be considered.
\r
806 * i) ATR : Attribute code is used to declare the font and script switching.
\r
807 * Currently we only switch scripts and font codes consumed without generating an error
\r
808 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
\r
809 * obsolete characters
\r
811 * i) Halant: if preceeded by a halant then it is a explicit halant
\r
813 * a) if preceeded by a halant then it is a soft halant
\r
814 * b) if preceeded by specific consonants and the ligatures have pre-composed
\r
815 * characters in Unicode then convert to pre-composed characters
\r
816 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
\r
818 class CharsetDecoderISCII extends CharsetDecoderICU {
\r
819 public CharsetDecoderISCII(CharsetICU cs) {
\r
824 protected void implReset() {
\r
826 this.toUnicodeStatus = 0xFFFF;
\r
827 extraInfo.initialize();
\r
830 @SuppressWarnings("fallthrough")
\r
831 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
\r
832 CoderResult cr = CoderResult.UNDERFLOW;
\r
833 int targetUniChar = 0x0000;
\r
834 short sourceChar = 0x0000;
\r
835 UConverterDataISCII data;
\r
836 boolean gotoCallBack = false;
\r
840 //data.contextCharToUnicode; /* contains previous ISCII codepoint visited */
\r
841 //this.toUnicodeStatus; /* contains the mapping to Unicode of the above codepoint */
\r
843 while (source.hasRemaining()) {
\r
844 targetUniChar = UConverterConstants.missingCharMarker;
\r
846 if (target.hasRemaining()) {
\r
847 sourceChar = (short)((short)source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
\r
849 /* look at the post-context perform special processing */
\r
850 if (data.contextCharToUnicode == ATR) {
\r
851 /* If we have ATR in data.contextCharToUnicode then we need to change our
\r
852 * state to Indic Script specified by sourceChar
\r
854 /* check if the sourceChar is supported script range */
\r
855 if (((short)(ISCIILang.PNJ - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (ISCIILang.PNJ - ISCIILang.DEV)) {
\r
856 data.currentDeltaToUnicode = (short)(lookupTable[sourceChar & 0x0F][0] * UniLang.DELTA);
\r
857 data.currentMaskToUnicode = lookupTable[sourceChar & 0x0F][1];
\r
858 } else if (sourceChar == ISCIILang.DEF) {
\r
859 /* switch back to default */
\r
860 data.currentDeltaToUnicode = data.defDeltaToUnicode;
\r
861 data.currentMaskToUnicode = data.defMaskToUnicode;
\r
863 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
\r
864 /* these are display codes consume and continue */
\r
866 cr = CoderResult.malformedForLength(1);
\r
868 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
869 gotoCallBack = true;
\r
873 if (!gotoCallBack) {
\r
874 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
877 } else if (data.contextCharToUnicode == EXT) {
\r
878 /* check if sourceChar is in 0xA1 - 0xEE range */
\r
879 if (((short)(EXT_RANGE_END - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
\r
880 /* We currently support only Anudatta and Devanagari abbreviation sign */
\r
881 if (sourceChar == 0xBF || sourceChar == 0xB8) {
\r
882 targetUniChar = (sourceChar == 0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
\r
884 /* find out if the mappling is valid in this state */
\r
885 if ((validityTable[((short)targetUniChar) & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) > 0) {
\r
886 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
888 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
\r
889 if (data.prevToUnicodeStatus != 0) {
\r
890 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
\r
891 data.prevToUnicodeStatus = 0x0000;
\r
893 /* write to target */
\r
894 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
\r
899 /* byte unit is unassigned */
\r
900 targetUniChar = UConverterConstants.missingCharMarker;
\r
901 cr = CoderResult.unmappableForLength(1);
\r
903 /* only 0xA1 - 0xEE are legal after EXT char */
\r
904 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
905 cr = CoderResult.malformedForLength(1);
\r
907 gotoCallBack = true;
\r
908 } else if (data.contextCharToUnicode == ISCII_INV) {
\r
909 if (sourceChar == ISCII_HALANT) {
\r
910 targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
\r
912 targetUniChar = ZWJ;
\r
915 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
\r
916 if (data.prevToUnicodeStatus != 0) {
\r
917 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
\r
918 data.prevToUnicodeStatus = 0x0000;
\r
921 /* write to target */
\r
922 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
\r
924 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
927 /* look at the pre-context and perform special processing */
\r
928 if (!gotoCallBack) {
\r
929 switch (sourceChar) {
\r
931 case EXT: /* falls through */
\r
933 data.contextCharToUnicode = (char)sourceChar;
\r
935 if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
\r
936 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
\r
937 if (data.prevToUnicodeStatus != 0) {
\r
938 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
\r
939 data.prevToUnicodeStatus = 0x0000;
\r
941 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
\r
942 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
946 /* handle double danda */
\r
947 if (data.contextCharToUnicode == ISCII_DANDA) {
\r
948 targetUniChar = DOUBLE_DANDA;
\r
949 /* clear the context */
\r
950 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
951 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
953 targetUniChar = GetMapping(sourceChar, targetUniChar, data);
\r
954 data.contextCharToUnicode = (char)sourceChar;
\r
958 /* handle explicit halant */
\r
959 if (data.contextCharToUnicode == ISCII_HALANT) {
\r
960 targetUniChar = ZWNJ;
\r
961 /* clear context */
\r
962 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
964 targetUniChar = GetMapping(sourceChar, targetUniChar, data);
\r
965 data.contextCharToUnicode = (char)sourceChar;
\r
971 data.resetToDefaultToUnicode = true;
\r
972 targetUniChar = GetMapping(sourceChar, targetUniChar, data);
\r
973 data.contextCharToUnicode = (char)sourceChar;
\r
975 case ISCII_VOWEL_SIGN_E:
\r
976 /* find <CHAR> + SIGN_VOWEL_E special mapping */
\r
978 boolean find = false;
\r
979 for (; n < vowelSignESpecialCases[0][0]; n++) {
\r
980 if (vowelSignESpecialCases[n][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
\r
981 targetUniChar = vowelSignESpecialCases[n][1];
\r
987 /* find out if the mapping is valid in this state */
\r
988 if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) {
\r
989 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
990 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
994 targetUniChar = GetMapping(sourceChar, targetUniChar, data);
\r
995 data.contextCharToUnicode = (char)sourceChar;
\r
998 /* handle soft halant */
\r
999 if (data.contextCharToUnicode == ISCII_HALANT) {
\r
1000 targetUniChar = ZWJ;
\r
1001 /* clear the context */
\r
1002 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
1004 } else if (data.currentDeltaToUnicode == PNJ_DELTA && data.contextCharToUnicode == 0xc0) {
\r
1005 /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
\r
1006 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
\r
1007 * WriteToTargetToU is given 0x095c instead of 0xa5c because that method will automatically
\r
1008 * convert the code point given based on the delta provided.
\r
1010 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_RRA, (short)0);
\r
1011 if (!cr.isOverflow()) {
\r
1012 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_SIGN_VIRAMA, (short)0);
\r
1013 if (!cr.isOverflow()) {
\r
1014 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_HA, (short)0);
\r
1016 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA;
\r
1019 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_SIGN_VIRAMA;
\r
1020 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA;
\r
1022 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
1023 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
1024 if (!cr.isError()) {
\r
1029 /* try to handle <CHAR> + ISCII_NUKTA special mappings */
\r
1031 boolean found = false;
\r
1032 for (; i < nuktaSpecialCases[0][0]; i++) {
\r
1033 if (nuktaSpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
\r
1034 targetUniChar = nuktaSpecialCases[i][1];
\r
1040 /* find out if the mapping is valid in this state */
\r
1041 if ((validityTable[(byte)targetUniChar] & data.currentMaskToUnicode) > 0) {
\r
1042 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
1043 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
1044 if (data.currentDeltaToUnicode == PNJ_DELTA) {
\r
1045 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
\r
1046 if (data.prevToUnicodeStatus != 0) {
\r
1047 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
\r
1048 data.prevToUnicodeStatus = 0x0000;
\r
1050 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
\r
1055 /* else fall through to default */
\r
1057 /* else fall through to default */
\r
1061 targetUniChar = GetMapping(sourceChar, targetUniChar, data);
\r
1062 data.contextCharToUnicode = (char)sourceChar;
\r
1065 }//end of CallBack if statement
\r
1067 if (!gotoCallBack && this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
\r
1068 /* Check to make sure that consonant clusters are handled correctly for Gurmukhi script. */
\r
1069 if (data.currentDeltaToUnicode == PNJ_DELTA && data.prevToUnicodeStatus != 0 && PNJ_CONSONANT_SET.contains(data.prevToUnicodeStatus) &&
\r
1070 (this.toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data.prevToUnicodeStatus) {
\r
1071 if (offsets != null) {
\r
1072 offset = source.position() - 3;
\r
1074 cr = WriteToTargetToU(offsets, offset, source, target, PNJ_ADHAK, (short)0);
\r
1075 cr = WriteToTargetToU(offsets, offset, source, target, data.prevToUnicodeStatus, (short)0);
\r
1076 data.prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
\r
1077 toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
1080 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
\r
1081 if (data.prevToUnicodeStatus != 0) {
\r
1082 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
\r
1083 data.prevToUnicodeStatus = 0x0000;
\r
1085 /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
\r
1086 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
\r
1088 if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && PNJ_BINDI_TIPPI_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) {
\r
1089 targetUniChar = PNJ_TIPPI - PNJ_DELTA;
\r
1090 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, PNJ_DELTA);
\r
1091 } else if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && PNJ_CONSONANT_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) {
\r
1092 /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
\r
1093 data.prevToUnicodeStatus = this.toUnicodeStatus + PNJ_DELTA;
\r
1095 /* write the previously mapped codepoint */
\r
1096 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
\r
1099 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
1102 if (!gotoCallBack && targetUniChar != UConverterConstants.missingCharMarker) {
\r
1103 /* now save the targetUniChar for delayed write */
\r
1104 this.toUnicodeStatus = (char)targetUniChar;
\r
1105 if (data.resetToDefaultToUnicode) {
\r
1106 data.currentDeltaToUnicode = data.defDeltaToUnicode;
\r
1107 data.currentMaskToUnicode = data.defMaskToUnicode;
\r
1108 data.resetToDefaultToUnicode = false;
\r
1111 /* we reach here only if targetUniChar == missingCharMarker
\r
1112 * so assign codes to reason and err
\r
1114 if (!gotoCallBack) {
\r
1115 cr = CoderResult.unmappableForLength(1);
\r
1118 toUBytesArray[0] = (byte)sourceChar;
\r
1120 gotoCallBack = false;
\r
1124 cr = CoderResult.OVERFLOW;
\r
1130 if (cr.isUnderflow() && flush && !source.hasRemaining()) {
\r
1131 /*end of the input stream */
\r
1132 if (data.contextCharToUnicode == ATR || data.contextCharToUnicode == EXT || data.contextCharToUnicode == ISCII_INV) {
\r
1133 /* set toUBytes[] */
\r
1134 toUBytesArray[0] = (byte)data.contextCharToUnicode;
\r
1137 /* avoid looping on truncated sequences */
\r
1138 data.contextCharToUnicode = NO_CHAR_MARKER;
\r
1143 if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
\r
1144 /* output a remaining target character */
\r
1145 WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
\r
1146 this.toUnicodeStatus = UConverterConstants.missingCharMarker;
\r
1152 private CoderResult WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta) {
\r
1153 CoderResult cr = CoderResult.UNDERFLOW;
\r
1154 /* add offset to current Indic Block */
\r
1155 if (targetUniChar > ASCII_END &&
\r
1156 targetUniChar != ZWJ &&
\r
1157 targetUniChar != ZWNJ &&
\r
1158 targetUniChar != DANDA &&
\r
1159 targetUniChar != DOUBLE_DANDA) {
\r
1160 targetUniChar += delta;
\r
1163 /* now write the targetUniChar */
\r
1164 if (target.hasRemaining()) {
\r
1165 target.put((char)targetUniChar);
\r
1166 if (offsets != null) {
\r
1167 offsets.put(offset);
\r
1170 charErrorBufferArray[charErrorBufferLength++] = (char)targetUniChar;
\r
1171 cr = CoderResult.OVERFLOW;
\r
1176 private int GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data) {
\r
1177 targetUniChar = toUnicodeTable[sourceChar];
\r
1178 /* is the code point valid in current script? */
\r
1179 if (sourceChar > ASCII_END &&
\r
1180 (validityTable[(short)targetUniChar & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) == 0) {
\r
1181 /* Vocallic RR is assigne in ISCII Telugu and Unicode */
\r
1182 if (data.currentDeltaToUnicode != (TELUGU_DELTA) || targetUniChar != VOCALLIC_RR) {
\r
1183 targetUniChar = UConverterConstants.missingCharMarker;
\r
1186 return targetUniChar;
\r
1192 * Explicit Halant :
\r
1193 * <HALANT> + <ZWNJ>
\r
1195 * <HALANT> + <ZWJ>
\r
1197 class CharsetEncoderISCII extends CharsetEncoderICU {
\r
1198 public CharsetEncoderISCII(CharsetICU cs) {
\r
1199 super(cs, fromUSubstitution);
\r
1203 protected void implReset() {
\r
1204 super.implReset();
\r
1205 extraInfo.initialize();
\r
1208 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
\r
1209 int targetByteUnit = 0x0000;
\r
1210 int sourceChar = 0x0000;
\r
1211 UConverterDataISCII converterData;
\r
1212 short newDelta = 0;
\r
1214 boolean deltaChanged = false;
\r
1215 int tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */
\r
1216 CoderResult cr = CoderResult.UNDERFLOW;
\r
1218 /* initialize data */
\r
1219 converterData = extraInfo;
\r
1220 newDelta = converterData.currentDeltaFromUnicode;
\r
1221 range = (short)(newDelta / UniLang.DELTA);
\r
1223 if ((sourceChar = fromUChar32) != 0) {
\r
1224 cr = handleSurrogates(source, (char) sourceChar);
\r
1225 return (cr != null) ? cr : CoderResult.unmappableForLength(2);
\r
1228 /* writing the char to the output stream */
\r
1229 while (source.hasRemaining()) {
\r
1230 if (!target.hasRemaining()) {
\r
1231 return CoderResult.OVERFLOW;
\r
1234 /* Write the language code following LF only if LF is not the last character. */
\r
1235 if (fromUnicodeStatus == LF) {
\r
1236 targetByteUnit = ATR << 8;
\r
1237 targetByteUnit += (byte)lookupInitialData[range].isciiLang;
\r
1238 fromUnicodeStatus = 0x0000;
\r
1239 /* now append ATR and language code */
\r
1240 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
\r
1241 if (cr.isOverflow()) {
\r
1246 sourceChar = source.get();
\r
1247 tempContextFromUnicode = converterData.contextCharFromUnicode;
\r
1249 targetByteUnit = UConverterConstants.missingCharMarker;
\r
1251 /* check if input is in ASCII and C0 control codes range */
\r
1252 if (sourceChar <= ASCII_END) {
\r
1253 fromUnicodeStatus = sourceChar;
\r
1254 cr = WriteToTargetFromU(offsets, source, target, sourceChar);
\r
1255 if (cr.isOverflow()) {
\r
1261 switch (sourceChar) {
\r
1263 /* contextChar has HALANT */
\r
1264 if (converterData.contextCharFromUnicode != 0) {
\r
1265 converterData.contextCharFromUnicode = 0x00;
\r
1266 targetByteUnit = ISCII_HALANT;
\r
1268 /* consume ZWNJ and continue */
\r
1269 converterData.contextCharFromUnicode = 0x00;
\r
1274 /* contextChar has HALANT */
\r
1275 if (converterData.contextCharFromUnicode != 0) {
\r
1276 targetByteUnit = ISCII_NUKTA;
\r
1278 targetByteUnit = ISCII_INV;
\r
1280 converterData.contextCharFromUnicode = 0x00;
\r
1283 /* is the sourceChar in the INDIC_RANGE? */
\r
1284 if((char)(INDIC_BLOCK_END - sourceChar) <= INDIC_RANGE) {
\r
1285 /* Danda and Doube Danda are valid in Northern scripts.. since Unicode
\r
1286 * does not include these codepoints in all Northern scripts we need to
\r
1289 if (sourceChar != DANDA && sourceChar != DOUBLE_DANDA) {
\r
1290 /* find out to which block the sourceChar belongs */
\r
1291 range = (short)((sourceChar - INDIC_BLOCK_BEGIN) / UniLang.DELTA);
\r
1292 newDelta = (short)(range * UniLang.DELTA);
\r
1294 /* Now are we in the same block as previous? */
\r
1295 if (newDelta != converterData.currentDeltaFromUnicode || converterData.isFirstBuffer) {
\r
1296 converterData.currentDeltaFromUnicode = newDelta;
\r
1297 converterData.currentMaskFromUnicode = lookupInitialData[range].maskEnum;
\r
1298 deltaChanged = true;
\r
1299 converterData.isFirstBuffer = false;
\r
1301 if (converterData.currentDeltaFromUnicode == PNJ_DELTA) {
\r
1302 if (sourceChar == PNJ_TIPPI) {
\r
1303 /* Make sure Tippi is converterd to Bindi. */
\r
1304 sourceChar = PNJ_BINDI;
\r
1305 } else if (sourceChar == PNJ_ADHAK) {
\r
1306 /* This is for consonant cluster handling. */
\r
1307 converterData.contextCharFromUnicode = PNJ_ADHAK;
\r
1310 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
\r
1311 /* now subtract the new delta from sourceChar */
\r
1312 sourceChar -= converterData.currentDeltaFromUnicode;
\r
1314 /* get the target byte unit */
\r
1315 targetByteUnit = fromUnicodeTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK];
\r
1317 /* is the code point valid in current script? */
\r
1318 if ((validityTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK] & converterData.currentMaskFromUnicode) == 0) {
\r
1319 /* Vocallic RR is assigned in ISCII Telugu and Unicode */
\r
1320 if (converterData.currentDeltaFromUnicode != (TELUGU_DELTA) || sourceChar != VOCALLIC_RR) {
\r
1321 targetByteUnit = UConverterConstants.missingCharMarker;
\r
1325 if (deltaChanged) {
\r
1326 /* we are in a script block which is different than
\r
1327 * previous sourceChar's script block write ATR and language codes
\r
1330 temp = (char)(ATR << 8);
\r
1331 temp += (char)(lookupInitialData[range].isciiLang & UConverterConstants.UNSIGNED_BYTE_MASK);
\r
1333 deltaChanged = false;
\r
1334 /* now append ATR and language code */
\r
1335 cr = WriteToTargetFromU(offsets, source, target, temp);
\r
1336 if (cr.isOverflow()) {
\r
1340 if (converterData.currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
\r
1344 /* reset context char */
\r
1345 converterData.contextCharFromUnicode = 0x00;
\r
1348 if (converterData.currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && PNJ_CONSONANT_SET.contains(sourceChar + PNJ_DELTA)) {
\r
1349 /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
\r
1350 /* reset context char */
\r
1351 converterData.contextCharFromUnicode = 0x0000;
\r
1352 targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
\r
1353 /*write targetByteUnit to target */
\r
1354 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
\r
1355 if (cr.isOverflow()) {
\r
1358 } else if (targetByteUnit != UConverterConstants.missingCharMarker) {
\r
1359 if (targetByteUnit == ISCII_HALANT) {
\r
1360 converterData.contextCharFromUnicode = (char)targetByteUnit;
\r
1362 /*write targetByteUnit to target */
\r
1363 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
\r
1364 if (cr.isOverflow()) {
\r
1367 } else if (UTF16.isSurrogate((char)sourceChar)) {
\r
1368 cr = handleSurrogates(source, (char) sourceChar);
\r
1369 return (cr != null) ? cr : CoderResult.unmappableForLength(2);
\r
1371 return CoderResult.unmappableForLength(1);
\r
1373 } /* end of while */
\r
1375 /* save the state and return */
\r
1379 private CoderResult WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit) {
\r
1380 CoderResult cr = CoderResult.UNDERFLOW;
\r
1381 int offset = source.position() - 1;
\r
1382 /* write the targetUniChar to target */
\r
1383 if (target.hasRemaining()) {
\r
1384 if (targetByteUnit <= 0xFF) {
\r
1385 target.put((byte)targetByteUnit);
\r
1386 if (offsets != null) {
\r
1387 offsets.put(offset);
\r
1390 if (targetByteUnit > 0xFFFF) {
\r
1391 target.put((byte)(targetByteUnit >> 16));
\r
1392 if (offsets != null) {
\r
1394 offsets.put(offset);
\r
1397 if (!target.hasRemaining()) {
\r
1398 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8);
\r
1399 errorBuffer[errorBufferLength++] = (byte)targetByteUnit;
\r
1400 cr = CoderResult.OVERFLOW;
\r
1403 target.put((byte)(targetByteUnit >> 8));
\r
1404 if (offsets != null) {
\r
1405 offsets.put(offset);
\r
1407 if (target.hasRemaining()) {
\r
1408 target.put((byte)targetByteUnit);
\r
1409 if (offsets != null) {
\r
1410 offsets.put(offset);
\r
1413 errorBuffer[errorBufferLength++] = (byte)targetByteUnit;
\r
1414 cr = CoderResult.OVERFLOW;
\r
1418 if ((targetByteUnit > 0xFFFF)) {
\r
1419 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 16);
\r
1420 } else if ((targetByteUnit & 0xFF00) > 0) {
\r
1421 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8);
\r
1423 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit);
\r
1424 cr = CoderResult.OVERFLOW;
\r
1430 public CharsetDecoder newDecoder() {
\r
1431 return new CharsetDecoderISCII(this);
\r
1434 public CharsetEncoder newEncoder() {
\r
1435 return new CharsetEncoderISCII(this);
\r
1438 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
\r
1442 setFillIn.add(0,ASCII_END );
\r
1443 for(script = UniLang.DEVALANGARI ; script<= UniLang.MALAYALAM ;script++){
\r
1444 mask = (char)lookupInitialData[script].maskEnum;
\r
1445 for(idx=0; idx < UniLang.DELTA ; idx++){
\r
1446 // Special check for telugu character
\r
1447 if((validityTable[idx] & mask)!=0 || (script == UniLang.TELUGU && idx==0x31)){
\r
1448 setFillIn.add(idx+(script*UniLang.DELTA)+INDIC_BLOCK_BEGIN );
\r
1452 setFillIn.add(DANDA);
\r
1453 setFillIn.add(DOUBLE_DANDA);
\r
1454 setFillIn.add(ZWNJ);
\r
1455 setFillIn.add(ZWJ);
\r