]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/tools/misc/src/com/ibm/icu/dev/tool/translit/indicExceptions.txt
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / tools / misc / src / com / ibm / icu / dev / tool / translit / indicExceptions.txt
1 /**
2  *******************************************************************************
3  * Copyright (C) 2002-2004, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7
8 /* Reserved in all 9 scripts */
9
10 /*
11    These codepoints are marked RESERVED in all 9 indic scripts.
12    There should not be any transliteration work on these codepoints, 
13    if they are ever encountered.  Ignore these codepoints.
14
15    Common reserved codepoints in ALL 9 scripts : (offset)
16    00, 04, 
17    3A, 3B, 
18    4E, 4F, 
19    7B, 7C, 7D, 7E, 7F
20 */
21
22 /* Script-specific */
23 /*
24    These codepoints are specific to their respective script.
25    Transliteration of these codepoints from one script to another is meaningless.
26
27    For example,
28       \u0B70 (Oriya) and \u0BF0 (Tamil) are non-reserved codepoints within each script.
29       On transliterating \u0B70 from Oriya to Tamil, we will get \u0BF0.
30       But \u0B70 in Oriya represents ISSHAR, whereas \u0BF0 in Tamil represents NUMBER TEN.
31
32    \u0970 Devanagari abbreviation sign
33
34    \u09F0 Bengali letter RA with middle diagonal (Assamese)
35    \u09F1 Bengali letter RA with lower diagonal 
36           (=Bengali letter VA with lower diagonal, Assamese)
37    \u09F2 Bengali rupee mark
38    \u09F3 Bengali rupee sign
39    \u09F4 Bengali currency numerator one
40    \u09F5 Bengali currency numerator two
41    \u09F6 Bengali currency numerator three
42    \u09F7 Bengali currency numerator four
43    \u09F8 Bengali currency numerator one less than the denominator
44    \u09F9 Bengali currency denominator sixteen
45    \u09FA Bengali isshar
46
47    \u0A70 Gurmukhi tippi (nasalization)
48    \u0A71 Gurmukhi addak (doubles following consonant)
49    \u0A72 Gurmukhi iri (base for vowels)
50    \u0A73 Gurmukhi ura (base for vowels)
51    \u0A74 Gurmukhi ek onkar (God is One)
52
53    \u0B70 Oriya isshar
54
55    \u0BF0 Tamil number ten
56    \u0BF1 Tamil number one hundred
57    \u0BF2 Tamil number one thousand
58 */
59
60
61
62 /*****************************************************************************/
63 /*  NOTE : <unknown> code will map back to itself                            */
64 /*         till we find a good match                                         */
65 /*****************************************************************************/
66
67
68
69 /*****************************************************************************/
70 /* Devanagari                                                                */
71 /*****************************************************************************/
72
73 "\u0955>\u0955;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
74 "\u0956>\u0948;"  // AI Length Mark -> Devanagari Vowel Sign AI
75 "\u0957>\u094C;"  // AU Length Mark -> Devanagari Vowel Sign AU
76
77
78
79 /*****************************************************************************/
80 /* Bengali                                                                   */
81 /*****************************************************************************/
82
83 "\u098D>\u098D;"  // <unknown> independent vowel Candra E
84 "\u098E>\u098F;"  // Letter Short E -> Letter E
85 "\u0991>\u0993;"  // Letter Candra O -> Letter O
86 "\u0992>\u0993;"  // Letter Short O -> Letter O
87 "\u09A9>\u09A8;"  // Letter NNNA -> Letter NA
88 "\u09B1>\u09B0;"  // Letter RRA -> Letter RA
89 "\u09B3>\u09B2;"  // Letter LLA -> Letter LA
90 "\u09B4>\u09B2;"  // Letter LLLA -> Letter LA
91 "\u09B5>\u09AC;"  // Letter VA -> Letter BA
92 "\u09BD>\u09BD;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
93 "\u09C5>\u09C7;"  // Vowel Candra E -> Vowel E
94 "\u09C6>\u09C7;"  // Vowel Short E -> Vowel E
95 "\u09C9>\u09CB;"  // Vowel Candra O -> Vowel O
96 "\u09CA>\u09CB;"  // Vowel Short O -> Vowel O
97 "\u09D0>\u09D0;"  // <unknown> OM - Devanagari (\u0950), Gujarati (\u0AD0)
98 "\u09D1>\u09D1;"  // <unknown> Stress - Devanagari (\u0951)
99 "\u09D2>\u09D2;"  // <unknown> Stress - Devanagari (\u0952)
100 "\u09D3>\u09D3;"  // <unknown> Accent - Devanagari (\u0953)
101 "\u09D4>\u09D4;"  // <unknown> Accent - Devanagari (\u0954)
102 "\u09D5>\u09D5;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
103 "\u09D6>\u09C8;"  // AI Length Mark -> Bengali Vowel Sign AI
104 "\u09D8>\u0995;"  // Letter QA -> Letter KA
105 "\u09D9>\u0996;"  // Letter KHHA -> Letter KHA
106 "\u09DA>\u0997;"  // Letter GHHA -> Letter GA
107 "\u09DB>\u099C;"  // Letter ZA -> Letter JA
108 "\u09DE>\u09AB;"  // Letter FA -> Letter PHA
109 "\u09E4>\u09E4;"  // <unknown> Danda - Devanagari (\u0964)
110 "\u09E5>\u09E5;"  // <unknown> Double Danda - Devanagari (\u0965)
111
112
113
114 /*****************************************************************************/
115 /* Gurmukhi                                                                  */
116 /*****************************************************************************/
117
118 "\u0A01>\u0A02;"
119 "\u0A03>\u0A03;"  // <unknown> Sign Visarga - Devanagari (\u0903) & the rest
120 "\u0A0B>\u0A30\u0A3F;"
121 "\u0A0C>\u0A07;"
122 "\u0A0D>\u0A10;"
123 "\u0A0E>\u0A0F;"
124 "\u0A11>\u0A14;"
125 "\u0A12>\u0A13;"
126 "\u0A29>\u0A28;"
127 "\u0A31>\u0A30;"
128 "\u0A34>\u0A33;"
129 "\u0A37>\u0A36;"
130 "\u0A3D>\u0A3D;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
131 "\u0A43>\u0A43;"  // <unknown> Vocalic R - Devanagari (\u0943)
132 "\u0A44>\u0A44;"  // <unknown> Vocalic RR - Devanagari (\u0944)
133 "\u0A45>\u0A48;"
134 "\u0A46>\u0A47;"
135 "\u0A49>\u0A4C;"
136 "\u0A4A>\u0A4B;"
137 "\u0A50>\u0A50;"  // <unknown> OM - Devanagari (\u0950), Gujarati (\u0AD0)
138 "\u0A51>\u0A51;"  // <unknown> Stress - Devanagari (\u0951)
139 "\u0A52>\u0A52;"  // <unknown> Stress - Devanagari (\u0952)
140 "\u0A53>\u0A53;"  // <unknown> Accent - Devanagari (\u0953)
141 "\u0A54>\u0A54;"  // <unknown> Accent - Devanagari (\u0954)
142 "\u0A55>\u0A55;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
143 "\u0A56>\u0A48;"  // AI Length Mark -> Gurmukhi Vowel Sign AI
144 "\u0A57>\u0A4C;"  // AU Length Mark -> Gurmukhi Vowel Sign AU
145 "\u0A58>\u0A15\u0A3C;"
146 "\u0A5D>\u0A22\u0A3C;"  // Letter RHA -> Gurmukhi letter ddha (\u0A22) + nukta (\u0A3C)
147 "\u0A5F>\u0A2F;"
148 "\u0A60>\u0A30\u0A3F;"
149 "\u0A61>\u0A08\u0A3C;"
150 "\u0A62>\u0A3F\u0A3C;"
151 "\u0A63>\u0A40\u0A3C;"
152 "\u0A64>\u0A64;"  // <unknown> Danda - Devanagari (\u0964)
153 "\u0A65>\u0A65;"  // <unknown> Double Danda - Devanagari (\u0965)
154
155
156
157 /*****************************************************************************/
158 /* Gujarati                                                                  */
159 /*****************************************************************************/
160 "\u0A8C>\u0AB2\u0AC3;"
161 "\u0A8E>\u0A8D;"
162 "\u0A92>\u0A91;"
163 "\u0AA9>\u0AA8;"
164 "\u0AB1>\u0AB0;"
165 "\u0AB4>\u0AB3;"
166 "\u0AC6>\u0AC5;"
167 "\u0ACA>\u0AC9;"
168 "\u0AD1>\u0AD1;"  // <unknown> Stress - Devanagari (\u0951)
169 "\u0AD2>\u0AD2;"  // <unknown> Stress - Devanagari (\u0952)
170 "\u0AD3>\u0AD3;"  // <unknown> Accent - Devanagari (\u0953)
171 "\u0AD4>\u0AD4;"  // <unknown> Accent - Devanagari (\u0954)
172 "\u0AD5>\u0AD5;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
173 "\u0AD6>\u0AC8;"  // AI Length Mark -> Gujarati Vowel Sign AI
174 "\u0AD7>\u0ACC;"  // AU Length Mark -> Gujarati Vowel Sign AU
175 "\u0AD8>\u0A95\u0ABC;"
176 "\u0AD9>\u0A96\u0ABC;"
177 "\u0ADA>\u0A97\u0ABC;"
178 "\u0ADB>\u0A9C\u0ABC;"
179 "\u0ADC>\u0AA1\u0ABC;"
180 "\u0ADD>\u0AA2\u0ABC;"
181 "\u0ADE>\u0AAB\u0ABC;"
182 "\u0ADF>\u0AAF\u0ABC;"
183 "\u0AE1>\u0AB2\u0AC3;"
184 "\u0AE2>\u0ABF\u0ABC;"
185 "\u0AE3>\u0AC0\u0ABC;"
186 "\u0AE4>\u0AE4;"  // <unknown> Danda - Devanagari (\u0964)
187 "\u0AE5>\u0AE5;"  // <unknown> Double Danda - Devanagari (\u0965)
188
189
190
191 /*****************************************************************************/
192 /* Oriya                                                                     */
193 /*****************************************************************************/
194 "\u0B0D>\u0B0F;"
195 "\u0B0E>\u0B0F;"
196 "\u0B11>\u0B13;"
197 "\u0B12>\u0B13;"
198 "\u0B29>\u0B28;"
199 "\u0B31>\u0B30;"
200 "\u0B34>\u0B33;"
201 "\u0B35>\u0B2C;"  // Letter VA -> Oriya Letter BA
202 "\u0B44>\u0B43\u0B3C;"
203 "\u0B45>\u0B47;"
204 "\u0B46>\u0B47;"
205 "\u0B49>\u0B4B;"
206 "\u0B4A>\u0B4B;"
207 "\u0B50>\u0B13\u0B01;"
208 "\u0B51>\u0B51;"  // <unknown> Stress - Devanagari (\u0951)
209 "\u0B52>\u0B52;"  // <unknown> Stress - Devanagari (\u0952)
210 "\u0B53>\u0B53;"  // <unknown> Accent - Devanagari (\u0953)
211 "\u0B54>\u0B54;"  // <unknown> Accent - Devanagari (\u0954)
212 "\u0B55>\u0B55;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
213 "\u0B58>\u0B15\u0B3C;"
214 "\u0B59>\u0B16\u0B3C;"
215 "\u0B5A>\u0B17\u0B3C;"
216 "\u0B5B>\u0B1C\u0B3C;"
217 "\u0B5E>\u0B2B\u0B3C;"
218 "\u0B62>\u0B56\u0B3C;"
219 "\u0B63>\u0B57\u0B3C;"
220 "\u0B64>\u0B64;"  // <unknown> Danda - Devanagari (\u0964)
221 "\u0B65>\u0B65;"  // <unknown> Double Danda - Devanagari (\u0965)
222
223
224
225 /*****************************************************************************/
226 /* Tamil                                                                     */
227 /*****************************************************************************/
228
229 "\u0B81>\u0B81;"  // <unknown> Candrabindu - Devanagari (\u0901) ,etc
230 "\u0B8B>\u0BB0\u0BBF;"
231 "\u0B8C>\u0B87;"
232 "\u0B8D>\u0B86;"
233 "\u0B91>\u0B86;"
234 "\u0B96>\u0B95;"
235 "\u0B97>\u0B95;"
236 "\u0B98>\u0B95;"
237 "\u0B9B>\u0B9A;"
238 "\u0B9D>\u0B9A;"
239 "\u0BA0>\u0B9F;"
240 "\u0BA1>\u0B9F;"
241 "\u0BA2>\u0B9F;"
242 "\u0BA5>\u0BA4;"
243 "\u0BA6>\u0BA4;"
244 "\u0BA7>\u0BA4;"
245 "\u0BAB>\u0BAA;"
246 "\u0BAC>\u0BAA;"
247 "\u0BAD>\u0BAA;"
248 "\u0BB6>\u0BB7;"
249 "\u0BBC>\u0BBC;"  // <unknown> Nukta
250 "\u0BBD>\u0BBD;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
251 "\u0BC3>\u0BCD\u0BB0\u0BBF;"
252 "\u0BC4>\u0BCD\u0BB0\u0BBF;"
253 "\u0BC5>\u0BBE;"
254 "\u0BC9>\u0BBE;"
255 "\u0BD0>\u0B93\u0BAE\u0BCD;"
256 "\u0BD1>\u0BD1;"  // <unknown> Stress - Devanagari (\u0951)
257 "\u0BD2>\u0BD2;"  // <unknown> Stress - Devanagari (\u0952)
258 "\u0BD3>\u0BD3;"  // <unknown> Accent - Devanagari (\u0953)
259 "\u0BD4>\u0BD4;"  // <unknown> Accent - Devanagari (\u0954)
260 "\u0BD5>\u0BD5;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
261 "\u0BD6>\u0BC8;"  // AI Length Mark -> Tamil Vowel Sign AI
262 "\u0BD8>\u0B95;"
263 "\u0BD9>\u0B95;"
264 "\u0BDA>\u0B95;"
265 "\u0BDB>\u0B9C;"
266 "\u0BDC>\u0B9F;"
267 "\u0BDD>\u0B9F;"
268 "\u0BDE>\u0BAA;"
269 "\u0BDF>\u0BAF;"
270 "\u0BE0>\u0BB0\u0BBF;"
271 "\u0BE1>\u0B88;"
272 "\u0BE2>\u0BE2;"  // <unknown> Vocalic L - Devanagari (\u0962)
273 "\u0BE3>\u0BE3;"  // <unknown> Vocalic LL - Devanagari (\u0963)
274 "\u0BE4>\u0BE4;"  // <unknown> Danda - Devanagari (\u0964)
275 "\u0BE5>\u0BE5;"  // <unknown> Double Danda - Devanagari (\u0965)
276 "\u0BE6>\u0030;"  // ZERO - Digit ZERO
277
278
279
280 /*****************************************************************************/
281 /* Telugu                                                                    */
282 /*****************************************************************************/
283
284 "\u0C0D>\u0C0E;"
285 "\u0C11>\u0C12;"
286 "\u0C29>\u0C28;"
287 "\u0C34>\u0C33;"
288 "\u0C3C>\u0C3C;"  // <unknown> Nukta
289 "\u0C3D>\u0C3D;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
290 "\u0C45>\u0C46;"
291 "\u0C49>\u0C4A;"
292 "\u0C50>\u0C13\u0C02;"
293 "\u0C51>\u0C51;"  // <unknown> Stress - Devanagari (\u0951)
294 "\u0C52>\u0C52;"  // <unknown> Stress - Devanagari (\u0952)
295 "\u0C53>\u0C53;"  // <unknown> Accent - Devanagari (\u0953)
296 "\u0C54>\u0C54;"  // <unknown> Accent - Devanagari (\u0954)
297 "\u0C57>\u0C4C;"
298 "\u0C58>\u0C15;"
299 "\u0C59>\u0C16;"
300 "\u0C5A>\u0C17;"
301 "\u0C5B>\u0C1C;"
302 "\u0C5C>\u0C21;"
303 "\u0C5D>\u0C22;"
304 "\u0C5E>\u0C2B;"
305 "\u0C5F>\u0C2F;"
306 "\u0C62>\u0C3F;"
307 "\u0C63>\u0C40;"
308 "\u0C64>\u0C64;"  // <unknown> Danda - Devanagari (\u0964)
309 "\u0C65>\u0C65;"  // <unknown> Double Danda - Devanagari (\u0965)
310
311
312
313 /*****************************************************************************/
314 /* Kannada                                                                   */
315 /*****************************************************************************/
316
317 "\u0C81>\u0C82;"
318 "\u0C8D>\u0C8E;"
319 "\u0C91>\u0C92;"
320 "\u0CA9>\u0CA8;"
321 "\u0CB4>\u0CB3;"
322 "\u0CBC>\u0CBC;"  // <unknown> Nukta
323 "\u0CBD>\u0CBD;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
324 "\u0CC5>\u0CC6;"
325 "\u0CC9>\u0CCA;"
326 "\u0CD0>\u0C93\u0C82;"
327 "\u0CD1>\u0CD1;"  // <unknown> Stress - Devanagari (\u0951)
328 "\u0CD2>\u0CD2;"  // <unknown> Stress - Devanagari (\u0952)
329 "\u0CD3>\u0CD3;"  // <unknown> Accent - Devanagari (\u0953)
330 "\u0CD4>\u0CD4;"  // <unknown> Accent - Devanagari (\u0954)
331 "\u0CD7>\u0CCC;"
332 "\u0CD8>\u0C95;"
333 "\u0CD9>\u0C96;"
334 "\u0CDA>\u0C97;"
335 "\u0CDB>\u0C9C;"
336 "\u0CDC>\u0CA1;"
337 "\u0CDD>\u0CA2;"
338 "\u0CDF>\u0CAF;"
339 "\u0CE2>\u0CBF;"
340 "\u0CE3>\u0CC0;"
341 "\u0CE4>\u0CE4;"  // <unknown> Danda - Devanagari (\u0964)
342 "\u0CE5>\u0CE5;"  // <unknown> Double Danda - Devanagari (\u0965)
343
344
345
346 /*****************************************************************************/
347 /* Malayalam                                                                 */
348 /*****************************************************************************/
349
350 "\u0D01>\u0D02;"
351 "\u0D0D>\u0D06;"
352 "\u0D11>\u0D13;"
353 "\u0D29>\u0D28;"
354 "\u0D3C>\u0D3C;"  // <unknown> Nukta
355 "\u0D3D>\u0D3D;"  // <unknown> Sign Avagraha - Devanagari (\u093D)
356 "\u0D44>\u0D44;"  // <unknown> Vocalic RR - Devanagari (\u0944)
357 "\u0D45>\u0D3E;"
358 "\u0D49>\u0D4B;"
359 "\u0D50>\u0D50;"  // <unknown> OM - Devanagari (\u0950), Gujarati (\u0AD0)
360 "\u0D51>\u0D51;"  // <unknown> Stress - Devanagari (\u0951)
361 "\u0D52>\u0D52;"  // <unknown> Stress - Devanagari (\u0952)
362 "\u0D53>\u0D53;"  // <unknown> Accent - Devanagari (\u0953)
363 "\u0D54>\u0D54;"  // <unknown> Accent - Devanagari (\u0954)
364 "\u0D55>\u0D55;"  // <unknown> Length Mark - Kannada (\u0CD5), Telugu (\u0C55)
365 "\u0D56>\u0D48;"  // AI Length Mark -> Malayalam Vowel Sign AI
366 "\u0D58>\u0D15;"
367 "\u0D59>\u0D16;"
368 "\u0D5A>\u0D17;"
369 "\u0D5B>\u0D1C;"
370 "\u0D5C>\u0D21;"
371 "\u0D5D>\u0D22;"
372 "\u0D5E>\u0D2B;"
373 "\u0D5F>\u0D2F;"
374 "\u0D62>\u0D62;"  // <unknown> Vocalic L - Devanagari (\u0962)
375 "\u0D63>\u0D63;"  // <unknown> Vocalic LL - Devanagari (\u0963)
376 "\u0D64>\u0D64;"  // <unknown> Danda - Devanagari (\u0964)
377 "\u0D65>\u0D65;"  // <unknown> Double Danda - Devanagari (\u0965)