2 * Copyright (C) 1996-2010, International Business Machines Corporation and
\r
3 * others. All Rights Reserved.
\r
5 package com.ibm.icu.text;
\r
6 import com.ibm.icu.impl.UCharacterName;
\r
7 import com.ibm.icu.impl.UCharacterProperty;
\r
8 import com.ibm.icu.impl.Utility;
\r
9 import com.ibm.icu.lang.UCharacter;
\r
12 * A transliterator that performs name to character mapping.
\r
15 class NameUnicodeTransliterator extends Transliterator {
\r
18 char closeDelimiter;
\r
20 static final String _ID = "Name-Any";
\r
22 static final String OPEN_PAT = "\\N~{~";
\r
23 static final char OPEN_DELIM = '\\'; // first char of OPEN_PAT
\r
24 static final char CLOSE_DELIM = '}';
\r
25 static final char SPACE = ' ';
\r
29 * System registration hook.
\r
31 static void register() {
\r
32 Transliterator.registerFactory(_ID, new Transliterator.Factory() {
\r
33 public Transliterator getInstance(String ID) {
\r
34 return new NameUnicodeTransliterator(null);
\r
40 * Constructs a transliterator.
\r
42 public NameUnicodeTransliterator(UnicodeFilter filter) {
\r
47 * Implements {@link Transliterator#handleTransliterate}.
\r
49 protected void handleTransliterate(Replaceable text,
\r
50 Position offsets, boolean isIncremental) {
\r
52 int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space
\r
54 StringBuffer name = new StringBuffer(maxLen);
\r
56 // Get the legal character set
\r
57 UnicodeSet legal = new UnicodeSet();
\r
58 UCharacterName.INSTANCE.getCharNameCharacters(legal);
\r
60 int cursor = offsets.start;
\r
61 int limit = offsets.limit;
\r
64 // 0 - looking for open delimiter
\r
65 // 1 - after open delimiter
\r
67 int openPos = -1; // open delim candidate pos
\r
70 while (cursor < limit) {
\r
71 c = text.char32At(cursor);
\r
74 case 0: // looking for open delimiter
\r
75 if (c == OPEN_DELIM) { // quick check first
\r
77 int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit);
\r
78 if (i >= 0 && i < limit) {
\r
82 continue; // *** reprocess char32At(cursor)
\r
87 case 1: // after open delimiter
\r
88 // Look for legal chars. If \s+ is found, convert it
\r
89 // to a single space. If closeDelimiter is found, exit
\r
90 // the loop. If any other character is found, exit the
\r
91 // loop. If the limit is reached, exit the loop.
\r
93 // Convert \s+ => SPACE. This assumes there are no
\r
94 // runs of >1 space characters in names.
\r
95 if (UCharacterProperty.isRuleWhiteSpace(c)) {
\r
96 // Ignore leading whitespace
\r
97 if (name.length() > 0 &&
\r
98 name.charAt(name.length()-1) != SPACE) {
\r
100 // If we are too long then abort. maxLen includes
\r
101 // temporary trailing space, so use '>'.
\r
102 if (name.length() > maxLen) {
\r
109 if (c == CLOSE_DELIM) {
\r
111 int len = name.length();
\r
113 // Delete trailing space, if any
\r
115 name.charAt(len-1) == SPACE) {
\r
116 name.setLength(--len);
\r
119 c = UCharacter.getCharFromExtendedName(name.toString());
\r
121 // Lookup succeeded
\r
123 // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
\r
124 cursor++; // advance over CLOSE_DELIM
\r
126 String str = UTF16.valueOf(c);
\r
127 text.replace(openPos, cursor, str);
\r
129 // Adjust indices for the change in the length of
\r
130 // the string. Do not assume that str.length() ==
\r
131 // 1, in case of surrogates.
\r
132 int delta = cursor - openPos - str.length();
\r
135 // assert(cursor == openPos + str.length());
\r
137 // If the lookup failed, we leave things as-is and
\r
138 // still switch to mode 0 and continue.
\r
140 openPos = -1; // close off candidate
\r
141 continue; // *** reprocess char32At(cursor)
\r
144 if (legal.contains(c)) {
\r
145 UTF16.append(name, c);
\r
146 // If we go past the longest possible name then abort.
\r
147 // maxLen includes temporary trailing space, so use '>='.
\r
148 if (name.length() >= maxLen) {
\r
153 // Invalid character
\r
155 --cursor; // Backup and reprocess this character
\r
162 cursor += UTF16.getCharCount(c);
\r
165 offsets.contextLimit += limit - offsets.limit;
\r
166 offsets.limit = limit;
\r
167 // In incremental mode, only advance the cursor up to the last
\r
168 // open delimiter candidate.
\r
169 offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
\r