2 * Copyright (C) 1996-2011, International Business Machines Corporation and
3 * others. All Rights Reserved.
5 package com.ibm.icu.text;
6 import com.ibm.icu.impl.PatternProps;
7 import com.ibm.icu.impl.UCharacterName;
8 import com.ibm.icu.impl.Utility;
9 import com.ibm.icu.lang.UCharacter;
12 * A transliterator that performs name to character mapping.
15 class NameUnicodeTransliterator extends Transliterator {
17 static final String _ID = "Name-Any";
19 static final String OPEN_PAT = "\\N~{~";
20 static final char OPEN_DELIM = '\\'; // first char of OPEN_PAT
21 static final char CLOSE_DELIM = '}';
22 static final char SPACE = ' ';
26 * System registration hook.
28 static void register() {
29 Transliterator.registerFactory(_ID, new Transliterator.Factory() {
30 public Transliterator getInstance(String ID) {
31 return new NameUnicodeTransliterator(null);
37 * Constructs a transliterator.
39 public NameUnicodeTransliterator(UnicodeFilter filter) {
44 * Implements {@link Transliterator#handleTransliterate}.
46 protected void handleTransliterate(Replaceable text,
47 Position offsets, boolean isIncremental) {
49 int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space
51 StringBuffer name = new StringBuffer(maxLen);
53 // Get the legal character set
54 UnicodeSet legal = new UnicodeSet();
55 UCharacterName.INSTANCE.getCharNameCharacters(legal);
57 int cursor = offsets.start;
58 int limit = offsets.limit;
61 // 0 - looking for open delimiter
62 // 1 - after open delimiter
64 int openPos = -1; // open delim candidate pos
67 while (cursor < limit) {
68 c = text.char32At(cursor);
71 case 0: // looking for open delimiter
72 if (c == OPEN_DELIM) { // quick check first
74 int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit);
75 if (i >= 0 && i < limit) {
79 continue; // *** reprocess char32At(cursor)
84 case 1: // after open delimiter
85 // Look for legal chars. If \s+ is found, convert it
86 // to a single space. If closeDelimiter is found, exit
87 // the loop. If any other character is found, exit the
88 // loop. If the limit is reached, exit the loop.
90 // Convert \s+ => SPACE. This assumes there are no
91 // runs of >1 space characters in names.
92 if (PatternProps.isWhiteSpace(c)) {
93 // Ignore leading whitespace
94 if (name.length() > 0 &&
95 name.charAt(name.length()-1) != SPACE) {
97 // If we are too long then abort. maxLen includes
98 // temporary trailing space, so use '>'.
99 if (name.length() > maxLen) {
106 if (c == CLOSE_DELIM) {
108 int len = name.length();
110 // Delete trailing space, if any
112 name.charAt(len-1) == SPACE) {
113 name.setLength(--len);
116 c = UCharacter.getCharFromExtendedName(name.toString());
120 // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
121 cursor++; // advance over CLOSE_DELIM
123 String str = UTF16.valueOf(c);
124 text.replace(openPos, cursor, str);
126 // Adjust indices for the change in the length of
127 // the string. Do not assume that str.length() ==
128 // 1, in case of surrogates.
129 int delta = cursor - openPos - str.length();
132 // assert(cursor == openPos + str.length());
134 // If the lookup failed, we leave things as-is and
135 // still switch to mode 0 and continue.
137 openPos = -1; // close off candidate
138 continue; // *** reprocess char32At(cursor)
141 if (legal.contains(c)) {
142 UTF16.append(name, c);
143 // If we go past the longest possible name then abort.
144 // maxLen includes temporary trailing space, so use '>='.
145 if (name.length() >= maxLen) {
152 --cursor; // Backup and reprocess this character
159 cursor += UTF16.getCharCount(c);
162 offsets.contextLimit += limit - offsets.limit;
163 offsets.limit = limit;
164 // In incremental mode, only advance the cursor up to the last
165 // open delimiter candidate.
166 offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
170 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
173 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
174 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
175 if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) {
176 return; // we have to contain both prefix and suffix
178 UnicodeSet items = new UnicodeSet()
181 .addAll('a', 'z') // for controls
182 .add('<').add('>') // for controls
183 .add('(').add(')') // for controls
186 .addAll(UnicodeNameTransliterator.OPEN_DELIM)
188 items.retainAll(myFilter);
189 if (items.size() > 0) {
190 sourceSet.addAll(items);
191 // could produce any character
192 targetSet.addAll(0, 0x10FFFF);