2 * Copyright (C) 1996-2007, International Business Machines Corporation and
\r
3 * others. All Rights Reserved.
\r
6 package com.ibm.icu.text;
\r
8 import java.io.IOException;
\r
10 import com.ibm.icu.impl.UCaseProps;
\r
12 import com.ibm.icu.util.ULocale;
\r
14 import com.ibm.icu.text.ReplaceableContextIterator;
\r
17 * A transliterator that converts all letters (as defined by
\r
18 * <code>UCharacter.isLetter()</code>) to lower case, except for those
\r
19 * letters preceded by non-letters. The latter are converted to title
\r
20 * case using <code>UCharacter.toTitleCase()</code>.
\r
23 class TitlecaseTransliterator extends Transliterator {
\r
25 static final String _ID = "Any-Title";
\r
28 * System registration hook.
\r
30 static void register() {
\r
31 Transliterator.registerFactory(_ID, new Transliterator.Factory() {
\r
32 public Transliterator getInstance(String ID) {
\r
33 return new TitlecaseTransliterator(ULocale.US);
\r
37 registerSpecialInverse("Title", "Lower", false);
\r
40 private ULocale locale;
\r
42 private UCaseProps csp;
\r
43 private ReplaceableContextIterator iter;
\r
44 private StringBuffer result;
\r
45 private int[] locCache;
\r
48 * Constructs a transliterator.
\r
50 public TitlecaseTransliterator(ULocale loc) {
\r
53 // Need to look back 2 characters in the case of "can't"
\r
54 setMaximumContextLength(2);
\r
56 csp=UCaseProps.getSingleton();
\r
57 } catch (IOException e) {
\r
60 iter=new ReplaceableContextIterator();
\r
61 result = new StringBuffer();
\r
62 locCache = new int[1];
\r
67 * Implements {@link Transliterator#handleTransliterate}.
\r
69 protected void handleTransliterate(Replaceable text,
\r
70 Position offsets, boolean isIncremental) {
\r
71 // TODO reimplement, see ustrcase.c
\r
72 // using a real word break iterator
\r
73 // instead of just looking for a transition between cased and uncased characters
\r
74 // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
\r
75 // needs to take isIncremental into account because case mappings are context-sensitive
\r
76 // also detect when lowercasing function did not finish because of context
\r
78 if (offsets.start >= offsets.limit) {
\r
82 // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable
\r
85 // Our mode; we are either converting letter toTitle or
\r
87 boolean doTitle = true;
\r
89 // Determine if there is a preceding context of cased case-ignorable*,
\r
90 // in which case we want to start in toLower mode. If the
\r
91 // prior context is anything else (including empty) then start
\r
94 for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) {
\r
95 c = text.char32At(start);
\r
96 type=csp.getTypeOrIgnorable(c);
\r
97 if(type>0) { // cased
\r
100 } else if(type==0) { // uncased but not ignorable
\r
103 // else (type<0) case-ignorable: continue
\r
106 // Convert things after a cased character toLower; things
\r
107 // after a uncased, non-case-ignorable character toTitle. Case-ignorable
\r
108 // characters are copied directly and do not change the mode.
\r
110 iter.setText(text);
\r
111 iter.setIndex(offsets.start);
\r
112 iter.setLimit(offsets.limit);
\r
113 iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
\r
115 result.setLength(0);
\r
117 // Walk through original string
\r
118 // If there is a case change, modify corresponding position in replaceable
\r
121 while((c=iter.nextCaseMapCP())>=0) {
\r
122 type=csp.getTypeOrIgnorable(c);
\r
123 if(type>=0) { // not case-ignorable
\r
125 c=csp.toFullTitle(c, iter, result, locale, locCache);
\r
127 c=csp.toFullLower(c, iter, result, locale, locCache);
\r
129 doTitle = type==0; // doTitle=isUncased
\r
131 if(iter.didReachLimit() && isIncremental) {
\r
132 // the case mapping function tried to look beyond the context limit
\r
133 // wait for more input
\r
134 offsets.start=iter.getCaseMapCPStart();
\r
138 /* decode the result */
\r
140 /* c mapped to itself, no change */
\r
142 } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
\r
143 /* replace by the mapping string */
\r
144 delta=iter.replace(result.toString());
\r
145 result.setLength(0);
\r
147 /* replace by single-code point mapping */
\r
148 delta=iter.replace(UTF16.valueOf(c));
\r
152 offsets.limit += delta;
\r
153 offsets.contextLimit += delta;
\r
157 offsets.start = offsets.limit;
\r