2 * Copyright (C) 1996-2011, International Business Machines Corporation and
3 * others. All Rights Reserved.
6 package com.ibm.icu.text;
8 import com.ibm.icu.impl.UCaseProps;
9 import com.ibm.icu.lang.UCharacter;
10 import com.ibm.icu.util.ULocale;
13 * A transliterator that converts all letters (as defined by
14 * <code>UCharacter.isLetter()</code>) to lower case, except for those
15 * letters preceded by non-letters. The latter are converted to title
16 * case using <code>UCharacter.toTitleCase()</code>.
19 class TitlecaseTransliterator extends Transliterator {
21 static final String _ID = "Any-Title";
24 * System registration hook.
26 static void register() {
27 Transliterator.registerFactory(_ID, new Transliterator.Factory() {
28 public Transliterator getInstance(String ID) {
29 return new TitlecaseTransliterator(ULocale.US);
33 registerSpecialInverse("Title", "Lower", false);
36 private ULocale locale;
38 private UCaseProps csp;
39 private ReplaceableContextIterator iter;
40 private StringBuilder result;
41 private int[] locCache;
44 * Constructs a transliterator.
46 public TitlecaseTransliterator(ULocale loc) {
49 // Need to look back 2 characters in the case of "can't"
50 setMaximumContextLength(2);
51 csp=UCaseProps.INSTANCE;
52 iter=new ReplaceableContextIterator();
53 result = new StringBuilder();
54 locCache = new int[1];
59 * Implements {@link Transliterator#handleTransliterate}.
61 protected synchronized void handleTransliterate(Replaceable text,
62 Position offsets, boolean isIncremental) {
63 // TODO reimplement, see ustrcase.c
64 // using a real word break iterator
65 // instead of just looking for a transition between cased and uncased characters
66 // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
67 // needs to take isIncremental into account because case mappings are context-sensitive
68 // also detect when lowercasing function did not finish because of context
70 if (offsets.start >= offsets.limit) {
74 // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable
77 // Our mode; we are either converting letter toTitle or
79 boolean doTitle = true;
81 // Determine if there is a preceding context of cased case-ignorable*,
82 // in which case we want to start in toLower mode. If the
83 // prior context is anything else (including empty) then start
86 for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) {
87 c = text.char32At(start);
88 type=csp.getTypeOrIgnorable(c);
92 } else if(type==0) { // uncased but not ignorable
95 // else (type<0) case-ignorable: continue
98 // Convert things after a cased character toLower; things
99 // after a uncased, non-case-ignorable character toTitle. Case-ignorable
100 // characters are copied directly and do not change the mode.
103 iter.setIndex(offsets.start);
104 iter.setLimit(offsets.limit);
105 iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
109 // Walk through original string
110 // If there is a case change, modify corresponding position in replaceable
113 while((c=iter.nextCaseMapCP())>=0) {
114 type=csp.getTypeOrIgnorable(c);
115 if(type>=0) { // not case-ignorable
117 c=csp.toFullTitle(c, iter, result, locale, locCache);
119 c=csp.toFullLower(c, iter, result, locale, locCache);
121 doTitle = type==0; // doTitle=isUncased
123 if(iter.didReachLimit() && isIncremental) {
124 // the case mapping function tried to look beyond the context limit
125 // wait for more input
126 offsets.start=iter.getCaseMapCPStart();
130 /* decode the result */
132 /* c mapped to itself, no change */
134 } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
135 /* replace by the mapping string */
136 delta=iter.replace(result.toString());
139 /* replace by single-code point mapping */
140 delta=iter.replace(UTF16.valueOf(c));
144 offsets.limit += delta;
145 offsets.contextLimit += delta;
149 offsets.start = offsets.limit;
152 // NOTE: normally this would be static, but because the results vary by locale....
153 SourceTargetUtility sourceTargetUtility = null;
156 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
159 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
160 synchronized (this) {
161 if (sourceTargetUtility == null) {
162 sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
163 public String transform(String source) {
164 return UCharacter.toTitleCase(locale, source, null);
169 sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);