2 *******************************************************************************
\r
3 * Copyright (C) 2009-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.text;
\r
10 import java.util.HashMap;
\r
11 import java.util.Map;
\r
13 import com.ibm.icu.util.ULocale;
\r
16 * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
\r
17 * implementation behind setLenientParseMode, which is based on Collator.
\r
19 * @deprecated This API is ICU internal only.
\r
21 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
\r
22 private Map<String, RbnfLenientScanner> cache;
\r
26 * @deprecated This API is ICU internal only.
\r
28 public RbnfScannerProviderImpl() {
\r
29 cache = new HashMap<String, RbnfLenientScanner>();
\r
33 * Returns a collation-based scanner.
\r
35 * Only primary differences are treated as significant. This means that case
\r
36 * differences, accent differences, alternate spellings of the same letter
\r
37 * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
\r
38 * matching the text. In many cases, numerals will be accepted in place of words
\r
39 * or phrases as well.
\r
41 * For example, all of the following will correctly parse as 255 in English in
\r
42 * lenient-parse mode:
\r
43 * <br>"two hundred fifty-five"
\r
44 * <br>"two hundred fifty five"
\r
45 * <br>"TWO HUNDRED FIFTY-FIVE"
\r
46 * <br>"twohundredfiftyfive"
\r
47 * <br>"2 hundred fifty-5"
\r
49 * The Collator used is determined by the locale that was
\r
50 * passed to this object on construction. The description passed to this object
\r
51 * on construction may supply additional collation rules that are appended to the
\r
52 * end of the default collator for the locale, enabling additional equivalences
\r
53 * (such as adding more ignorable characters or permitting spelled-out version of
\r
54 * symbols; see the demo program for examples).
\r
56 * It's important to emphasize that even strict parsing is relatively lenient: it
\r
57 * will accept some text that it won't produce as output. In English, for example,
\r
58 * it will correctly parse "two hundred zero" and "fifteen hundred".
\r
61 * @deprecated This API is ICU internal only.
\r
63 public RbnfLenientScanner get(ULocale locale, String extras) {
\r
64 RbnfLenientScanner result = null;
\r
65 String key = locale.toString() + "/" + extras;
\r
66 synchronized(cache) {
\r
67 result = cache.get(key);
\r
68 if (result != null) {
\r
72 result = createScanner(locale, extras);
\r
73 synchronized(cache) {
\r
74 cache.put(key, result);
\r
81 * @deprecated This API is ICU internal only.
\r
83 protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
\r
84 RuleBasedCollator collator = null;
\r
86 // create a default collator based on the locale,
\r
87 // then pull out that collator's rules, append any additional
\r
88 // rules specified in the description, and create a _new_
\r
89 // collator based on the combination of those rules
\r
90 collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
\r
91 if (extras != null) {
\r
92 String rules = collator.getRules() + extras;
\r
93 collator = new RuleBasedCollator(rules);
\r
95 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
97 catch (Exception e) {
\r
98 // If we get here, it means we have a malformed set of
\r
99 // collation rules, which hopefully won't happen
\r
101 if (true){ // debug hook
\r
102 e.printStackTrace(); System.out.println("++++");
\r
108 return new RbnfLenientScannerImpl(collator);
\r
111 private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
\r
112 private final RuleBasedCollator collator;
\r
114 private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
\r
115 this.collator = rbc;
\r
118 public boolean allIgnorable(String s) {
\r
119 CollationElementIterator iter = collator.getCollationElementIterator(s);
\r
121 int o = iter.next();
\r
122 while (o != CollationElementIterator.NULLORDER
\r
123 && CollationElementIterator.primaryOrder(o) == 0) {
\r
126 return o == CollationElementIterator.NULLORDER;
\r
129 public int[] findText(String str, String key, int startingAt) {
\r
130 int p = startingAt;
\r
133 // basically just isolate smaller and smaller substrings of
\r
134 // the target string (each running to the end of the string,
\r
135 // and with the first one running from startingAt to the end)
\r
136 // and then use prefixLength() to see if the search key is at
\r
137 // the beginning of each substring. This is excruciatingly
\r
138 // slow, but it will locate the key and tell use how long the
\r
139 // matching text was.
\r
140 while (p < str.length() && keyLen == 0) {
\r
141 keyLen = prefixLength(str.substring(p), key);
\r
143 return new int[] { p, keyLen };
\r
147 // if we make it to here, we didn't find it. Return -1 for the
\r
148 // location. The length should be ignored, but set it to 0,
\r
149 // which should be "safe"
\r
150 return new int[] { -1, 0 };
\r
154 // The following method contains the same signature as findText
\r
155 // and has never been used by anything once.
\r
156 @SuppressWarnings("unused")
\r
157 public int[] findText2(String str, String key, int startingAt) {
\r
159 CollationElementIterator strIter = collator.getCollationElementIterator(str);
\r
160 CollationElementIterator keyIter = collator.getCollationElementIterator(key);
\r
164 strIter.setOffset(startingAt);
\r
166 int oStr = strIter.next();
\r
167 int oKey = keyIter.next();
\r
168 while (oKey != CollationElementIterator.NULLORDER) {
\r
169 while (oStr != CollationElementIterator.NULLORDER &&
\r
170 CollationElementIterator.primaryOrder(oStr) == 0)
\r
171 oStr = strIter.next();
\r
173 while (oKey != CollationElementIterator.NULLORDER &&
\r
174 CollationElementIterator.primaryOrder(oKey) == 0)
\r
175 oKey = keyIter.next();
\r
177 if (oStr == CollationElementIterator.NULLORDER) {
\r
178 return new int[] { -1, 0 };
\r
181 if (oKey == CollationElementIterator.NULLORDER) {
\r
185 if (CollationElementIterator.primaryOrder(oStr) ==
\r
186 CollationElementIterator.primaryOrder(oKey)) {
\r
187 keyStart = strIter.getOffset();
\r
188 oStr = strIter.next();
\r
189 oKey = keyIter.next();
\r
191 if (keyStart != -1) {
\r
195 oStr = strIter.next();
\r
200 if (oKey == CollationElementIterator.NULLORDER) {
\r
201 return new int[] { keyStart, strIter.getOffset() - keyStart };
\r
204 return new int[] { -1, 0 };
\r
208 public int prefixLength(String str, String prefix) {
\r
209 // Create two collation element iterators, one over the target string
\r
210 // and another over the prefix.
\r
212 // Previous code was matching "fifty-" against " fifty" and leaving
\r
213 // the number " fifty-7" to parse as 43 (50 - 7).
\r
214 // Also it seems that if we consume the entire prefix, that's ok even
\r
215 // if we've consumed the entire string, so I switched the logic to
\r
218 CollationElementIterator strIter = collator.getCollationElementIterator(str);
\r
219 CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
\r
221 // match collation elements between the strings
\r
222 int oStr = strIter.next();
\r
223 int oPrefix = prefixIter.next();
\r
225 while (oPrefix != CollationElementIterator.NULLORDER) {
\r
226 // skip over ignorable characters in the target string
\r
227 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
\r
228 CollationElementIterator.NULLORDER) {
\r
229 oStr = strIter.next();
\r
232 // skip over ignorable characters in the prefix
\r
233 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
\r
234 CollationElementIterator.NULLORDER) {
\r
235 oPrefix = prefixIter.next();
\r
238 // if skipping over ignorables brought to the end of
\r
239 // the prefix, we DID match: drop out of the loop
\r
240 if (oPrefix == CollationElementIterator.NULLORDER) {
\r
244 // if skipping over ignorables brought us to the end
\r
245 // of the target string, we didn't match and return 0
\r
246 if (oStr == CollationElementIterator.NULLORDER) {
\r
250 // match collation elements from the two strings
\r
251 // (considering only primary differences). If we
\r
252 // get a mismatch, dump out and return 0
\r
253 if (CollationElementIterator.primaryOrder(oStr) !=
\r
254 CollationElementIterator.primaryOrder(oPrefix)) {
\r
258 // otherwise, advance to the next character in each string
\r
259 // and loop (we drop out of the loop when we exhaust
\r
260 // collation elements in the prefix)
\r
262 oStr = strIter.next();
\r
263 oPrefix = prefixIter.next();
\r
266 int result = strIter.getOffset();
\r
267 if (oStr != CollationElementIterator.NULLORDER) {
\r