jars/icu4j-52_1/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 2009-2010, International Business Machines Corporation and    *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 */
   7
   8 package com.ibm.icu.text;
   9
  10 import java.util.HashMap;
  11 import java.util.Map;
  12
  13 import com.ibm.icu.util.ULocale;
  14
  15 /**
  16  * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
  17  * implementation behind setLenientParseMode, which is based on Collator.
  18  * @internal
  19  * @deprecated This API is ICU internal only.
  20  */
  21 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
  22     private Map<String, RbnfLenientScanner> cache;
  23
  24     /**
  25      * @internal
  26      * @deprecated This API is ICU internal only.
  27      */
  28     public RbnfScannerProviderImpl() {
  29         cache = new HashMap<String, RbnfLenientScanner>();
  30     }
  31
  32     /**
  33      * Returns a collation-based scanner.
  34      *
  35      * Only primary differences are treated as significant.  This means that case
  36      * differences, accent differences, alternate spellings of the same letter
  37      * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
  38      * matching the text.  In many cases, numerals will be accepted in place of words
  39      * or phrases as well.
  40      *
  41      * For example, all of the following will correctly parse as 255 in English in
  42      * lenient-parse mode:
  43      * <br>"two hundred fifty-five"
  44      * <br>"two hundred fifty five"
  45      * <br>"TWO HUNDRED FIFTY-FIVE"
  46      * <br>"twohundredfiftyfive"
  47      * <br>"2 hundred fifty-5"
  48      *
  49      * The Collator used is determined by the locale that was
  50      * passed to this object on construction.  The description passed to this object
  51      * on construction may supply additional collation rules that are appended to the
  52      * end of the default collator for the locale, enabling additional equivalences
  53      * (such as adding more ignorable characters or permitting spelled-out version of
  54      * symbols; see the demo program for examples).
  55      *
  56      * It's important to emphasize that even strict parsing is relatively lenient: it
  57      * will accept some text that it won't produce as output.  In English, for example,
  58      * it will correctly parse "two hundred zero" and "fifteen hundred".
  59      *
  60      * @internal
  61      * @deprecated This API is ICU internal only.
  62      */
  63     public RbnfLenientScanner get(ULocale locale, String extras) {
  64         RbnfLenientScanner result = null;
  65         String key = locale.toString() + "/" + extras;
  66         synchronized(cache) {
  67             result = cache.get(key);
  68             if (result != null) {
  69                 return result;
  70             }
  71         }
  72         result = createScanner(locale, extras);
  73         synchronized(cache) {
  74             cache.put(key, result);
  75         }
  76         return result;
  77     }
  78
  79     /**
  80      * @internal
  81      * @deprecated This API is ICU internal only.
  82      */
  83     protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
  84         RuleBasedCollator collator = null;
  85         try {
  86             // create a default collator based on the locale,
  87             // then pull out that collator's rules, append any additional
  88             // rules specified in the description, and create a _new_
  89             // collator based on the combination of those rules
  90             collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
  91             if (extras != null) {
  92                 String rules = collator.getRules() + extras;
  93                 collator = new RuleBasedCollator(rules);
  94             }
  95             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
  96         }
  97         catch (Exception e) {
  98             // If we get here, it means we have a malformed set of
  99             // collation rules, which hopefully won't happen
 100             ///CLOVER:OFF
 101             if (true){ // debug hook
 102                 e.printStackTrace(); System.out.println("++++");
 103             }
 104             collator = null;
 105             ///CLOVER:ON
 106         }
 107
 108         return new RbnfLenientScannerImpl(collator);
 109     }
 110
 111     private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
 112         private final RuleBasedCollator collator;
 113
 114         private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
 115             this.collator = rbc;
 116         }
 117
 118         public boolean allIgnorable(String s) {
 119             CollationElementIterator iter = collator.getCollationElementIterator(s);
 120
 121             int o = iter.next();
 122             while (o != CollationElementIterator.NULLORDER
 123                    && CollationElementIterator.primaryOrder(o) == 0) {
 124                 o = iter.next();
 125             }
 126             return o == CollationElementIterator.NULLORDER;
 127         }
 128
 129         public int[] findText(String str, String key, int startingAt) {
 130             int p = startingAt;
 131             int keyLen = 0;
 132
 133             // basically just isolate smaller and smaller substrings of
 134             // the target string (each running to the end of the string,
 135             // and with the first one running from startingAt to the end)
 136             // and then use prefixLength() to see if the search key is at
 137             // the beginning of each substring.  This is excruciatingly
 138             // slow, but it will locate the key and tell use how long the
 139             // matching text was.
 140             while (p < str.length() && keyLen == 0) {
 141                 keyLen = prefixLength(str.substring(p), key);
 142                 if (keyLen != 0) {
 143                     return new int[] { p, keyLen };
 144                 }
 145                 ++p;
 146             }
 147             // if we make it to here, we didn't find it.  Return -1 for the
 148             // location.  The length should be ignored, but set it to 0,
 149             // which should be "safe"
 150             return new int[] { -1, 0 };
 151         }
 152
 153         ///CLOVER:OFF
 154         // The following method contains the same signature as findText
 155         //  and has never been used by anything once.
 156         @SuppressWarnings("unused")
 157         public int[] findText2(String str, String key, int startingAt) {
 158
 159             CollationElementIterator strIter = collator.getCollationElementIterator(str);
 160             CollationElementIterator keyIter = collator.getCollationElementIterator(key);
 161
 162             int keyStart = -1;
 163
 164             strIter.setOffset(startingAt);
 165
 166             int oStr = strIter.next();
 167             int oKey = keyIter.next();
 168             while (oKey != CollationElementIterator.NULLORDER) {
 169                 while (oStr != CollationElementIterator.NULLORDER &&
 170                        CollationElementIterator.primaryOrder(oStr) == 0)
 171                     oStr = strIter.next();
 172
 173                 while (oKey != CollationElementIterator.NULLORDER &&
 174                        CollationElementIterator.primaryOrder(oKey) == 0)
 175                     oKey = keyIter.next();
 176
 177                 if (oStr == CollationElementIterator.NULLORDER) {
 178                     return new int[] { -1, 0 };
 179                 }
 180
 181                 if (oKey == CollationElementIterator.NULLORDER) {
 182                     break;
 183                 }
 184
 185                 if (CollationElementIterator.primaryOrder(oStr) ==
 186                     CollationElementIterator.primaryOrder(oKey)) {
 187                     keyStart = strIter.getOffset();
 188                     oStr = strIter.next();
 189                     oKey = keyIter.next();
 190                 } else {
 191                     if (keyStart != -1) {
 192                         keyStart = -1;
 193                         keyIter.reset();
 194                     } else {
 195                         oStr = strIter.next();
 196                     }
 197                 }
 198             }
 199
 200             if (oKey == CollationElementIterator.NULLORDER) {
 201                 return new int[] { keyStart, strIter.getOffset() - keyStart };
 202             }
 203
 204             return new int[] { -1, 0 };
 205         }
 206         ///CLOVER:ON
 207
 208         public int prefixLength(String str, String prefix) {
 209             // Create two collation element iterators, one over the target string
 210             // and another over the prefix.
 211             //
 212             // Previous code was matching "fifty-" against " fifty" and leaving
 213             // the number " fifty-7" to parse as 43 (50 - 7).
 214             // Also it seems that if we consume the entire prefix, that's ok even
 215             // if we've consumed the entire string, so I switched the logic to
 216             // reflect this.
 217
 218             CollationElementIterator strIter = collator.getCollationElementIterator(str);
 219             CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
 220
 221             // match collation elements between the strings
 222             int oStr = strIter.next();
 223             int oPrefix = prefixIter.next();
 224
 225             while (oPrefix != CollationElementIterator.NULLORDER) {
 226                 // skip over ignorable characters in the target string
 227                 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
 228                        CollationElementIterator.NULLORDER) {
 229                     oStr = strIter.next();
 230                 }
 231
 232                 // skip over ignorable characters in the prefix
 233                 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
 234                        CollationElementIterator.NULLORDER) {
 235                     oPrefix = prefixIter.next();
 236                 }
 237
 238                 // if skipping over ignorables brought to the end of
 239                 // the prefix, we DID match: drop out of the loop
 240                 if (oPrefix == CollationElementIterator.NULLORDER) {
 241                     break;
 242                 }
 243
 244                 // if skipping over ignorables brought us to the end
 245                 // of the target string, we didn't match and return 0
 246                 if (oStr == CollationElementIterator.NULLORDER) {
 247                     return 0;
 248                 }
 249
 250                 // match collation elements from the two strings
 251                 // (considering only primary differences).  If we
 252                 // get a mismatch, dump out and return 0
 253                 if (CollationElementIterator.primaryOrder(oStr) !=
 254                     CollationElementIterator.primaryOrder(oPrefix)) {
 255                     return 0;
 256                 }
 257
 258                 // otherwise, advance to the next character in each string
 259                 // and loop (we drop out of the loop when we exhaust
 260                 // collation elements in the prefix)
 261
 262                 oStr = strIter.next();
 263                 oPrefix = prefixIter.next();
 264             }
 265
 266             int result = strIter.getOffset();
 267             if (oStr != CollationElementIterator.NULLORDER) {
 268                 --result;
 269             }
 270             return result;
 271         }
 272     }
 273 }