2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.text.ParsePosition;
10 import java.util.ArrayList;
11 import java.util.List;
13 import com.ibm.icu.impl.PatternProps;
14 import com.ibm.icu.impl.Utility;
17 * A collection of rules used by a RuleBasedNumberFormat to format and
18 * parse numbers. It is the responsibility of a RuleSet to select an
19 * appropriate rule for formatting a particular number and dispatch
20 * control to it, and to arbitrate between different rules when parsing
24 final class NFRuleSet {
25 //-----------------------------------------------------------------------
27 //-----------------------------------------------------------------------
35 * The rule set's regular rules
37 private NFRule[] rules;
40 * The rule set's negative-number rule
42 private NFRule negativeNumberRule = null;
45 * The rule set's fraction rules: element 0 is the proper fraction
46 * (0.x) rule, element 1 is the improper fraction (x.x) rule, and
47 * element 2 is the master (x.0) rule.
49 private NFRule[] fractionRules = new NFRule[3];
52 * True if the rule set is a fraction rule set. A fraction rule set
53 * is a rule set that is used to format the fractional part of a
54 * number. It is called from a >> substitution in another rule set's
55 * fraction rule, and is only called upon to format values between
56 * 0 and 1. A fraction rule set has different rule-selection
57 * behavior than a regular rule set.
59 private boolean isFractionRuleSet = false;
62 * Used to limit recursion for bad rule sets.
64 private int recursionCount = 0;
69 private static final int RECURSION_LIMIT = 50;
71 //-----------------------------------------------------------------------
73 //-----------------------------------------------------------------------
76 * Constructs a rule set.
77 * @param descriptions An array of Strings representing rule set
78 * descriptions. On exit, this rule set's entry in the array will
79 * have been stripped of its rule set name and any trailing whitespace.
80 * @param index The index into "descriptions" of the description
81 * for the rule to be constructed
83 public NFRuleSet(String[] descriptions, int index) throws IllegalArgumentException {
84 String description = descriptions[index];
86 if (description.length() == 0) {
87 throw new IllegalArgumentException("Empty rule set description");
90 // if the description begins with a rule set name (the rule set
91 // name can be omitted in formatter descriptions that consist
92 // of only one rule set), copy it out into our "name" member
93 // and delete it from the description
94 if (description.charAt(0) == '%') {
95 int pos = description.indexOf(':');
97 throw new IllegalArgumentException("Rule set name doesn't end in colon");
99 name = description.substring(0, pos);
100 while (pos < description.length() && PatternProps.isWhiteSpace(description.
103 description = description.substring(pos);
104 descriptions[index] = description;
107 // if the description doesn't begin with a rule set name, its
108 // name is "%default"
113 if (description.length() == 0) {
114 throw new IllegalArgumentException("Empty rule set description");
117 // all of the other members of NFRuleSet are initialized
122 * Construct the subordinate data structures used by this object.
123 * This function is called by the RuleBasedNumberFormat constructor
124 * after all the rule sets have been created to actually parse
125 * the description and build rules from it. Since any rule set
126 * can refer to any other rule set, we have to have created all of
127 * them before we can create anything else.
128 * @param description The textual description of this rule set
129 * @param owner The formatter that owns this rule set
131 public void parseRules(String description,
132 RuleBasedNumberFormat owner) {
133 // start by creating a Vector whose elements are Strings containing
134 // the descriptions of the rules (one rule per element). The rules
135 // are separated by semicolons (there's no escape facility: ALL
136 // semicolons are rule delimiters)
137 List<String> ruleDescriptions = new ArrayList<String>();
140 int p = description.indexOf(';');
143 ruleDescriptions.add(description.substring(oldP, p));
146 if (oldP < description.length()) {
147 ruleDescriptions.add(description.substring(oldP));
151 p = description.indexOf(';', p + 1);
154 // now go back through and build a vector of the rules themselves
155 // (the number of elements in the description list isn't necessarily
156 // the number of rules-- some descriptions may expend into two rules)
157 List<NFRule> tempRules = new ArrayList<NFRule>();
159 // we keep track of the rule before the one we're currently working
160 // on solely to support >>> substitutions
161 NFRule predecessor = null;
162 for (int i = 0; i < ruleDescriptions.size(); i++) {
163 // makeRules (a factory method on NFRule) will return either
164 // a single rule or an array of rules. Either way, add them
165 // to our rule vector
166 Object temp = NFRule.makeRules(ruleDescriptions.get(i),
167 this, predecessor, owner);
169 if (temp instanceof NFRule) {
170 tempRules.add((NFRule)temp);
171 predecessor = (NFRule)temp;
173 else if (temp instanceof NFRule[]) {
174 NFRule[] rulesToAdd = (NFRule[])temp;
176 for (int j = 0; j < rulesToAdd.length; j++) {
177 tempRules.add(rulesToAdd[j]);
178 predecessor = rulesToAdd[j];
182 // now we can bag the description list
183 ruleDescriptions = null;
185 // for rules that didn't specify a base value, their base values
186 // were initialized to 0. Make another pass through the list and
187 // set all those rules' base values. We also remove any special
188 // rules from the list and put them into their own member variables
189 long defaultBaseValue = 0;
191 // (this isn't a for loop because we might be deleting items from
192 // the vector-- we want to make sure we only increment i when
193 // we _didn't_ delete aything from the vector)
195 while (i < tempRules.size()) {
196 NFRule rule = tempRules.get(i);
198 switch ((int)rule.getBaseValue()) {
199 // if the rule's base value is 0, fill in a default
200 // base value (this will be 1 plus the preceding
201 // rule's base value for regular rule sets, and the
202 // same as the preceding rule's base value in fraction
205 rule.setBaseValue(defaultBaseValue);
206 if (!isFractionRuleSet) {
212 // if it's the negative-number rule, copy it into its own
213 // data member and delete it from the list
214 case NFRule.NEGATIVE_NUMBER_RULE:
215 negativeNumberRule = rule;
219 // if it's the improper fraction rule, copy it into the
220 // correct element of fractionRules
221 case NFRule.IMPROPER_FRACTION_RULE:
222 fractionRules[0] = rule;
226 // if it's the proper fraction rule, copy it into the
227 // correct element of fractionRules
228 case NFRule.PROPER_FRACTION_RULE:
229 fractionRules[1] = rule;
233 // if it's the master rule, copy it into the
234 // correct element of fractionRules
235 case NFRule.MASTER_RULE:
236 fractionRules[2] = rule;
240 // if it's a regular rule that already knows its base value,
241 // check to make sure the rules are in order, and update
242 // the default base value for the next rule
244 if (rule.getBaseValue() < defaultBaseValue) {
245 throw new IllegalArgumentException("Rules are not in order, base: " +
246 rule.getBaseValue() + " < " + defaultBaseValue);
248 defaultBaseValue = rule.getBaseValue();
249 if (!isFractionRuleSet) {
257 // finally, we can copy the rules from the vector into a
258 // fixed-length array
259 rules = new NFRule[tempRules.size()];
260 tempRules.toArray(rules);
264 * Flags this rule set as a fraction rule set. This function is
265 * called during the construction process once we know this rule
266 * set is a fraction rule set. We don't know a rule set is a
267 * fraction rule set until we see it used somewhere. This function
268 * is not ad must not be called at any time other than during
269 * construction of a RuleBasedNumberFormat.
271 public void makeIntoFractionRuleSet() {
272 isFractionRuleSet = true;
275 //-----------------------------------------------------------------------
277 //-----------------------------------------------------------------------
280 * Compares two rule sets for equality.
281 * @param that The other rule set
282 * @return true if the two rule sets are functionally equivalent.
284 public boolean equals(Object that) {
285 // if different classes, they're not equal
286 if (!(that instanceof NFRuleSet)) {
289 // otherwise, compare the members one by one...
290 NFRuleSet that2 = (NFRuleSet)that;
292 if (!name.equals(that2.name)
293 || !Utility.objectEquals(negativeNumberRule, that2.negativeNumberRule)
294 || !Utility.objectEquals(fractionRules[0], that2.fractionRules[0])
295 || !Utility.objectEquals(fractionRules[1], that2.fractionRules[1])
296 || !Utility.objectEquals(fractionRules[2], that2.fractionRules[2])
297 || rules.length != that2.rules.length
298 || isFractionRuleSet != that2.isFractionRuleSet) {
303 // ...then compare the rule lists...
304 for (int i = 0; i < rules.length; i++) {
305 if (!rules[i].equals(that2.rules[i])) {
310 // ...and if we make it here, tney're equal
317 * Builds a textual representation of a rule set.
318 * @return A textual representation of a rule set. This won't
319 * necessarily be the same description that the rule set was
320 * constructed with, but it will produce the same results.
322 public String toString() {
323 StringBuilder result = new StringBuilder();
325 // the rule set name goes first...
326 result.append(name + ":\n");
328 // followed by the regular rules...
329 for (int i = 0; i < rules.length; i++) {
330 result.append(" " + rules[i].toString() + "\n");
333 // followed by the special rules (if they exist)
334 if (negativeNumberRule != null) {
335 result.append(" " + negativeNumberRule.toString() + "\n");
337 if (fractionRules[0] != null) {
338 result.append(" " + fractionRules[0].toString() + "\n");
340 if (fractionRules[1] != null) {
341 result.append(" " + fractionRules[1].toString() + "\n");
343 if (fractionRules[2] != null) {
344 result.append(" " + fractionRules[2].toString() + "\n");
347 return result.toString();
350 //-----------------------------------------------------------------------
352 //-----------------------------------------------------------------------
355 * Says whether this rule set is a fraction rule set.
356 * @return true if this rule is a fraction rule set; false if it isn't
358 public boolean isFractionSet() {
359 return isFractionRuleSet;
363 * Returns the rule set's name
364 * @return The rule set's name
366 public String getName() {
371 * Return true if the rule set is public.
372 * @return true if the rule set is public
374 public boolean isPublic() {
375 return !name.startsWith("%%");
379 * Return true if the rule set can be used for parsing.
380 * @return true if the rule set can be used for parsing.
382 public boolean isParseable() {
384 // In CLDR 1.7, we have no distinction between
385 // parseable/unparseable. Rules which have one of
386 // 3 suffixes below are know as unparseable for now.
387 // We should add the information in CLDR data.
388 return !(name.endsWith("-prefixpart")
389 || name.endsWith("-postfixpart")
390 || name.endsWith("-postfx"));
393 //-----------------------------------------------------------------------
395 //-----------------------------------------------------------------------
398 * Formats a long. Selects an appropriate rule and dispatches
400 * @param number The number being formatted
401 * @param toInsertInto The string where the result is to be placed
402 * @param pos The position in toInsertInto where the result of
403 * this operation is to be inserted
405 public void format(long number, StringBuffer toInsertInto, int pos) {
406 NFRule applicableRule = findNormalRule(number);
408 if (++recursionCount >= RECURSION_LIMIT) {
410 throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
412 applicableRule.doFormat(number, toInsertInto, pos);
417 * Formats a double. Selects an appropriate rule and dispatches
419 * @param number The number being formatted
420 * @param toInsertInto The string where the result is to be placed
421 * @param pos The position in toInsertInto where the result of
422 * this operation is to be inserted
424 public void format(double number, StringBuffer toInsertInto, int pos) {
425 NFRule applicableRule = findRule(number);
427 if (++recursionCount >= RECURSION_LIMIT) {
429 throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
431 applicableRule.doFormat(number, toInsertInto, pos);
436 * Selects an apropriate rule for formatting the number.
437 * @param number The number being formatted.
438 * @return The rule that should be used to format it
440 private NFRule findRule(double number) {
441 // if this is a fraction rule set, use findFractionRuleSetRule()
442 if (isFractionRuleSet) {
443 return findFractionRuleSetRule(number);
446 // if the number is negative, return the negative number rule
447 // (if there isn't a negative-number rule, we pretend it's a
450 if (negativeNumberRule != null) {
451 return negativeNumberRule;
457 // if the number isn't an integer, we use one f the fraction rules...
458 if (number != Math.floor(number)) {
459 // if the number is between 0 and 1, return the proper
461 if (number < 1 && fractionRules[1] != null) {
462 return fractionRules[1];
465 // otherwise, return the improper fraction rule
466 else if (fractionRules[0] != null) {
467 return fractionRules[0];
471 // if there's a master rule, use it to format the number
472 if (fractionRules[2] != null) {
473 return fractionRules[2];
475 // and if we haven't yet returned a rule, use findNormalRule()
476 // to find the applicable rule
478 return findNormalRule(Math.round(number));
483 * If the value passed to findRule() is a positive integer, findRule()
484 * uses this function to select the appropriate rule. The result will
485 * generally be the rule with the highest base value less than or equal
486 * to the number. There is one exception to this: If that rule has
487 * two substitutions and a base value that is not an even multiple of
488 * its divisor, and the number itself IS an even multiple of the rule's
489 * divisor, then the result will be the rule that preceded the original
490 * result in the rule list. (This behavior is known as the "rollback
491 * rule", and is used to handle optional text: a rule with optional
492 * text is represented internally as two rules, and the rollback rule
493 * selects appropriate between them. This avoids things like "two
495 * @param number The number being formatted
496 * @return The rule to use to format this number
498 private NFRule findNormalRule(long number) {
499 // if this is a fraction rule set, use findFractionRuleSetRule()
500 // to find the rule (we should only go into this clause if the
502 if (isFractionRuleSet) {
503 return findFractionRuleSetRule(number);
506 // if the number is negative, return the negative-number rule
507 // (if there isn't one, pretend the number is positive)
509 if (negativeNumberRule != null) {
510 return negativeNumberRule;
516 // we have to repeat the preceding two checks, even though we
517 // do them in findRule(), because the version of format() that
518 // takes a long bypasses findRule() and goes straight to this
519 // function. This function does skip the fraction rules since
520 // we know the value is an integer (it also skips the master
521 // rule, since it's considered a fraction rule. Skipping the
522 // master rule in this function is also how we avoid infinite
525 // binary-search the rule list for the applicable rule
526 // (a rule is used for all values from its base value to
527 // the next rule's base value)
529 int hi = rules.length;
532 int mid = (lo + hi) / 2;
533 if (rules[mid].getBaseValue() == number) {
536 else if (rules[mid].getBaseValue() > number) {
543 if (hi == 0) { // bad rule set
544 throw new IllegalStateException("The rule set " + name + " cannot format the value " + number);
546 NFRule result = rules[hi - 1];
548 // use shouldRollBack() to see whether we need to invoke the
549 // rollback rule (see shouldRollBack()'s documentation for
550 // an explanation of the rollback rule). If we do, roll back
551 // one rule and return that one instead of the one we'd normally
553 if (result.shouldRollBack(number)) {
554 if (hi == 1) { // bad rule set
555 throw new IllegalStateException("The rule set " + name + " cannot roll back from the rule '" +
558 result = rules[hi - 2];
562 // else use the master rule
563 return fractionRules[2];
567 * If this rule is a fraction rule set, this function is used by
568 * findRule() to select the most appropriate rule for formatting
569 * the number. Basically, the base value of each rule in the rule
570 * set is treated as the denominator of a fraction. Whichever
571 * denominator can produce the fraction closest in value to the
572 * number passed in is the result. If there's a tie, the earlier
573 * one in the list wins. (If there are two rules in a row with the
574 * same base value, the first one is used when the numerator of the
575 * fraction would be 1, and the second rule is used the rest of the
577 * @param number The number being formatted (which will always be
578 * a number between 0 and 1)
579 * @return The rule to use to format this number
581 private NFRule findFractionRuleSetRule(double number) {
582 // the obvious way to do this (multiply the value being formatted
583 // by each rule's base value until you get an integral result)
584 // doesn't work because of rounding error. This method is more
587 // find the least common multiple of the rules' base values
588 // and multiply this by the number being formatted. This is
589 // all the precision we need, and we can do all of the rest
590 // of the math using integer arithmetic
591 long leastCommonMultiple = rules[0].getBaseValue();
592 for (int i = 1; i < rules.length; i++) {
593 leastCommonMultiple = lcm(leastCommonMultiple, rules[i].getBaseValue());
595 long numerator = Math.round(number * leastCommonMultiple);
597 // for each rule, do the following...
599 long difference = Long.MAX_VALUE;
601 for (int i = 0; i < rules.length; i++) {
602 // "numerator" is the numerator of the fraction is the
603 // denominator is the LCD. The numerator if the the rule's
604 // base value is the denomiator is "numerator" times the
605 // base value divided bythe LCD. Here we check to see if
606 // that's an integer, and if not, how close it is to being
608 tempDifference = numerator * rules[i].getBaseValue() % leastCommonMultiple;
610 // normalize the result of the above calculation: we want
611 // the numerator's distance from the CLOSEST multiple
613 if (leastCommonMultiple - tempDifference < tempDifference) {
614 tempDifference = leastCommonMultiple - tempDifference;
617 // if this is as close as we've come, keep track of how close
618 // that is, and the line number of the rule that did it. If
619 // we've scored a direct hit, we don't have to look at any more
621 if (tempDifference < difference) {
622 difference = tempDifference;
624 if (difference == 0) {
630 // if we have two successive rules that both have the winning base
631 // value, then the first one (the one we found above) is used if
632 // the numerator of the fraction is 1 and the second one is used if
633 // the numerator of the fraction is anything else (this lets us
634 // do things like "one third"/"two thirds" without haveing to define
635 // a whole bunch of extra rule sets)
636 if (winner + 1 < rules.length
637 && rules[winner + 1].getBaseValue() == rules[winner].getBaseValue()) {
638 if (Math.round(number * rules[winner].getBaseValue()) < 1
639 || Math.round(number * rules[winner].getBaseValue()) >= 2) {
644 // finally, return the winning rule
645 return rules[winner];
649 * Calculates the least common multiple of x and y.
651 private static long lcm(long x, long y) {
652 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
653 // vol. 2, 1st ed., pp. 298-299
658 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
672 while ((t & 1) == 0) {
684 // x * y == gcd(x, y) * lcm(x, y)
688 //-----------------------------------------------------------------------
690 //-----------------------------------------------------------------------
693 * Parses a string. Matches the string to be parsed against each
694 * of its rules (with a base value less than upperBound) and returns
695 * the value produced by the rule that matched the most charcters
696 * in the source string.
697 * @param text The string to parse
698 * @param parsePosition The initial position is ignored and assumed
699 * to be 0. On exit, this object has been updated to point to the
700 * first character position this rule set didn't consume.
701 * @param upperBound Limits the rules that can be allowed to match.
702 * Only rules whose base values are strictly less than upperBound
704 * @return The numerical result of parsing this string. This will
705 * be the matching rule's base value, composed appropriately with
706 * the results of matching any of its substitutions. The object
707 * will be an instance of Long if it's an integral value; otherwise,
708 * it will be an instance of Double. This function always returns
709 * a valid object: If nothing matched the input string at all,
710 * this function returns new Long(0), and the parse position is
713 public Number parse(String text, ParsePosition parsePosition, double upperBound) {
714 // try matching each rule in the rule set against the text being
715 // parsed. Whichever one matches the most characters is the one
716 // that determines the value we return.
718 ParsePosition highWaterMark = new ParsePosition(0);
719 Number result = new Long(0);
720 Number tempResult = null;
722 // dump out if there's no text to parse
723 if (text.length() == 0) {
727 // start by trying the nehative number rule (if there is one)
728 if (negativeNumberRule != null) {
729 tempResult = negativeNumberRule.doParse(text, parsePosition, false, upperBound);
730 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
732 highWaterMark.setIndex(parsePosition.getIndex());
734 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
735 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
736 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
738 parsePosition.setIndex(0);
741 // then try each of the fraction rules
742 for (int i = 0; i < 3; i++) {
743 if (fractionRules[i] != null) {
744 tempResult = fractionRules[i].doParse(text, parsePosition, false, upperBound);
745 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
747 highWaterMark.setIndex(parsePosition.getIndex());
749 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
750 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
751 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
753 parsePosition.setIndex(0);
757 // finally, go through the regular rules one at a time. We start
758 // at the end of the list because we want to try matching the most
759 // sigificant rule first (this helps ensure that we parse
760 // "five thousand three hundred six" as
761 // "(five thousand) (three hundred) (six)" rather than
762 // "((five thousand three) hundred) (six)"). Skip rules whose
763 // base values are higher than the upper bound (again, this helps
764 // limit ambiguity by making sure the rules that match a rule's
765 // are less significant than the rule containing the substitutions)/
766 for (int i = rules.length - 1; i >= 0 && highWaterMark.getIndex() < text.length(); i--) {
767 if (!isFractionRuleSet && rules[i].getBaseValue() >= upperBound) {
771 tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound);
772 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
774 highWaterMark.setIndex(parsePosition.getIndex());
776 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
777 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
778 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
780 parsePosition.setIndex(0);
783 // finally, update the parse postion we were passed to point to the
784 // first character we didn't use, and return the result that
785 // cporresponds to that string of characters
786 parsePosition.setIndex(highWaterMark.getIndex());
787 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
788 // if (parsePosition.getIndex() == 0) {
789 // parsePosition.setErrorIndex(highWaterMark.getErrorIndex());