2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.text.ParsePosition;
10 import java.util.ArrayList;
11 import java.util.List;
13 import com.ibm.icu.impl.PatternProps;
14 import com.ibm.icu.impl.Utility;
17 * A collection of rules used by a RuleBasedNumberFormat to format and
18 * parse numbers. It is the responsibility of a RuleSet to select an
19 * appropriate rule for formatting a particular number and dispatch
20 * control to it, and to arbitrate between different rules when parsing
24 final class NFRuleSet {
25 //-----------------------------------------------------------------------
27 //-----------------------------------------------------------------------
35 * The rule set's regular rules
37 private NFRule[] rules;
40 * The rule set's negative-number rule
42 private NFRule negativeNumberRule = null;
45 * The rule set's fraction rules: element 0 is the proper fraction
46 * (0.x) rule, element 1 is the improper fraction (x.x) rule, and
47 * element 2 is the master (x.0) rule.
49 private NFRule[] fractionRules = new NFRule[3];
52 * True if the rule set is a fraction rule set. A fraction rule set
53 * is a rule set that is used to format the fractional part of a
54 * number. It is called from a >> substitution in another rule set's
55 * fraction rule, and is only called upon to format values between
56 * 0 and 1. A fraction rule set has different rule-selection
57 * behavior than a regular rule set.
59 private boolean isFractionRuleSet = false;
62 * True if the rule set is parseable.
64 private boolean isParseable = true;
67 * Used to limit recursion for bad rule sets.
69 private int recursionCount = 0;
74 private static final int RECURSION_LIMIT = 50;
76 //-----------------------------------------------------------------------
78 //-----------------------------------------------------------------------
81 * Constructs a rule set.
82 * @param descriptions An array of Strings representing rule set
83 * descriptions. On exit, this rule set's entry in the array will
84 * have been stripped of its rule set name and any trailing whitespace.
85 * @param index The index into "descriptions" of the description
86 * for the rule to be constructed
88 public NFRuleSet(String[] descriptions, int index) throws IllegalArgumentException {
89 String description = descriptions[index];
91 if (description.length() == 0) {
92 throw new IllegalArgumentException("Empty rule set description");
95 // if the description begins with a rule set name (the rule set
96 // name can be omitted in formatter descriptions that consist
97 // of only one rule set), copy it out into our "name" member
98 // and delete it from the description
99 if (description.charAt(0) == '%') {
100 int pos = description.indexOf(':');
102 throw new IllegalArgumentException("Rule set name doesn't end in colon");
104 name = description.substring(0, pos);
105 while (pos < description.length() && PatternProps.isWhiteSpace(description.
108 description = description.substring(pos);
109 descriptions[index] = description;
112 // if the description doesn't begin with a rule set name, its
113 // name is "%default"
118 if (description.length() == 0) {
119 throw new IllegalArgumentException("Empty rule set description");
122 if ( name.endsWith("@noparse")) {
123 name = name.substring(0,name.length()-8); // Remove the @noparse from the name
127 // all of the other members of NFRuleSet are initialized
132 * Construct the subordinate data structures used by this object.
133 * This function is called by the RuleBasedNumberFormat constructor
134 * after all the rule sets have been created to actually parse
135 * the description and build rules from it. Since any rule set
136 * can refer to any other rule set, we have to have created all of
137 * them before we can create anything else.
138 * @param description The textual description of this rule set
139 * @param owner The formatter that owns this rule set
141 public void parseRules(String description,
142 RuleBasedNumberFormat owner) {
143 // start by creating a Vector whose elements are Strings containing
144 // the descriptions of the rules (one rule per element). The rules
145 // are separated by semicolons (there's no escape facility: ALL
146 // semicolons are rule delimiters)
147 List<String> ruleDescriptions = new ArrayList<String>();
150 int p = description.indexOf(';');
153 ruleDescriptions.add(description.substring(oldP, p));
156 if (oldP < description.length()) {
157 ruleDescriptions.add(description.substring(oldP));
161 p = description.indexOf(';', p + 1);
164 // now go back through and build a vector of the rules themselves
165 // (the number of elements in the description list isn't necessarily
166 // the number of rules-- some descriptions may expend into two rules)
167 List<NFRule> tempRules = new ArrayList<NFRule>();
169 // we keep track of the rule before the one we're currently working
170 // on solely to support >>> substitutions
171 NFRule predecessor = null;
172 for (int i = 0; i < ruleDescriptions.size(); i++) {
173 // makeRules (a factory method on NFRule) will return either
174 // a single rule or an array of rules. Either way, add them
175 // to our rule vector
176 Object temp = NFRule.makeRules(ruleDescriptions.get(i),
177 this, predecessor, owner);
179 if (temp instanceof NFRule) {
180 tempRules.add((NFRule)temp);
181 predecessor = (NFRule)temp;
183 else if (temp instanceof NFRule[]) {
184 NFRule[] rulesToAdd = (NFRule[])temp;
186 for (int j = 0; j < rulesToAdd.length; j++) {
187 tempRules.add(rulesToAdd[j]);
188 predecessor = rulesToAdd[j];
192 // now we can bag the description list
193 ruleDescriptions = null;
195 // for rules that didn't specify a base value, their base values
196 // were initialized to 0. Make another pass through the list and
197 // set all those rules' base values. We also remove any special
198 // rules from the list and put them into their own member variables
199 long defaultBaseValue = 0;
201 // (this isn't a for loop because we might be deleting items from
202 // the vector-- we want to make sure we only increment i when
203 // we _didn't_ delete aything from the vector)
205 while (i < tempRules.size()) {
206 NFRule rule = tempRules.get(i);
208 switch ((int)rule.getBaseValue()) {
209 // if the rule's base value is 0, fill in a default
210 // base value (this will be 1 plus the preceding
211 // rule's base value for regular rule sets, and the
212 // same as the preceding rule's base value in fraction
215 rule.setBaseValue(defaultBaseValue);
216 if (!isFractionRuleSet) {
222 // if it's the negative-number rule, copy it into its own
223 // data member and delete it from the list
224 case NFRule.NEGATIVE_NUMBER_RULE:
225 negativeNumberRule = rule;
229 // if it's the improper fraction rule, copy it into the
230 // correct element of fractionRules
231 case NFRule.IMPROPER_FRACTION_RULE:
232 fractionRules[0] = rule;
236 // if it's the proper fraction rule, copy it into the
237 // correct element of fractionRules
238 case NFRule.PROPER_FRACTION_RULE:
239 fractionRules[1] = rule;
243 // if it's the master rule, copy it into the
244 // correct element of fractionRules
245 case NFRule.MASTER_RULE:
246 fractionRules[2] = rule;
250 // if it's a regular rule that already knows its base value,
251 // check to make sure the rules are in order, and update
252 // the default base value for the next rule
254 if (rule.getBaseValue() < defaultBaseValue) {
255 throw new IllegalArgumentException("Rules are not in order, base: " +
256 rule.getBaseValue() + " < " + defaultBaseValue);
258 defaultBaseValue = rule.getBaseValue();
259 if (!isFractionRuleSet) {
267 // finally, we can copy the rules from the vector into a
268 // fixed-length array
269 rules = new NFRule[tempRules.size()];
270 tempRules.toArray(rules);
274 * Flags this rule set as a fraction rule set. This function is
275 * called during the construction process once we know this rule
276 * set is a fraction rule set. We don't know a rule set is a
277 * fraction rule set until we see it used somewhere. This function
278 * is not ad must not be called at any time other than during
279 * construction of a RuleBasedNumberFormat.
281 public void makeIntoFractionRuleSet() {
282 isFractionRuleSet = true;
285 //-----------------------------------------------------------------------
287 //-----------------------------------------------------------------------
290 * Compares two rule sets for equality.
291 * @param that The other rule set
292 * @return true if the two rule sets are functionally equivalent.
294 public boolean equals(Object that) {
295 // if different classes, they're not equal
296 if (!(that instanceof NFRuleSet)) {
299 // otherwise, compare the members one by one...
300 NFRuleSet that2 = (NFRuleSet)that;
302 if (!name.equals(that2.name)
303 || !Utility.objectEquals(negativeNumberRule, that2.negativeNumberRule)
304 || !Utility.objectEquals(fractionRules[0], that2.fractionRules[0])
305 || !Utility.objectEquals(fractionRules[1], that2.fractionRules[1])
306 || !Utility.objectEquals(fractionRules[2], that2.fractionRules[2])
307 || rules.length != that2.rules.length
308 || isFractionRuleSet != that2.isFractionRuleSet) {
313 // ...then compare the rule lists...
314 for (int i = 0; i < rules.length; i++) {
315 if (!rules[i].equals(that2.rules[i])) {
320 // ...and if we make it here, tney're equal
325 public int hashCode() {
326 assert false : "hashCode not designed";
332 * Builds a textual representation of a rule set.
333 * @return A textual representation of a rule set. This won't
334 * necessarily be the same description that the rule set was
335 * constructed with, but it will produce the same results.
337 public String toString() {
338 StringBuilder result = new StringBuilder();
340 // the rule set name goes first...
341 result.append(name + ":\n");
343 // followed by the regular rules...
344 for (int i = 0; i < rules.length; i++) {
345 result.append(" " + rules[i].toString() + "\n");
348 // followed by the special rules (if they exist)
349 if (negativeNumberRule != null) {
350 result.append(" " + negativeNumberRule.toString() + "\n");
352 if (fractionRules[0] != null) {
353 result.append(" " + fractionRules[0].toString() + "\n");
355 if (fractionRules[1] != null) {
356 result.append(" " + fractionRules[1].toString() + "\n");
358 if (fractionRules[2] != null) {
359 result.append(" " + fractionRules[2].toString() + "\n");
362 return result.toString();
365 //-----------------------------------------------------------------------
367 //-----------------------------------------------------------------------
370 * Says whether this rule set is a fraction rule set.
371 * @return true if this rule is a fraction rule set; false if it isn't
373 public boolean isFractionSet() {
374 return isFractionRuleSet;
378 * Returns the rule set's name
379 * @return The rule set's name
381 public String getName() {
386 * Return true if the rule set is public.
387 * @return true if the rule set is public
389 public boolean isPublic() {
390 return !name.startsWith("%%");
394 * Return true if the rule set can be used for parsing.
395 * @return true if the rule set can be used for parsing.
397 public boolean isParseable() {
398 return (isParseable);
401 //-----------------------------------------------------------------------
403 //-----------------------------------------------------------------------
406 * Formats a long. Selects an appropriate rule and dispatches
408 * @param number The number being formatted
409 * @param toInsertInto The string where the result is to be placed
410 * @param pos The position in toInsertInto where the result of
411 * this operation is to be inserted
413 public void format(long number, StringBuffer toInsertInto, int pos) {
414 NFRule applicableRule = findNormalRule(number);
416 if (++recursionCount >= RECURSION_LIMIT) {
418 throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
420 applicableRule.doFormat(number, toInsertInto, pos);
425 * Formats a double. Selects an appropriate rule and dispatches
427 * @param number The number being formatted
428 * @param toInsertInto The string where the result is to be placed
429 * @param pos The position in toInsertInto where the result of
430 * this operation is to be inserted
432 public void format(double number, StringBuffer toInsertInto, int pos) {
433 NFRule applicableRule = findRule(number);
435 if (++recursionCount >= RECURSION_LIMIT) {
437 throw new IllegalStateException("Recursion limit exceeded when applying ruleSet " + name);
439 applicableRule.doFormat(number, toInsertInto, pos);
444 * Selects an apropriate rule for formatting the number.
445 * @param number The number being formatted.
446 * @return The rule that should be used to format it
448 private NFRule findRule(double number) {
449 // if this is a fraction rule set, use findFractionRuleSetRule()
450 if (isFractionRuleSet) {
451 return findFractionRuleSetRule(number);
454 // if the number is negative, return the negative number rule
455 // (if there isn't a negative-number rule, we pretend it's a
458 if (negativeNumberRule != null) {
459 return negativeNumberRule;
465 // if the number isn't an integer, we use one f the fraction rules...
466 if (number != Math.floor(number)) {
467 // if the number is between 0 and 1, return the proper
469 if (number < 1 && fractionRules[1] != null) {
470 return fractionRules[1];
473 // otherwise, return the improper fraction rule
474 else if (fractionRules[0] != null) {
475 return fractionRules[0];
479 // if there's a master rule, use it to format the number
480 if (fractionRules[2] != null) {
481 return fractionRules[2];
483 // and if we haven't yet returned a rule, use findNormalRule()
484 // to find the applicable rule
486 return findNormalRule(Math.round(number));
491 * If the value passed to findRule() is a positive integer, findRule()
492 * uses this function to select the appropriate rule. The result will
493 * generally be the rule with the highest base value less than or equal
494 * to the number. There is one exception to this: If that rule has
495 * two substitutions and a base value that is not an even multiple of
496 * its divisor, and the number itself IS an even multiple of the rule's
497 * divisor, then the result will be the rule that preceded the original
498 * result in the rule list. (This behavior is known as the "rollback
499 * rule", and is used to handle optional text: a rule with optional
500 * text is represented internally as two rules, and the rollback rule
501 * selects appropriate between them. This avoids things like "two
503 * @param number The number being formatted
504 * @return The rule to use to format this number
506 private NFRule findNormalRule(long number) {
507 // if this is a fraction rule set, use findFractionRuleSetRule()
508 // to find the rule (we should only go into this clause if the
510 if (isFractionRuleSet) {
511 return findFractionRuleSetRule(number);
514 // if the number is negative, return the negative-number rule
515 // (if there isn't one, pretend the number is positive)
517 if (negativeNumberRule != null) {
518 return negativeNumberRule;
524 // we have to repeat the preceding two checks, even though we
525 // do them in findRule(), because the version of format() that
526 // takes a long bypasses findRule() and goes straight to this
527 // function. This function does skip the fraction rules since
528 // we know the value is an integer (it also skips the master
529 // rule, since it's considered a fraction rule. Skipping the
530 // master rule in this function is also how we avoid infinite
533 // binary-search the rule list for the applicable rule
534 // (a rule is used for all values from its base value to
535 // the next rule's base value)
537 int hi = rules.length;
540 int mid = (lo + hi) >>> 1;
541 if (rules[mid].getBaseValue() == number) {
544 else if (rules[mid].getBaseValue() > number) {
551 if (hi == 0) { // bad rule set
552 throw new IllegalStateException("The rule set " + name + " cannot format the value " + number);
554 NFRule result = rules[hi - 1];
556 // use shouldRollBack() to see whether we need to invoke the
557 // rollback rule (see shouldRollBack()'s documentation for
558 // an explanation of the rollback rule). If we do, roll back
559 // one rule and return that one instead of the one we'd normally
561 if (result.shouldRollBack(number)) {
562 if (hi == 1) { // bad rule set
563 throw new IllegalStateException("The rule set " + name + " cannot roll back from the rule '" +
566 result = rules[hi - 2];
570 // else use the master rule
571 return fractionRules[2];
575 * If this rule is a fraction rule set, this function is used by
576 * findRule() to select the most appropriate rule for formatting
577 * the number. Basically, the base value of each rule in the rule
578 * set is treated as the denominator of a fraction. Whichever
579 * denominator can produce the fraction closest in value to the
580 * number passed in is the result. If there's a tie, the earlier
581 * one in the list wins. (If there are two rules in a row with the
582 * same base value, the first one is used when the numerator of the
583 * fraction would be 1, and the second rule is used the rest of the
585 * @param number The number being formatted (which will always be
586 * a number between 0 and 1)
587 * @return The rule to use to format this number
589 private NFRule findFractionRuleSetRule(double number) {
590 // the obvious way to do this (multiply the value being formatted
591 // by each rule's base value until you get an integral result)
592 // doesn't work because of rounding error. This method is more
595 // find the least common multiple of the rules' base values
596 // and multiply this by the number being formatted. This is
597 // all the precision we need, and we can do all of the rest
598 // of the math using integer arithmetic
599 long leastCommonMultiple = rules[0].getBaseValue();
600 for (int i = 1; i < rules.length; i++) {
601 leastCommonMultiple = lcm(leastCommonMultiple, rules[i].getBaseValue());
603 long numerator = Math.round(number * leastCommonMultiple);
605 // for each rule, do the following...
607 long difference = Long.MAX_VALUE;
609 for (int i = 0; i < rules.length; i++) {
610 // "numerator" is the numerator of the fraction is the
611 // denominator is the LCD. The numerator if the the rule's
612 // base value is the denomiator is "numerator" times the
613 // base value divided bythe LCD. Here we check to see if
614 // that's an integer, and if not, how close it is to being
616 tempDifference = numerator * rules[i].getBaseValue() % leastCommonMultiple;
618 // normalize the result of the above calculation: we want
619 // the numerator's distance from the CLOSEST multiple
621 if (leastCommonMultiple - tempDifference < tempDifference) {
622 tempDifference = leastCommonMultiple - tempDifference;
625 // if this is as close as we've come, keep track of how close
626 // that is, and the line number of the rule that did it. If
627 // we've scored a direct hit, we don't have to look at any more
629 if (tempDifference < difference) {
630 difference = tempDifference;
632 if (difference == 0) {
638 // if we have two successive rules that both have the winning base
639 // value, then the first one (the one we found above) is used if
640 // the numerator of the fraction is 1 and the second one is used if
641 // the numerator of the fraction is anything else (this lets us
642 // do things like "one third"/"two thirds" without haveing to define
643 // a whole bunch of extra rule sets)
644 if (winner + 1 < rules.length
645 && rules[winner + 1].getBaseValue() == rules[winner].getBaseValue()) {
646 if (Math.round(number * rules[winner].getBaseValue()) < 1
647 || Math.round(number * rules[winner].getBaseValue()) >= 2) {
652 // finally, return the winning rule
653 return rules[winner];
657 * Calculates the least common multiple of x and y.
659 private static long lcm(long x, long y) {
660 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
661 // vol. 2, 1st ed., pp. 298-299
666 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
680 while ((t & 1) == 0) {
692 // x * y == gcd(x, y) * lcm(x, y)
696 //-----------------------------------------------------------------------
698 //-----------------------------------------------------------------------
701 * Parses a string. Matches the string to be parsed against each
702 * of its rules (with a base value less than upperBound) and returns
703 * the value produced by the rule that matched the most charcters
704 * in the source string.
705 * @param text The string to parse
706 * @param parsePosition The initial position is ignored and assumed
707 * to be 0. On exit, this object has been updated to point to the
708 * first character position this rule set didn't consume.
709 * @param upperBound Limits the rules that can be allowed to match.
710 * Only rules whose base values are strictly less than upperBound
712 * @return The numerical result of parsing this string. This will
713 * be the matching rule's base value, composed appropriately with
714 * the results of matching any of its substitutions. The object
715 * will be an instance of Long if it's an integral value; otherwise,
716 * it will be an instance of Double. This function always returns
717 * a valid object: If nothing matched the input string at all,
718 * this function returns new Long(0), and the parse position is
721 public Number parse(String text, ParsePosition parsePosition, double upperBound) {
722 // try matching each rule in the rule set against the text being
723 // parsed. Whichever one matches the most characters is the one
724 // that determines the value we return.
726 ParsePosition highWaterMark = new ParsePosition(0);
727 Number result = Long.valueOf(0);
728 Number tempResult = null;
730 // dump out if there's no text to parse
731 if (text.length() == 0) {
735 // start by trying the nehative number rule (if there is one)
736 if (negativeNumberRule != null) {
737 tempResult = negativeNumberRule.doParse(text, parsePosition, false, upperBound);
738 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
740 highWaterMark.setIndex(parsePosition.getIndex());
742 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
743 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
744 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
746 parsePosition.setIndex(0);
749 // then try each of the fraction rules
750 for (int i = 0; i < 3; i++) {
751 if (fractionRules[i] != null) {
752 tempResult = fractionRules[i].doParse(text, parsePosition, false, upperBound);
753 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
755 highWaterMark.setIndex(parsePosition.getIndex());
757 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
758 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
759 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
761 parsePosition.setIndex(0);
765 // finally, go through the regular rules one at a time. We start
766 // at the end of the list because we want to try matching the most
767 // sigificant rule first (this helps ensure that we parse
768 // "five thousand three hundred six" as
769 // "(five thousand) (three hundred) (six)" rather than
770 // "((five thousand three) hundred) (six)"). Skip rules whose
771 // base values are higher than the upper bound (again, this helps
772 // limit ambiguity by making sure the rules that match a rule's
773 // are less significant than the rule containing the substitutions)/
774 for (int i = rules.length - 1; i >= 0 && highWaterMark.getIndex() < text.length(); i--) {
775 if (!isFractionRuleSet && rules[i].getBaseValue() >= upperBound) {
779 tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound);
780 if (parsePosition.getIndex() > highWaterMark.getIndex()) {
782 highWaterMark.setIndex(parsePosition.getIndex());
784 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
785 // if (parsePosition.getErrorIndex() > highWaterMark.getErrorIndex()) {
786 // highWaterMark.setErrorIndex(parsePosition.getErrorIndex());
788 parsePosition.setIndex(0);
791 // finally, update the parse postion we were passed to point to the
792 // first character we didn't use, and return the result that
793 // cporresponds to that string of characters
794 parsePosition.setIndex(highWaterMark.getIndex());
795 // commented out because the error-index API on ParsePosition isn't there in 1.1.x
796 // if (parsePosition.getIndex() == 0) {
797 // parsePosition.setErrorIndex(highWaterMark.getErrorIndex());