2 *******************************************************************************
\r
3 * Copyright (C) 2007-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.text;
\r
10 import java.text.FieldPosition;
\r
11 import java.text.ParsePosition;
\r
12 import java.util.HashMap;
\r
13 import java.util.Locale;
\r
14 import java.util.Map;
\r
15 import java.util.Set;
\r
17 import com.ibm.icu.impl.UCharacterProperty;
\r
18 import com.ibm.icu.util.ULocale;
\r
22 * <code>PluralFormat</code> supports the creation of internationalized
\r
23 * messages with plural inflection. It is based on <i>plural
\r
24 * selection</i>, i.e. the caller specifies messages for each
\r
25 * plural case that can appear in the users language and the
\r
26 * <code>PluralFormat</code> selects the appropriate message based on
\r
29 * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
\r
31 * Different languages have different ways to inflect
\r
32 * plurals. Creating internationalized messages that include plural
\r
33 * forms is only feasible when the framework is able to handle plural
\r
34 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
\r
35 * doesn't handle this well, because it attaches a number interval to
\r
36 * each message and selects the message whose interval contains a
\r
37 * given number. This can only handle a finite number of
\r
38 * intervals. But in some languages, like Polish, one plural case
\r
39 * applies to infinitely many intervals (e.g., paucal applies to
\r
40 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
\r
41 * 14). Thus <code>ChoiceFormat</code> is not adequate.
\r
43 * <code>PluralFormat</code> deals with this by breaking the problem
\r
46 * <li>It uses <code>PluralRules</code> that can define more complex
\r
47 * conditions for a plural case than just a single interval. These plural
\r
48 * rules define both what plural cases exist in a language, and to
\r
49 * which numbers these cases apply.
\r
50 * <li>It provides predefined plural rules for many locales. Thus, the programmer
\r
51 * need not worry about the plural cases of a language. On the flip side,
\r
52 * the localizer does not have to specify the plural cases; he can simply
\r
53 * use the predefined keywords. The whole plural formatting of messages can
\r
54 * be done using localized patterns from resource bundles.
\r
57 * <h4>Usage of <code>PluralFormat</code></h4>
\r
59 * This discussion assumes that you use <code>PluralFormat</code> with
\r
60 * a predefined set of plural rules. You can create one using one of
\r
61 * the constructors that takes a <code>ULocale</code> object. To
\r
62 * specify the message pattern, you can either pass it to the
\r
63 * constructor or set it explicitly using the
\r
64 * <code>applyPattern()</code> method. The <code>format()</code>
\r
65 * method takes a number object and selects the message of the
\r
66 * matching plural case. This message will be returned.
\r
68 * <h5>Patterns and Their Interpretation</h5>
\r
70 * The pattern text defines the message output for each plural case of the
\r
71 * used locale. The pattern is a sequence of
\r
72 * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
\r
73 * space characters. Each clause assigns the message <code><i>message</i></code>
\r
74 * to the plural case identified by <code><i>caseKeyword</i></code>.
\r
76 * You always have to define a message text for the default plural case
\r
77 * "<code>other</code>" which is contained in every rule set. If the plural
\r
78 * rules of the <code>PluralFormat</code> object do not contain a plural case
\r
79 * identified by <code><i>caseKeyword</i></code>, an
\r
80 * <code>IllegalArgumentException</code> is thrown.
\r
81 * If you do not specify a message text for a particular plural case, the
\r
82 * message text of the plural case "<code>other</code>" gets assigned to this
\r
83 * plural case. If you specify more than one message for the same plural case,
\r
84 * an <code>IllegalArgumentException</code> is thrown.
\r
86 * Spaces between <code><i>caseKeyword</i></code> and
\r
87 * <code><i>message</i></code> will be ignored; spaces within
\r
88 * <code><i>message</i></code> will be preserved.
\r
90 * The message text for a particular plural case may contain other message
\r
91 * format patterns. <code>PluralFormat</code> preserves these so that you
\r
92 * can use the strings produced by <code>PluralFormat</code> with other
\r
93 * formatters. If you are using <code>PluralFormat</code> inside a
\r
94 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
\r
95 * automatically evaluate the resulting format pattern.<br/>
\r
96 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
\r
97 * in message texts to define a nested format pattern.<br/>
\r
98 * The pound sign (<code>#</code>) will be interpreted as the number placeholder
\r
99 * in the message text, if it is not contained in curly braces (to preserve
\r
100 * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
\r
101 * replace each of those pound signs by the number passed to the
\r
102 * <code>format()</code> method. It will be formatted using a
\r
103 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
\r
104 * need special number formatting, you have to explicitly specify a
\r
105 * <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
\r
109 * MessageFormat msgFmt = new MessageFormat("{0, plural, " +
\r
110 * "one{{0, number, C''''est #,##0.0# fichier}} " +
\r
111 * "other {Ce sont # fichiers}} dans la liste.",
\r
112 * new ULocale("fr"));
\r
113 * Object args[] = {new Long(0)};
\r
114 * System.out.println(msgFmt.format(args));
\r
115 * args = {new Long(3)};
\r
116 * System.out.println(msgFmt.format(args));
\r
118 * Produces the output:<br />
\r
119 * <code>C'est 0,0 fichier dans la liste.</code><br />
\r
120 * <code>Ce sont 3 fichiers dans la liste."</code>
\r
122 * <strong>Note:</strong><br />
\r
123 * Currently <code>PluralFormat</code>
\r
124 * does not make use of quotes like <code>MessageFormat</code>.
\r
125 * If you use plural format strings with <code>MessageFormat</code> and want
\r
126 * to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
\r
127 * <code>MessageFormat</code> unquotes this pattern and passes the unquoted
\r
128 * pattern to <code>PluralFormat</code>. It's a bit trickier if you use
\r
129 * nested formats that do quoting. In the example above, we wanted to insert
\r
130 * "<code>'</code>" in the number format pattern. Since
\r
131 * <code>NumberFormat</code> supports quotes, we had to insert
\r
132 * "<code>''</code>". But since <code>MessageFormat</code> unquotes the
\r
133 * pattern before it gets passed to <code>PluralFormat</code>, we have to
\r
134 * double these quotes, i.e. write "<code>''''</code>".
\r
136 * <h4>Defining Custom Plural Rules</h4>
\r
137 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
\r
138 * create a <code>PluralRules</code> object and pass it to
\r
139 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
\r
140 * constructor, this locale will be used to format the number in the message
\r
143 * For more information about <code>PluralRules</code>, see
\r
144 * {@link PluralRules}.
\r
147 * @author tschumann (Tim Schumann)
\r
150 public class PluralFormat extends UFormat {
\r
151 private static final long serialVersionUID = 1L;
\r
154 * The locale used for standard number formatting and getting the predefined
\r
155 * plural rules (if they were not defined explicitely).
\r
157 private ULocale ulocale = null;
\r
160 * The plural rules used for plural selection.
\r
162 private PluralRules pluralRules = null;
\r
165 * The applied pattern string.
\r
167 private String pattern = null;
\r
170 * The format messages for each plural case. It is a mapping:
\r
171 * <code>String</code>(plural case keyword) --> <code>String</code>
\r
172 * (message for this plural case).
\r
174 private Map<String, String> parsedValues = null;
\r
177 * This <code>NumberFormat</code> is used for the standard formatting of
\r
178 * the number inserted into the message.
\r
180 private NumberFormat numberFormat = null;
\r
183 * Creates a new <code>PluralFormat</code> for the default locale.
\r
184 * This locale will be used to get the set of plural rules and for standard
\r
185 * number formatting.
\r
188 public PluralFormat() {
\r
189 init(null, ULocale.getDefault());
\r
193 * Creates a new <code>PluralFormat</code> for a given locale.
\r
194 * @param ulocale the <code>PluralFormat</code> will be configured with
\r
195 * rules for this locale. This locale will also be used for standard
\r
196 * number formatting.
\r
199 public PluralFormat(ULocale ulocale) {
\r
200 init(null, ulocale);
\r
204 * Creates a new <code>PluralFormat</code> for a given set of rules.
\r
205 * The standard number formatting will be done using the default locale.
\r
206 * @param rules defines the behavior of the <code>PluralFormat</code>
\r
210 public PluralFormat(PluralRules rules) {
\r
211 init(rules, ULocale.getDefault());
\r
215 * Creates a new <code>PluralFormat</code> for a given set of rules.
\r
216 * The standard number formatting will be done using the given locale.
\r
217 * @param ulocale the default number formatting will be done using this
\r
219 * @param rules defines the behavior of the <code>PluralFormat</code>
\r
223 public PluralFormat(ULocale ulocale, PluralRules rules) {
\r
224 init(rules, ulocale);
\r
228 * Creates a new <code>PluralFormat</code> for a given pattern string.
\r
229 * The default locale will be used to get the set of plural rules and for
\r
230 * standard number formatting.
\r
231 * @param pattern the pattern for this <code>PluralFormat</code>.
\r
232 * @throws IllegalArgumentException if the pattern is invalid.
\r
235 public PluralFormat(String pattern) {
\r
236 init(null, ULocale.getDefault());
\r
237 applyPattern(pattern);
\r
241 * Creates a new <code>PluralFormat</code> for a given pattern string and
\r
243 * The locale will be used to get the set of plural rules and for
\r
244 * standard number formatting.
\r
245 * @param ulocale the <code>PluralFormat</code> will be configured with
\r
246 * rules for this locale. This locale will also be used for standard
\r
247 * number formatting.
\r
248 * @param pattern the pattern for this <code>PluralFormat</code>.
\r
249 * @throws IllegalArgumentException if the pattern is invalid.
\r
252 public PluralFormat(ULocale ulocale, String pattern) {
\r
253 init(null, ulocale);
\r
254 applyPattern(pattern);
\r
258 * Creates a new <code>PluralFormat</code> for a given set of rules and a
\r
260 * The standard number formatting will be done using the default locale.
\r
261 * @param rules defines the behavior of the <code>PluralFormat</code>
\r
263 * @param pattern the pattern for this <code>PluralFormat</code>.
\r
264 * @throws IllegalArgumentException if the pattern is invalid.
\r
267 public PluralFormat(PluralRules rules, String pattern) {
\r
268 init(rules, ULocale.getDefault());
\r
269 applyPattern(pattern);
\r
273 * Creates a new <code>PluralFormat</code> for a given set of rules, a
\r
274 * pattern and a locale.
\r
275 * @param ulocale the <code>PluralFormat</code> will be configured with
\r
276 * rules for this locale. This locale will also be used for standard
\r
277 * number formatting.
\r
278 * @param rules defines the behavior of the <code>PluralFormat</code>
\r
280 * @param pattern the pattern for this <code>PluralFormat</code>.
\r
281 * @throws IllegalArgumentException if the pattern is invalid.
\r
284 public PluralFormat(ULocale ulocale, PluralRules rules, String pattern) {
\r
285 init(rules, ulocale);
\r
286 applyPattern(pattern);
\r
290 * Initializes the <code>PluralRules</code> object.
\r
291 * Postcondition:<br/>
\r
292 * <code>ulocale</code> : is <code>locale</code><br/>
\r
293 * <code>pluralRules</code>: if <code>rules</code> != <code>null</code>
\r
294 * it's set to rules, otherwise it is the
\r
295 * predefined plural rule set for the locale
\r
296 * <code>ulocale</code>.<br/>
\r
297 * <code>parsedValues</code>: is <code>null</code><br/>
\r
298 * <code>pattern</code>: is <code>null</code><br/>
\r
299 * <code>numberFormat</code>: a <code>NumberFormat</code> for the locale
\r
300 * <code>ulocale</code>.
\r
302 private void init(PluralRules rules, ULocale locale) {
\r
304 pluralRules = (rules == null) ? PluralRules.forLocale(ulocale)
\r
306 parsedValues = null;
\r
308 numberFormat = NumberFormat.getInstance(ulocale);
\r
312 * Sets the pattern used by this plural format.
\r
313 * The method parses the pattern and creates a map of format strings
\r
314 * for the plural rules.
\r
315 * Patterns and their interpretation are specified in the class description.
\r
317 * @param pttrn the pattern for this plural format.
\r
318 * @throws IllegalArgumentException if the pattern is invalid.
\r
321 public void applyPattern(String pttrn) {
\r
322 pttrn = pttrn.trim();
\r
324 this.pattern = pttrn;
\r
325 int braceStack = 0;
\r
326 Set<String> ruleNames = pluralRules.getKeywords();
\r
327 parsedValues = new HashMap<String, String>();
\r
329 // Format string has to include keywords.
\r
331 // 0: Reading keyword.
\r
332 // 1: Reading value for preceding keyword.
\r
334 StringBuilder token = new StringBuilder();
\r
335 String currentKeyword = null;
\r
336 boolean readSpaceAfterKeyword = false;
\r
337 for (int i = 0; i < pttrn.length(); ++i) {
\r
338 char ch = pttrn.charAt(i);
\r
340 case 0: // Reading value.
\r
341 if (token.length() == 0) {
\r
342 readSpaceAfterKeyword = false;
\r
344 if (UCharacterProperty.isRuleWhiteSpace(ch)) {
\r
345 if (token.length() > 0) {
\r
346 readSpaceAfterKeyword = true;
\r
348 // Skip leading and trailing whitespaces.
\r
351 if (ch == '{') { // End of keyword definition reached.
\r
352 currentKeyword = token.toString().toLowerCase(
\r
354 if (!ruleNames.contains(currentKeyword)) {
\r
355 parsingFailure("Malformed formatting expression. "
\r
356 + "Unknown keyword \"" + currentKeyword
\r
357 + "\" at position " + i + ".");
\r
359 if (parsedValues.get(currentKeyword) != null) {
\r
360 parsingFailure("Malformed formatting expression. "
\r
361 + "Text for case \"" + currentKeyword
\r
362 + "\" at position " + i + " already defined!");
\r
364 token.delete(0, token.length());
\r
369 if (readSpaceAfterKeyword) {
\r
370 parsingFailure("Malformed formatting expression. " +
\r
371 "Invalid keyword definition. Character \"" + ch +
\r
372 "\" at position " + i + " not expected!");
\r
376 case 1: // Reading value.
\r
384 if (braceStack == 0) { // End of value reached.
\r
385 parsedValues.put(currentKeyword, token.toString());
\r
386 token.delete(0, token.length());
\r
388 } else if (braceStack < 0) {
\r
389 parsingFailure("Malformed formatting expression. "
\r
390 + "Braces do not match.");
\r
391 } else { // braceStack > 0
\r
401 if (braceStack != 0) {
\r
403 "Malformed formatting expression. Braces do not match.");
\r
405 checkSufficientDefinition();
\r
409 * Returns the pattern for this PluralFormat.
\r
411 * @return the pattern string
\r
414 public String toPattern() {
\r
419 * Formats a plural message for a given number.
\r
421 * @param number a number for which the plural message should be formatted.
\r
422 * If no pattern has been applied to this
\r
423 * <code>PluralFormat</code> object yet, the formatted number will
\r
425 * @return the string containing the formatted plural message.
\r
428 public final String format(double number) {
\r
429 // If no pattern was applied, return the formatted number.
\r
430 if (parsedValues == null) {
\r
431 return numberFormat.format(number);
\r
434 // Get appropriate format pattern.
\r
435 String selectedRule = pluralRules.select(number);
\r
436 String selectedPattern = parsedValues.get(selectedRule);
\r
437 if (selectedPattern == null) { // Fallback to others.
\r
438 selectedPattern = parsedValues.get(PluralRules.KEYWORD_OTHER);
\r
440 // Get formatted number and insert it into String.
\r
441 // Will replace all '#' which are not inside curly braces by the
\r
442 // formatted number.
\r
443 return insertFormattedNumber(number, selectedPattern);
\r
447 * Formats a plural message for a given number and appends the formatted
\r
448 * message to the given <code>StringBuffer</code>.
\r
449 * @param number a number object (instance of <code>Number</code> for which
\r
450 * the plural message should be formatted. If no pattern has been
\r
451 * applied to this <code>PluralFormat</code> object yet, the
\r
452 * formatted number will be returned.
\r
453 * Note: If this object is not an instance of <code>Number</code>,
\r
454 * the <code>toAppendTo</code> will not be modified.
\r
455 * @param toAppendTo the formatted message will be appended to this
\r
456 * <code>StringBuffer</code>.
\r
457 * @param pos will be ignored by this method.
\r
458 * @return the string buffer passed in as toAppendTo, with formatted text
\r
460 * @throws IllegalArgumentException if number is not an instance of Number
\r
463 public StringBuffer format(Object number, StringBuffer toAppendTo,
\r
464 FieldPosition pos) {
\r
465 if (number instanceof Number) {
\r
466 toAppendTo.append(format(((Number) number).doubleValue()));
\r
469 throw new IllegalArgumentException("'" + number +
\r
470 "' is not a Number");
\r
474 * This method is not yet supported by <code>PluralFormat</code>.
\r
475 * @param text the string to be parsed.
\r
476 * @param parsePosition defines the position where parsing is to begin,
\r
477 * and upon return, the position where parsing left off. If the position
\r
478 * has not changed upon return, then parsing failed.
\r
479 * @return nothing because this method is not yet implemented.
\r
480 * @throws UnsupportedOperationException will always be thrown by this method.
\r
483 public Number parse(String text, ParsePosition parsePosition) {
\r
484 throw new UnsupportedOperationException();
\r
488 * This method is not yet supported by <code>PluralFormat</code>.
\r
489 * @param source the string to be parsed.
\r
490 * @param pos defines the position where parsing is to begin,
\r
491 * and upon return, the position where parsing left off. If the position
\r
492 * has not changed upon return, then parsing failed.
\r
493 * @return nothing because this method is not yet implemented.
\r
494 * @throws UnsupportedOperationException will always be thrown by this method.
\r
497 public Object parseObject(String source, ParsePosition pos) {
\r
498 throw new UnsupportedOperationException();
\r
502 * Sets the locale used by this <code>PluraFormat</code> object.
\r
503 * Note: Calling this method resets this <code>PluraFormat</code> object,
\r
504 * i.e., a pattern that was applied previously will be removed,
\r
505 * and the NumberFormat is set to the default number format for
\r
506 * the locale. The resulting format behaves the same as one
\r
507 * constructed from {@link #PluralFormat(ULocale)}.
\r
508 * @param ulocale the <code>ULocale</code> used to configure the
\r
509 * formatter. If <code>ulocale</code> is <code>null</code>, the
\r
510 * default locale will be used.
\r
513 public void setLocale(ULocale ulocale) {
\r
514 if (ulocale == null) {
\r
515 ulocale = ULocale.getDefault();
\r
517 init(null, ulocale);
\r
521 * Sets the number format used by this formatter. You only need to
\r
522 * call this if you want a different number format than the default
\r
523 * formatter for the locale.
\r
524 * @param format the number format to use.
\r
527 public void setNumberFormat(NumberFormat format) {
\r
528 numberFormat = format;
\r
532 * Checks if the applied pattern provided enough information,
\r
533 * i.e., if the attribute <code>parsedValues</code> stores enough
\r
534 * information for plural formatting.
\r
535 * Will be called at the end of pattern parsing.
\r
536 * @throws IllegalArgumentException if there's not sufficient information
\r
539 private void checkSufficientDefinition() {
\r
540 // Check that at least the default rule is defined.
\r
541 if (parsedValues.get(PluralRules.KEYWORD_OTHER) == null) {
\r
542 parsingFailure("Malformed formatting expression.\n"
\r
543 + "Value for case \"" + PluralRules.KEYWORD_OTHER
\r
544 + "\" was not defined.");
\r
549 * Helper method that resets the <code>PluralFormat</code> object and throws
\r
550 * an <code>IllegalArgumentException</code> with a given error text.
\r
551 * @param errorText the error text of the exception message.
\r
552 * @throws IllegalArgumentException will always be thrown by this method.
\r
554 private void parsingFailure(String errorText) {
\r
555 // Set PluralFormat to a valid state.
\r
556 init(null, ULocale.getDefault());
\r
557 throw new IllegalArgumentException(errorText);
\r
561 * Helper method that is called during formatting.
\r
562 * It replaces the character '#' by the number used for plural selection in
\r
563 * a message text. Only '#' are replaced, that are not written inside curly
\r
564 * braces. This allows the use of nested number formats.
\r
565 * The number will be formatted using the attribute
\r
566 * <code>numberformat</code>.
\r
567 * @param number the number used for plural selection.
\r
568 * @param message is the text in which '#' will be replaced.
\r
569 * @return the text with inserted numbers.
\r
571 private String insertFormattedNumber(double number, String message) {
\r
572 if (message == null) {
\r
575 String formattedNumber = numberFormat.format(number);
\r
576 StringBuilder result = new StringBuilder();
\r
577 int braceStack = 0;
\r
578 int startIndex = 0;
\r
579 for (int i = 0; i < message.length(); ++i) {
\r
580 switch (message.charAt(i)) {
\r
588 if (braceStack == 0) {
\r
589 result.append(message.substring(startIndex,i));
\r
590 startIndex = i + 1;
\r
591 result.append(formattedNumber);
\r
596 if (startIndex < message.length()) {
\r
597 result.append(message.substring(startIndex, message.length()));
\r
599 return result.toString();
\r
606 public boolean equals(Object rhs) {
\r
607 return rhs instanceof PluralFormat && equals((PluralFormat) rhs);
\r
611 * Returns true if this equals the provided PluralFormat.
\r
612 * @param rhs the PluralFormat to compare against
\r
613 * @return true if this equals rhs
\r
616 public boolean equals(PluralFormat rhs) {
\r
617 return pluralRules.equals(rhs.pluralRules) &&
\r
618 parsedValues.equals(rhs.parsedValues) &&
\r
619 numberFormat.equals(rhs.numberFormat);
\r
626 public int hashCode() {
\r
627 return pluralRules.hashCode() ^ parsedValues.hashCode();
\r
631 * For debugging purposes only
\r
632 * @return a text representation of the format data.
\r
635 public String toString() {
\r
636 StringBuilder buf = new StringBuilder();
\r
637 buf.append("locale=" + ulocale);
\r
638 buf.append(", rules='" + pluralRules + "'");
\r
639 buf.append(", pattern='" + pattern + "'");
\r
640 buf.append(", parsedValues='" + parsedValues + "'");
\r
641 buf.append(", format='" + numberFormat + "'");
\r
642 return buf.toString();
\r