2 *******************************************************************************
\r
3 * Copyright (C) 2004-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 * Copyright (C) 2009 , Yahoo! Inc. *
\r
6 *******************************************************************************
\r
8 package com.ibm.icu.text;
\r
10 import java.io.IOException;
\r
11 import java.io.ObjectInputStream;
\r
12 import java.text.FieldPosition;
\r
13 import java.text.Format;
\r
14 import java.text.ParsePosition;
\r
15 import java.util.HashMap;
\r
16 import java.util.Map;
\r
19 * <p><code>SelectFormat</code> supports the creation of internationalized
\r
20 * messages by selecting phrases based on keywords. The pattern specifies
\r
21 * how to map keywords to phrases and provides a default phrase. The
\r
22 * object provided to the format method is a string that's matched
\r
23 * against the keywords. If there is a match, the corresponding phrase
\r
24 * is selected; otherwise, the default phrase is used.</p>
\r
26 * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
\r
28 * <p>The main use case for the select format is gender based inflection.
\r
29 * When names or nouns are inserted into sentences, their gender can affect pronouns,
\r
30 * verb forms, articles, and adjectives. Special care needs to be
\r
31 * taken for the case where the gender cannot be determined.
\r
32 * The impact varies between languages:</p>
\r
35 * <li>English has three genders, and unknown gender is handled as a special
\r
36 * case. Names use the gender of the named person (if known), nouns referring
\r
37 * to people use natural gender, and inanimate objects are usually neutral.
\r
38 * The gender only affects pronouns: "he", "she", "it", "they".
\r
40 * <li>German differs from English in that the gender of nouns is rather
\r
41 * arbitrary, even for nouns referring to people ("M&#u00E4;dchen", girl, is neutral).
\r
42 * The gender affects pronouns ("er", "sie", "es"), articles ("der", "die",
\r
43 * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes M&#u00E4;dchen").
\r
45 * <li>French has only two genders; as in German the gender of nouns
\r
46 * is rather arbitrary - for sun and moon, the genders
\r
47 * are the opposite of those in German. The gender affects
\r
48 * pronouns ("il", "elle"), articles ("le", "la"),
\r
49 * adjective forms ("bon", "bonne"), and sometimes
\r
50 * verb forms ("all&#u00E9;", "all&#u00E9e;").
\r
52 * <li>Polish distinguishes five genders (or noun classes),
\r
53 * human masculine, animate non-human masculine, inanimate masculine,
\r
54 * feminine, and neuter.
\r
57 * <p>Some other languages have noun classes that are not related to gender,
\r
58 * but similar in grammatical use.
\r
59 * Some African languages have around 20 noun classes.</p>
\r
61 * <p>To enable localizers to create sentence patterns that take their
\r
62 * language's gender dependencies into consideration, software has to provide
\r
63 * information about the gender associated with a noun or name to
\r
64 * <code>MessageFormat</code>.
\r
65 * Two main cases can be distinguished:</p>
\r
68 * <li>For people, natural gender information should be maintained for each person.
\r
69 * The keywords "male", "female", "mixed" (for groups of people)
\r
70 * and "unknown" are used.
\r
72 * <li>For nouns, grammatical gender information should be maintained for
\r
73 * each noun and per language, e.g., in resource bundles.
\r
74 * The keywords "masculine", "feminine", and "neuter" are commonly used,
\r
75 * but some languages may require other keywords.
\r
78 * <p>The resulting keyword is provided to <code>MessageFormat</code> as a
\r
79 * parameter separate from the name or noun it's associated with. For example,
\r
80 * to generate a message such as "Jean went to Paris", three separate arguments
\r
81 * would be provided: The name of the person as argument 0, the gender of
\r
82 * the person as argument 1, and the name of the city as argument 2.
\r
83 * The sentence pattern for English, where the gender of the person has
\r
84 * no impact on this simple sentence, would not refer to argument 1 at all:</p>
\r
86 * <pre>{0} went to {2}.</pre>
\r
88 * <p>The sentence pattern for French, where the gender of the person affects
\r
89 * the form of the participle, uses a select format based on argument 1:</p>
\r
91 * <pre>{0} est {1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; {2}.</pre>
\r
93 * <p>Patterns can be nested, so that it's possible to handle interactions of
\r
94 * number and gender where necessary. For example, if the above sentence should
\r
95 * allow for the names of several people to be inserted, the following sentence
\r
96 * pattern can be used (with argument 0 the list of people's names,
\r
97 * argument 1 the number of people, argument 2 their combined gender, and
\r
98 * argument 3 the city name):</p>
\r
100 * <pre>{0} {1, plural,
\r
101 * one {est {2, select, female {all&#u00E9;e} other {all&#u00E9;}}}
\r
102 * other {sont {2, select, female {all&#u00E9;es} other {all&#u00E9;s}}}
\r
103 * }&#u00E0; {3}.</pre>
\r
105 * <h4>Patterns and Their Interpretation</h4>
\r
107 * <p>The <code>SelectFormat</code> pattern text defines the phrase output
\r
108 * for each user-defined keyword.
\r
109 * The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
\r
110 * clauses, separated by white space characters.
\r
111 * Each clause assigns the phrase <code><i>phrase</i></code>
\r
112 * to the user-defined <code><i>keyword</i></code>.</p>
\r
114 * <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
\r
115 * that don't match this pattern result in the error code
\r
116 * <code>U_ILLEGAL_CHARACTER</code>.
\r
117 * You always have to define a phrase for the default keyword
\r
118 * <code>other</code>; this phrase is returned when the keyword
\r
120 * the <code>format</code> method matches no other keyword.
\r
121 * If a pattern does not provide a phrase for <code>other</code>, the method
\r
122 * it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
\r
123 * If a pattern provides more than one phrase for the same keyword, the
\r
124 * error <code>U_DUPLICATE_KEYWORD</code> is returned.
\r
126 * Spaces between <code><i>keyword</i></code> and
\r
127 * <code>{<i>phrase</i>}</code> will be ignored; spaces within
\r
128 * <code>{<i>phrase</i>}</code> will be preserved.</p>
\r
130 * <p>The phrase for a particular select case may contain other message
\r
131 * format patterns. <code>SelectFormat</code> preserves these so that you
\r
132 * can use the strings produced by <code>SelectFormat</code> with other
\r
133 * formatters. If you are using <code>SelectFormat</code> inside a
\r
134 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
\r
135 * automatically evaluate the resulting format pattern.
\r
136 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
\r
137 * in phrases to define a nested format pattern.</p>
\r
140 * MessageFormat msgFmt = new MessageFormat("{0} est " +
\r
141 * "{1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; Paris.",
\r
142 * new ULocale("fr"));
\r
143 * Object args[] = {"Kirti","female"};
\r
144 * System.out.println(msgFmt.format(args));
\r
147 * Produces the output:<br/>
\r
148 * <code>Kirti est all&#u00E9;e &#u00E0; Paris.</code>
\r
152 * @provisional This API might change or be removed in a future release.
\r
155 public class SelectFormat extends Format{
\r
156 // Generated by serialver from JDK 1.5
\r
157 private static final long serialVersionUID = 2993154333257524984L;
\r
160 * The applied pattern string.
\r
162 private String pattern = null;
\r
165 * The format messages for each select case. It is a mapping:
\r
166 * <code>String</code>(select case keyword) --> <code>String</code>
\r
167 * (message for this select case).
\r
169 transient private Map<String, String> parsedValues = null;
\r
172 * Common name for the default select form. This name is returned
\r
173 * for values to which no other form in the rule applies. It
\r
174 * can additionally be assigned rules of its own.
\r
176 * @provisional This API might change or be removed in a future release.
\r
178 private static final String KEYWORD_OTHER = "other";
\r
181 * The types of character classifications
\r
183 private enum CharacterClass {
\r
184 T_START_KEYWORD, T_CONTINUE_KEYWORD, T_LEFT_BRACE,
\r
185 T_RIGHT_BRACE, T_SPACE, T_OTHER
\r
189 * The different states needed in state machine
\r
190 * in applyPattern method.
\r
192 private enum State {
\r
193 START_STATE, KEYWORD_STATE,
\r
194 PAST_KEYWORD_STATE, PHRASE_STATE
\r
198 * Creates a new <code>SelectFormat</code> for a given pattern string.
\r
199 * @param pattern the pattern for this <code>SelectFormat</code>.
\r
201 * @provisional This API might change or be removed in a future release.
\r
203 public SelectFormat(String pattern) {
\r
205 applyPattern(pattern);
\r
209 * Initializes the <code>SelectFormat</code> object.
\r
210 * Postcondition:<br/>
\r
211 * <code>parsedValues</code>: is <code>null</code><br/>
\r
212 * <code>pattern</code>: is <code>null</code><br/>
\r
214 private void init() {
\r
215 parsedValues = null;
\r
220 * Classifies the characters
\r
222 private boolean checkValidKeyword(String argKeyword) {
\r
223 int len = argKeyword.length();
\r
227 if (classifyCharacter(argKeyword.charAt(0)) != CharacterClass.T_START_KEYWORD) {
\r
230 for (int i = 1; i < len; i++) {
\r
231 CharacterClass type = classifyCharacter(argKeyword.charAt(i));
\r
232 if (type != CharacterClass.T_START_KEYWORD &&
\r
233 type != CharacterClass.T_CONTINUE_KEYWORD) {
\r
241 * Classifies the characters.
\r
243 private CharacterClass classifyCharacter(char ch) {
\r
244 if ((ch >= 'A') && (ch <= 'Z')) {
\r
245 return CharacterClass.T_START_KEYWORD;
\r
247 if ((ch >= 'a') && (ch <= 'z')) {
\r
248 return CharacterClass.T_START_KEYWORD;
\r
250 if ((ch >= '0') && (ch <= '9')) {
\r
251 return CharacterClass.T_CONTINUE_KEYWORD;
\r
255 return CharacterClass.T_LEFT_BRACE;
\r
257 return CharacterClass.T_RIGHT_BRACE;
\r
260 return CharacterClass.T_SPACE;
\r
263 return CharacterClass.T_CONTINUE_KEYWORD;
\r
265 return CharacterClass.T_OTHER;
\r
270 * Sets the pattern used by this select format.
\r
271 * Patterns and their interpretation are specified in the class description.
\r
273 * @param pattern the pattern for this select format.
\r
274 * @throws IllegalArgumentException when the pattern is not a valid select format pattern.
\r
276 * @provisional This API might change or be removed in a future release.
\r
278 public void applyPattern(String pattern) {
\r
279 parsedValues = null;
\r
280 this.pattern = pattern;
\r
283 StringBuilder keyword = new StringBuilder();
\r
284 StringBuilder phrase = new StringBuilder();
\r
285 int braceCount = 0;
\r
287 parsedValues = new HashMap<String, String>();
\r
289 //Process the state machine
\r
290 State state = State.START_STATE;
\r
291 for (int i = 0; i < pattern.length(); i++ ){
\r
292 //Get the character and check its type
\r
293 char ch = pattern.charAt(i);
\r
294 CharacterClass type = classifyCharacter(ch);
\r
296 //Process the state machine
\r
298 //At the start of pattern
\r
303 case T_START_KEYWORD:
\r
304 state = State.KEYWORD_STATE;
\r
305 keyword.append(ch);
\r
307 //If anything else is encountered, it's a syntax error
\r
309 parsingFailure("Pattern syntax error.");
\r
310 }//end of switch(type)
\r
313 //Handle the keyword state
\r
314 case KEYWORD_STATE:
\r
317 state = State.PAST_KEYWORD_STATE;
\r
319 case T_START_KEYWORD:
\r
320 case T_CONTINUE_KEYWORD:
\r
321 keyword.append(ch);
\r
324 state = State.PHRASE_STATE;
\r
326 //If anything else is encountered, it's a syntax error
\r
328 parsingFailure("Pattern syntax error.");
\r
329 }//end of switch(type)
\r
332 //Handle the pastkeyword state
\r
333 case PAST_KEYWORD_STATE:
\r
338 state = State.PHRASE_STATE;
\r
340 //If anything else is encountered, it's a syntax error
\r
342 parsingFailure("Pattern syntax error.");
\r
343 }//end of switch(type)
\r
346 //Handle the phrase state
\r
353 case T_RIGHT_BRACE:
\r
354 //Matching keyword, phrase pair found
\r
355 if (braceCount == 0){
\r
356 //Check validity of keyword
\r
357 if (parsedValues.get(keyword.toString()) != null) {
\r
358 parsingFailure("Duplicate keyword error.");
\r
360 if (keyword.length() == 0) {
\r
361 parsingFailure("Pattern syntax error.");
\r
364 //Store the keyword, phrase pair in hashTable
\r
365 parsedValues.put( keyword.toString(), phrase.toString());
\r
368 keyword.setLength(0);
\r
369 phrase.setLength(0);
\r
370 state = State.START_STATE;
\r
373 if (braceCount > 0){
\r
380 }//end of switch(type)
\r
383 //Handle the default case of switch(state)
\r
385 parsingFailure("Pattern syntax error.");
\r
387 }//end of switch(state)
\r
390 //Check if the state machine is back to START_STATE
\r
391 if ( state != State.START_STATE){
\r
392 parsingFailure("Pattern syntax error.");
\r
395 //Check if "other" keyword is present
\r
396 if ( !checkSufficientDefinition() ) {
\r
397 parsingFailure("Pattern syntax error. "
\r
398 + "Value for case \"" + KEYWORD_OTHER
\r
399 + "\" was not defined. ");
\r
405 * Returns the pattern for this <code>SelectFormat</code>
\r
407 * @return the pattern string
\r
409 * @provisional This API might change or be removed in a future release.
\r
411 public String toPattern() {
\r
416 * Selects the phrase for the given keyword.
\r
418 * @param keyword a keyword for which the select message should be formatted.
\r
419 * @return the string containing the formatted select message.
\r
420 * @throws IllegalArgumentException when the given keyword is not available in the select format pattern
\r
422 * @provisional This API might change or be removed in a future release.
\r
424 public final String format(String keyword) {
\r
425 //Check for the validity of the keyword
\r
426 if( !checkValidKeyword(keyword) ){
\r
427 throw new IllegalArgumentException("Invalid formatting argument.");
\r
430 // If no pattern was applied, throw an exception
\r
431 if (parsedValues == null) {
\r
432 throw new IllegalStateException("Invalid format error.");
\r
435 // Get appropriate format pattern.
\r
436 String selectedPattern = parsedValues.get(keyword);
\r
437 if (selectedPattern == null) { // Fallback to others.
\r
438 selectedPattern = parsedValues.get(KEYWORD_OTHER);
\r
440 return selectedPattern;
\r
444 * Selects the phrase for the given keyword.
\r
445 * and appends the formatted message to the given <code>StringBuffer</code>.
\r
446 * @param keyword a keyword for which the select message should be formatted.
\r
447 * @param toAppendTo the formatted message will be appended to this
\r
448 * <code>StringBuffer</code>.
\r
449 * @param pos will be ignored by this method.
\r
450 * @throws IllegalArgumentException when the given keyword is not available in the select format pattern
\r
451 * @return the string buffer passed in as toAppendTo, with formatted text
\r
454 * @provisional This API might change or be removed in a future release.
\r
456 public StringBuffer format(Object keyword, StringBuffer toAppendTo,
\r
457 FieldPosition pos) {
\r
458 if (keyword instanceof String) {
\r
459 toAppendTo.append(format( (String)keyword));
\r
461 throw new IllegalArgumentException("'" + keyword + "' is not a String");
\r
467 * This method is not supported by <code>SelectFormat</code>.
\r
468 * @param source the string to be parsed.
\r
469 * @param pos defines the position where parsing is to begin,
\r
470 * and upon return, the position where parsing left off. If the position
\r
471 * has not changed upon return, then parsing failed.
\r
472 * @return nothing because this method is not supported.
\r
473 * @throws UnsupportedOperationException thrown always.
\r
475 * @provisional This API might change or be removed in a future release.
\r
477 public Object parseObject(String source, ParsePosition pos) {
\r
478 throw new UnsupportedOperationException();
\r
482 * Checks if the applied pattern provided enough information,
\r
483 * i.e., if the attribute <code>parsedValues</code> stores enough
\r
484 * information for select formatting.
\r
485 * Will be called at the end of pattern parsing.
\r
487 private boolean checkSufficientDefinition() {
\r
488 // Check that at least the default rule is defined.
\r
489 return parsedValues.get(KEYWORD_OTHER) != null;
\r
493 * Helper method that resets the <code>SelectFormat</code> object and throws
\r
494 * an <code>IllegalArgumentException</code> with a given error text.
\r
495 * @param errorText the error text of the exception message.
\r
496 * @throws IllegalArgumentException will always be thrown by this method.
\r
498 private void parsingFailure(String errorText) {
\r
499 // Set SelectFormat to a valid state.
\r
501 throw new IllegalArgumentException(errorText);
\r
507 * @provisional This API might change or be removed in a future release.
\r
509 public boolean equals(Object obj) {
\r
510 if (!(obj instanceof SelectFormat)) {
\r
513 SelectFormat sf = (SelectFormat) obj;
\r
514 return pattern == null ? sf.pattern == null : pattern.equals(sf.pattern);
\r
520 * @provisional This API might change or be removed in a future release.
\r
522 public int hashCode() {
\r
523 if (pattern != null) {
\r
524 return pattern.hashCode();
\r
530 * Returns a string representation of the object
\r
531 * @return a text representation of the format object.
\r
532 * The result string includes the class name and
\r
533 * the pattern string returned by <code>toPattern()</code>.
\r
535 * @provisional This API might change or be removed in a future release.
\r
537 public String toString() {
\r
538 StringBuilder buf = new StringBuilder();
\r
539 buf.append("pattern='" + pattern + "'");
\r
540 return buf.toString();
\r
543 private void readObject(ObjectInputStream in)
\r
544 throws IOException, ClassNotFoundException {
\r
545 in.defaultReadObject();
\r
546 if (pattern != null) {
\r
547 applyPattern(pattern);
\r