2 *******************************************************************************
3 * Copyright (C) 2010-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * created on: 2010aug21
7 * created by: Markus W. Scherer
10 package com.ibm.icu.text;
12 import java.util.ArrayList;
13 import java.util.Locale;
15 import com.ibm.icu.impl.ICUConfig;
16 import com.ibm.icu.impl.PatternProps;
17 import com.ibm.icu.util.Freezable;
19 //Note: Minimize ICU dependencies, only use a very small part of the ICU core.
20 //In particular, do not depend on *Format classes.
23 * Parses and represents ICU MessageFormat patterns.
24 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
25 * Used in the implementations of those classes as well as in tools
26 * for message validation, translation and format conversion.
28 * The parser handles all syntax relevant for identifying message arguments.
29 * This includes "complex" arguments whose style strings contain
30 * nested MessageFormat pattern substrings.
31 * For "simple" arguments (with no nested MessageFormat pattern substrings),
32 * the argument style is not parsed any further.
34 * The parser handles named and numbered message arguments and allows both in one message.
36 * Once a pattern has been parsed successfully, iterate through the parsed data
37 * with countParts(), getPart() and related methods.
39 * The data logically represents a parse tree, but is stored and accessed
40 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
41 * Arguments and nested messages are best handled via recursion.
42 * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
43 * the index of the corresponding _LIMIT "part".
47 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
48 * argument = noneArg | simpleArg | complexArg
49 * complexArg = choiceArg | pluralArg | selectArg
51 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
52 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
53 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
54 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
55 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
57 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
58 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
59 * selectStyle = (ARG_SELECTOR message)+
62 * <li>Literal output text is not represented directly by "parts" but accessed
63 * between parts of a message, from one part's getLimit() to the next part's getIndex().
64 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
65 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
66 * the less-than-or-equal-to sign (U+2264).
67 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
68 * The optional numeric Part between each (ARG_SELECTOR, message) pair
69 * is the value of an explicit-number selector like "=2",
70 * otherwise the selector is a non-numeric identifier.
71 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
73 * This class is not intended for public subclassing.
76 * @author Markus Scherer
78 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
80 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
81 * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
82 * (/com/ibm/icu/ICUConfig.properties).
84 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
85 * even when the pair is between two single, text-quoting apostrophes.
87 * The following table shows examples of desired MessageFormat.format() output
88 * with the pattern strings that yield that output.
92 * <th>Desired output</th>
93 * <th>DOUBLE_OPTIONAL</th>
94 * <th>DOUBLE_REQUIRED</th>
97 * <td>I see {many}</td>
98 * <td>I see '{many}'</td>
102 * <td>I said {'Wow!'}</td>
103 * <td>I said '{''Wow!''}'</td>
107 * <td>I don't know</td>
108 * <td>I don't know OR<br> I don''t know</td>
109 * <td>I don''t know</td>
114 public enum ApostropheMode {
116 * A literal apostrophe is represented by
117 * either a single or a double apostrophe pattern character.
118 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
119 * if it immediately precedes a curly brace {},
120 * or a pipe symbol | if inside a choice format,
121 * or a pound symbol # if inside a plural format.
123 * This is the default behavior starting with ICU 4.8.
128 * A literal apostrophe must be represented by
129 * a double apostrophe pattern character.
130 * A single apostrophe always starts quoted literal text.
132 * This is the behavior of ICU 4.6 and earlier, and of the JDK.
139 * Constructs an empty MessagePattern with default ApostropheMode.
142 public MessagePattern() {
143 aposMode=defaultAposMode;
147 * Constructs an empty MessagePattern.
148 * @param mode Explicit ApostropheMode.
151 public MessagePattern(ApostropheMode mode) {
156 * Constructs a MessagePattern with default ApostropheMode and
157 * parses the MessageFormat pattern string.
158 * @param pattern a MessageFormat pattern string
159 * @throws IllegalArgumentException for syntax errors in the pattern string
160 * @throws IndexOutOfBoundsException if certain limits are exceeded
161 * (e.g., argument number too high, argument name too long, etc.)
162 * @throws NumberFormatException if a number could not be parsed
165 public MessagePattern(String pattern) {
166 aposMode=defaultAposMode;
171 * Parses a MessageFormat pattern string.
172 * @param pattern a MessageFormat pattern string
174 * @throws IllegalArgumentException for syntax errors in the pattern string
175 * @throws IndexOutOfBoundsException if certain limits are exceeded
176 * (e.g., argument number too high, argument name too long, etc.)
177 * @throws NumberFormatException if a number could not be parsed
180 public MessagePattern parse(String pattern) {
182 parseMessage(0, 0, 0, ArgType.NONE);
188 * Parses a ChoiceFormat pattern string.
189 * @param pattern a ChoiceFormat pattern string
191 * @throws IllegalArgumentException for syntax errors in the pattern string
192 * @throws IndexOutOfBoundsException if certain limits are exceeded
193 * (e.g., argument number too high, argument name too long, etc.)
194 * @throws NumberFormatException if a number could not be parsed
197 public MessagePattern parseChoiceStyle(String pattern) {
199 parseChoiceStyle(0, 0);
205 * Parses a PluralFormat pattern string.
206 * @param pattern a PluralFormat pattern string
208 * @throws IllegalArgumentException for syntax errors in the pattern string
209 * @throws IndexOutOfBoundsException if certain limits are exceeded
210 * (e.g., argument number too high, argument name too long, etc.)
211 * @throws NumberFormatException if a number could not be parsed
214 public MessagePattern parsePluralStyle(String pattern) {
216 parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
222 * Parses a SelectFormat pattern string.
223 * @param pattern a SelectFormat pattern string
225 * @throws IllegalArgumentException for syntax errors in the pattern string
226 * @throws IndexOutOfBoundsException if certain limits are exceeded
227 * (e.g., argument number too high, argument name too long, etc.)
228 * @throws NumberFormatException if a number could not be parsed
231 public MessagePattern parseSelectStyle(String pattern) {
233 parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
239 * Clears this MessagePattern.
240 * countParts() will return 0.
243 public void clear() {
244 // Mostly the same as preParse().
246 throw new UnsupportedOperationException(
247 "Attempt to clear() a frozen MessagePattern instance.");
250 hasArgNames=hasArgNumbers=false;
251 needsAutoQuoting=false;
253 if(numericValues!=null) {
254 numericValues.clear();
259 * Clears this MessagePattern and sets the ApostropheMode.
260 * countParts() will return 0.
261 * @param mode The new ApostropheMode.
264 public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
270 * @param other another object to compare with.
271 * @return true if this object is equivalent to the other one.
275 public boolean equals(Object other) {
279 if(other==null || getClass()!=other.getClass()) {
282 MessagePattern o=(MessagePattern)other;
284 aposMode.equals(o.aposMode) &&
285 (msg==null ? o.msg==null : msg.equals(o.msg)) &&
286 parts.equals(o.parts);
287 // No need to compare numericValues if msg and parts are the same.
295 public int hashCode() {
296 return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
300 * @return this instance's ApostropheMode.
303 public ApostropheMode getApostropheMode() {
308 * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
311 /* package */ boolean jdkAposMode() {
312 return aposMode == ApostropheMode.DOUBLE_REQUIRED;
316 * @return the parsed pattern string (null if none was parsed).
319 public String getPatternString() {
324 * Does the parsed pattern have named arguments like {first_name}?
325 * @return true if the parsed pattern has at least one named argument.
328 public boolean hasNamedArguments() {
333 * Does the parsed pattern have numbered arguments like {2}?
334 * @return true if the parsed pattern has at least one numbered argument.
337 public boolean hasNumberedArguments() {
338 return hasArgNumbers;
346 public String toString() {
351 * Validates and parses an argument name or argument number string.
352 * An argument name must be a "pattern identifier", that is, it must contain
353 * no Unicode Pattern_Syntax or Pattern_White_Space characters.
354 * If it only contains ASCII digits, then it must be a small integer with no leading zero.
355 * @param name Input string.
356 * @return >=0 if the name is a valid number,
357 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
358 * ARG_NAME_NOT_VALID (-2) if it is neither.
361 public static int validateArgumentName(String name) {
362 if(!PatternProps.isIdentifier(name)) {
363 return ARG_NAME_NOT_VALID;
365 return parseArgNumber(name, 0, name.length());
369 * Return value from {@link #validateArgumentName(String)} for when
370 * the string is a valid "pattern identifier" but not a number.
373 public static final int ARG_NAME_NOT_NUMBER=-1;
376 * Return value from {@link #validateArgumentName(String)} for when
377 * the string is invalid.
378 * It might not be a valid "pattern identifier",
379 * or it have only ASCII digits but there is a leading zero or the number is too large.
382 public static final int ARG_NAME_NOT_VALID=-2;
385 * Returns a version of the parsed pattern string where each ASCII apostrophe
386 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
388 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
389 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
390 * @return the deep-auto-quoted version of the parsed pattern string.
391 * @see MessageFormat#autoQuoteApostrophe(String)
394 public String autoQuoteApostropheDeep() {
395 if(!needsAutoQuoting) {
398 StringBuilder modified=null;
399 // Iterate backward so that the insertion indexes do not change.
400 int count=countParts();
401 for(int i=count; i>0;) {
403 if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
405 modified=new StringBuilder(msg.length()+10).append(msg);
407 modified.insert(part.index, (char)part.value);
413 return modified.toString();
418 * Returns the number of "parts" created by parsing the pattern string.
419 * Returns 0 if no pattern has been parsed or clear() was called.
420 * @return the number of pattern parts.
423 public int countParts() {
428 * Gets the i-th pattern "part".
429 * @param i The index of the Part data. (0..countParts()-1)
430 * @return the i-th pattern "part".
431 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
434 public Part getPart(int i) {
439 * Returns the Part.Type of the i-th pattern "part".
440 * Convenience method for getPart(i).getType().
441 * @param i The index of the Part data. (0..countParts()-1)
442 * @return The Part.Type of the i-th Part.
443 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
446 public Part.Type getPartType(int i) {
447 return parts.get(i).type;
451 * Returns the pattern index of the specified pattern "part".
452 * Convenience method for getPart(partIndex).getIndex().
453 * @param partIndex The index of the Part data. (0..countParts()-1)
454 * @return The pattern index of this Part.
455 * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
458 public int getPatternIndex(int partIndex) {
459 return parts.get(partIndex).index;
463 * Returns the substring of the pattern string indicated by the Part.
464 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
465 * @param part a part of this MessagePattern.
466 * @return the substring associated with part.
469 public String getSubstring(Part part) {
470 int index=part.index;
471 return msg.substring(index, index+part.length);
475 * Compares the part's substring with the input string s.
476 * @param part a part of this MessagePattern.
478 * @return true if getSubstring(part).equals(s).
481 public boolean partSubstringMatches(Part part, String s) {
482 return msg.regionMatches(part.index, s, 0, part.length);
486 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
487 * @param part a part of this MessagePattern.
488 * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
491 public double getNumericValue(Part part) {
492 Part.Type type=part.type;
493 if(type==Part.Type.ARG_INT) {
495 } else if(type==Part.Type.ARG_DOUBLE) {
496 return numericValues.get(part.value);
498 return NO_NUMERIC_VALUE;
503 * Special value that is returned by getNumericValue(Part) when no
504 * numeric value is defined for a part.
505 * @see #getNumericValue
508 public static final double NO_NUMERIC_VALUE=-123456789;
511 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
512 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
513 * @return the "offset:" value.
514 * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
517 public double getPluralOffset(int pluralStart) {
518 Part part=parts.get(pluralStart);
519 if(part.type.hasNumericValue()) {
520 return getNumericValue(part);
527 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
528 * @param start The index of some Part data (0..countParts()-1);
529 * this Part should be of Type ARG_START or MSG_START.
530 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
531 * or start itself if getPartType(msgStart)!=ARG|MSG_START.
532 * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
535 public int getLimitPartIndex(int start) {
536 int limit=parts.get(start).limitPartIndex;
544 * A message pattern "part", representing a pattern parsing event.
545 * There is a part for the start and end of a message or argument,
546 * for quoting and escaping of and with ASCII apostrophes,
547 * and for syntax elements of "complex" arguments.
550 public static final class Part {
551 private Part(Type t, int i, int l, int v) {
559 * Returns the type of this part.
560 * @return the part type.
563 public Type getType() {
568 * Returns the pattern string index associated with this Part.
569 * @return this part's pattern string index.
572 public int getIndex() {
577 * Returns the length of the pattern substring associated with this Part.
578 * This is 0 for some parts.
579 * @return this part's pattern substring length.
582 public int getLength() {
587 * Returns the pattern string limit (exclusive-end) index associated with this Part.
588 * Convenience method for getIndex()+getLength().
589 * @return this part's pattern string limit index, same as getIndex()+getLength().
592 public int getLimit() {
597 * Returns a value associated with this part.
598 * See the documentation of each part type for details.
599 * @return the part value.
602 public int getValue() {
607 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
608 * otherwise ArgType.NONE.
609 * @return the argument type for this part.
612 public ArgType getArgType() {
614 if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
615 return argTypes[value];
622 * Part type constants.
627 * Start of a message pattern (main or nested).
628 * The length is 0 for the top-level message
629 * and for a choice argument sub-message, otherwise 1 for the '{'.
630 * The value indicates the nesting level, starting with 0 for the main message.
632 * There is always a later MSG_LIMIT part.
637 * End of a message pattern (main or nested).
638 * The length is 0 for the top-level message and
639 * the last sub-message of a choice argument,
640 * otherwise 1 for the '}' or (in a choice argument style) the '|'.
641 * The value indicates the nesting level, starting with 0 for the main message.
646 * Indicates a substring of the pattern string which is to be skipped when formatting.
647 * For example, an apostrophe that begins or ends quoted text
648 * would be indicated with such a part.
649 * The value is undefined and currently always 0.
654 * Indicates that a syntax character needs to be inserted for auto-quoting.
656 * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
661 * Indicates a syntactic (non-escaped) # symbol in a plural variant.
662 * When formatting, replace this part's substring with the
663 * (value-offset) for the plural argument value.
664 * The value is undefined and currently always 0.
669 * Start of an argument.
670 * The length is 1 for the '{'.
671 * The value is the ordinal value of the ArgType. Use getArgType().
673 * This part is followed by either an ARG_NUMBER or ARG_NAME,
674 * followed by optional argument sub-parts (see ArgType constants)
675 * and finally an ARG_LIMIT part.
680 * End of an argument.
681 * The length is 1 for the '}'.
682 * The value is the ordinal value of the ArgType. Use getArgType().
687 * The argument number, provided by the value.
693 * The value is undefined and currently always 0.
699 * The value is undefined and currently always 0.
704 * The argument style text.
705 * The value is undefined and currently always 0.
710 * A selector substring in a "complex" argument style.
711 * The value is undefined and currently always 0.
716 * An integer value, for example the offset or an explicit selector value
717 * in a PluralFormat style.
718 * The part value is the integer value.
723 * A numeric value, for example the offset or an explicit selector value
724 * in a PluralFormat style.
725 * The part value is an index into an internal array of numeric values;
726 * use getNumericValue().
732 * Indicates whether this part has a numeric value.
733 * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
734 * @return true if this part has a numeric value.
737 public boolean hasNumericValue() {
738 return this==ARG_INT || this==ARG_DOUBLE;
743 * @return a string representation of this part.
747 public String toString() {
748 String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
749 getArgType().name() : Integer.toString(value);
750 return type.name()+"("+valueString+")@"+index;
754 * @param other another object to compare with.
755 * @return true if this object is equivalent to the other one.
759 public boolean equals(Object other) {
763 if(other==null || getClass()!=other.getClass()) {
768 type.equals(o.type) &&
772 limitPartIndex==o.limitPartIndex;
780 public int hashCode() {
781 return ((type.hashCode()*37+index)*37+length)*37+value;
784 private static final int MAX_LENGTH=0xffff;
785 private static final int MAX_VALUE=Short.MAX_VALUE;
787 // Some fields are not final because they are modified during pattern parsing.
788 // After pattern parsing, the parts are effectively immutable.
789 private final Type type;
790 private final int index;
791 private final char length;
793 private int limitPartIndex;
797 * Argument type constants.
798 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
800 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
801 * with a nesting level one greater than the surrounding message.
804 public enum ArgType {
806 * The argument has no specified type.
811 * The argument has a "simple" type which is provided by the ARG_TYPE part.
812 * An ARG_STYLE part might follow that.
817 * The argument is a ChoiceFormat with one or more
818 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
823 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
825 * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
826 * If the selector has an explicit value (e.g., =2), then
827 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
828 * Otherwise the message immediately follows the ARG_SELECTOR.
833 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
838 * The argument is an ordinal-number PluralFormat
839 * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
845 * @return true if the argument type has a plural style part sequence and semantics,
846 * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
849 public boolean hasPluralStyle() {
850 return this == PLURAL || this == SELECTORDINAL;
855 * Creates and returns a copy of this object.
856 * @return a copy of this object (or itself if frozen).
860 public Object clone() {
864 return cloneAsThawed();
869 * Creates and returns an unfrozen copy of this object.
870 * @return a copy of this object.
873 @SuppressWarnings("unchecked")
874 public MessagePattern cloneAsThawed() {
875 MessagePattern newMsg;
877 newMsg=(MessagePattern)super.clone();
878 } catch (CloneNotSupportedException e) {
879 throw new RuntimeException(e);
881 newMsg.parts=(ArrayList<Part>)parts.clone();
882 if(numericValues!=null) {
883 newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
890 * Freezes this object, making it immutable and thread-safe.
894 public MessagePattern freeze() {
900 * Determines whether this object is frozen (immutable) or not.
901 * @return true if this object is frozen.
904 public boolean isFrozen() {
908 private void preParse(String pattern) {
910 throw new UnsupportedOperationException(
911 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
914 hasArgNames=hasArgNumbers=false;
915 needsAutoQuoting=false;
917 if(numericValues!=null) {
918 numericValues.clear();
922 private void postParse() {
923 // Nothing to be done currently.
926 private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
927 if(nestingLevel>Part.MAX_VALUE) {
928 throw new IndexOutOfBoundsException();
930 int msgStart=parts.size();
931 addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
932 index+=msgStartLength;
933 while(index<msg.length()) {
934 char c=msg.charAt(index++);
936 if(index==msg.length()) {
937 // The apostrophe is the last character in the pattern.
938 // Add a Part for auto-quoting.
939 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted
940 needsAutoQuoting=true;
944 // double apostrophe, skip the second one
945 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
947 aposMode==ApostropheMode.DOUBLE_REQUIRED ||
949 (parentType==ArgType.CHOICE && c=='|') ||
950 (parentType.hasPluralStyle() && c=='#')
952 // skip the quote-starting apostrophe
953 addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
954 // find the end of the quoted literal text
956 index=msg.indexOf('\'', index+1);
958 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
959 // double apostrophe inside quoted literal text
960 // still encodes a single apostrophe, skip the second one
961 addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
963 // skip the quote-ending apostrophe
964 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
968 // The quoted text reaches to the end of the of the message.
970 // Add a Part for auto-quoting.
971 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted
972 needsAutoQuoting=true;
977 // Interpret the apostrophe as literal text.
978 // Add a Part for auto-quoting.
979 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted
980 needsAutoQuoting=true;
983 } else if(parentType.hasPluralStyle() && c=='#') {
984 // The unquoted # in a plural message fragment will be replaced
985 // with the (number-offset).
986 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
988 index=parseArg(index-1, 1, nestingLevel);
989 } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
990 // Finish the message before the terminator.
991 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
992 // not for this MSG_LIMIT.
993 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
994 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
995 if(parentType==ArgType.CHOICE) {
996 // Let the choice style parser see the '}' or '|'.
999 // continue parsing after the '}'
1002 } // else: c is part of literal text
1004 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1005 throw new IllegalArgumentException(
1006 "Unmatched '{' braces in message "+prefix());
1008 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1012 private int parseArg(int index, int argStartLength, int nestingLevel) {
1013 int argStart=parts.size();
1014 ArgType argType=ArgType.NONE;
1015 addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1016 int nameIndex=index=skipWhiteSpace(index+argStartLength);
1017 if(index==msg.length()) {
1018 throw new IllegalArgumentException(
1019 "Unmatched '{' braces in message "+prefix());
1021 // parse argument name or number
1022 index=skipIdentifier(index);
1023 int number=parseArgNumber(nameIndex, index);
1025 int length=index-nameIndex;
1026 if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1027 throw new IndexOutOfBoundsException(
1028 "Argument number too large: "+prefix(nameIndex));
1031 addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1032 } else if(number==ARG_NAME_NOT_NUMBER) {
1033 int length=index-nameIndex;
1034 if(length>Part.MAX_LENGTH) {
1035 throw new IndexOutOfBoundsException(
1036 "Argument name too long: "+prefix(nameIndex));
1039 addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1040 } else { // number<-1 (ARG_NAME_NOT_VALID)
1041 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1043 index=skipWhiteSpace(index);
1044 if(index==msg.length()) {
1045 throw new IllegalArgumentException(
1046 "Unmatched '{' braces in message "+prefix());
1048 char c=msg.charAt(index);
1052 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1054 // parse argument type: case-sensitive a-zA-Z
1055 int typeIndex=index=skipWhiteSpace(index+1);
1056 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1059 int length=index-typeIndex;
1060 index=skipWhiteSpace(index);
1061 if(index==msg.length()) {
1062 throw new IllegalArgumentException(
1063 "Unmatched '{' braces in message "+prefix());
1065 if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1066 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1068 if(length>Part.MAX_LENGTH) {
1069 throw new IndexOutOfBoundsException(
1070 "Argument type name too long: "+prefix(nameIndex));
1072 argType=ArgType.SIMPLE;
1074 // case-insensitive comparisons for complex-type names
1075 if(isChoice(typeIndex)) {
1076 argType=ArgType.CHOICE;
1077 } else if(isPlural(typeIndex)) {
1078 argType=ArgType.PLURAL;
1079 } else if(isSelect(typeIndex)) {
1080 argType=ArgType.SELECT;
1082 } else if(length==13) {
1083 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1084 argType=ArgType.SELECTORDINAL;
1087 // change the ARG_START type from NONE to argType
1088 parts.get(argStart).value=(short)argType.ordinal();
1089 if(argType==ArgType.SIMPLE) {
1090 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1092 // look for an argument style (pattern)
1094 if(argType!=ArgType.SIMPLE) {
1095 throw new IllegalArgumentException(
1096 "No style field for complex argument: "+prefix(nameIndex));
1100 if(argType==ArgType.SIMPLE) {
1101 index=parseSimpleStyle(index);
1102 } else if(argType==ArgType.CHOICE) {
1103 index=parseChoiceStyle(index, nestingLevel);
1105 index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1109 // Argument parsing stopped on the '}'.
1110 addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1114 private int parseSimpleStyle(int index) {
1117 while(index<msg.length()) {
1118 char c=msg.charAt(index++);
1120 // Treat apostrophe as quoting but include it in the style part.
1121 // Find the end of the quoted literal text.
1122 index=msg.indexOf('\'', index);
1124 throw new IllegalArgumentException(
1125 "Quoted literal argument style text reaches to the end of the message: "+
1128 // skip the quote-ending apostrophe
1133 if(nestedBraces>0) {
1136 int length=--index-start;
1137 if(length>Part.MAX_LENGTH) {
1138 throw new IndexOutOfBoundsException(
1139 "Argument style text too long: "+prefix(start));
1141 addPart(Part.Type.ARG_STYLE, start, length, 0);
1144 } // c is part of literal text
1146 throw new IllegalArgumentException(
1147 "Unmatched '{' braces in message "+prefix());
1150 private int parseChoiceStyle(int index, int nestingLevel) {
1152 index=skipWhiteSpace(index);
1153 if(index==msg.length() || msg.charAt(index)=='}') {
1154 throw new IllegalArgumentException(
1155 "Missing choice argument pattern in "+prefix());
1158 // The choice argument style contains |-separated (number, separator, message) triples.
1159 // Parse the number.
1160 int numberIndex=index;
1161 index=skipDouble(index);
1162 int length=index-numberIndex;
1164 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1166 if(length>Part.MAX_LENGTH) {
1167 throw new IndexOutOfBoundsException(
1168 "Choice number too long: "+prefix(numberIndex));
1170 parseDouble(numberIndex, index, true); // adds ARG_INT or ARG_DOUBLE
1171 // Parse the separator.
1172 index=skipWhiteSpace(index);
1173 if(index==msg.length()) {
1174 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1176 char c=msg.charAt(index);
1177 if(!(c=='#' || c=='<' || c=='\u2264')) { // U+2264 is <=
1178 throw new IllegalArgumentException(
1179 "Expected choice separator (#<\u2264) instead of '"+c+
1180 "' in choice pattern "+prefix(start));
1182 addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1183 // Parse the message fragment.
1184 index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1185 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1186 if(index==msg.length()) {
1189 if(msg.charAt(index)=='}') {
1190 if(!inMessageFormatPattern(nestingLevel)) {
1191 throw new IllegalArgumentException(
1192 "Bad choice pattern syntax: "+prefix(start));
1195 } // else the terminator is '|'
1196 index=skipWhiteSpace(index+1);
1200 private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1202 boolean isEmpty=true;
1203 boolean hasOther=false;
1205 // First, collect the selector looking for a small set of terminators.
1206 // It would be a little faster to consider the syntax of each possible
1207 // token right here, but that makes the code too complicated.
1208 index=skipWhiteSpace(index);
1209 boolean eos=index==msg.length();
1210 if(eos || msg.charAt(index)=='}') {
1211 if(eos==inMessageFormatPattern(nestingLevel)) {
1212 throw new IllegalArgumentException(
1214 argType.toString().toLowerCase(Locale.ENGLISH)+
1215 " pattern syntax: "+prefix(start));
1218 throw new IllegalArgumentException(
1219 "Missing 'other' keyword in "+
1220 argType.toString().toLowerCase(Locale.ENGLISH)+
1221 " pattern in "+prefix());
1225 int selectorIndex=index;
1226 if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1227 // explicit-value plural selector: =double
1228 index=skipDouble(index+1);
1229 int length=index-selectorIndex;
1231 throw new IllegalArgumentException(
1233 argType.toString().toLowerCase(Locale.ENGLISH)+
1234 " pattern syntax: "+prefix(start));
1236 if(length>Part.MAX_LENGTH) {
1237 throw new IndexOutOfBoundsException(
1238 "Argument selector too long: "+prefix(selectorIndex));
1240 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1241 parseDouble(selectorIndex+1, index, false); // adds ARG_INT or ARG_DOUBLE
1243 index=skipIdentifier(index);
1244 int length=index-selectorIndex;
1246 throw new IllegalArgumentException(
1248 argType.toString().toLowerCase(Locale.ENGLISH)+
1249 " pattern syntax: "+prefix(start));
1251 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1252 if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1253 msg.regionMatches(selectorIndex, "offset:", 0, 7)
1255 // plural offset, not a selector
1257 throw new IllegalArgumentException(
1258 "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1261 // allow whitespace between offset: and its value
1262 int valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
1263 index=skipDouble(valueIndex);
1264 if(index==valueIndex) {
1265 throw new IllegalArgumentException(
1266 "Missing value for plural 'offset:' "+prefix(start));
1268 if((index-valueIndex)>Part.MAX_LENGTH) {
1269 throw new IndexOutOfBoundsException(
1270 "Plural offset value too long: "+prefix(valueIndex));
1272 parseDouble(valueIndex, index, false); // adds ARG_INT or ARG_DOUBLE
1274 continue; // no message fragment after the offset
1276 // normal selector word
1277 if(length>Part.MAX_LENGTH) {
1278 throw new IndexOutOfBoundsException(
1279 "Argument selector too long: "+prefix(selectorIndex));
1281 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1282 if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1288 // parse the message fragment following the selector
1289 index=skipWhiteSpace(index);
1290 if(index==msg.length() || msg.charAt(index)!='{') {
1291 throw new IllegalArgumentException(
1292 "No message fragment after "+
1293 argType.toString().toLowerCase(Locale.ENGLISH)+
1294 " selector: "+prefix(selectorIndex));
1296 index=parseMessage(index, 1, nestingLevel+1, argType);
1302 * Validates and parses an argument name or argument number string.
1303 * This internal method assumes that the input substring is a "pattern identifier".
1304 * @return >=0 if the name is a valid number,
1305 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1306 * ARG_NAME_NOT_VALID (-2) if it is neither.
1307 * @see #validateArgumentName(String)
1309 private static int parseArgNumber(CharSequence s, int start, int limit) {
1310 // If the identifier contains only ASCII digits, then it is an argument _number_
1311 // and must not have leading zeros (except "0" itself).
1312 // Otherwise it is an argument _name_.
1314 return ARG_NAME_NOT_VALID;
1317 // Defer numeric errors until we know there are only digits.
1319 char c=s.charAt(start++);
1325 badNumber=true; // leading zero
1327 } else if('1'<=c && c<='9') {
1331 return ARG_NAME_NOT_NUMBER;
1333 while(start<limit) {
1334 c=s.charAt(start++);
1335 if('0'<=c && c<='9') {
1336 if(number>=Integer.MAX_VALUE/10) {
1337 badNumber=true; // overflow
1339 number=number*10+(c-'0');
1341 return ARG_NAME_NOT_NUMBER;
1344 // There are only ASCII digits.
1346 return ARG_NAME_NOT_VALID;
1352 private int parseArgNumber(int start, int limit) {
1353 return parseArgNumber(msg, start, limit);
1357 * Parses a number from the specified message substring.
1358 * @param start start index into the message string
1359 * @param limit limit index into the message string, must be start<limit
1360 * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1362 private void parseDouble(int start, int limit, boolean allowInfinity) {
1364 // fake loop for easy exit and single throw statement
1366 // fast path for small integers and infinity
1368 int isNegative=0; // not boolean so that we can easily add it to value
1370 char c=msg.charAt(index++);
1376 c=msg.charAt(index++);
1381 c=msg.charAt(index++);
1383 if(c==0x221e) { // infinity
1384 if(allowInfinity && index==limit) {
1386 isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1387 start, limit-start);
1393 // try to parse the number as a small integer but fall back to a double
1394 while('0'<=c && c<='9') {
1395 value=value*10+(c-'0');
1396 if(value>(Part.MAX_VALUE+isNegative)) {
1397 break; // not a small-enough integer
1400 addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1403 c=msg.charAt(index++);
1405 // Let Double.parseDouble() throw a NumberFormatException.
1406 double numericValue=Double.parseDouble(msg.substring(start, limit));
1407 addArgDoublePart(numericValue, start, limit-start);
1410 throw new NumberFormatException(
1411 "Bad syntax for numeric value: "+msg.substring(start, limit));
1415 * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1416 * according to JDK pattern behavior.
1419 /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1423 int i=s.indexOf('\'', start);
1424 if(i<0 || i>=limit) {
1425 sb.append(s, start, limit);
1429 // Double apostrophe at start-1 and start==i, append one.
1434 // Append text between apostrophes and skip this one.
1435 sb.append(s, start, i);
1436 doubleApos=start=i+1;
1441 private int skipWhiteSpace(int index) {
1442 return PatternProps.skipWhiteSpace(msg, index);
1445 private int skipIdentifier(int index) {
1446 return PatternProps.skipIdentifier(msg, index);
1450 * Skips a sequence of characters that could occur in a double value.
1451 * Does not fully parse or validate the value.
1453 private int skipDouble(int index) {
1454 while(index<msg.length()) {
1455 char c=msg.charAt(index);
1456 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1457 if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1465 private static boolean isArgTypeChar(int c) {
1466 return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1469 private boolean isChoice(int index) {
1472 ((c=msg.charAt(index++))=='c' || c=='C') &&
1473 ((c=msg.charAt(index++))=='h' || c=='H') &&
1474 ((c=msg.charAt(index++))=='o' || c=='O') &&
1475 ((c=msg.charAt(index++))=='i' || c=='I') &&
1476 ((c=msg.charAt(index++))=='c' || c=='C') &&
1477 ((c=msg.charAt(index))=='e' || c=='E');
1480 private boolean isPlural(int index) {
1483 ((c=msg.charAt(index++))=='p' || c=='P') &&
1484 ((c=msg.charAt(index++))=='l' || c=='L') &&
1485 ((c=msg.charAt(index++))=='u' || c=='U') &&
1486 ((c=msg.charAt(index++))=='r' || c=='R') &&
1487 ((c=msg.charAt(index++))=='a' || c=='A') &&
1488 ((c=msg.charAt(index))=='l' || c=='L');
1491 private boolean isSelect(int index) {
1494 ((c=msg.charAt(index++))=='s' || c=='S') &&
1495 ((c=msg.charAt(index++))=='e' || c=='E') &&
1496 ((c=msg.charAt(index++))=='l' || c=='L') &&
1497 ((c=msg.charAt(index++))=='e' || c=='E') &&
1498 ((c=msg.charAt(index++))=='c' || c=='C') &&
1499 ((c=msg.charAt(index))=='t' || c=='T');
1502 private boolean isOrdinal(int index) {
1505 ((c=msg.charAt(index++))=='o' || c=='O') &&
1506 ((c=msg.charAt(index++))=='r' || c=='R') &&
1507 ((c=msg.charAt(index++))=='d' || c=='D') &&
1508 ((c=msg.charAt(index++))=='i' || c=='I') &&
1509 ((c=msg.charAt(index++))=='n' || c=='N') &&
1510 ((c=msg.charAt(index++))=='a' || c=='A') &&
1511 ((c=msg.charAt(index))=='l' || c=='L');
1515 * @return true if we are inside a MessageFormat (sub-)pattern,
1516 * as opposed to inside a top-level choice/plural/select pattern.
1518 private boolean inMessageFormatPattern(int nestingLevel) {
1519 return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1523 * @return true if we are in a MessageFormat sub-pattern
1524 * of a top-level ChoiceFormat pattern.
1526 private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1529 parentType==ArgType.CHOICE &&
1530 parts.get(0).type!=Part.Type.MSG_START;
1533 private void addPart(Part.Type type, int index, int length, int value) {
1534 parts.add(new Part(type, index, length, value));
1537 private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1538 parts.get(start).limitPartIndex=parts.size();
1539 addPart(type, index, length, value);
1542 private void addArgDoublePart(double numericValue, int start, int length) {
1544 if(numericValues==null) {
1545 numericValues=new ArrayList<Double>();
1548 numericIndex=numericValues.size();
1549 if(numericIndex>Part.MAX_VALUE) {
1550 throw new IndexOutOfBoundsException("Too many numeric values");
1553 numericValues.add(numericValue);
1554 addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1557 private static final int MAX_PREFIX_LENGTH=24;
1560 * Returns a prefix of s.substring(start). Used for Exception messages.
1562 * @param start start index in s
1563 * @return s.substring(start) or a prefix of that
1565 private static String prefix(String s, int start) {
1566 StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1568 prefix.append("\"");
1570 prefix.append("[at pattern index ").append(start).append("] \"");
1572 int substringLength=s.length()-start;
1573 if(substringLength<=MAX_PREFIX_LENGTH) {
1574 prefix.append(start==0 ? s : s.substring(start));
1576 int limit=start+MAX_PREFIX_LENGTH-4;
1577 if(Character.isHighSurrogate(s.charAt(limit-1))) {
1578 // remove lead surrogate from the end of the prefix
1581 prefix.append(s, start, limit).append(" ...");
1583 return prefix.append("\"").toString();
1586 private static String prefix(String s) {
1587 return prefix(s, 0);
1590 private String prefix(int start) {
1591 return prefix(msg, start);
1594 private String prefix() {
1595 return prefix(msg, 0);
1598 private ApostropheMode aposMode;
1600 private ArrayList<Part> parts=new ArrayList<Part>();
1601 private ArrayList<Double> numericValues;
1602 private boolean hasArgNames;
1603 private boolean hasArgNumbers;
1604 private boolean needsAutoQuoting;
1605 private boolean frozen;
1607 private static final ApostropheMode defaultAposMode=
1608 ApostropheMode.valueOf(
1609 ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
1611 private static final ArgType[] argTypes=ArgType.values();