1/*
2*******************************************************************************
3*   Copyright (C) 2010-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   created on: 2010aug21
7*   created by: Markus W. Scherer
8*/
9
10package com.ibm.icu.text;
11
12import java.util.ArrayList;
13import java.util.Locale;
14
15import com.ibm.icu.impl.ICUConfig;
16import com.ibm.icu.impl.PatternProps;
17import com.ibm.icu.util.Freezable;
18import com.ibm.icu.util.ICUCloneNotSupportedException;
19
20//Note: Minimize ICU dependencies, only use a very small part of the ICU core.
21//In particular, do not depend on *Format classes.
22
23/**
24 * Parses and represents ICU MessageFormat patterns.
25 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
26 * Used in the implementations of those classes as well as in tools
27 * for message validation, translation and format conversion.
28 * <p>
29 * The parser handles all syntax relevant for identifying message arguments.
30 * This includes "complex" arguments whose style strings contain
31 * nested MessageFormat pattern substrings.
32 * For "simple" arguments (with no nested MessageFormat pattern substrings),
33 * the argument style is not parsed any further.
34 * <p>
35 * The parser handles named and numbered message arguments and allows both in one message.
36 * <p>
37 * Once a pattern has been parsed successfully, iterate through the parsed data
38 * with countParts(), getPart() and related methods.
39 * <p>
40 * The data logically represents a parse tree, but is stored and accessed
41 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
42 * Arguments and nested messages are best handled via recursion.
43 * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
44 * the index of the corresponding _LIMIT "part".
45 * <p>
46 * List of "parts":
47 * <pre>
48 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
49 * argument = noneArg | simpleArg | complexArg
50 * complexArg = choiceArg | pluralArg | selectArg
51 *
52 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
53 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
54 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
55 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
56 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
57 *
58 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
59 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
60 * selectStyle = (ARG_SELECTOR message)+
61 * </pre>
62 * <ul>
63 *   <li>Literal output text is not represented directly by "parts" but accessed
64 *       between parts of a message, from one part's getLimit() to the next part's getIndex().
65 *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
66 *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
67 *       the less-than-or-equal-to sign (U+2264).
68 *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
69 *       The optional numeric Part between each (ARG_SELECTOR, message) pair
70 *       is the value of an explicit-number selector like "=2",
71 *       otherwise the selector is a non-numeric identifier.
72 *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
73 * <p>
74 * This class is not intended for public subclassing.
75 *
76 * @stable ICU 4.8
77 * @author Markus Scherer
78 */
79public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
80    /**
81     * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
82     * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
83     * (/com/ibm/icu/ICUConfig.properties).
84     * <p>
85     * A pair of adjacent apostrophes always results in a single apostrophe in the output,
86     * even when the pair is between two single, text-quoting apostrophes.
87     * <p>
88     * The following table shows examples of desired MessageFormat.format() output
89     * with the pattern strings that yield that output.
90     * <p>
91     * <table>
92     *   <tr>
93     *     <th>Desired output</th>
94     *     <th>DOUBLE_OPTIONAL</th>
95     *     <th>DOUBLE_REQUIRED</th>
96     *   </tr>
97     *   <tr>
98     *     <td>I see {many}</td>
99     *     <td>I see '{many}'</td>
100     *     <td>(same)</td>
101     *   </tr>
102     *   <tr>
103     *     <td>I said {'Wow!'}</td>
104     *     <td>I said '{''Wow!''}'</td>
105     *     <td>(same)</td>
106     *   </tr>
107     *   <tr>
108     *     <td>I don't know</td>
109     *     <td>I don't know OR<br> I don''t know</td>
110     *     <td>I don''t know</td>
111     *   </tr>
112     * </table>
113     * @stable ICU 4.8
114     */
115    public enum ApostropheMode {
116        /**
117         * A literal apostrophe is represented by
118         * either a single or a double apostrophe pattern character.
119         * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
120         * if it immediately precedes a curly brace {},
121         * or a pipe symbol | if inside a choice format,
122         * or a pound symbol # if inside a plural format.
123         * <p>
124         * This is the default behavior starting with ICU 4.8.
125         * @stable ICU 4.8
126         */
127        DOUBLE_OPTIONAL,
128        /**
129         * A literal apostrophe must be represented by
130         * a double apostrophe pattern character.
131         * A single apostrophe always starts quoted literal text.
132         * <p>
133         * This is the behavior of ICU 4.6 and earlier, and of the JDK.
134         * @stable ICU 4.8
135         */
136        DOUBLE_REQUIRED
137    }
138
139    /**
140     * Constructs an empty MessagePattern with default ApostropheMode.
141     * @stable ICU 4.8
142     */
143    public MessagePattern() {
144        aposMode=defaultAposMode;
145    }
146
147    /**
148     * Constructs an empty MessagePattern.
149     * @param mode Explicit ApostropheMode.
150     * @stable ICU 4.8
151     */
152    public MessagePattern(ApostropheMode mode) {
153        aposMode=mode;
154    }
155
156    /**
157     * Constructs a MessagePattern with default ApostropheMode and
158     * parses the MessageFormat pattern string.
159     * @param pattern a MessageFormat pattern string
160     * @throws IllegalArgumentException for syntax errors in the pattern string
161     * @throws IndexOutOfBoundsException if certain limits are exceeded
162     *         (e.g., argument number too high, argument name too long, etc.)
163     * @throws NumberFormatException if a number could not be parsed
164     * @stable ICU 4.8
165     */
166    public MessagePattern(String pattern) {
167        aposMode=defaultAposMode;
168        parse(pattern);
169    }
170
171    /**
172     * Parses a MessageFormat pattern string.
173     * @param pattern a MessageFormat pattern string
174     * @return this
175     * @throws IllegalArgumentException for syntax errors in the pattern string
176     * @throws IndexOutOfBoundsException if certain limits are exceeded
177     *         (e.g., argument number too high, argument name too long, etc.)
178     * @throws NumberFormatException if a number could not be parsed
179     * @stable ICU 4.8
180     */
181    public MessagePattern parse(String pattern) {
182        preParse(pattern);
183        parseMessage(0, 0, 0, ArgType.NONE);
184        postParse();
185        return this;
186    }
187
188    /**
189     * Parses a ChoiceFormat pattern string.
190     * @param pattern a ChoiceFormat pattern string
191     * @return this
192     * @throws IllegalArgumentException for syntax errors in the pattern string
193     * @throws IndexOutOfBoundsException if certain limits are exceeded
194     *         (e.g., argument number too high, argument name too long, etc.)
195     * @throws NumberFormatException if a number could not be parsed
196     * @stable ICU 4.8
197     */
198    public MessagePattern parseChoiceStyle(String pattern) {
199        preParse(pattern);
200        parseChoiceStyle(0, 0);
201        postParse();
202        return this;
203    }
204
205    /**
206     * Parses a PluralFormat pattern string.
207     * @param pattern a PluralFormat pattern string
208     * @return this
209     * @throws IllegalArgumentException for syntax errors in the pattern string
210     * @throws IndexOutOfBoundsException if certain limits are exceeded
211     *         (e.g., argument number too high, argument name too long, etc.)
212     * @throws NumberFormatException if a number could not be parsed
213     * @stable ICU 4.8
214     */
215    public MessagePattern parsePluralStyle(String pattern) {
216        preParse(pattern);
217        parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
218        postParse();
219        return this;
220    }
221
222    /**
223     * Parses a SelectFormat pattern string.
224     * @param pattern a SelectFormat pattern string
225     * @return this
226     * @throws IllegalArgumentException for syntax errors in the pattern string
227     * @throws IndexOutOfBoundsException if certain limits are exceeded
228     *         (e.g., argument number too high, argument name too long, etc.)
229     * @throws NumberFormatException if a number could not be parsed
230     * @stable ICU 4.8
231     */
232    public MessagePattern parseSelectStyle(String pattern) {
233        preParse(pattern);
234        parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
235        postParse();
236        return this;
237    }
238
239    /**
240     * Clears this MessagePattern.
241     * countParts() will return 0.
242     * @stable ICU 4.8
243     */
244    public void clear() {
245        // Mostly the same as preParse().
246        if(isFrozen()) {
247            throw new UnsupportedOperationException(
248                "Attempt to clear() a frozen MessagePattern instance.");
249        }
250        msg=null;
251        hasArgNames=hasArgNumbers=false;
252        needsAutoQuoting=false;
253        parts.clear();
254        if(numericValues!=null) {
255            numericValues.clear();
256        }
257    }
258
259    /**
260     * Clears this MessagePattern and sets the ApostropheMode.
261     * countParts() will return 0.
262     * @param mode The new ApostropheMode.
263     * @stable ICU 4.8
264     */
265    public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
266        clear();
267        aposMode=mode;
268    }
269
270    /**
271     * @param other another object to compare with.
272     * @return true if this object is equivalent to the other one.
273     * @stable ICU 4.8
274     */
275    @Override
276    public boolean equals(Object other) {
277        if(this==other) {
278            return true;
279        }
280        if(other==null || getClass()!=other.getClass()) {
281            return false;
282        }
283        MessagePattern o=(MessagePattern)other;
284        return
285            aposMode.equals(o.aposMode) &&
286            (msg==null ? o.msg==null : msg.equals(o.msg)) &&
287            parts.equals(o.parts);
288        // No need to compare numericValues if msg and parts are the same.
289    }
290
291    /**
292     * {@inheritDoc}
293     * @stable ICU 4.8
294     */
295    @Override
296    public int hashCode() {
297        return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
298    }
299
300    /**
301     * @return this instance's ApostropheMode.
302     * @stable ICU 4.8
303     */
304    public ApostropheMode getApostropheMode() {
305        return aposMode;
306    }
307
308    /**
309     * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
310     * @internal
311     */
312    public boolean jdkAposMode() {
313        return aposMode == ApostropheMode.DOUBLE_REQUIRED;
314    }
315
316    /**
317     * @return the parsed pattern string (null if none was parsed).
318     * @stable ICU 4.8
319     */
320    public String getPatternString() {
321        return msg;
322    }
323
324    /**
325     * Does the parsed pattern have named arguments like {first_name}?
326     * @return true if the parsed pattern has at least one named argument.
327     * @stable ICU 4.8
328     */
329    public boolean hasNamedArguments() {
330        return hasArgNames;
331    }
332
333    /**
334     * Does the parsed pattern have numbered arguments like {2}?
335     * @return true if the parsed pattern has at least one numbered argument.
336     * @stable ICU 4.8
337     */
338    public boolean hasNumberedArguments() {
339        return hasArgNumbers;
340    }
341
342    /**
343     * {@inheritDoc}
344     * @stable ICU 4.8
345     */
346    @Override
347    public String toString() {
348        return msg;
349    }
350
351    /**
352     * Validates and parses an argument name or argument number string.
353     * An argument name must be a "pattern identifier", that is, it must contain
354     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
355     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
356     * @param name Input string.
357     * @return &gt;=0 if the name is a valid number,
358     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
359     *         ARG_NAME_NOT_VALID (-2) if it is neither.
360     * @stable ICU 4.8
361     */
362    public static int validateArgumentName(String name) {
363        if(!PatternProps.isIdentifier(name)) {
364            return ARG_NAME_NOT_VALID;
365        }
366        return parseArgNumber(name, 0, name.length());
367    }
368
369    /**
370     * Return value from {@link #validateArgumentName(String)} for when
371     * the string is a valid "pattern identifier" but not a number.
372     * @stable ICU 4.8
373     */
374    public static final int ARG_NAME_NOT_NUMBER=-1;
375
376    /**
377     * Return value from {@link #validateArgumentName(String)} for when
378     * the string is invalid.
379     * It might not be a valid "pattern identifier",
380     * or it have only ASCII digits but there is a leading zero or the number is too large.
381     * @stable ICU 4.8
382     */
383    public static final int ARG_NAME_NOT_VALID=-2;
384
385    /**
386     * Returns a version of the parsed pattern string where each ASCII apostrophe
387     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
388     * <p>
389     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
390     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
391     * @return the deep-auto-quoted version of the parsed pattern string.
392     * @see MessageFormat#autoQuoteApostrophe(String)
393     * @stable ICU 4.8
394     */
395    public String autoQuoteApostropheDeep() {
396        if(!needsAutoQuoting) {
397            return msg;
398        }
399        StringBuilder modified=null;
400        // Iterate backward so that the insertion indexes do not change.
401        int count=countParts();
402        for(int i=count; i>0;) {
403            Part part;
404            if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
405                if(modified==null) {
406                    modified=new StringBuilder(msg.length()+10).append(msg);
407                }
408                modified.insert(part.index, (char)part.value);
409            }
410        }
411        if(modified==null) {
412            return msg;
413        } else {
414            return modified.toString();
415        }
416    }
417
418    /**
419     * Returns the number of "parts" created by parsing the pattern string.
420     * Returns 0 if no pattern has been parsed or clear() was called.
421     * @return the number of pattern parts.
422     * @stable ICU 4.8
423     */
424    public int countParts() {
425        return parts.size();
426    }
427
428    /**
429     * Gets the i-th pattern "part".
430     * @param i The index of the Part data. (0..countParts()-1)
431     * @return the i-th pattern "part".
432     * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
433     * @stable ICU 4.8
434     */
435    public Part getPart(int i) {
436        return parts.get(i);
437    }
438
439    /**
440     * Returns the Part.Type of the i-th pattern "part".
441     * Convenience method for getPart(i).getType().
442     * @param i The index of the Part data. (0..countParts()-1)
443     * @return The Part.Type of the i-th Part.
444     * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
445     * @stable ICU 4.8
446     */
447    public Part.Type getPartType(int i) {
448        return parts.get(i).type;
449    }
450
451    /**
452     * Returns the pattern index of the specified pattern "part".
453     * Convenience method for getPart(partIndex).getIndex().
454     * @param partIndex The index of the Part data. (0..countParts()-1)
455     * @return The pattern index of this Part.
456     * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
457     * @stable ICU 4.8
458     */
459    public int getPatternIndex(int partIndex) {
460        return parts.get(partIndex).index;
461    }
462
463    /**
464     * Returns the substring of the pattern string indicated by the Part.
465     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
466     * @param part a part of this MessagePattern.
467     * @return the substring associated with part.
468     * @stable ICU 4.8
469     */
470    public String getSubstring(Part part) {
471        int index=part.index;
472        return msg.substring(index, index+part.length);
473    }
474
475    /**
476     * Compares the part's substring with the input string s.
477     * @param part a part of this MessagePattern.
478     * @param s a string.
479     * @return true if getSubstring(part).equals(s).
480     * @stable ICU 4.8
481     */
482    public boolean partSubstringMatches(Part part, String s) {
483        return msg.regionMatches(part.index, s, 0, part.length);
484    }
485
486    /**
487     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
488     * @param part a part of this MessagePattern.
489     * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
490     * @stable ICU 4.8
491     */
492    public double getNumericValue(Part part) {
493        Part.Type type=part.type;
494        if(type==Part.Type.ARG_INT) {
495            return part.value;
496        } else if(type==Part.Type.ARG_DOUBLE) {
497            return numericValues.get(part.value);
498        } else {
499            return NO_NUMERIC_VALUE;
500        }
501    }
502
503    /**
504     * Special value that is returned by getNumericValue(Part) when no
505     * numeric value is defined for a part.
506     * @see #getNumericValue
507     * @stable ICU 4.8
508     */
509    public static final double NO_NUMERIC_VALUE=-123456789;
510
511    /**
512     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
513     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
514     * @return the "offset:" value.
515     * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
516     * @stable ICU 4.8
517     */
518    public double getPluralOffset(int pluralStart) {
519        Part part=parts.get(pluralStart);
520        if(part.type.hasNumericValue()) {
521            return getNumericValue(part);
522        } else {
523            return 0;
524        }
525    }
526
527    /**
528     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
529     * @param start The index of some Part data (0..countParts()-1);
530     *        this Part should be of Type ARG_START or MSG_START.
531     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
532     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
533     * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
534     * @stable ICU 4.8
535     */
536    public int getLimitPartIndex(int start) {
537        int limit=parts.get(start).limitPartIndex;
538        if(limit<start) {
539            return start;
540        }
541        return limit;
542    }
543
544    /**
545     * A message pattern "part", representing a pattern parsing event.
546     * There is a part for the start and end of a message or argument,
547     * for quoting and escaping of and with ASCII apostrophes,
548     * and for syntax elements of "complex" arguments.
549     * @stable ICU 4.8
550     */
551    public static final class Part {
552        private Part(Type t, int i, int l, int v) {
553            type=t;
554            index=i;
555            length=(char)l;
556            value=(short)v;
557        }
558
559        /**
560         * Returns the type of this part.
561         * @return the part type.
562         * @stable ICU 4.8
563         */
564        public Type getType() {
565            return type;
566        }
567
568        /**
569         * Returns the pattern string index associated with this Part.
570         * @return this part's pattern string index.
571         * @stable ICU 4.8
572         */
573        public int getIndex() {
574            return index;
575        }
576
577        /**
578         * Returns the length of the pattern substring associated with this Part.
579         * This is 0 for some parts.
580         * @return this part's pattern substring length.
581         * @stable ICU 4.8
582         */
583        public int getLength() {
584            return length;
585        }
586
587        /**
588         * Returns the pattern string limit (exclusive-end) index associated with this Part.
589         * Convenience method for getIndex()+getLength().
590         * @return this part's pattern string limit index, same as getIndex()+getLength().
591         * @stable ICU 4.8
592         */
593        public int getLimit() {
594            return index+length;
595        }
596
597        /**
598         * Returns a value associated with this part.
599         * See the documentation of each part type for details.
600         * @return the part value.
601         * @stable ICU 4.8
602         */
603        public int getValue() {
604            return value;
605        }
606
607        /**
608         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
609         * otherwise ArgType.NONE.
610         * @return the argument type for this part.
611         * @stable ICU 4.8
612         */
613        public ArgType getArgType() {
614            Type type=getType();
615            if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
616                return argTypes[value];
617            } else {
618                return ArgType.NONE;
619            }
620        }
621
622        /**
623         * Part type constants.
624         * @stable ICU 4.8
625         */
626        public enum Type {
627            /**
628             * Start of a message pattern (main or nested).
629             * The length is 0 for the top-level message
630             * and for a choice argument sub-message, otherwise 1 for the '{'.
631             * The value indicates the nesting level, starting with 0 for the main message.
632             * <p>
633             * There is always a later MSG_LIMIT part.
634             * @stable ICU 4.8
635             */
636            MSG_START,
637            /**
638             * End of a message pattern (main or nested).
639             * The length is 0 for the top-level message and
640             * the last sub-message of a choice argument,
641             * otherwise 1 for the '}' or (in a choice argument style) the '|'.
642             * The value indicates the nesting level, starting with 0 for the main message.
643             * @stable ICU 4.8
644             */
645            MSG_LIMIT,
646            /**
647             * Indicates a substring of the pattern string which is to be skipped when formatting.
648             * For example, an apostrophe that begins or ends quoted text
649             * would be indicated with such a part.
650             * The value is undefined and currently always 0.
651             * @stable ICU 4.8
652             */
653            SKIP_SYNTAX,
654            /**
655             * Indicates that a syntax character needs to be inserted for auto-quoting.
656             * The length is 0.
657             * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
658             * @stable ICU 4.8
659             */
660            INSERT_CHAR,
661            /**
662             * Indicates a syntactic (non-escaped) # symbol in a plural variant.
663             * When formatting, replace this part's substring with the
664             * (value-offset) for the plural argument value.
665             * The value is undefined and currently always 0.
666             * @stable ICU 4.8
667             */
668            REPLACE_NUMBER,
669            /**
670             * Start of an argument.
671             * The length is 1 for the '{'.
672             * The value is the ordinal value of the ArgType. Use getArgType().
673             * <p>
674             * This part is followed by either an ARG_NUMBER or ARG_NAME,
675             * followed by optional argument sub-parts (see ArgType constants)
676             * and finally an ARG_LIMIT part.
677             * @stable ICU 4.8
678             */
679            ARG_START,
680            /**
681             * End of an argument.
682             * The length is 1 for the '}'.
683             * The value is the ordinal value of the ArgType. Use getArgType().
684             * @stable ICU 4.8
685             */
686            ARG_LIMIT,
687            /**
688             * The argument number, provided by the value.
689             * @stable ICU 4.8
690             */
691            ARG_NUMBER,
692            /**
693             * The argument name.
694             * The value is undefined and currently always 0.
695             * @stable ICU 4.8
696             */
697            ARG_NAME,
698            /**
699             * The argument type.
700             * The value is undefined and currently always 0.
701             * @stable ICU 4.8
702             */
703            ARG_TYPE,
704            /**
705             * The argument style text.
706             * The value is undefined and currently always 0.
707             * @stable ICU 4.8
708             */
709            ARG_STYLE,
710            /**
711             * A selector substring in a "complex" argument style.
712             * The value is undefined and currently always 0.
713             * @stable ICU 4.8
714             */
715            ARG_SELECTOR,
716            /**
717             * An integer value, for example the offset or an explicit selector value
718             * in a PluralFormat style.
719             * The part value is the integer value.
720             * @stable ICU 4.8
721             */
722            ARG_INT,
723            /**
724             * A numeric value, for example the offset or an explicit selector value
725             * in a PluralFormat style.
726             * The part value is an index into an internal array of numeric values;
727             * use getNumericValue().
728             * @stable ICU 4.8
729             */
730            ARG_DOUBLE;
731
732            /**
733             * Indicates whether this part has a numeric value.
734             * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
735             * @return true if this part has a numeric value.
736             * @stable ICU 4.8
737             */
738            public boolean hasNumericValue() {
739                return this==ARG_INT || this==ARG_DOUBLE;
740            }
741        }
742
743        /**
744         * @return a string representation of this part.
745         * @stable ICU 4.8
746         */
747        @Override
748        public String toString() {
749            String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
750                getArgType().name() : Integer.toString(value);
751            return type.name()+"("+valueString+")@"+index;
752        }
753
754        /**
755         * @param other another object to compare with.
756         * @return true if this object is equivalent to the other one.
757         * @stable ICU 4.8
758         */
759        @Override
760        public boolean equals(Object other) {
761            if(this==other) {
762                return true;
763            }
764            if(other==null || getClass()!=other.getClass()) {
765                return false;
766            }
767            Part o=(Part)other;
768            return
769                type.equals(o.type) &&
770                index==o.index &&
771                length==o.length &&
772                value==o.value &&
773                limitPartIndex==o.limitPartIndex;
774        }
775
776        /**
777         * {@inheritDoc}
778         * @stable ICU 4.8
779         */
780        @Override
781        public int hashCode() {
782            return ((type.hashCode()*37+index)*37+length)*37+value;
783        }
784
785        private static final int MAX_LENGTH=0xffff;
786        private static final int MAX_VALUE=Short.MAX_VALUE;
787
788        // Some fields are not final because they are modified during pattern parsing.
789        // After pattern parsing, the parts are effectively immutable.
790        private final Type type;
791        private final int index;
792        private final char length;
793        private short value;
794        private int limitPartIndex;
795    }
796
797    /**
798     * Argument type constants.
799     * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
800     *
801     * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
802     * with a nesting level one greater than the surrounding message.
803     * @stable ICU 4.8
804     */
805    public enum ArgType {
806        /**
807         * The argument has no specified type.
808         * @stable ICU 4.8
809         */
810        NONE,
811        /**
812         * The argument has a "simple" type which is provided by the ARG_TYPE part.
813         * An ARG_STYLE part might follow that.
814         * @stable ICU 4.8
815         */
816        SIMPLE,
817        /**
818         * The argument is a ChoiceFormat with one or more
819         * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
820         * @stable ICU 4.8
821         */
822        CHOICE,
823        /**
824         * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
825         * (e.g., offset:1)
826         * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
827         * If the selector has an explicit value (e.g., =2), then
828         * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
829         * Otherwise the message immediately follows the ARG_SELECTOR.
830         * @stable ICU 4.8
831         */
832        PLURAL,
833        /**
834         * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
835         * @stable ICU 4.8
836         */
837        SELECT,
838        /**
839         * The argument is an ordinal-number PluralFormat
840         * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
841         * @stable ICU 50
842         */
843        SELECTORDINAL;
844
845        /**
846         * @return true if the argument type has a plural style part sequence and semantics,
847         * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
848         * @stable ICU 50
849         */
850        public boolean hasPluralStyle() {
851            return this == PLURAL || this == SELECTORDINAL;
852        }
853    }
854
855    /**
856     * Creates and returns a copy of this object.
857     * @return a copy of this object (or itself if frozen).
858     * @stable ICU 4.8
859     */
860    @Override
861    public Object clone() {
862        if(isFrozen()) {
863            return this;
864        } else {
865            return cloneAsThawed();
866        }
867    }
868
869    /**
870     * Creates and returns an unfrozen copy of this object.
871     * @return a copy of this object.
872     * @stable ICU 4.8
873     */
874    @SuppressWarnings("unchecked")
875    public MessagePattern cloneAsThawed() {
876        MessagePattern newMsg;
877        try {
878            newMsg=(MessagePattern)super.clone();
879        } catch (CloneNotSupportedException e) {
880            throw new ICUCloneNotSupportedException(e);
881        }
882        newMsg.parts=(ArrayList<Part>)parts.clone();
883        if(numericValues!=null) {
884            newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
885        }
886        newMsg.frozen=false;
887        return newMsg;
888    }
889
890    /**
891     * Freezes this object, making it immutable and thread-safe.
892     * @return this
893     * @stable ICU 4.8
894     */
895    public MessagePattern freeze() {
896        frozen=true;
897        return this;
898    }
899
900    /**
901     * Determines whether this object is frozen (immutable) or not.
902     * @return true if this object is frozen.
903     * @stable ICU 4.8
904     */
905    public boolean isFrozen() {
906        return frozen;
907    }
908
909    private void preParse(String pattern) {
910        if(isFrozen()) {
911            throw new UnsupportedOperationException(
912                "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
913        }
914        msg=pattern;
915        hasArgNames=hasArgNumbers=false;
916        needsAutoQuoting=false;
917        parts.clear();
918        if(numericValues!=null) {
919            numericValues.clear();
920        }
921    }
922
923    private void postParse() {
924        // Nothing to be done currently.
925    }
926
927    private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
928        if(nestingLevel>Part.MAX_VALUE) {
929            throw new IndexOutOfBoundsException();
930        }
931        int msgStart=parts.size();
932        addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
933        index+=msgStartLength;
934        while(index<msg.length()) {
935            char c=msg.charAt(index++);
936            if(c=='\'') {
937                if(index==msg.length()) {
938                    // The apostrophe is the last character in the pattern.
939                    // Add a Part for auto-quoting.
940                    addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
941                    needsAutoQuoting=true;
942                } else {
943                    c=msg.charAt(index);
944                    if(c=='\'') {
945                        // double apostrophe, skip the second one
946                        addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
947                    } else if(
948                        aposMode==ApostropheMode.DOUBLE_REQUIRED ||
949                        c=='{' || c=='}' ||
950                        (parentType==ArgType.CHOICE && c=='|') ||
951                        (parentType.hasPluralStyle() && c=='#')
952                    ) {
953                        // skip the quote-starting apostrophe
954                        addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
955                        // find the end of the quoted literal text
956                        for(;;) {
957                            index=msg.indexOf('\'', index+1);
958                            if(index>=0) {
959                                if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
960                                    // double apostrophe inside quoted literal text
961                                    // still encodes a single apostrophe, skip the second one
962                                    addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
963                                } else {
964                                    // skip the quote-ending apostrophe
965                                    addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
966                                    break;
967                                }
968                            } else {
969                                // The quoted text reaches to the end of the of the message.
970                                index=msg.length();
971                                // Add a Part for auto-quoting.
972                                addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
973                                needsAutoQuoting=true;
974                                break;
975                            }
976                        }
977                    } else {
978                        // Interpret the apostrophe as literal text.
979                        // Add a Part for auto-quoting.
980                        addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
981                        needsAutoQuoting=true;
982                    }
983                }
984            } else if(parentType.hasPluralStyle() && c=='#') {
985                // The unquoted # in a plural message fragment will be replaced
986                // with the (number-offset).
987                addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
988            } else if(c=='{') {
989                index=parseArg(index-1, 1, nestingLevel);
990            } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
991                // Finish the message before the terminator.
992                // In a choice style, report the "}" substring only for the following ARG_LIMIT,
993                // not for this MSG_LIMIT.
994                int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
995                addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
996                if(parentType==ArgType.CHOICE) {
997                    // Let the choice style parser see the '}' or '|'.
998                    return index-1;
999                } else {
1000                    // continue parsing after the '}'
1001                    return index;
1002                }
1003            }  // else: c is part of literal text
1004        }
1005        if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1006            throw new IllegalArgumentException(
1007                "Unmatched '{' braces in message "+prefix());
1008        }
1009        addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1010        return index;
1011    }
1012
1013    private int parseArg(int index, int argStartLength, int nestingLevel) {
1014        int argStart=parts.size();
1015        ArgType argType=ArgType.NONE;
1016        addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1017        int nameIndex=index=skipWhiteSpace(index+argStartLength);
1018        if(index==msg.length()) {
1019            throw new IllegalArgumentException(
1020                "Unmatched '{' braces in message "+prefix());
1021        }
1022        // parse argument name or number
1023        index=skipIdentifier(index);
1024        int number=parseArgNumber(nameIndex, index);
1025        if(number>=0) {
1026            int length=index-nameIndex;
1027            if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1028                throw new IndexOutOfBoundsException(
1029                    "Argument number too large: "+prefix(nameIndex));
1030            }
1031            hasArgNumbers=true;
1032            addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1033        } else if(number==ARG_NAME_NOT_NUMBER) {
1034            int length=index-nameIndex;
1035            if(length>Part.MAX_LENGTH) {
1036                throw new IndexOutOfBoundsException(
1037                    "Argument name too long: "+prefix(nameIndex));
1038            }
1039            hasArgNames=true;
1040            addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1041        } else {  // number<-1 (ARG_NAME_NOT_VALID)
1042            throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1043        }
1044        index=skipWhiteSpace(index);
1045        if(index==msg.length()) {
1046            throw new IllegalArgumentException(
1047                "Unmatched '{' braces in message "+prefix());
1048        }
1049        char c=msg.charAt(index);
1050        if(c=='}') {
1051            // all done
1052        } else if(c!=',') {
1053            throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1054        } else /* ',' */ {
1055            // parse argument type: case-sensitive a-zA-Z
1056            int typeIndex=index=skipWhiteSpace(index+1);
1057            while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1058                ++index;
1059            }
1060            int length=index-typeIndex;
1061            index=skipWhiteSpace(index);
1062            if(index==msg.length()) {
1063                throw new IllegalArgumentException(
1064                    "Unmatched '{' braces in message "+prefix());
1065            }
1066            if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1067                throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1068            }
1069            if(length>Part.MAX_LENGTH) {
1070                throw new IndexOutOfBoundsException(
1071                    "Argument type name too long: "+prefix(nameIndex));
1072            }
1073            argType=ArgType.SIMPLE;
1074            if(length==6) {
1075                // case-insensitive comparisons for complex-type names
1076                if(isChoice(typeIndex)) {
1077                    argType=ArgType.CHOICE;
1078                } else if(isPlural(typeIndex)) {
1079                    argType=ArgType.PLURAL;
1080                } else if(isSelect(typeIndex)) {
1081                    argType=ArgType.SELECT;
1082                }
1083            } else if(length==13) {
1084                if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1085                    argType=ArgType.SELECTORDINAL;
1086                }
1087            }
1088            // change the ARG_START type from NONE to argType
1089            parts.get(argStart).value=(short)argType.ordinal();
1090            if(argType==ArgType.SIMPLE) {
1091                addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1092            }
1093            // look for an argument style (pattern)
1094            if(c=='}') {
1095                if(argType!=ArgType.SIMPLE) {
1096                    throw new IllegalArgumentException(
1097                        "No style field for complex argument: "+prefix(nameIndex));
1098                }
1099            } else /* ',' */ {
1100                ++index;
1101                if(argType==ArgType.SIMPLE) {
1102                    index=parseSimpleStyle(index);
1103                } else if(argType==ArgType.CHOICE) {
1104                    index=parseChoiceStyle(index, nestingLevel);
1105                } else {
1106                    index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1107                }
1108            }
1109        }
1110        // Argument parsing stopped on the '}'.
1111        addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1112        return index+1;
1113    }
1114
1115    private int parseSimpleStyle(int index) {
1116        int start=index;
1117        int nestedBraces=0;
1118        while(index<msg.length()) {
1119            char c=msg.charAt(index++);
1120            if(c=='\'') {
1121                // Treat apostrophe as quoting but include it in the style part.
1122                // Find the end of the quoted literal text.
1123                index=msg.indexOf('\'', index);
1124                if(index<0) {
1125                    throw new IllegalArgumentException(
1126                        "Quoted literal argument style text reaches to the end of the message: "+
1127                        prefix(start));
1128                }
1129                // skip the quote-ending apostrophe
1130                ++index;
1131            } else if(c=='{') {
1132                ++nestedBraces;
1133            } else if(c=='}') {
1134                if(nestedBraces>0) {
1135                    --nestedBraces;
1136                } else {
1137                    int length=--index-start;
1138                    if(length>Part.MAX_LENGTH) {
1139                        throw new IndexOutOfBoundsException(
1140                            "Argument style text too long: "+prefix(start));
1141                    }
1142                    addPart(Part.Type.ARG_STYLE, start, length, 0);
1143                    return index;
1144                }
1145            }  // c is part of literal text
1146        }
1147        throw new IllegalArgumentException(
1148            "Unmatched '{' braces in message "+prefix());
1149    }
1150
1151    private int parseChoiceStyle(int index, int nestingLevel) {
1152        int start=index;
1153        index=skipWhiteSpace(index);
1154        if(index==msg.length() || msg.charAt(index)=='}') {
1155            throw new IllegalArgumentException(
1156                "Missing choice argument pattern in "+prefix());
1157        }
1158        for(;;) {
1159            // The choice argument style contains |-separated (number, separator, message) triples.
1160            // Parse the number.
1161            int numberIndex=index;
1162            index=skipDouble(index);
1163            int length=index-numberIndex;
1164            if(length==0) {
1165                throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1166            }
1167            if(length>Part.MAX_LENGTH) {
1168                throw new IndexOutOfBoundsException(
1169                    "Choice number too long: "+prefix(numberIndex));
1170            }
1171            parseDouble(numberIndex, index, true);  // adds ARG_INT or ARG_DOUBLE
1172            // Parse the separator.
1173            index=skipWhiteSpace(index);
1174            if(index==msg.length()) {
1175                throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1176            }
1177            char c=msg.charAt(index);
1178            if(!(c=='#' || c=='<' || c=='\u2264')) {  // U+2264 is <=
1179                throw new IllegalArgumentException(
1180                    "Expected choice separator (#<\u2264) instead of '"+c+
1181                    "' in choice pattern "+prefix(start));
1182            }
1183            addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1184            // Parse the message fragment.
1185            index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1186            // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1187            if(index==msg.length()) {
1188                return index;
1189            }
1190            if(msg.charAt(index)=='}') {
1191                if(!inMessageFormatPattern(nestingLevel)) {
1192                    throw new IllegalArgumentException(
1193                        "Bad choice pattern syntax: "+prefix(start));
1194                }
1195                return index;
1196            }  // else the terminator is '|'
1197            index=skipWhiteSpace(index+1);
1198        }
1199    }
1200
1201    private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1202        int start=index;
1203        boolean isEmpty=true;
1204        boolean hasOther=false;
1205        for(;;) {
1206            // First, collect the selector looking for a small set of terminators.
1207            // It would be a little faster to consider the syntax of each possible
1208            // token right here, but that makes the code too complicated.
1209            index=skipWhiteSpace(index);
1210            boolean eos=index==msg.length();
1211            if(eos || msg.charAt(index)=='}') {
1212                if(eos==inMessageFormatPattern(nestingLevel)) {
1213                    throw new IllegalArgumentException(
1214                        "Bad "+
1215                        argType.toString().toLowerCase(Locale.ENGLISH)+
1216                        " pattern syntax: "+prefix(start));
1217                }
1218                if(!hasOther) {
1219                    throw new IllegalArgumentException(
1220                        "Missing 'other' keyword in "+
1221                        argType.toString().toLowerCase(Locale.ENGLISH)+
1222                        " pattern in "+prefix());
1223                }
1224                return index;
1225            }
1226            int selectorIndex=index;
1227            if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1228                // explicit-value plural selector: =double
1229                index=skipDouble(index+1);
1230                int length=index-selectorIndex;
1231                if(length==1) {
1232                    throw new IllegalArgumentException(
1233                        "Bad "+
1234                        argType.toString().toLowerCase(Locale.ENGLISH)+
1235                        " pattern syntax: "+prefix(start));
1236                }
1237                if(length>Part.MAX_LENGTH) {
1238                    throw new IndexOutOfBoundsException(
1239                        "Argument selector too long: "+prefix(selectorIndex));
1240                }
1241                addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1242                parseDouble(selectorIndex+1, index, false);  // adds ARG_INT or ARG_DOUBLE
1243            } else {
1244                index=skipIdentifier(index);
1245                int length=index-selectorIndex;
1246                if(length==0) {
1247                    throw new IllegalArgumentException(
1248                        "Bad "+
1249                        argType.toString().toLowerCase(Locale.ENGLISH)+
1250                        " pattern syntax: "+prefix(start));
1251                }
1252                // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1253                if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1254                    msg.regionMatches(selectorIndex, "offset:", 0, 7)
1255                ) {
1256                    // plural offset, not a selector
1257                    if(!isEmpty) {
1258                        throw new IllegalArgumentException(
1259                            "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1260                            prefix(start));
1261                    }
1262                    // allow whitespace between offset: and its value
1263                    int valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
1264                    index=skipDouble(valueIndex);
1265                    if(index==valueIndex) {
1266                        throw new IllegalArgumentException(
1267                            "Missing value for plural 'offset:' "+prefix(start));
1268                    }
1269                    if((index-valueIndex)>Part.MAX_LENGTH) {
1270                        throw new IndexOutOfBoundsException(
1271                            "Plural offset value too long: "+prefix(valueIndex));
1272                    }
1273                    parseDouble(valueIndex, index, false);  // adds ARG_INT or ARG_DOUBLE
1274                    isEmpty=false;
1275                    continue;  // no message fragment after the offset
1276                } else {
1277                    // normal selector word
1278                    if(length>Part.MAX_LENGTH) {
1279                        throw new IndexOutOfBoundsException(
1280                            "Argument selector too long: "+prefix(selectorIndex));
1281                    }
1282                    addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1283                    if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1284                        hasOther=true;
1285                    }
1286                }
1287            }
1288
1289            // parse the message fragment following the selector
1290            index=skipWhiteSpace(index);
1291            if(index==msg.length() || msg.charAt(index)!='{') {
1292                throw new IllegalArgumentException(
1293                    "No message fragment after "+
1294                    argType.toString().toLowerCase(Locale.ENGLISH)+
1295                    " selector: "+prefix(selectorIndex));
1296            }
1297            index=parseMessage(index, 1, nestingLevel+1, argType);
1298            isEmpty=false;
1299        }
1300    }
1301
1302    /**
1303     * Validates and parses an argument name or argument number string.
1304     * This internal method assumes that the input substring is a "pattern identifier".
1305     * @return &gt;=0 if the name is a valid number,
1306     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1307     *         ARG_NAME_NOT_VALID (-2) if it is neither.
1308     * @see #validateArgumentName(String)
1309     */
1310    private static int parseArgNumber(CharSequence s, int start, int limit) {
1311        // If the identifier contains only ASCII digits, then it is an argument _number_
1312        // and must not have leading zeros (except "0" itself).
1313        // Otherwise it is an argument _name_.
1314        if(start>=limit) {
1315            return ARG_NAME_NOT_VALID;
1316        }
1317        int number;
1318        // Defer numeric errors until we know there are only digits.
1319        boolean badNumber;
1320        char c=s.charAt(start++);
1321        if(c=='0') {
1322            if(start==limit) {
1323                return 0;
1324            } else {
1325                number=0;
1326                badNumber=true;  // leading zero
1327            }
1328        } else if('1'<=c && c<='9') {
1329            number=c-'0';
1330            badNumber=false;
1331        } else {
1332            return ARG_NAME_NOT_NUMBER;
1333        }
1334        while(start<limit) {
1335            c=s.charAt(start++);
1336            if('0'<=c && c<='9') {
1337                if(number>=Integer.MAX_VALUE/10) {
1338                    badNumber=true;  // overflow
1339                }
1340                number=number*10+(c-'0');
1341            } else {
1342                return ARG_NAME_NOT_NUMBER;
1343            }
1344        }
1345        // There are only ASCII digits.
1346        if(badNumber) {
1347            return ARG_NAME_NOT_VALID;
1348        } else {
1349            return number;
1350        }
1351    }
1352
1353    private int parseArgNumber(int start, int limit) {
1354        return parseArgNumber(msg, start, limit);
1355    }
1356
1357    /**
1358     * Parses a number from the specified message substring.
1359     * @param start start index into the message string
1360     * @param limit limit index into the message string, must be start<limit
1361     * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1362     */
1363    private void parseDouble(int start, int limit, boolean allowInfinity) {
1364        assert start<limit;
1365        // fake loop for easy exit and single throw statement
1366        for(;;) {
1367            // fast path for small integers and infinity
1368            int value=0;
1369            int isNegative=0;  // not boolean so that we can easily add it to value
1370            int index=start;
1371            char c=msg.charAt(index++);
1372            if(c=='-') {
1373                isNegative=1;
1374                if(index==limit) {
1375                    break;  // no number
1376                }
1377                c=msg.charAt(index++);
1378            } else if(c=='+') {
1379                if(index==limit) {
1380                    break;  // no number
1381                }
1382                c=msg.charAt(index++);
1383            }
1384            if(c==0x221e) {  // infinity
1385                if(allowInfinity && index==limit) {
1386                    addArgDoublePart(
1387                        isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1388                        start, limit-start);
1389                    return;
1390                } else {
1391                    break;
1392                }
1393            }
1394            // try to parse the number as a small integer but fall back to a double
1395            while('0'<=c && c<='9') {
1396                value=value*10+(c-'0');
1397                if(value>(Part.MAX_VALUE+isNegative)) {
1398                    break;  // not a small-enough integer
1399                }
1400                if(index==limit) {
1401                    addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1402                    return;
1403                }
1404                c=msg.charAt(index++);
1405            }
1406            // Let Double.parseDouble() throw a NumberFormatException.
1407            double numericValue=Double.parseDouble(msg.substring(start, limit));
1408            addArgDoublePart(numericValue, start, limit-start);
1409            return;
1410        }
1411        throw new NumberFormatException(
1412            "Bad syntax for numeric value: "+msg.substring(start, limit));
1413    }
1414
1415    /**
1416     * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1417     * according to JDK pattern behavior.
1418     * @internal
1419     */
1420    /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1421                                                       StringBuilder sb) {
1422        int doubleApos=-1;
1423        for(;;) {
1424            int i=s.indexOf('\'', start);
1425            if(i<0 || i>=limit) {
1426                sb.append(s, start, limit);
1427                break;
1428            }
1429            if(i==doubleApos) {
1430                // Double apostrophe at start-1 and start==i, append one.
1431                sb.append('\'');
1432                ++start;
1433                doubleApos=-1;
1434            } else {
1435                // Append text between apostrophes and skip this one.
1436                sb.append(s, start, i);
1437                doubleApos=start=i+1;
1438            }
1439        }
1440    }
1441
1442    private int skipWhiteSpace(int index) {
1443        return PatternProps.skipWhiteSpace(msg, index);
1444    }
1445
1446    private int skipIdentifier(int index) {
1447        return PatternProps.skipIdentifier(msg, index);
1448    }
1449
1450    /**
1451     * Skips a sequence of characters that could occur in a double value.
1452     * Does not fully parse or validate the value.
1453     */
1454    private int skipDouble(int index) {
1455        while(index<msg.length()) {
1456            char c=msg.charAt(index);
1457            // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1458            if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1459                break;
1460            }
1461            ++index;
1462        }
1463        return index;
1464    }
1465
1466    private static boolean isArgTypeChar(int c) {
1467        return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1468    }
1469
1470    private boolean isChoice(int index) {
1471        char c;
1472        return
1473            ((c=msg.charAt(index++))=='c' || c=='C') &&
1474            ((c=msg.charAt(index++))=='h' || c=='H') &&
1475            ((c=msg.charAt(index++))=='o' || c=='O') &&
1476            ((c=msg.charAt(index++))=='i' || c=='I') &&
1477            ((c=msg.charAt(index++))=='c' || c=='C') &&
1478            ((c=msg.charAt(index))=='e' || c=='E');
1479    }
1480
1481    private boolean isPlural(int index) {
1482        char c;
1483        return
1484            ((c=msg.charAt(index++))=='p' || c=='P') &&
1485            ((c=msg.charAt(index++))=='l' || c=='L') &&
1486            ((c=msg.charAt(index++))=='u' || c=='U') &&
1487            ((c=msg.charAt(index++))=='r' || c=='R') &&
1488            ((c=msg.charAt(index++))=='a' || c=='A') &&
1489            ((c=msg.charAt(index))=='l' || c=='L');
1490    }
1491
1492    private boolean isSelect(int index) {
1493        char c;
1494        return
1495            ((c=msg.charAt(index++))=='s' || c=='S') &&
1496            ((c=msg.charAt(index++))=='e' || c=='E') &&
1497            ((c=msg.charAt(index++))=='l' || c=='L') &&
1498            ((c=msg.charAt(index++))=='e' || c=='E') &&
1499            ((c=msg.charAt(index++))=='c' || c=='C') &&
1500            ((c=msg.charAt(index))=='t' || c=='T');
1501    }
1502
1503    private boolean isOrdinal(int index) {
1504        char c;
1505        return
1506            ((c=msg.charAt(index++))=='o' || c=='O') &&
1507            ((c=msg.charAt(index++))=='r' || c=='R') &&
1508            ((c=msg.charAt(index++))=='d' || c=='D') &&
1509            ((c=msg.charAt(index++))=='i' || c=='I') &&
1510            ((c=msg.charAt(index++))=='n' || c=='N') &&
1511            ((c=msg.charAt(index++))=='a' || c=='A') &&
1512            ((c=msg.charAt(index))=='l' || c=='L');
1513    }
1514
1515    /**
1516     * @return true if we are inside a MessageFormat (sub-)pattern,
1517     *         as opposed to inside a top-level choice/plural/select pattern.
1518     */
1519    private boolean inMessageFormatPattern(int nestingLevel) {
1520        return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1521    }
1522
1523    /**
1524     * @return true if we are in a MessageFormat sub-pattern
1525     *         of a top-level ChoiceFormat pattern.
1526     */
1527    private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1528        return
1529            nestingLevel==1 &&
1530            parentType==ArgType.CHOICE &&
1531            parts.get(0).type!=Part.Type.MSG_START;
1532    }
1533
1534    private void addPart(Part.Type type, int index, int length, int value) {
1535        parts.add(new Part(type, index, length, value));
1536    }
1537
1538    private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1539        parts.get(start).limitPartIndex=parts.size();
1540        addPart(type, index, length, value);
1541    }
1542
1543    private void addArgDoublePart(double numericValue, int start, int length) {
1544        int numericIndex;
1545        if(numericValues==null) {
1546            numericValues=new ArrayList<Double>();
1547            numericIndex=0;
1548        } else {
1549            numericIndex=numericValues.size();
1550            if(numericIndex>Part.MAX_VALUE) {
1551                throw new IndexOutOfBoundsException("Too many numeric values");
1552            }
1553        }
1554        numericValues.add(numericValue);
1555        addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1556    }
1557
1558    private static final int MAX_PREFIX_LENGTH=24;
1559
1560    /**
1561     * Returns a prefix of s.substring(start). Used for Exception messages.
1562     * @param s
1563     * @param start start index in s
1564     * @return s.substring(start) or a prefix of that
1565     */
1566    private static String prefix(String s, int start) {
1567        StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1568        if(start==0) {
1569            prefix.append("\"");
1570        } else {
1571            prefix.append("[at pattern index ").append(start).append("] \"");
1572        }
1573        int substringLength=s.length()-start;
1574        if(substringLength<=MAX_PREFIX_LENGTH) {
1575            prefix.append(start==0 ? s : s.substring(start));
1576        } else {
1577            int limit=start+MAX_PREFIX_LENGTH-4;
1578            if(Character.isHighSurrogate(s.charAt(limit-1))) {
1579                // remove lead surrogate from the end of the prefix
1580                --limit;
1581            }
1582            prefix.append(s, start, limit).append(" ...");
1583        }
1584        return prefix.append("\"").toString();
1585    }
1586
1587    private static String prefix(String s) {
1588        return prefix(s, 0);
1589    }
1590
1591    private String prefix(int start) {
1592        return prefix(msg, start);
1593    }
1594
1595    private String prefix() {
1596        return prefix(msg, 0);
1597    }
1598
1599    private ApostropheMode aposMode;
1600    private String msg;
1601    private ArrayList<Part> parts=new ArrayList<Part>();
1602    private ArrayList<Double> numericValues;
1603    private boolean hasArgNames;
1604    private boolean hasArgNumbers;
1605    private boolean needsAutoQuoting;
1606    private boolean frozen;
1607
1608    private static final ApostropheMode defaultAposMode=
1609        ApostropheMode.valueOf(
1610            ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
1611
1612    private static final ArgType[] argTypes=ArgType.values();
1613}
1614