1/*
2*******************************************************************************
3*   Copyright (C) 2011-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  messagepattern.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2011mar14
12*   created by: Markus W. Scherer
13*/
14
15#ifndef __MESSAGEPATTERN_H__
16#define __MESSAGEPATTERN_H__
17
18/**
19 * \file
20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27#include "unicode/parseerr.h"
28#include "unicode/unistr.h"
29
30/**
31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34 * <p>
35 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
36 * even when the pair is between two single, text-quoting apostrophes.
37 * <p>
38 * The following table shows examples of desired MessageFormat.format() output
39 * with the pattern strings that yield that output.
40 * <p>
41 * <table>
42 *   <tr>
43 *     <th>Desired output</th>
44 *     <th>DOUBLE_OPTIONAL</th>
45 *     <th>DOUBLE_REQUIRED</th>
46 *   </tr>
47 *   <tr>
48 *     <td>I see {many}</td>
49 *     <td>I see '{many}'</td>
50 *     <td>(same)</td>
51 *   </tr>
52 *   <tr>
53 *     <td>I said {'Wow!'}</td>
54 *     <td>I said '{''Wow!''}'</td>
55 *     <td>(same)</td>
56 *   </tr>
57 *   <tr>
58 *     <td>I don't know</td>
59 *     <td>I don't know OR<br> I don''t know</td>
60 *     <td>I don''t know</td>
61 *   </tr>
62 * </table>
63 * @stable ICU 4.8
64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65 */
66enum UMessagePatternApostropheMode {
67    /**
68     * A literal apostrophe is represented by
69     * either a single or a double apostrophe pattern character.
70     * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71     * if it immediately precedes a curly brace {},
72     * or a pipe symbol | if inside a choice format,
73     * or a pound symbol # if inside a plural format.
74     * <p>
75     * This is the default behavior starting with ICU 4.8.
76     * @stable ICU 4.8
77     */
78    UMSGPAT_APOS_DOUBLE_OPTIONAL,
79    /**
80     * A literal apostrophe must be represented by
81     * a double apostrophe pattern character.
82     * A single apostrophe always starts quoted literal text.
83     * <p>
84     * This is the behavior of ICU 4.6 and earlier, and of the JDK.
85     * @stable ICU 4.8
86     */
87    UMSGPAT_APOS_DOUBLE_REQUIRED
88};
89/**
90 * @stable ICU 4.8
91 */
92typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93
94/**
95 * MessagePattern::Part type constants.
96 * @stable ICU 4.8
97 */
98enum UMessagePatternPartType {
99    /**
100     * Start of a message pattern (main or nested).
101     * The length is 0 for the top-level message
102     * and for a choice argument sub-message, otherwise 1 for the '{'.
103     * The value indicates the nesting level, starting with 0 for the main message.
104     * <p>
105     * There is always a later MSG_LIMIT part.
106     * @stable ICU 4.8
107     */
108    UMSGPAT_PART_TYPE_MSG_START,
109    /**
110     * End of a message pattern (main or nested).
111     * The length is 0 for the top-level message and
112     * the last sub-message of a choice argument,
113     * otherwise 1 for the '}' or (in a choice argument style) the '|'.
114     * The value indicates the nesting level, starting with 0 for the main message.
115     * @stable ICU 4.8
116     */
117    UMSGPAT_PART_TYPE_MSG_LIMIT,
118    /**
119     * Indicates a substring of the pattern string which is to be skipped when formatting.
120     * For example, an apostrophe that begins or ends quoted text
121     * would be indicated with such a part.
122     * The value is undefined and currently always 0.
123     * @stable ICU 4.8
124     */
125    UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126    /**
127     * Indicates that a syntax character needs to be inserted for auto-quoting.
128     * The length is 0.
129     * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130     * @stable ICU 4.8
131     */
132    UMSGPAT_PART_TYPE_INSERT_CHAR,
133    /**
134     * Indicates a syntactic (non-escaped) # symbol in a plural variant.
135     * When formatting, replace this part's substring with the
136     * (value-offset) for the plural argument value.
137     * The value is undefined and currently always 0.
138     * @stable ICU 4.8
139     */
140    UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141    /**
142     * Start of an argument.
143     * The length is 1 for the '{'.
144     * The value is the ordinal value of the ArgType. Use getArgType().
145     * <p>
146     * This part is followed by either an ARG_NUMBER or ARG_NAME,
147     * followed by optional argument sub-parts (see UMessagePatternArgType constants)
148     * and finally an ARG_LIMIT part.
149     * @stable ICU 4.8
150     */
151    UMSGPAT_PART_TYPE_ARG_START,
152    /**
153     * End of an argument.
154     * The length is 1 for the '}'.
155     * The value is the ordinal value of the ArgType. Use getArgType().
156     * @stable ICU 4.8
157     */
158    UMSGPAT_PART_TYPE_ARG_LIMIT,
159    /**
160     * The argument number, provided by the value.
161     * @stable ICU 4.8
162     */
163    UMSGPAT_PART_TYPE_ARG_NUMBER,
164    /**
165     * The argument name.
166     * The value is undefined and currently always 0.
167     * @stable ICU 4.8
168     */
169    UMSGPAT_PART_TYPE_ARG_NAME,
170    /**
171     * The argument type.
172     * The value is undefined and currently always 0.
173     * @stable ICU 4.8
174     */
175    UMSGPAT_PART_TYPE_ARG_TYPE,
176    /**
177     * The argument style text.
178     * The value is undefined and currently always 0.
179     * @stable ICU 4.8
180     */
181    UMSGPAT_PART_TYPE_ARG_STYLE,
182    /**
183     * A selector substring in a "complex" argument style.
184     * The value is undefined and currently always 0.
185     * @stable ICU 4.8
186     */
187    UMSGPAT_PART_TYPE_ARG_SELECTOR,
188    /**
189     * An integer value, for example the offset or an explicit selector value
190     * in a PluralFormat style.
191     * The part value is the integer value.
192     * @stable ICU 4.8
193     */
194    UMSGPAT_PART_TYPE_ARG_INT,
195    /**
196     * A numeric value, for example the offset or an explicit selector value
197     * in a PluralFormat style.
198     * The part value is an index into an internal array of numeric values;
199     * use getNumericValue().
200     * @stable ICU 4.8
201     */
202    UMSGPAT_PART_TYPE_ARG_DOUBLE
203};
204/**
205 * @stable ICU 4.8
206 */
207typedef enum UMessagePatternPartType UMessagePatternPartType;
208
209/**
210 * Argument type constants.
211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212 *
213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214 * with a nesting level one greater than the surrounding message.
215 * @stable ICU 4.8
216 */
217enum UMessagePatternArgType {
218    /**
219     * The argument has no specified type.
220     * @stable ICU 4.8
221     */
222    UMSGPAT_ARG_TYPE_NONE,
223    /**
224     * The argument has a "simple" type which is provided by the ARG_TYPE part.
225     * An ARG_STYLE part might follow that.
226     * @stable ICU 4.8
227     */
228    UMSGPAT_ARG_TYPE_SIMPLE,
229    /**
230     * The argument is a ChoiceFormat with one or more
231     * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232     * @stable ICU 4.8
233     */
234    UMSGPAT_ARG_TYPE_CHOICE,
235    /**
236     * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237     * (e.g., offset:1)
238     * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239     * If the selector has an explicit value (e.g., =2), then
240     * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241     * Otherwise the message immediately follows the ARG_SELECTOR.
242     * @stable ICU 4.8
243     */
244    UMSGPAT_ARG_TYPE_PLURAL,
245    /**
246     * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247     * @stable ICU 4.8
248     */
249    UMSGPAT_ARG_TYPE_SELECT,
250    /**
251     * The argument is an ordinal-number PluralFormat
252     * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
253     * @stable ICU 50
254     */
255    UMSGPAT_ARG_TYPE_SELECTORDINAL
256};
257/**
258 * @stable ICU 4.8
259 */
260typedef enum UMessagePatternArgType UMessagePatternArgType;
261
262/**
263 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
264 * Returns TRUE if the argument type has a plural style part sequence and semantics,
265 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
266 * @stable ICU 50
267 */
268#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
269    ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
270
271enum {
272    /**
273     * Return value from MessagePattern.validateArgumentName() for when
274     * the string is a valid "pattern identifier" but not a number.
275     * @stable ICU 4.8
276     */
277    UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
278
279    /**
280     * Return value from MessagePattern.validateArgumentName() for when
281     * the string is invalid.
282     * It might not be a valid "pattern identifier",
283     * or it have only ASCII digits but there is a leading zero or the number is too large.
284     * @stable ICU 4.8
285     */
286    UMSGPAT_ARG_NAME_NOT_VALID=-2
287};
288
289/**
290 * Special value that is returned by getNumericValue(Part) when no
291 * numeric value is defined for a part.
292 * @see MessagePattern.getNumericValue()
293 * @stable ICU 4.8
294 */
295#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
296
297U_NAMESPACE_BEGIN
298
299class MessagePatternDoubleList;
300class MessagePatternPartsList;
301
302/**
303 * Parses and represents ICU MessageFormat patterns.
304 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
305 * Used in the implementations of those classes as well as in tools
306 * for message validation, translation and format conversion.
307 * <p>
308 * The parser handles all syntax relevant for identifying message arguments.
309 * This includes "complex" arguments whose style strings contain
310 * nested MessageFormat pattern substrings.
311 * For "simple" arguments (with no nested MessageFormat pattern substrings),
312 * the argument style is not parsed any further.
313 * <p>
314 * The parser handles named and numbered message arguments and allows both in one message.
315 * <p>
316 * Once a pattern has been parsed successfully, iterate through the parsed data
317 * with countParts(), getPart() and related methods.
318 * <p>
319 * The data logically represents a parse tree, but is stored and accessed
320 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
321 * Arguments and nested messages are best handled via recursion.
322 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
323 * the index of the corresponding _LIMIT "part".
324 * <p>
325 * List of "parts":
326 * <pre>
327 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
328 * argument = noneArg | simpleArg | complexArg
329 * complexArg = choiceArg | pluralArg | selectArg
330 *
331 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
332 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
333 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
334 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
335 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
336 *
337 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
338 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
339 * selectStyle = (ARG_SELECTOR message)+
340 * </pre>
341 * <ul>
342 *   <li>Literal output text is not represented directly by "parts" but accessed
343 *       between parts of a message, from one part's getLimit() to the next part's getIndex().
344 *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
345 *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
346 *       the less-than-or-equal-to sign (U+2264).
347 *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
348 *       The optional numeric Part between each (ARG_SELECTOR, message) pair
349 *       is the value of an explicit-number selector like "=2",
350 *       otherwise the selector is a non-numeric identifier.
351 *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
352 * </ul>
353 * <p>
354 * This class is not intended for public subclassing.
355 *
356 * @stable ICU 4.8
357 */
358class U_COMMON_API MessagePattern : public UObject {
359public:
360    /**
361     * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
362     * @param errorCode Standard ICU error code. Its input value must
363     *                  pass the U_SUCCESS() test, or else the function returns
364     *                  immediately. Check for U_FAILURE() on output or use with
365     *                  function chaining. (See User Guide for details.)
366     * @stable ICU 4.8
367     */
368    MessagePattern(UErrorCode &errorCode);
369
370    /**
371     * Constructs an empty MessagePattern.
372     * @param mode Explicit UMessagePatternApostropheMode.
373     * @param errorCode Standard ICU error code. Its input value must
374     *                  pass the U_SUCCESS() test, or else the function returns
375     *                  immediately. Check for U_FAILURE() on output or use with
376     *                  function chaining. (See User Guide for details.)
377     * @stable ICU 4.8
378     */
379    MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
380
381    /**
382     * Constructs a MessagePattern with default UMessagePatternApostropheMode and
383     * parses the MessageFormat pattern string.
384     * @param pattern a MessageFormat pattern string
385     * @param parseError Struct to receive information on the position
386     *                   of an error within the pattern.
387     *                   Can be NULL.
388     * @param errorCode Standard ICU error code. Its input value must
389     *                  pass the U_SUCCESS() test, or else the function returns
390     *                  immediately. Check for U_FAILURE() on output or use with
391     *                  function chaining. (See User Guide for details.)
392     * TODO: turn @throws into UErrorCode specifics?
393     * @throws IllegalArgumentException for syntax errors in the pattern string
394     * @throws IndexOutOfBoundsException if certain limits are exceeded
395     *         (e.g., argument number too high, argument name too long, etc.)
396     * @throws NumberFormatException if a number could not be parsed
397     * @stable ICU 4.8
398     */
399    MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
400
401    /**
402     * Copy constructor.
403     * @param other Object to copy.
404     * @stable ICU 4.8
405     */
406    MessagePattern(const MessagePattern &other);
407
408    /**
409     * Assignment operator.
410     * @param other Object to copy.
411     * @return *this=other
412     * @stable ICU 4.8
413     */
414    MessagePattern &operator=(const MessagePattern &other);
415
416    /**
417     * Destructor.
418     * @stable ICU 4.8
419     */
420    virtual ~MessagePattern();
421
422    /**
423     * Parses a MessageFormat pattern string.
424     * @param pattern a MessageFormat pattern string
425     * @param parseError Struct to receive information on the position
426     *                   of an error within the pattern.
427     *                   Can be NULL.
428     * @param errorCode Standard ICU error code. Its input value must
429     *                  pass the U_SUCCESS() test, or else the function returns
430     *                  immediately. Check for U_FAILURE() on output or use with
431     *                  function chaining. (See User Guide for details.)
432     * @return *this
433     * @throws IllegalArgumentException for syntax errors in the pattern string
434     * @throws IndexOutOfBoundsException if certain limits are exceeded
435     *         (e.g., argument number too high, argument name too long, etc.)
436     * @throws NumberFormatException if a number could not be parsed
437     * @stable ICU 4.8
438     */
439    MessagePattern &parse(const UnicodeString &pattern,
440                          UParseError *parseError, UErrorCode &errorCode);
441
442    /**
443     * Parses a ChoiceFormat pattern string.
444     * @param pattern a ChoiceFormat pattern string
445     * @param parseError Struct to receive information on the position
446     *                   of an error within the pattern.
447     *                   Can be NULL.
448     * @param errorCode Standard ICU error code. Its input value must
449     *                  pass the U_SUCCESS() test, or else the function returns
450     *                  immediately. Check for U_FAILURE() on output or use with
451     *                  function chaining. (See User Guide for details.)
452     * @return *this
453     * @throws IllegalArgumentException for syntax errors in the pattern string
454     * @throws IndexOutOfBoundsException if certain limits are exceeded
455     *         (e.g., argument number too high, argument name too long, etc.)
456     * @throws NumberFormatException if a number could not be parsed
457     * @stable ICU 4.8
458     */
459    MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
460                                     UParseError *parseError, UErrorCode &errorCode);
461
462    /**
463     * Parses a PluralFormat pattern string.
464     * @param pattern a PluralFormat pattern string
465     * @param parseError Struct to receive information on the position
466     *                   of an error within the pattern.
467     *                   Can be NULL.
468     * @param errorCode Standard ICU error code. Its input value must
469     *                  pass the U_SUCCESS() test, or else the function returns
470     *                  immediately. Check for U_FAILURE() on output or use with
471     *                  function chaining. (See User Guide for details.)
472     * @return *this
473     * @throws IllegalArgumentException for syntax errors in the pattern string
474     * @throws IndexOutOfBoundsException if certain limits are exceeded
475     *         (e.g., argument number too high, argument name too long, etc.)
476     * @throws NumberFormatException if a number could not be parsed
477     * @stable ICU 4.8
478     */
479    MessagePattern &parsePluralStyle(const UnicodeString &pattern,
480                                     UParseError *parseError, UErrorCode &errorCode);
481
482    /**
483     * Parses a SelectFormat pattern string.
484     * @param pattern a SelectFormat pattern string
485     * @param parseError Struct to receive information on the position
486     *                   of an error within the pattern.
487     *                   Can be NULL.
488     * @param errorCode Standard ICU error code. Its input value must
489     *                  pass the U_SUCCESS() test, or else the function returns
490     *                  immediately. Check for U_FAILURE() on output or use with
491     *                  function chaining. (See User Guide for details.)
492     * @return *this
493     * @throws IllegalArgumentException for syntax errors in the pattern string
494     * @throws IndexOutOfBoundsException if certain limits are exceeded
495     *         (e.g., argument number too high, argument name too long, etc.)
496     * @throws NumberFormatException if a number could not be parsed
497     * @stable ICU 4.8
498     */
499    MessagePattern &parseSelectStyle(const UnicodeString &pattern,
500                                     UParseError *parseError, UErrorCode &errorCode);
501
502    /**
503     * Clears this MessagePattern.
504     * countParts() will return 0.
505     * @stable ICU 4.8
506     */
507    void clear();
508
509    /**
510     * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
511     * countParts() will return 0.
512     * @param mode The new UMessagePatternApostropheMode.
513     * @stable ICU 4.8
514     */
515    void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
516        clear();
517        aposMode=mode;
518    }
519
520    /**
521     * @param other another object to compare with.
522     * @return TRUE if this object is equivalent to the other one.
523     * @stable ICU 4.8
524     */
525    UBool operator==(const MessagePattern &other) const;
526
527    /**
528     * @param other another object to compare with.
529     * @return FALSE if this object is equivalent to the other one.
530     * @stable ICU 4.8
531     */
532    inline UBool operator!=(const MessagePattern &other) const {
533        return !operator==(other);
534    }
535
536    /**
537     * @return A hash code for this object.
538     * @stable ICU 4.8
539     */
540    int32_t hashCode() const;
541
542    /**
543     * @return this instance's UMessagePatternApostropheMode.
544     * @stable ICU 4.8
545     */
546    UMessagePatternApostropheMode getApostropheMode() const {
547        return aposMode;
548    }
549
550    // Java has package-private jdkAposMode() here.
551    // In C++, this is declared in the MessageImpl class.
552
553    /**
554     * @return the parsed pattern string (null if none was parsed).
555     * @stable ICU 4.8
556     */
557    const UnicodeString &getPatternString() const {
558        return msg;
559    }
560
561    /**
562     * Does the parsed pattern have named arguments like {first_name}?
563     * @return TRUE if the parsed pattern has at least one named argument.
564     * @stable ICU 4.8
565     */
566    UBool hasNamedArguments() const {
567        return hasArgNames;
568    }
569
570    /**
571     * Does the parsed pattern have numbered arguments like {2}?
572     * @return TRUE if the parsed pattern has at least one numbered argument.
573     * @stable ICU 4.8
574     */
575    UBool hasNumberedArguments() const {
576        return hasArgNumbers;
577    }
578
579    /**
580     * Validates and parses an argument name or argument number string.
581     * An argument name must be a "pattern identifier", that is, it must contain
582     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
583     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
584     * @param name Input string.
585     * @return &gt;=0 if the name is a valid number,
586     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
587     *         ARG_NAME_NOT_VALID (-2) if it is neither.
588     * @stable ICU 4.8
589     */
590    static int32_t validateArgumentName(const UnicodeString &name);
591
592    /**
593     * Returns a version of the parsed pattern string where each ASCII apostrophe
594     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
595     * <p>
596     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
597     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
598     * @return the deep-auto-quoted version of the parsed pattern string.
599     * @see MessageFormat.autoQuoteApostrophe()
600     * @stable ICU 4.8
601     */
602    UnicodeString autoQuoteApostropheDeep() const;
603
604    class Part;
605
606    /**
607     * Returns the number of "parts" created by parsing the pattern string.
608     * Returns 0 if no pattern has been parsed or clear() was called.
609     * @return the number of pattern parts.
610     * @stable ICU 4.8
611     */
612    int32_t countParts() const {
613        return partsLength;
614    }
615
616    /**
617     * Gets the i-th pattern "part".
618     * @param i The index of the Part data. (0..countParts()-1)
619     * @return the i-th pattern "part".
620     * @stable ICU 4.8
621     */
622    const Part &getPart(int32_t i) const {
623        return parts[i];
624    }
625
626    /**
627     * Returns the UMessagePatternPartType of the i-th pattern "part".
628     * Convenience method for getPart(i).getType().
629     * @param i The index of the Part data. (0..countParts()-1)
630     * @return The UMessagePatternPartType of the i-th Part.
631     * @stable ICU 4.8
632     */
633    UMessagePatternPartType getPartType(int32_t i) const {
634        return getPart(i).type;
635    }
636
637    /**
638     * Returns the pattern index of the specified pattern "part".
639     * Convenience method for getPart(partIndex).getIndex().
640     * @param partIndex The index of the Part data. (0..countParts()-1)
641     * @return The pattern index of this Part.
642     * @stable ICU 4.8
643     */
644    int32_t getPatternIndex(int32_t partIndex) const {
645        return getPart(partIndex).index;
646    }
647
648    /**
649     * Returns the substring of the pattern string indicated by the Part.
650     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
651     * @param part a part of this MessagePattern.
652     * @return the substring associated with part.
653     * @stable ICU 4.8
654     */
655    UnicodeString getSubstring(const Part &part) const {
656        return msg.tempSubString(part.index, part.length);
657    }
658
659    /**
660     * Compares the part's substring with the input string s.
661     * @param part a part of this MessagePattern.
662     * @param s a string.
663     * @return TRUE if getSubstring(part).equals(s).
664     * @stable ICU 4.8
665     */
666    UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
667        return 0==msg.compare(part.index, part.length, s);
668    }
669
670    /**
671     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
672     * @param part a part of this MessagePattern.
673     * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
674     * @stable ICU 4.8
675     */
676    double getNumericValue(const Part &part) const;
677
678    /**
679     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
680     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
681     * @return the "offset:" value.
682     * @stable ICU 4.8
683     */
684    double getPluralOffset(int32_t pluralStart) const;
685
686    /**
687     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
688     * @param start The index of some Part data (0..countParts()-1);
689     *        this Part should be of Type ARG_START or MSG_START.
690     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
691     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
692     * @stable ICU 4.8
693     */
694    int32_t getLimitPartIndex(int32_t start) const {
695        int32_t limit=getPart(start).limitPartIndex;
696        if(limit<start) {
697            return start;
698        }
699        return limit;
700    }
701
702    /**
703     * A message pattern "part", representing a pattern parsing event.
704     * There is a part for the start and end of a message or argument,
705     * for quoting and escaping of and with ASCII apostrophes,
706     * and for syntax elements of "complex" arguments.
707     * @stable ICU 4.8
708     */
709    class Part : public UMemory {
710    public:
711        /**
712         * Default constructor, do not use.
713         * @internal
714         */
715        Part() {}
716
717        /**
718         * Returns the type of this part.
719         * @return the part type.
720         * @stable ICU 4.8
721         */
722        UMessagePatternPartType getType() const {
723            return type;
724        }
725
726        /**
727         * Returns the pattern string index associated with this Part.
728         * @return this part's pattern string index.
729         * @stable ICU 4.8
730         */
731        int32_t getIndex() const {
732            return index;
733        }
734
735        /**
736         * Returns the length of the pattern substring associated with this Part.
737         * This is 0 for some parts.
738         * @return this part's pattern substring length.
739         * @stable ICU 4.8
740         */
741        int32_t getLength() const {
742            return length;
743        }
744
745        /**
746         * Returns the pattern string limit (exclusive-end) index associated with this Part.
747         * Convenience method for getIndex()+getLength().
748         * @return this part's pattern string limit index, same as getIndex()+getLength().
749         * @stable ICU 4.8
750         */
751        int32_t getLimit() const {
752            return index+length;
753        }
754
755        /**
756         * Returns a value associated with this part.
757         * See the documentation of each part type for details.
758         * @return the part value.
759         * @stable ICU 4.8
760         */
761        int32_t getValue() const {
762            return value;
763        }
764
765        /**
766         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
767         * otherwise UMSGPAT_ARG_TYPE_NONE.
768         * @return the argument type for this part.
769         * @stable ICU 4.8
770         */
771        UMessagePatternArgType getArgType() const {
772            UMessagePatternPartType type=getType();
773            if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
774                return (UMessagePatternArgType)value;
775            } else {
776                return UMSGPAT_ARG_TYPE_NONE;
777            }
778        }
779
780        /**
781         * Indicates whether the Part type has a numeric value.
782         * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
783         * @param type The Part type to be tested.
784         * @return TRUE if the Part type has a numeric value.
785         * @stable ICU 4.8
786         */
787        static UBool hasNumericValue(UMessagePatternPartType type) {
788            return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
789        }
790
791        /**
792         * @param other another object to compare with.
793         * @return TRUE if this object is equivalent to the other one.
794         * @stable ICU 4.8
795         */
796        UBool operator==(const Part &other) const;
797
798        /**
799         * @param other another object to compare with.
800         * @return FALSE if this object is equivalent to the other one.
801         * @stable ICU 4.8
802         */
803        inline UBool operator!=(const Part &other) const {
804            return !operator==(other);
805        }
806
807        /**
808         * @return A hash code for this object.
809         * @stable ICU 4.8
810         */
811        int32_t hashCode() const {
812            return ((type*37+index)*37+length)*37+value;
813        }
814
815    private:
816        friend class MessagePattern;
817
818        static const int32_t MAX_LENGTH=0xffff;
819        static const int32_t MAX_VALUE=0x7fff;
820
821        // Some fields are not final because they are modified during pattern parsing.
822        // After pattern parsing, the parts are effectively immutable.
823        UMessagePatternPartType type;
824        int32_t index;
825        uint16_t length;
826        int16_t value;
827        int32_t limitPartIndex;
828    };
829
830private:
831    void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
832
833    void postParse();
834
835    int32_t parseMessage(int32_t index, int32_t msgStartLength,
836                         int32_t nestingLevel, UMessagePatternArgType parentType,
837                         UParseError *parseError, UErrorCode &errorCode);
838
839    int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
840                     UParseError *parseError, UErrorCode &errorCode);
841
842    int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
843
844    int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
845                             UParseError *parseError, UErrorCode &errorCode);
846
847    int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
848                                     UParseError *parseError, UErrorCode &errorCode);
849
850    /**
851     * Validates and parses an argument name or argument number string.
852     * This internal method assumes that the input substring is a "pattern identifier".
853     * @return &gt;=0 if the name is a valid number,
854     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
855     *         ARG_NAME_NOT_VALID (-2) if it is neither.
856     * @see #validateArgumentName(String)
857     */
858    static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
859
860    int32_t parseArgNumber(int32_t start, int32_t limit) {
861        return parseArgNumber(msg, start, limit);
862    }
863
864    /**
865     * Parses a number from the specified message substring.
866     * @param start start index into the message string
867     * @param limit limit index into the message string, must be start<limit
868     * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
869     * @param parseError
870     * @param errorCode
871     */
872    void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
873                     UParseError *parseError, UErrorCode &errorCode);
874
875    // Java has package-private appendReducedApostrophes() here.
876    // In C++, this is declared in the MessageImpl class.
877
878    int32_t skipWhiteSpace(int32_t index);
879
880    int32_t skipIdentifier(int32_t index);
881
882    /**
883     * Skips a sequence of characters that could occur in a double value.
884     * Does not fully parse or validate the value.
885     */
886    int32_t skipDouble(int32_t index);
887
888    static UBool isArgTypeChar(UChar32 c);
889
890    UBool isChoice(int32_t index);
891
892    UBool isPlural(int32_t index);
893
894    UBool isSelect(int32_t index);
895
896    UBool isOrdinal(int32_t index);
897
898    /**
899     * @return TRUE if we are inside a MessageFormat (sub-)pattern,
900     *         as opposed to inside a top-level choice/plural/select pattern.
901     */
902    UBool inMessageFormatPattern(int32_t nestingLevel);
903
904    /**
905     * @return TRUE if we are in a MessageFormat sub-pattern
906     *         of a top-level ChoiceFormat pattern.
907     */
908    UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
909
910    void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
911                 int32_t value, UErrorCode &errorCode);
912
913    void addLimitPart(int32_t start,
914                      UMessagePatternPartType type, int32_t index, int32_t length,
915                      int32_t value, UErrorCode &errorCode);
916
917    void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
918
919    void setParseError(UParseError *parseError, int32_t index);
920
921    UBool init(UErrorCode &errorCode);
922    UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
923
924    UMessagePatternApostropheMode aposMode;
925    UnicodeString msg;
926    // ArrayList<Part> parts=new ArrayList<Part>();
927    MessagePatternPartsList *partsList;
928    Part *parts;
929    int32_t partsLength;
930    // ArrayList<Double> numericValues;
931    MessagePatternDoubleList *numericValuesList;
932    double *numericValues;
933    int32_t numericValuesLength;
934    UBool hasArgNames;
935    UBool hasArgNumbers;
936    UBool needsAutoQuoting;
937};
938
939U_NAMESPACE_END
940
941#endif  // !UCONFIG_NO_FORMATTING
942
943#endif  // __MESSAGEPATTERN_H__
944