1/*
2*******************************************************************************
3*   Copyright (C) 2011-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  messagepattern.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2011mar14
12*   created by: Markus W. Scherer
13*/
14
15#ifndef __MESSAGEPATTERN_H__
16#define __MESSAGEPATTERN_H__
17
18/**
19 * \file
20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27#include "unicode/parseerr.h"
28#include "unicode/unistr.h"
29
30/**
31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34 * <p>
35 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
36 * even when the pair is between two single, text-quoting apostrophes.
37 * <p>
38 * The following table shows examples of desired MessageFormat.format() output
39 * with the pattern strings that yield that output.
40 * <p>
41 * <table>
42 *   <tr>
43 *     <th>Desired output</th>
44 *     <th>DOUBLE_OPTIONAL</th>
45 *     <th>DOUBLE_REQUIRED</th>
46 *   </tr>
47 *   <tr>
48 *     <td>I see {many}</td>
49 *     <td>I see '{many}'</td>
50 *     <td>(same)</td>
51 *   </tr>
52 *   <tr>
53 *     <td>I said {'Wow!'}</td>
54 *     <td>I said '{''Wow!''}'</td>
55 *     <td>(same)</td>
56 *   </tr>
57 *   <tr>
58 *     <td>I don't know</td>
59 *     <td>I don't know OR<br> I don''t know</td>
60 *     <td>I don''t know</td>
61 *   </tr>
62 * </table>
63 * @stable ICU 4.8
64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65 */
66enum UMessagePatternApostropheMode {
67    /**
68     * A literal apostrophe is represented by
69     * either a single or a double apostrophe pattern character.
70     * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71     * if it immediately precedes a curly brace {},
72     * or a pipe symbol | if inside a choice format,
73     * or a pound symbol # if inside a plural format.
74     * <p>
75     * This is the default behavior starting with ICU 4.8.
76     * @stable ICU 4.8
77     */
78    UMSGPAT_APOS_DOUBLE_OPTIONAL,
79    /**
80     * A literal apostrophe must be represented by
81     * a double apostrophe pattern character.
82     * A single apostrophe always starts quoted literal text.
83     * <p>
84     * This is the behavior of ICU 4.6 and earlier, and of the JDK.
85     * @stable ICU 4.8
86     */
87    UMSGPAT_APOS_DOUBLE_REQUIRED
88};
89/**
90 * @stable ICU 4.8
91 */
92typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93
94/**
95 * MessagePattern::Part type constants.
96 * @stable ICU 4.8
97 */
98enum UMessagePatternPartType {
99    /**
100     * Start of a message pattern (main or nested).
101     * The length is 0 for the top-level message
102     * and for a choice argument sub-message, otherwise 1 for the '{'.
103     * The value indicates the nesting level, starting with 0 for the main message.
104     * <p>
105     * There is always a later MSG_LIMIT part.
106     * @stable ICU 4.8
107     */
108    UMSGPAT_PART_TYPE_MSG_START,
109    /**
110     * End of a message pattern (main or nested).
111     * The length is 0 for the top-level message and
112     * the last sub-message of a choice argument,
113     * otherwise 1 for the '}' or (in a choice argument style) the '|'.
114     * The value indicates the nesting level, starting with 0 for the main message.
115     * @stable ICU 4.8
116     */
117    UMSGPAT_PART_TYPE_MSG_LIMIT,
118    /**
119     * Indicates a substring of the pattern string which is to be skipped when formatting.
120     * For example, an apostrophe that begins or ends quoted text
121     * would be indicated with such a part.
122     * The value is undefined and currently always 0.
123     * @stable ICU 4.8
124     */
125    UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126    /**
127     * Indicates that a syntax character needs to be inserted for auto-quoting.
128     * The length is 0.
129     * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130     * @stable ICU 4.8
131     */
132    UMSGPAT_PART_TYPE_INSERT_CHAR,
133    /**
134     * Indicates a syntactic (non-escaped) # symbol in a plural variant.
135     * When formatting, replace this part's substring with the
136     * (value-offset) for the plural argument value.
137     * The value is undefined and currently always 0.
138     * @stable ICU 4.8
139     */
140    UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141    /**
142     * Start of an argument.
143     * The length is 1 for the '{'.
144     * The value is the ordinal value of the ArgType. Use getArgType().
145     * <p>
146     * This part is followed by either an ARG_NUMBER or ARG_NAME,
147     * followed by optional argument sub-parts (see UMessagePatternArgType constants)
148     * and finally an ARG_LIMIT part.
149     * @stable ICU 4.8
150     */
151    UMSGPAT_PART_TYPE_ARG_START,
152    /**
153     * End of an argument.
154     * The length is 1 for the '}'.
155     * The value is the ordinal value of the ArgType. Use getArgType().
156     * @stable ICU 4.8
157     */
158    UMSGPAT_PART_TYPE_ARG_LIMIT,
159    /**
160     * The argument number, provided by the value.
161     * @stable ICU 4.8
162     */
163    UMSGPAT_PART_TYPE_ARG_NUMBER,
164    /**
165     * The argument name.
166     * The value is undefined and currently always 0.
167     * @stable ICU 4.8
168     */
169    UMSGPAT_PART_TYPE_ARG_NAME,
170    /**
171     * The argument type.
172     * The value is undefined and currently always 0.
173     * @stable ICU 4.8
174     */
175    UMSGPAT_PART_TYPE_ARG_TYPE,
176    /**
177     * The argument style text.
178     * The value is undefined and currently always 0.
179     * @stable ICU 4.8
180     */
181    UMSGPAT_PART_TYPE_ARG_STYLE,
182    /**
183     * A selector substring in a "complex" argument style.
184     * The value is undefined and currently always 0.
185     * @stable ICU 4.8
186     */
187    UMSGPAT_PART_TYPE_ARG_SELECTOR,
188    /**
189     * An integer value, for example the offset or an explicit selector value
190     * in a PluralFormat style.
191     * The part value is the integer value.
192     * @stable ICU 4.8
193     */
194    UMSGPAT_PART_TYPE_ARG_INT,
195    /**
196     * A numeric value, for example the offset or an explicit selector value
197     * in a PluralFormat style.
198     * The part value is an index into an internal array of numeric values;
199     * use getNumericValue().
200     * @stable ICU 4.8
201     */
202    UMSGPAT_PART_TYPE_ARG_DOUBLE
203};
204/**
205 * @stable ICU 4.8
206 */
207typedef enum UMessagePatternPartType UMessagePatternPartType;
208
209/**
210 * Argument type constants.
211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212 *
213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214 * with a nesting level one greater than the surrounding message.
215 * @stable ICU 4.8
216 */
217enum UMessagePatternArgType {
218    /**
219     * The argument has no specified type.
220     * @stable ICU 4.8
221     */
222    UMSGPAT_ARG_TYPE_NONE,
223    /**
224     * The argument has a "simple" type which is provided by the ARG_TYPE part.
225     * An ARG_STYLE part might follow that.
226     * @stable ICU 4.8
227     */
228    UMSGPAT_ARG_TYPE_SIMPLE,
229    /**
230     * The argument is a ChoiceFormat with one or more
231     * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232     * @stable ICU 4.8
233     */
234    UMSGPAT_ARG_TYPE_CHOICE,
235    /**
236     * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237     * (e.g., offset:1)
238     * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239     * If the selector has an explicit value (e.g., =2), then
240     * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241     * Otherwise the message immediately follows the ARG_SELECTOR.
242     * @stable ICU 4.8
243     */
244    UMSGPAT_ARG_TYPE_PLURAL,
245    /**
246     * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247     * @stable ICU 4.8
248     */
249    UMSGPAT_ARG_TYPE_SELECT,
250#ifndef U_HIDE_DRAFT_API
251    /**
252     * The argument is an ordinal-number PluralFormat
253     * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
254     * @draft ICU 50
255     */
256    UMSGPAT_ARG_TYPE_SELECTORDINAL
257#endif /* U_HIDE_DRAFT_API */
258};
259/**
260 * @stable ICU 4.8
261 */
262typedef enum UMessagePatternArgType UMessagePatternArgType;
263
264#ifndef U_HIDE_DRAFT_API
265/**
266 * Returns TRUE if the argument type has a plural style part sequence and semantics,
267 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
268 * @draft ICU 50
269 */
270#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
271    ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
272#endif /* U_HIDE_DRAFT_API */
273
274enum {
275    /**
276     * Return value from MessagePattern.validateArgumentName() for when
277     * the string is a valid "pattern identifier" but not a number.
278     * @stable ICU 4.8
279     */
280    UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
281
282    /**
283     * Return value from MessagePattern.validateArgumentName() for when
284     * the string is invalid.
285     * It might not be a valid "pattern identifier",
286     * or it have only ASCII digits but there is a leading zero or the number is too large.
287     * @stable ICU 4.8
288     */
289    UMSGPAT_ARG_NAME_NOT_VALID=-2
290};
291
292/**
293 * Special value that is returned by getNumericValue(Part) when no
294 * numeric value is defined for a part.
295 * @see MessagePattern.getNumericValue()
296 * @stable ICU 4.8
297 */
298#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
299
300U_NAMESPACE_BEGIN
301
302class MessagePatternDoubleList;
303class MessagePatternPartsList;
304
305/**
306 * Parses and represents ICU MessageFormat patterns.
307 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
308 * Used in the implementations of those classes as well as in tools
309 * for message validation, translation and format conversion.
310 * <p>
311 * The parser handles all syntax relevant for identifying message arguments.
312 * This includes "complex" arguments whose style strings contain
313 * nested MessageFormat pattern substrings.
314 * For "simple" arguments (with no nested MessageFormat pattern substrings),
315 * the argument style is not parsed any further.
316 * <p>
317 * The parser handles named and numbered message arguments and allows both in one message.
318 * <p>
319 * Once a pattern has been parsed successfully, iterate through the parsed data
320 * with countParts(), getPart() and related methods.
321 * <p>
322 * The data logically represents a parse tree, but is stored and accessed
323 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
324 * Arguments and nested messages are best handled via recursion.
325 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
326 * the index of the corresponding _LIMIT "part".
327 * <p>
328 * List of "parts":
329 * <pre>
330 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
331 * argument = noneArg | simpleArg | complexArg
332 * complexArg = choiceArg | pluralArg | selectArg
333 *
334 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
335 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
336 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
337 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
338 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
339 *
340 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
341 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
342 * selectStyle = (ARG_SELECTOR message)+
343 * </pre>
344 * <ul>
345 *   <li>Literal output text is not represented directly by "parts" but accessed
346 *       between parts of a message, from one part's getLimit() to the next part's getIndex().
347 *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
348 *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
349 *       the less-than-or-equal-to sign (U+2264).
350 *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
351 *       The optional numeric Part between each (ARG_SELECTOR, message) pair
352 *       is the value of an explicit-number selector like "=2",
353 *       otherwise the selector is a non-numeric identifier.
354 *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
355 * </ul>
356 * <p>
357 * This class is not intended for public subclassing.
358 *
359 * @stable ICU 4.8
360 */
361class U_COMMON_API MessagePattern : public UObject {
362public:
363    /**
364     * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
365     * @param errorCode Standard ICU error code. Its input value must
366     *                  pass the U_SUCCESS() test, or else the function returns
367     *                  immediately. Check for U_FAILURE() on output or use with
368     *                  function chaining. (See User Guide for details.)
369     * @stable ICU 4.8
370     */
371    MessagePattern(UErrorCode &errorCode);
372
373    /**
374     * Constructs an empty MessagePattern.
375     * @param mode Explicit UMessagePatternApostropheMode.
376     * @param errorCode Standard ICU error code. Its input value must
377     *                  pass the U_SUCCESS() test, or else the function returns
378     *                  immediately. Check for U_FAILURE() on output or use with
379     *                  function chaining. (See User Guide for details.)
380     * @stable ICU 4.8
381     */
382    MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
383
384    /**
385     * Constructs a MessagePattern with default UMessagePatternApostropheMode and
386     * parses the MessageFormat pattern string.
387     * @param pattern a MessageFormat pattern string
388     * @param parseError Struct to receive information on the position
389     *                   of an error within the pattern.
390     *                   Can be NULL.
391     * @param errorCode Standard ICU error code. Its input value must
392     *                  pass the U_SUCCESS() test, or else the function returns
393     *                  immediately. Check for U_FAILURE() on output or use with
394     *                  function chaining. (See User Guide for details.)
395     * TODO: turn @throws into UErrorCode specifics?
396     * @throws IllegalArgumentException for syntax errors in the pattern string
397     * @throws IndexOutOfBoundsException if certain limits are exceeded
398     *         (e.g., argument number too high, argument name too long, etc.)
399     * @throws NumberFormatException if a number could not be parsed
400     * @stable ICU 4.8
401     */
402    MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
403
404    /**
405     * Copy constructor.
406     * @param other Object to copy.
407     * @stable ICU 4.8
408     */
409    MessagePattern(const MessagePattern &other);
410
411    /**
412     * Assignment operator.
413     * @param other Object to copy.
414     * @return *this=other
415     * @stable ICU 4.8
416     */
417    MessagePattern &operator=(const MessagePattern &other);
418
419    /**
420     * Destructor.
421     * @stable ICU 4.8
422     */
423    virtual ~MessagePattern();
424
425    /**
426     * Parses a MessageFormat pattern string.
427     * @param pattern a MessageFormat pattern string
428     * @param parseError Struct to receive information on the position
429     *                   of an error within the pattern.
430     *                   Can be NULL.
431     * @param errorCode Standard ICU error code. Its input value must
432     *                  pass the U_SUCCESS() test, or else the function returns
433     *                  immediately. Check for U_FAILURE() on output or use with
434     *                  function chaining. (See User Guide for details.)
435     * @return *this
436     * @throws IllegalArgumentException for syntax errors in the pattern string
437     * @throws IndexOutOfBoundsException if certain limits are exceeded
438     *         (e.g., argument number too high, argument name too long, etc.)
439     * @throws NumberFormatException if a number could not be parsed
440     * @stable ICU 4.8
441     */
442    MessagePattern &parse(const UnicodeString &pattern,
443                          UParseError *parseError, UErrorCode &errorCode);
444
445    /**
446     * Parses a ChoiceFormat pattern string.
447     * @param pattern a ChoiceFormat pattern string
448     * @param parseError Struct to receive information on the position
449     *                   of an error within the pattern.
450     *                   Can be NULL.
451     * @param errorCode Standard ICU error code. Its input value must
452     *                  pass the U_SUCCESS() test, or else the function returns
453     *                  immediately. Check for U_FAILURE() on output or use with
454     *                  function chaining. (See User Guide for details.)
455     * @return *this
456     * @throws IllegalArgumentException for syntax errors in the pattern string
457     * @throws IndexOutOfBoundsException if certain limits are exceeded
458     *         (e.g., argument number too high, argument name too long, etc.)
459     * @throws NumberFormatException if a number could not be parsed
460     * @stable ICU 4.8
461     */
462    MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
463                                     UParseError *parseError, UErrorCode &errorCode);
464
465    /**
466     * Parses a PluralFormat pattern string.
467     * @param pattern a PluralFormat pattern string
468     * @param parseError Struct to receive information on the position
469     *                   of an error within the pattern.
470     *                   Can be NULL.
471     * @param errorCode Standard ICU error code. Its input value must
472     *                  pass the U_SUCCESS() test, or else the function returns
473     *                  immediately. Check for U_FAILURE() on output or use with
474     *                  function chaining. (See User Guide for details.)
475     * @return *this
476     * @throws IllegalArgumentException for syntax errors in the pattern string
477     * @throws IndexOutOfBoundsException if certain limits are exceeded
478     *         (e.g., argument number too high, argument name too long, etc.)
479     * @throws NumberFormatException if a number could not be parsed
480     * @stable ICU 4.8
481     */
482    MessagePattern &parsePluralStyle(const UnicodeString &pattern,
483                                     UParseError *parseError, UErrorCode &errorCode);
484
485    /**
486     * Parses a SelectFormat pattern string.
487     * @param pattern a SelectFormat pattern string
488     * @param parseError Struct to receive information on the position
489     *                   of an error within the pattern.
490     *                   Can be NULL.
491     * @param errorCode Standard ICU error code. Its input value must
492     *                  pass the U_SUCCESS() test, or else the function returns
493     *                  immediately. Check for U_FAILURE() on output or use with
494     *                  function chaining. (See User Guide for details.)
495     * @return *this
496     * @throws IllegalArgumentException for syntax errors in the pattern string
497     * @throws IndexOutOfBoundsException if certain limits are exceeded
498     *         (e.g., argument number too high, argument name too long, etc.)
499     * @throws NumberFormatException if a number could not be parsed
500     * @stable ICU 4.8
501     */
502    MessagePattern &parseSelectStyle(const UnicodeString &pattern,
503                                     UParseError *parseError, UErrorCode &errorCode);
504
505    /**
506     * Clears this MessagePattern.
507     * countParts() will return 0.
508     * @stable ICU 4.8
509     */
510    void clear();
511
512    /**
513     * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
514     * countParts() will return 0.
515     * @param mode The new UMessagePatternApostropheMode.
516     * @stable ICU 4.8
517     */
518    void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
519        clear();
520        aposMode=mode;
521    }
522
523    /**
524     * @param other another object to compare with.
525     * @return TRUE if this object is equivalent to the other one.
526     * @stable ICU 4.8
527     */
528    UBool operator==(const MessagePattern &other) const;
529
530    /**
531     * @param other another object to compare with.
532     * @return FALSE if this object is equivalent to the other one.
533     * @stable ICU 4.8
534     */
535    inline UBool operator!=(const MessagePattern &other) const {
536        return !operator==(other);
537    }
538
539    /**
540     * @return A hash code for this object.
541     * @stable ICU 4.8
542     */
543    int32_t hashCode() const;
544
545    /**
546     * @return this instance's UMessagePatternApostropheMode.
547     * @stable ICU 4.8
548     */
549    UMessagePatternApostropheMode getApostropheMode() const {
550        return aposMode;
551    }
552
553    // Java has package-private jdkAposMode() here.
554    // In C++, this is declared in the MessageImpl class.
555
556    /**
557     * @return the parsed pattern string (null if none was parsed).
558     * @stable ICU 4.8
559     */
560    const UnicodeString &getPatternString() const {
561        return msg;
562    }
563
564    /**
565     * Does the parsed pattern have named arguments like {first_name}?
566     * @return TRUE if the parsed pattern has at least one named argument.
567     * @stable ICU 4.8
568     */
569    UBool hasNamedArguments() const {
570        return hasArgNames;
571    }
572
573    /**
574     * Does the parsed pattern have numbered arguments like {2}?
575     * @return TRUE if the parsed pattern has at least one numbered argument.
576     * @stable ICU 4.8
577     */
578    UBool hasNumberedArguments() const {
579        return hasArgNumbers;
580    }
581
582    /**
583     * Validates and parses an argument name or argument number string.
584     * An argument name must be a "pattern identifier", that is, it must contain
585     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
586     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
587     * @param name Input string.
588     * @return &gt;=0 if the name is a valid number,
589     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
590     *         ARG_NAME_NOT_VALID (-2) if it is neither.
591     * @stable ICU 4.8
592     */
593    static int32_t validateArgumentName(const UnicodeString &name);
594
595    /**
596     * Returns a version of the parsed pattern string where each ASCII apostrophe
597     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
598     * <p>
599     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
600     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
601     * @return the deep-auto-quoted version of the parsed pattern string.
602     * @see MessageFormat.autoQuoteApostrophe()
603     * @stable ICU 4.8
604     */
605    UnicodeString autoQuoteApostropheDeep() const;
606
607    class Part;
608
609    /**
610     * Returns the number of "parts" created by parsing the pattern string.
611     * Returns 0 if no pattern has been parsed or clear() was called.
612     * @return the number of pattern parts.
613     * @stable ICU 4.8
614     */
615    int32_t countParts() const {
616        return partsLength;
617    }
618
619    /**
620     * Gets the i-th pattern "part".
621     * @param i The index of the Part data. (0..countParts()-1)
622     * @return the i-th pattern "part".
623     * @stable ICU 4.8
624     */
625    const Part &getPart(int32_t i) const {
626        return parts[i];
627    }
628
629    /**
630     * Returns the UMessagePatternPartType of the i-th pattern "part".
631     * Convenience method for getPart(i).getType().
632     * @param i The index of the Part data. (0..countParts()-1)
633     * @return The UMessagePatternPartType of the i-th Part.
634     * @stable ICU 4.8
635     */
636    UMessagePatternPartType getPartType(int32_t i) const {
637        return getPart(i).type;
638    }
639
640    /**
641     * Returns the pattern index of the specified pattern "part".
642     * Convenience method for getPart(partIndex).getIndex().
643     * @param partIndex The index of the Part data. (0..countParts()-1)
644     * @return The pattern index of this Part.
645     * @stable ICU 4.8
646     */
647    int32_t getPatternIndex(int32_t partIndex) const {
648        return getPart(partIndex).index;
649    }
650
651    /**
652     * Returns the substring of the pattern string indicated by the Part.
653     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
654     * @param part a part of this MessagePattern.
655     * @return the substring associated with part.
656     * @stable ICU 4.8
657     */
658    UnicodeString getSubstring(const Part &part) const {
659        return msg.tempSubString(part.index, part.length);
660    }
661
662    /**
663     * Compares the part's substring with the input string s.
664     * @param part a part of this MessagePattern.
665     * @param s a string.
666     * @return TRUE if getSubstring(part).equals(s).
667     * @stable ICU 4.8
668     */
669    UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
670        return 0==msg.compare(part.index, part.length, s);
671    }
672
673    /**
674     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
675     * @param part a part of this MessagePattern.
676     * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
677     * @stable ICU 4.8
678     */
679    double getNumericValue(const Part &part) const;
680
681    /**
682     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
683     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
684     * @return the "offset:" value.
685     * @stable ICU 4.8
686     */
687    double getPluralOffset(int32_t pluralStart) const;
688
689    /**
690     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
691     * @param start The index of some Part data (0..countParts()-1);
692     *        this Part should be of Type ARG_START or MSG_START.
693     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
694     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
695     * @stable ICU 4.8
696     */
697    int32_t getLimitPartIndex(int32_t start) const {
698        int32_t limit=getPart(start).limitPartIndex;
699        if(limit<start) {
700            return start;
701        }
702        return limit;
703    }
704
705    /**
706     * A message pattern "part", representing a pattern parsing event.
707     * There is a part for the start and end of a message or argument,
708     * for quoting and escaping of and with ASCII apostrophes,
709     * and for syntax elements of "complex" arguments.
710     * @stable ICU 4.8
711     */
712    class Part : public UMemory {
713    public:
714        /**
715         * Default constructor, do not use.
716         * @internal
717         */
718        Part() {}
719
720        /**
721         * Returns the type of this part.
722         * @return the part type.
723         * @stable ICU 4.8
724         */
725        UMessagePatternPartType getType() const {
726            return type;
727        }
728
729        /**
730         * Returns the pattern string index associated with this Part.
731         * @return this part's pattern string index.
732         * @stable ICU 4.8
733         */
734        int32_t getIndex() const {
735            return index;
736        }
737
738        /**
739         * Returns the length of the pattern substring associated with this Part.
740         * This is 0 for some parts.
741         * @return this part's pattern substring length.
742         * @stable ICU 4.8
743         */
744        int32_t getLength() const {
745            return length;
746        }
747
748        /**
749         * Returns the pattern string limit (exclusive-end) index associated with this Part.
750         * Convenience method for getIndex()+getLength().
751         * @return this part's pattern string limit index, same as getIndex()+getLength().
752         * @stable ICU 4.8
753         */
754        int32_t getLimit() const {
755            return index+length;
756        }
757
758        /**
759         * Returns a value associated with this part.
760         * See the documentation of each part type for details.
761         * @return the part value.
762         * @stable ICU 4.8
763         */
764        int32_t getValue() const {
765            return value;
766        }
767
768        /**
769         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
770         * otherwise UMSGPAT_ARG_TYPE_NONE.
771         * @return the argument type for this part.
772         * @stable ICU 4.8
773         */
774        UMessagePatternArgType getArgType() const {
775            UMessagePatternPartType type=getType();
776            if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
777                return (UMessagePatternArgType)value;
778            } else {
779                return UMSGPAT_ARG_TYPE_NONE;
780            }
781        }
782
783        /**
784         * Indicates whether the Part type has a numeric value.
785         * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
786         * @param type The Part type to be tested.
787         * @return TRUE if the Part type has a numeric value.
788         * @stable ICU 4.8
789         */
790        static UBool hasNumericValue(UMessagePatternPartType type) {
791            return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
792        }
793
794        /**
795         * @param other another object to compare with.
796         * @return TRUE if this object is equivalent to the other one.
797         * @stable ICU 4.8
798         */
799        UBool operator==(const Part &other) const;
800
801        /**
802         * @param other another object to compare with.
803         * @return FALSE if this object is equivalent to the other one.
804         * @stable ICU 4.8
805         */
806        inline UBool operator!=(const Part &other) const {
807            return !operator==(other);
808        }
809
810        /**
811         * @return A hash code for this object.
812         * @stable ICU 4.8
813         */
814        int32_t hashCode() const {
815            return ((type*37+index)*37+length)*37+value;
816        }
817
818    private:
819        friend class MessagePattern;
820
821        static const int32_t MAX_LENGTH=0xffff;
822        static const int32_t MAX_VALUE=0x7fff;
823
824        // Some fields are not final because they are modified during pattern parsing.
825        // After pattern parsing, the parts are effectively immutable.
826        UMessagePatternPartType type;
827        int32_t index;
828        uint16_t length;
829        int16_t value;
830        int32_t limitPartIndex;
831    };
832
833private:
834    void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
835
836    void postParse();
837
838    int32_t parseMessage(int32_t index, int32_t msgStartLength,
839                         int32_t nestingLevel, UMessagePatternArgType parentType,
840                         UParseError *parseError, UErrorCode &errorCode);
841
842    int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
843                     UParseError *parseError, UErrorCode &errorCode);
844
845    int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
846
847    int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
848                             UParseError *parseError, UErrorCode &errorCode);
849
850    int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
851                                     UParseError *parseError, UErrorCode &errorCode);
852
853    /**
854     * Validates and parses an argument name or argument number string.
855     * This internal method assumes that the input substring is a "pattern identifier".
856     * @return &gt;=0 if the name is a valid number,
857     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
858     *         ARG_NAME_NOT_VALID (-2) if it is neither.
859     * @see #validateArgumentName(String)
860     */
861    static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
862
863    int32_t parseArgNumber(int32_t start, int32_t limit) {
864        return parseArgNumber(msg, start, limit);
865    }
866
867    /**
868     * Parses a number from the specified message substring.
869     * @param start start index into the message string
870     * @param limit limit index into the message string, must be start<limit
871     * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
872     * @param parseError
873     * @param errorCode
874     */
875    void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
876                     UParseError *parseError, UErrorCode &errorCode);
877
878    // Java has package-private appendReducedApostrophes() here.
879    // In C++, this is declared in the MessageImpl class.
880
881    int32_t skipWhiteSpace(int32_t index);
882
883    int32_t skipIdentifier(int32_t index);
884
885    /**
886     * Skips a sequence of characters that could occur in a double value.
887     * Does not fully parse or validate the value.
888     */
889    int32_t skipDouble(int32_t index);
890
891    static UBool isArgTypeChar(UChar32 c);
892
893    UBool isChoice(int32_t index);
894
895    UBool isPlural(int32_t index);
896
897    UBool isSelect(int32_t index);
898
899    UBool isOrdinal(int32_t index);
900
901    /**
902     * @return TRUE if we are inside a MessageFormat (sub-)pattern,
903     *         as opposed to inside a top-level choice/plural/select pattern.
904     */
905    UBool inMessageFormatPattern(int32_t nestingLevel);
906
907    /**
908     * @return TRUE if we are in a MessageFormat sub-pattern
909     *         of a top-level ChoiceFormat pattern.
910     */
911    UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
912
913    void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
914                 int32_t value, UErrorCode &errorCode);
915
916    void addLimitPart(int32_t start,
917                      UMessagePatternPartType type, int32_t index, int32_t length,
918                      int32_t value, UErrorCode &errorCode);
919
920    void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
921
922    void setParseError(UParseError *parseError, int32_t index);
923
924    UBool init(UErrorCode &errorCode);
925    UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
926
927    UMessagePatternApostropheMode aposMode;
928    UnicodeString msg;
929    // ArrayList<Part> parts=new ArrayList<Part>();
930    MessagePatternPartsList *partsList;
931    Part *parts;
932    int32_t partsLength;
933    // ArrayList<Double> numericValues;
934    MessagePatternDoubleList *numericValuesList;
935    double *numericValues;
936    int32_t numericValuesLength;
937    UBool hasArgNames;
938    UBool hasArgNumbers;
939    UBool needsAutoQuoting;
940};
941
942U_NAMESPACE_END
943
944#endif  // !UCONFIG_NO_FORMATTING
945
946#endif  // __MESSAGEPATTERN_H__
947