1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2011-2013, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  messagepattern.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2011mar14
14*   created by: Markus W. Scherer
15*/
16
17#ifndef __MESSAGEPATTERN_H__
18#define __MESSAGEPATTERN_H__
19
20/**
21 * \file
22 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
23 */
24
25#include "unicode/utypes.h"
26
27#if !UCONFIG_NO_FORMATTING
28
29#include "unicode/parseerr.h"
30#include "unicode/unistr.h"
31
32/**
33 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
34 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
35 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
36 * <p>
37 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
38 * even when the pair is between two single, text-quoting apostrophes.
39 * <p>
40 * The following table shows examples of desired MessageFormat.format() output
41 * with the pattern strings that yield that output.
42 * <p>
43 * <table>
44 *   <tr>
45 *     <th>Desired output</th>
46 *     <th>DOUBLE_OPTIONAL</th>
47 *     <th>DOUBLE_REQUIRED</th>
48 *   </tr>
49 *   <tr>
50 *     <td>I see {many}</td>
51 *     <td>I see '{many}'</td>
52 *     <td>(same)</td>
53 *   </tr>
54 *   <tr>
55 *     <td>I said {'Wow!'}</td>
56 *     <td>I said '{''Wow!''}'</td>
57 *     <td>(same)</td>
58 *   </tr>
59 *   <tr>
60 *     <td>I don't know</td>
61 *     <td>I don't know OR<br> I don''t know</td>
62 *     <td>I don''t know</td>
63 *   </tr>
64 * </table>
65 * @stable ICU 4.8
66 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
67 */
68enum UMessagePatternApostropheMode {
69    /**
70     * A literal apostrophe is represented by
71     * either a single or a double apostrophe pattern character.
72     * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
73     * if it immediately precedes a curly brace {},
74     * or a pipe symbol | if inside a choice format,
75     * or a pound symbol # if inside a plural format.
76     * <p>
77     * This is the default behavior starting with ICU 4.8.
78     * @stable ICU 4.8
79     */
80    UMSGPAT_APOS_DOUBLE_OPTIONAL,
81    /**
82     * A literal apostrophe must be represented by
83     * a double apostrophe pattern character.
84     * A single apostrophe always starts quoted literal text.
85     * <p>
86     * This is the behavior of ICU 4.6 and earlier, and of the JDK.
87     * @stable ICU 4.8
88     */
89    UMSGPAT_APOS_DOUBLE_REQUIRED
90};
91/**
92 * @stable ICU 4.8
93 */
94typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
95
96/**
97 * MessagePattern::Part type constants.
98 * @stable ICU 4.8
99 */
100enum UMessagePatternPartType {
101    /**
102     * Start of a message pattern (main or nested).
103     * The length is 0 for the top-level message
104     * and for a choice argument sub-message, otherwise 1 for the '{'.
105     * The value indicates the nesting level, starting with 0 for the main message.
106     * <p>
107     * There is always a later MSG_LIMIT part.
108     * @stable ICU 4.8
109     */
110    UMSGPAT_PART_TYPE_MSG_START,
111    /**
112     * End of a message pattern (main or nested).
113     * The length is 0 for the top-level message and
114     * the last sub-message of a choice argument,
115     * otherwise 1 for the '}' or (in a choice argument style) the '|'.
116     * The value indicates the nesting level, starting with 0 for the main message.
117     * @stable ICU 4.8
118     */
119    UMSGPAT_PART_TYPE_MSG_LIMIT,
120    /**
121     * Indicates a substring of the pattern string which is to be skipped when formatting.
122     * For example, an apostrophe that begins or ends quoted text
123     * would be indicated with such a part.
124     * The value is undefined and currently always 0.
125     * @stable ICU 4.8
126     */
127    UMSGPAT_PART_TYPE_SKIP_SYNTAX,
128    /**
129     * Indicates that a syntax character needs to be inserted for auto-quoting.
130     * The length is 0.
131     * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
132     * @stable ICU 4.8
133     */
134    UMSGPAT_PART_TYPE_INSERT_CHAR,
135    /**
136     * Indicates a syntactic (non-escaped) # symbol in a plural variant.
137     * When formatting, replace this part's substring with the
138     * (value-offset) for the plural argument value.
139     * The value is undefined and currently always 0.
140     * @stable ICU 4.8
141     */
142    UMSGPAT_PART_TYPE_REPLACE_NUMBER,
143    /**
144     * Start of an argument.
145     * The length is 1 for the '{'.
146     * The value is the ordinal value of the ArgType. Use getArgType().
147     * <p>
148     * This part is followed by either an ARG_NUMBER or ARG_NAME,
149     * followed by optional argument sub-parts (see UMessagePatternArgType constants)
150     * and finally an ARG_LIMIT part.
151     * @stable ICU 4.8
152     */
153    UMSGPAT_PART_TYPE_ARG_START,
154    /**
155     * End of an argument.
156     * The length is 1 for the '}'.
157     * The value is the ordinal value of the ArgType. Use getArgType().
158     * @stable ICU 4.8
159     */
160    UMSGPAT_PART_TYPE_ARG_LIMIT,
161    /**
162     * The argument number, provided by the value.
163     * @stable ICU 4.8
164     */
165    UMSGPAT_PART_TYPE_ARG_NUMBER,
166    /**
167     * The argument name.
168     * The value is undefined and currently always 0.
169     * @stable ICU 4.8
170     */
171    UMSGPAT_PART_TYPE_ARG_NAME,
172    /**
173     * The argument type.
174     * The value is undefined and currently always 0.
175     * @stable ICU 4.8
176     */
177    UMSGPAT_PART_TYPE_ARG_TYPE,
178    /**
179     * The argument style text.
180     * The value is undefined and currently always 0.
181     * @stable ICU 4.8
182     */
183    UMSGPAT_PART_TYPE_ARG_STYLE,
184    /**
185     * A selector substring in a "complex" argument style.
186     * The value is undefined and currently always 0.
187     * @stable ICU 4.8
188     */
189    UMSGPAT_PART_TYPE_ARG_SELECTOR,
190    /**
191     * An integer value, for example the offset or an explicit selector value
192     * in a PluralFormat style.
193     * The part value is the integer value.
194     * @stable ICU 4.8
195     */
196    UMSGPAT_PART_TYPE_ARG_INT,
197    /**
198     * A numeric value, for example the offset or an explicit selector value
199     * in a PluralFormat style.
200     * The part value is an index into an internal array of numeric values;
201     * use getNumericValue().
202     * @stable ICU 4.8
203     */
204    UMSGPAT_PART_TYPE_ARG_DOUBLE
205};
206/**
207 * @stable ICU 4.8
208 */
209typedef enum UMessagePatternPartType UMessagePatternPartType;
210
211/**
212 * Argument type constants.
213 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
214 *
215 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
216 * with a nesting level one greater than the surrounding message.
217 * @stable ICU 4.8
218 */
219enum UMessagePatternArgType {
220    /**
221     * The argument has no specified type.
222     * @stable ICU 4.8
223     */
224    UMSGPAT_ARG_TYPE_NONE,
225    /**
226     * The argument has a "simple" type which is provided by the ARG_TYPE part.
227     * An ARG_STYLE part might follow that.
228     * @stable ICU 4.8
229     */
230    UMSGPAT_ARG_TYPE_SIMPLE,
231    /**
232     * The argument is a ChoiceFormat with one or more
233     * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
234     * @stable ICU 4.8
235     */
236    UMSGPAT_ARG_TYPE_CHOICE,
237    /**
238     * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
239     * (e.g., offset:1)
240     * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
241     * If the selector has an explicit value (e.g., =2), then
242     * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
243     * Otherwise the message immediately follows the ARG_SELECTOR.
244     * @stable ICU 4.8
245     */
246    UMSGPAT_ARG_TYPE_PLURAL,
247    /**
248     * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
249     * @stable ICU 4.8
250     */
251    UMSGPAT_ARG_TYPE_SELECT,
252    /**
253     * The argument is an ordinal-number PluralFormat
254     * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
255     * @stable ICU 50
256     */
257    UMSGPAT_ARG_TYPE_SELECTORDINAL
258};
259/**
260 * @stable ICU 4.8
261 */
262typedef enum UMessagePatternArgType UMessagePatternArgType;
263
264/**
265 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
266 * Returns TRUE if the argument type has a plural style part sequence and semantics,
267 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
268 * @stable ICU 50
269 */
270#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
271    ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
272
273enum {
274    /**
275     * Return value from MessagePattern.validateArgumentName() for when
276     * the string is a valid "pattern identifier" but not a number.
277     * @stable ICU 4.8
278     */
279    UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
280
281    /**
282     * Return value from MessagePattern.validateArgumentName() for when
283     * the string is invalid.
284     * It might not be a valid "pattern identifier",
285     * or it have only ASCII digits but there is a leading zero or the number is too large.
286     * @stable ICU 4.8
287     */
288    UMSGPAT_ARG_NAME_NOT_VALID=-2
289};
290
291/**
292 * Special value that is returned by getNumericValue(Part) when no
293 * numeric value is defined for a part.
294 * @see MessagePattern.getNumericValue()
295 * @stable ICU 4.8
296 */
297#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
298
299U_NAMESPACE_BEGIN
300
301class MessagePatternDoubleList;
302class MessagePatternPartsList;
303
304/**
305 * Parses and represents ICU MessageFormat patterns.
306 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
307 * Used in the implementations of those classes as well as in tools
308 * for message validation, translation and format conversion.
309 * <p>
310 * The parser handles all syntax relevant for identifying message arguments.
311 * This includes "complex" arguments whose style strings contain
312 * nested MessageFormat pattern substrings.
313 * For "simple" arguments (with no nested MessageFormat pattern substrings),
314 * the argument style is not parsed any further.
315 * <p>
316 * The parser handles named and numbered message arguments and allows both in one message.
317 * <p>
318 * Once a pattern has been parsed successfully, iterate through the parsed data
319 * with countParts(), getPart() and related methods.
320 * <p>
321 * The data logically represents a parse tree, but is stored and accessed
322 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
323 * Arguments and nested messages are best handled via recursion.
324 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
325 * the index of the corresponding _LIMIT "part".
326 * <p>
327 * List of "parts":
328 * <pre>
329 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
330 * argument = noneArg | simpleArg | complexArg
331 * complexArg = choiceArg | pluralArg | selectArg
332 *
333 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
334 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
335 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
336 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
337 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
338 *
339 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
340 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
341 * selectStyle = (ARG_SELECTOR message)+
342 * </pre>
343 * <ul>
344 *   <li>Literal output text is not represented directly by "parts" but accessed
345 *       between parts of a message, from one part's getLimit() to the next part's getIndex().
346 *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
347 *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
348 *       the less-than-or-equal-to sign (U+2264).
349 *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
350 *       The optional numeric Part between each (ARG_SELECTOR, message) pair
351 *       is the value of an explicit-number selector like "=2",
352 *       otherwise the selector is a non-numeric identifier.
353 *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
354 * </ul>
355 * <p>
356 * This class is not intended for public subclassing.
357 *
358 * @stable ICU 4.8
359 */
360class U_COMMON_API MessagePattern : public UObject {
361public:
362    /**
363     * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
364     * @param errorCode Standard ICU error code. Its input value must
365     *                  pass the U_SUCCESS() test, or else the function returns
366     *                  immediately. Check for U_FAILURE() on output or use with
367     *                  function chaining. (See User Guide for details.)
368     * @stable ICU 4.8
369     */
370    MessagePattern(UErrorCode &errorCode);
371
372    /**
373     * Constructs an empty MessagePattern.
374     * @param mode Explicit UMessagePatternApostropheMode.
375     * @param errorCode Standard ICU error code. Its input value must
376     *                  pass the U_SUCCESS() test, or else the function returns
377     *                  immediately. Check for U_FAILURE() on output or use with
378     *                  function chaining. (See User Guide for details.)
379     * @stable ICU 4.8
380     */
381    MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
382
383    /**
384     * Constructs a MessagePattern with default UMessagePatternApostropheMode and
385     * parses the MessageFormat pattern string.
386     * @param pattern a MessageFormat pattern string
387     * @param parseError Struct to receive information on the position
388     *                   of an error within the pattern.
389     *                   Can be NULL.
390     * @param errorCode Standard ICU error code. Its input value must
391     *                  pass the U_SUCCESS() test, or else the function returns
392     *                  immediately. Check for U_FAILURE() on output or use with
393     *                  function chaining. (See User Guide for details.)
394     * TODO: turn @throws into UErrorCode specifics?
395     * @throws IllegalArgumentException for syntax errors in the pattern string
396     * @throws IndexOutOfBoundsException if certain limits are exceeded
397     *         (e.g., argument number too high, argument name too long, etc.)
398     * @throws NumberFormatException if a number could not be parsed
399     * @stable ICU 4.8
400     */
401    MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
402
403    /**
404     * Copy constructor.
405     * @param other Object to copy.
406     * @stable ICU 4.8
407     */
408    MessagePattern(const MessagePattern &other);
409
410    /**
411     * Assignment operator.
412     * @param other Object to copy.
413     * @return *this=other
414     * @stable ICU 4.8
415     */
416    MessagePattern &operator=(const MessagePattern &other);
417
418    /**
419     * Destructor.
420     * @stable ICU 4.8
421     */
422    virtual ~MessagePattern();
423
424    /**
425     * Parses a MessageFormat pattern string.
426     * @param pattern a MessageFormat pattern string
427     * @param parseError Struct to receive information on the position
428     *                   of an error within the pattern.
429     *                   Can be NULL.
430     * @param errorCode Standard ICU error code. Its input value must
431     *                  pass the U_SUCCESS() test, or else the function returns
432     *                  immediately. Check for U_FAILURE() on output or use with
433     *                  function chaining. (See User Guide for details.)
434     * @return *this
435     * @throws IllegalArgumentException for syntax errors in the pattern string
436     * @throws IndexOutOfBoundsException if certain limits are exceeded
437     *         (e.g., argument number too high, argument name too long, etc.)
438     * @throws NumberFormatException if a number could not be parsed
439     * @stable ICU 4.8
440     */
441    MessagePattern &parse(const UnicodeString &pattern,
442                          UParseError *parseError, UErrorCode &errorCode);
443
444    /**
445     * Parses a ChoiceFormat pattern string.
446     * @param pattern a ChoiceFormat pattern string
447     * @param parseError Struct to receive information on the position
448     *                   of an error within the pattern.
449     *                   Can be NULL.
450     * @param errorCode Standard ICU error code. Its input value must
451     *                  pass the U_SUCCESS() test, or else the function returns
452     *                  immediately. Check for U_FAILURE() on output or use with
453     *                  function chaining. (See User Guide for details.)
454     * @return *this
455     * @throws IllegalArgumentException for syntax errors in the pattern string
456     * @throws IndexOutOfBoundsException if certain limits are exceeded
457     *         (e.g., argument number too high, argument name too long, etc.)
458     * @throws NumberFormatException if a number could not be parsed
459     * @stable ICU 4.8
460     */
461    MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
462                                     UParseError *parseError, UErrorCode &errorCode);
463
464    /**
465     * Parses a PluralFormat pattern string.
466     * @param pattern a PluralFormat pattern string
467     * @param parseError Struct to receive information on the position
468     *                   of an error within the pattern.
469     *                   Can be NULL.
470     * @param errorCode Standard ICU error code. Its input value must
471     *                  pass the U_SUCCESS() test, or else the function returns
472     *                  immediately. Check for U_FAILURE() on output or use with
473     *                  function chaining. (See User Guide for details.)
474     * @return *this
475     * @throws IllegalArgumentException for syntax errors in the pattern string
476     * @throws IndexOutOfBoundsException if certain limits are exceeded
477     *         (e.g., argument number too high, argument name too long, etc.)
478     * @throws NumberFormatException if a number could not be parsed
479     * @stable ICU 4.8
480     */
481    MessagePattern &parsePluralStyle(const UnicodeString &pattern,
482                                     UParseError *parseError, UErrorCode &errorCode);
483
484    /**
485     * Parses a SelectFormat pattern string.
486     * @param pattern a SelectFormat pattern string
487     * @param parseError Struct to receive information on the position
488     *                   of an error within the pattern.
489     *                   Can be NULL.
490     * @param errorCode Standard ICU error code. Its input value must
491     *                  pass the U_SUCCESS() test, or else the function returns
492     *                  immediately. Check for U_FAILURE() on output or use with
493     *                  function chaining. (See User Guide for details.)
494     * @return *this
495     * @throws IllegalArgumentException for syntax errors in the pattern string
496     * @throws IndexOutOfBoundsException if certain limits are exceeded
497     *         (e.g., argument number too high, argument name too long, etc.)
498     * @throws NumberFormatException if a number could not be parsed
499     * @stable ICU 4.8
500     */
501    MessagePattern &parseSelectStyle(const UnicodeString &pattern,
502                                     UParseError *parseError, UErrorCode &errorCode);
503
504    /**
505     * Clears this MessagePattern.
506     * countParts() will return 0.
507     * @stable ICU 4.8
508     */
509    void clear();
510
511    /**
512     * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
513     * countParts() will return 0.
514     * @param mode The new UMessagePatternApostropheMode.
515     * @stable ICU 4.8
516     */
517    void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
518        clear();
519        aposMode=mode;
520    }
521
522    /**
523     * @param other another object to compare with.
524     * @return TRUE if this object is equivalent to the other one.
525     * @stable ICU 4.8
526     */
527    UBool operator==(const MessagePattern &other) const;
528
529    /**
530     * @param other another object to compare with.
531     * @return FALSE if this object is equivalent to the other one.
532     * @stable ICU 4.8
533     */
534    inline UBool operator!=(const MessagePattern &other) const {
535        return !operator==(other);
536    }
537
538    /**
539     * @return A hash code for this object.
540     * @stable ICU 4.8
541     */
542    int32_t hashCode() const;
543
544    /**
545     * @return this instance's UMessagePatternApostropheMode.
546     * @stable ICU 4.8
547     */
548    UMessagePatternApostropheMode getApostropheMode() const {
549        return aposMode;
550    }
551
552    // Java has package-private jdkAposMode() here.
553    // In C++, this is declared in the MessageImpl class.
554
555    /**
556     * @return the parsed pattern string (null if none was parsed).
557     * @stable ICU 4.8
558     */
559    const UnicodeString &getPatternString() const {
560        return msg;
561    }
562
563    /**
564     * Does the parsed pattern have named arguments like {first_name}?
565     * @return TRUE if the parsed pattern has at least one named argument.
566     * @stable ICU 4.8
567     */
568    UBool hasNamedArguments() const {
569        return hasArgNames;
570    }
571
572    /**
573     * Does the parsed pattern have numbered arguments like {2}?
574     * @return TRUE if the parsed pattern has at least one numbered argument.
575     * @stable ICU 4.8
576     */
577    UBool hasNumberedArguments() const {
578        return hasArgNumbers;
579    }
580
581    /**
582     * Validates and parses an argument name or argument number string.
583     * An argument name must be a "pattern identifier", that is, it must contain
584     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
585     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
586     * @param name Input string.
587     * @return &gt;=0 if the name is a valid number,
588     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
589     *         ARG_NAME_NOT_VALID (-2) if it is neither.
590     * @stable ICU 4.8
591     */
592    static int32_t validateArgumentName(const UnicodeString &name);
593
594    /**
595     * Returns a version of the parsed pattern string where each ASCII apostrophe
596     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
597     * <p>
598     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
599     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
600     * @return the deep-auto-quoted version of the parsed pattern string.
601     * @see MessageFormat.autoQuoteApostrophe()
602     * @stable ICU 4.8
603     */
604    UnicodeString autoQuoteApostropheDeep() const;
605
606    class Part;
607
608    /**
609     * Returns the number of "parts" created by parsing the pattern string.
610     * Returns 0 if no pattern has been parsed or clear() was called.
611     * @return the number of pattern parts.
612     * @stable ICU 4.8
613     */
614    int32_t countParts() const {
615        return partsLength;
616    }
617
618    /**
619     * Gets the i-th pattern "part".
620     * @param i The index of the Part data. (0..countParts()-1)
621     * @return the i-th pattern "part".
622     * @stable ICU 4.8
623     */
624    const Part &getPart(int32_t i) const {
625        return parts[i];
626    }
627
628    /**
629     * Returns the UMessagePatternPartType of the i-th pattern "part".
630     * Convenience method for getPart(i).getType().
631     * @param i The index of the Part data. (0..countParts()-1)
632     * @return The UMessagePatternPartType of the i-th Part.
633     * @stable ICU 4.8
634     */
635    UMessagePatternPartType getPartType(int32_t i) const {
636        return getPart(i).type;
637    }
638
639    /**
640     * Returns the pattern index of the specified pattern "part".
641     * Convenience method for getPart(partIndex).getIndex().
642     * @param partIndex The index of the Part data. (0..countParts()-1)
643     * @return The pattern index of this Part.
644     * @stable ICU 4.8
645     */
646    int32_t getPatternIndex(int32_t partIndex) const {
647        return getPart(partIndex).index;
648    }
649
650    /**
651     * Returns the substring of the pattern string indicated by the Part.
652     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
653     * @param part a part of this MessagePattern.
654     * @return the substring associated with part.
655     * @stable ICU 4.8
656     */
657    UnicodeString getSubstring(const Part &part) const {
658        return msg.tempSubString(part.index, part.length);
659    }
660
661    /**
662     * Compares the part's substring with the input string s.
663     * @param part a part of this MessagePattern.
664     * @param s a string.
665     * @return TRUE if getSubstring(part).equals(s).
666     * @stable ICU 4.8
667     */
668    UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
669        return 0==msg.compare(part.index, part.length, s);
670    }
671
672    /**
673     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
674     * @param part a part of this MessagePattern.
675     * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
676     * @stable ICU 4.8
677     */
678    double getNumericValue(const Part &part) const;
679
680    /**
681     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
682     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
683     * @return the "offset:" value.
684     * @stable ICU 4.8
685     */
686    double getPluralOffset(int32_t pluralStart) const;
687
688    /**
689     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
690     * @param start The index of some Part data (0..countParts()-1);
691     *        this Part should be of Type ARG_START or MSG_START.
692     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
693     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
694     * @stable ICU 4.8
695     */
696    int32_t getLimitPartIndex(int32_t start) const {
697        int32_t limit=getPart(start).limitPartIndex;
698        if(limit<start) {
699            return start;
700        }
701        return limit;
702    }
703
704    /**
705     * A message pattern "part", representing a pattern parsing event.
706     * There is a part for the start and end of a message or argument,
707     * for quoting and escaping of and with ASCII apostrophes,
708     * and for syntax elements of "complex" arguments.
709     * @stable ICU 4.8
710     */
711    class Part : public UMemory {
712    public:
713        /**
714         * Default constructor, do not use.
715         * @internal
716         */
717        Part() {}
718
719        /**
720         * Returns the type of this part.
721         * @return the part type.
722         * @stable ICU 4.8
723         */
724        UMessagePatternPartType getType() const {
725            return type;
726        }
727
728        /**
729         * Returns the pattern string index associated with this Part.
730         * @return this part's pattern string index.
731         * @stable ICU 4.8
732         */
733        int32_t getIndex() const {
734            return index;
735        }
736
737        /**
738         * Returns the length of the pattern substring associated with this Part.
739         * This is 0 for some parts.
740         * @return this part's pattern substring length.
741         * @stable ICU 4.8
742         */
743        int32_t getLength() const {
744            return length;
745        }
746
747        /**
748         * Returns the pattern string limit (exclusive-end) index associated with this Part.
749         * Convenience method for getIndex()+getLength().
750         * @return this part's pattern string limit index, same as getIndex()+getLength().
751         * @stable ICU 4.8
752         */
753        int32_t getLimit() const {
754            return index+length;
755        }
756
757        /**
758         * Returns a value associated with this part.
759         * See the documentation of each part type for details.
760         * @return the part value.
761         * @stable ICU 4.8
762         */
763        int32_t getValue() const {
764            return value;
765        }
766
767        /**
768         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
769         * otherwise UMSGPAT_ARG_TYPE_NONE.
770         * @return the argument type for this part.
771         * @stable ICU 4.8
772         */
773        UMessagePatternArgType getArgType() const {
774            UMessagePatternPartType type=getType();
775            if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
776                return (UMessagePatternArgType)value;
777            } else {
778                return UMSGPAT_ARG_TYPE_NONE;
779            }
780        }
781
782        /**
783         * Indicates whether the Part type has a numeric value.
784         * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
785         * @param type The Part type to be tested.
786         * @return TRUE if the Part type has a numeric value.
787         * @stable ICU 4.8
788         */
789        static UBool hasNumericValue(UMessagePatternPartType type) {
790            return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
791        }
792
793        /**
794         * @param other another object to compare with.
795         * @return TRUE if this object is equivalent to the other one.
796         * @stable ICU 4.8
797         */
798        UBool operator==(const Part &other) const;
799
800        /**
801         * @param other another object to compare with.
802         * @return FALSE if this object is equivalent to the other one.
803         * @stable ICU 4.8
804         */
805        inline UBool operator!=(const Part &other) const {
806            return !operator==(other);
807        }
808
809        /**
810         * @return A hash code for this object.
811         * @stable ICU 4.8
812         */
813        int32_t hashCode() const {
814            return ((type*37+index)*37+length)*37+value;
815        }
816
817    private:
818        friend class MessagePattern;
819
820        static const int32_t MAX_LENGTH=0xffff;
821        static const int32_t MAX_VALUE=0x7fff;
822
823        // Some fields are not final because they are modified during pattern parsing.
824        // After pattern parsing, the parts are effectively immutable.
825        UMessagePatternPartType type;
826        int32_t index;
827        uint16_t length;
828        int16_t value;
829        int32_t limitPartIndex;
830    };
831
832private:
833    void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
834
835    void postParse();
836
837    int32_t parseMessage(int32_t index, int32_t msgStartLength,
838                         int32_t nestingLevel, UMessagePatternArgType parentType,
839                         UParseError *parseError, UErrorCode &errorCode);
840
841    int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
842                     UParseError *parseError, UErrorCode &errorCode);
843
844    int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
845
846    int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
847                             UParseError *parseError, UErrorCode &errorCode);
848
849    int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
850                                     UParseError *parseError, UErrorCode &errorCode);
851
852    /**
853     * Validates and parses an argument name or argument number string.
854     * This internal method assumes that the input substring is a "pattern identifier".
855     * @return &gt;=0 if the name is a valid number,
856     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
857     *         ARG_NAME_NOT_VALID (-2) if it is neither.
858     * @see #validateArgumentName(String)
859     */
860    static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
861
862    int32_t parseArgNumber(int32_t start, int32_t limit) {
863        return parseArgNumber(msg, start, limit);
864    }
865
866    /**
867     * Parses a number from the specified message substring.
868     * @param start start index into the message string
869     * @param limit limit index into the message string, must be start<limit
870     * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
871     * @param parseError
872     * @param errorCode
873     */
874    void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
875                     UParseError *parseError, UErrorCode &errorCode);
876
877    // Java has package-private appendReducedApostrophes() here.
878    // In C++, this is declared in the MessageImpl class.
879
880    int32_t skipWhiteSpace(int32_t index);
881
882    int32_t skipIdentifier(int32_t index);
883
884    /**
885     * Skips a sequence of characters that could occur in a double value.
886     * Does not fully parse or validate the value.
887     */
888    int32_t skipDouble(int32_t index);
889
890    static UBool isArgTypeChar(UChar32 c);
891
892    UBool isChoice(int32_t index);
893
894    UBool isPlural(int32_t index);
895
896    UBool isSelect(int32_t index);
897
898    UBool isOrdinal(int32_t index);
899
900    /**
901     * @return TRUE if we are inside a MessageFormat (sub-)pattern,
902     *         as opposed to inside a top-level choice/plural/select pattern.
903     */
904    UBool inMessageFormatPattern(int32_t nestingLevel);
905
906    /**
907     * @return TRUE if we are in a MessageFormat sub-pattern
908     *         of a top-level ChoiceFormat pattern.
909     */
910    UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
911
912    void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
913                 int32_t value, UErrorCode &errorCode);
914
915    void addLimitPart(int32_t start,
916                      UMessagePatternPartType type, int32_t index, int32_t length,
917                      int32_t value, UErrorCode &errorCode);
918
919    void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
920
921    void setParseError(UParseError *parseError, int32_t index);
922
923    UBool init(UErrorCode &errorCode);
924    UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925
926    UMessagePatternApostropheMode aposMode;
927    UnicodeString msg;
928    // ArrayList<Part> parts=new ArrayList<Part>();
929    MessagePatternPartsList *partsList;
930    Part *parts;
931    int32_t partsLength;
932    // ArrayList<Double> numericValues;
933    MessagePatternDoubleList *numericValuesList;
934    double *numericValues;
935    int32_t numericValuesLength;
936    UBool hasArgNames;
937    UBool hasArgNumbers;
938    UBool needsAutoQuoting;
939};
940
941U_NAMESPACE_END
942
943#endif  // !UCONFIG_NO_FORMATTING
944
945#endif  // __MESSAGEPATTERN_H__
946