1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 * Copyright (C) 2010 , Yahoo! Inc.
6 ********************************************************************
7 *
8 * File SELFMT.H
9 *
10 * Modification History:
11 *
12 *   Date        Name        Description
13 *   11/11/09    kirtig      Finished first cut of implementation.
14 ********************************************************************/
15
16#ifndef SELFMT
17#define SELFMT
18
19#include "unicode/utypes.h"
20#include "unicode/numfmt.h"
21
22/**
23 * \file
24 * \brief C++ API: SelectFormat object
25 */
26
27#if !UCONFIG_NO_FORMATTING
28
29U_NAMESPACE_BEGIN
30
31class Hashtable;
32
33/**
34  * <p><code>SelectFormat</code> supports the creation of  internationalized
35  * messages by selecting phrases based on keywords. The pattern  specifies
36  * how to map keywords to phrases and provides a default phrase. The
37  * object provided to the format method is a string that's matched
38  * against the keywords. If there is a match, the corresponding phrase
39  * is selected; otherwise, the default phrase is used.</p>
40  *
41  * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
42  *
43  * <p>The main use case for the select format is gender based  inflection.
44  * When names or nouns are inserted into sentences, their gender can  affect pronouns,
45  * verb forms, articles, and adjectives. Special care needs to be
46  * taken for the case where the gender cannot be determined.
47  * The impact varies between languages:</p>
48  * \htmlonly
49  * <ul>
50  * <li>English has three genders, and unknown gender is handled as a  special
51  * case. Names use the gender of the named person (if known), nouns  referring
52  * to people use natural gender, and inanimate objects are usually  neutral.
53  * The gender only affects pronouns: "he", "she", "it", "they".
54  *
55  * <li>German differs from English in that the gender of nouns is  rather
56  * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
57  * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
58  * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
59  *
60  * <li>French has only two genders; as in German the gender of nouns
61  * is rather arbitrary - for sun and moon, the genders
62  * are the opposite of those in German. The gender affects
63  * pronouns ("il", "elle"), articles ("le", "la"),
64  * adjective forms ("bon", "bonne"), and sometimes
65  * verb forms ("all&#x00E9;", "all&#x00E9;e").
66  *
67  * <li>Polish distinguishes five genders (or noun classes),
68  * human masculine, animate non-human masculine, inanimate masculine,
69  * feminine, and neuter.
70  * </ul>
71  * \endhtmlonly
72  * <p>Some other languages have noun classes that are not related to  gender,
73  * but similar in grammatical use.
74  * Some African languages have around 20 noun classes.</p>
75  *
76  * <p>To enable localizers to create sentence patterns that take their
77  * language's gender dependencies into consideration, software has to  provide
78  * information about the gender associated with a noun or name to
79  * <code>MessageFormat</code>.
80  * Two main cases can be distinguished:</p>
81  *
82  * <ul>
83  * <li>For people, natural gender information should be maintained  for each person.
84  * The keywords "male", "female", "mixed" (for groups of people)
85  * and "unknown" are used.
86  *
87  * <li>For nouns, grammatical gender information should be maintained  for
88  * each noun and per language, e.g., in resource bundles.
89  * The keywords "masculine", "feminine", and "neuter" are commonly  used,
90  * but some languages may require other keywords.
91  * </ul>
92  *
93  * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
94  * parameter separate from the name or noun it's associated with. For  example,
95  * to generate a message such as "Jean went to Paris", three separate  arguments
96  * would be provided: The name of the person as argument 0, the  gender of
97  * the person as argument 1, and the name of the city as argument 2.
98  * The sentence pattern for English, where the gender of the person has
99  * no impact on this simple sentence, would not refer to argument 1  at all:</p>
100  *
101  * <pre>{0} went to {2}.</pre>
102  *
103  * <p>The sentence pattern for French, where the gender of the person affects
104  * the form of the participle, uses a select format based on argument 1:</p>
105  *
106  * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
107  *
108  * <p>Patterns can be nested, so that it's possible to handle  interactions of
109  * number and gender where necessary. For example, if the above  sentence should
110  * allow for the names of several people to be inserted, the  following sentence
111  * pattern can be used (with argument 0 the list of people's names,
112  * argument 1 the number of people, argument 2 their combined gender, and
113  * argument 3 the city name):</p>
114  *
115  * \htmlonly
116  * <pre>{0} {1, plural,
117  *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
118  *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
119  *          }&#x00E0; {3}.</pre>
120  * \endhtmlonly
121  *
122  * <h4>Patterns and Their Interpretation</h4>
123  *
124  * <p>The <code>SelectFormat</code> pattern text defines the phrase  output
125  * for each user-defined keyword.
126  * The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
127  * clauses.
128  * Each clause assigns the phrase <code><i>phrase</i></code>
129  * to the user-defined <code><i>keyword</i></code>.</p>
130  *
131  * <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
132  * that don't match this pattern result in the error code
133  * <code>U_ILLEGAL_CHARACTER</code>.
134  * You always have to define a phrase for the default keyword
135  * <code>other</code>; this phrase is returned when the keyword
136  * provided to
137  * the <code>format</code> method matches no other keyword.
138  * If a pattern does not provide a phrase for <code>other</code>, the  method
139  * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
140  * If a pattern provides more than one phrase for the same keyword, the
141  * error <code>U_DUPLICATE_KEYWORD</code> is returned.
142  * <br>
143  * Spaces between <code><i>keyword</i></code> and
144  * <code>{<i>phrase</i>}</code>  will be ignored; spaces within
145  * <code>{<i>phrase</i>}</code> will be preserved.<p>
146  *
147  * <p>The phrase for a particular select case may contain other message
148  * format patterns. <code>SelectFormat</code> preserves these so that  you
149  * can use the strings produced by <code>SelectFormat</code> with other
150  * formatters. If you are using <code>SelectFormat</code> inside a
151  * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
152  * automatically evaluate the resulting format pattern.
153  * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
154  * in phrases to define a nested format pattern.</p>
155  *
156  * <p>Example:
157  * \htmlonly
158  *
159  * UErrorCode status = U_ZERO_ERROR;
160  * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
161  * if (U_FAILURE(status)) {
162  *       return;
163  * }
164  * FieldPosition ignore(FieldPosition::DONT_CARE);
165  * UnicodeString result;
166  *
167  * char* str1= "Kirti,female";
168  * Formattable args1[] = {"Kirti","female"};
169  * msgFmt->format(args1, 2, result, ignore, status);
170  * cout << "Input is " << str1 << " and result is: " << result << endl;
171  * delete msgFmt;
172  *
173  * \endhtmlonly
174  * </p>
175  *
176  * Produces the output:<br>
177  * \htmlonly
178  * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
179  * \endhtmlonly
180  *
181  * @stable ICU 4.4
182  */
183
184class U_I18N_API SelectFormat : public Format {
185public:
186
187    /**
188     * Creates a new <code>SelectFormat</code> for a given pattern string.
189     * @param  pattern the pattern for this <code>SelectFormat</code>.
190     *                 errors are returned to status if the pattern is invalid.
191     * @param status   output param set to success/failure code on exit, which
192     *                 must not indicate a failure before the function call.
193     * @stable ICU 4.4
194     */
195    SelectFormat(const UnicodeString& pattern, UErrorCode& status);
196
197    /**
198     * copy constructor.
199     * @stable ICU 4.4
200     */
201    SelectFormat(const SelectFormat& other);
202
203    /**
204     * Destructor.
205     * @stable ICU 4.4
206     */
207    virtual ~SelectFormat();
208
209    /**
210     * Sets the pattern used by this select format.
211     * for the keyword rules.
212     * Patterns and their interpretation are specified in the class description.
213     *
214     * @param pattern the pattern for this select format
215     *                errors are returned to status if the pattern is invalid.
216     * @param status  output param set to success/failure code on exit, which
217     *                must not indicate a failure before the function call.
218     * @stable ICU 4.4
219     */
220    void applyPattern(const UnicodeString& pattern, UErrorCode& status);
221
222
223    using Format::format;
224
225    /**
226     * Selects the phrase for  the given keyword
227     *
228     * @param keyword  The keyword that is used to select an alternative.
229     * @param appendTo output parameter to receive result.
230     *                 result is appended to existing contents.
231     * @param pos      On input: an alignment field, if desired.
232     *                 On output: the offsets of the alignment field.
233     * @param status  output param set to success/failure code on exit, which
234     *                 must not indicate a failure before the function call.
235     * @return         Reference to 'appendTo' parameter.
236     * @stable ICU 4.4
237     */
238    UnicodeString& format(const UnicodeString& keyword,
239                            UnicodeString& appendTo,
240                            FieldPosition& pos,
241                            UErrorCode& status) const;
242
243    /**
244     * Assignment operator
245     *
246     * @param other    the SelectFormat object to copy from.
247     * @stable ICU 4.4
248     */
249    SelectFormat& operator=(const SelectFormat& other);
250
251    /**
252     * Return true if another object is semantically equal to this one.
253     *
254     * @param other    the SelectFormat object to be compared with.
255     * @return         true if other is semantically equal to this.
256     * @stable ICU 4.4
257     */
258    virtual UBool operator==(const Format& other) const;
259
260    /**
261     * Return true if another object is semantically unequal to this one.
262     *
263     * @param other    the SelectFormat object to be compared with.
264     * @return         true if other is semantically unequal to this.
265     * @stable ICU 4.4
266     */
267    virtual UBool operator!=(const Format& other) const;
268
269    /**
270     * Clones this Format object polymorphically.  The caller owns the
271     * result and should delete it when done.
272     * @stable ICU 4.4
273     */
274    virtual Format* clone(void) const;
275
276    /**
277     * Format an object to produce a string.
278     * This method handles keyword strings.
279     * If the Formattable object is not a <code>UnicodeString</code>,
280     * then it returns a failing UErrorCode.
281     *
282     * @param obj       A keyword string that is used to select an alternative.
283     * @param appendTo  output parameter to receive result.
284     *                  Result is appended to existing contents.
285     * @param pos       On input: an alignment field, if desired.
286     *                  On output: the offsets of the alignment field.
287     * @param status    output param filled with success/failure status.
288     * @return          Reference to 'appendTo' parameter.
289     * @stable ICU 4.4
290     */
291    UnicodeString& format(const Formattable& obj,
292                         UnicodeString& appendTo,
293                         FieldPosition& pos,
294                         UErrorCode& status) const;
295
296    /**
297     * Returns the pattern from applyPattern() or constructor.
298     *
299     * @param  appendTo  output parameter to receive result.
300     *                  Result is appended to existing contents.
301     * @return the UnicodeString with inserted pattern.
302     * @stable ICU 4.4
303     */
304    UnicodeString& toPattern(UnicodeString& appendTo);
305
306    /**
307     * This method is not yet supported by <code>SelectFormat</code>.
308     * <P>
309     * Before calling, set parse_pos.index to the offset you want to start
310     * parsing at in the source. After calling, parse_pos.index is the end of
311     * the text you parsed. If error occurs, index is unchanged.
312     * <P>
313     * When parsing, leading whitespace is discarded (with a successful parse),
314     * while trailing whitespace is left as is.
315     * <P>
316     * See Format::parseObject() for more.
317     *
318     * @param source     The string to be parsed into an object.
319     * @param result     Formattable to be set to the parse result.
320     *     If parse fails, return contents are undefined.
321     * @param parse_pos The position to start parsing at. Upon return
322     *     this param is set to the position after the
323     *     last character successfully parsed. If the
324     *     source is not parsed successfully, this param
325     *     will remain unchanged.
326     * @stable ICU 4.4
327     */
328    virtual void parseObject(const UnicodeString& source,
329                            Formattable& result,
330                            ParsePosition& parse_pos) const;
331
332    /**
333     * ICU "poor man's RTTI", returns a UClassID for this class.
334     * @stable ICU 4.4
335     */
336    static UClassID U_EXPORT2 getStaticClassID(void);
337
338    /**
339     * ICU "poor man's RTTI", returns a UClassID for the actual class.
340     * @stable ICU 4.4
341     */
342    virtual UClassID getDynamicClassID() const;
343
344private:
345    typedef enum classesForSelectFormat{
346        tStartKeyword,
347        tContinueKeyword,
348        tLeftBrace,
349        tRightBrace,
350        tSpace,
351        tOther
352    }CharacterClass;
353
354    UnicodeString pattern;
355    //Hash to store the keyword, phrase pairs.
356    Hashtable  *parsedValuesHash;
357
358    SelectFormat();   // default constructor not implemented.
359    void initHashTable(UErrorCode &status);
360    void cleanHashTable();
361
362    //For the applyPattern , classifies char.s in one of the characterClass.
363    CharacterClass classifyCharacter(UChar ch) const;
364    //Checks if the "other" keyword is present in pattern.
365    UBool checkSufficientDefinition();
366    //Checks if the keyword passed is valid.
367    UBool checkValidKeyword(const UnicodeString& argKeyword) const;
368    void parsingFailure();
369    void copyHashtable(Hashtable *other, UErrorCode& status);
370};
371
372U_NAMESPACE_END
373
374#endif /* #if !UCONFIG_NO_FORMATTING */
375
376#endif // _SELFMT
377//eof
378